From 04bafbaac5bf7bc58d6b59c150ed70aa403445c0 Mon Sep 17 00:00:00 2001 From: phlrain Date: Mon, 21 Nov 2022 14:01:25 +0000 Subject: [PATCH 01/16] change staticRNN to while --- python/paddle/fluid/layers/rnn.py | 78 ++++-- .../fluid/tests/unittests/rnn/test_rnn_api.py | 240 ++++++++++++++++++ 2 files changed, 297 insertions(+), 21 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 60b0eb5da67d0..3d0bbc959fc5c 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -687,29 +687,65 @@ def _switch_grad(x, stop=False): mask = tensor.reverse(mask, axis=[0]) if sequence_length else None # StaticRNN - rnn = control_flow.StaticRNN() - with rnn.step(): - inputs = map_structure(rnn.step_input, inputs) - states = map_structure(rnn.memory, initial_states) - copy_states = map_structure(lambda x: x, states) - outputs, new_states = cell(inputs, copy_states, **kwargs) - assert_same_structure(states, new_states) - if sequence_length: - step_mask = rnn.step_input(mask) - new_states = map_structure( - partial(_maybe_copy, step_mask=step_mask), states, new_states - ) + # rnn = control_flow.StaticRNN() + # with rnn.step(): + # inputs = map_structure(rnn.step_input, inputs) + # states = map_structure(rnn.memory, initial_states) + # copy_states = map_structure(lambda x: x, states) + # outputs, new_states = cell(inputs, copy_states, **kwargs) + # assert_same_structure(states, new_states) + # if sequence_length: + # step_mask = rnn.step_input(mask) + # new_states = map_structure( + # partial(_maybe_copy, step_mask=step_mask), states, new_states + # ) + + # map_structure(rnn.update_memory, states, new_states) + # flat_outputs = flatten(outputs) + # map_structure(rnn.step_output, outputs) + # map_structure(rnn.step_output, new_states) + + # rnn_out = rnn() + start_i = paddle.ones([1], dtype="int64") + if isinstance(inputs, (list, tuple)): + end = paddle.shape(inputs[0])[0] + else: + end = paddle.shape(inputs)[0] + + end = paddle.cast(end, "int64") + cond = start_i < end + print(type(cond)) + while_op = control_flow.While(cond) + + out_array = control_flow.create_array(dtype="float64") # change here + with while_op.block(): + step_in = inputs[start_i] + outputs, new_states = cell(step_in, initial_states, **kwargs) + + for ns, os in zip(new_states, initial_states): + paddle.assign(ns, os) + + paddle.fluid.layers.Print(outputs) + control_flow.array_write(outputs, start_i, out_array) + + print("outs", outputs) + + new_st_i = start_i + 1 + new_cond = new_st_i < end + paddle.assign(new_st_i, start_i) + print(new_st_i.dtype) + print(end.dtype) + paddle.fluid.layers.less_than(x=new_st_i, y=end, cond=cond) - map_structure(rnn.update_memory, states, new_states) - flat_outputs = flatten(outputs) - map_structure(rnn.step_output, outputs) - map_structure(rnn.step_output, new_states) + print("test", out_array.shape) - rnn_out = rnn() - final_outputs = rnn_out[: len(flat_outputs)] - final_outputs = pack_sequence_as(outputs, final_outputs) - final_states = map_structure(lambda x: x[-1], rnn_out[len(flat_outputs) :]) - final_states = pack_sequence_as(new_states, final_states) + out, _ = paddle.fluid.layers.tensor_array_to_tensor(out_array) + final_outputs = out + # final_outputs = rnn_out[: len(flat_outputs)] + # final_outputs = pack_sequence_as(outputs, final_outputs) + # final_states = map_structure(lambda x: x[-1], rnn_out[len(flat_outputs) :]) + final_states = initial_states + # final_states = pack_sequence_as(new_states, final_states) if is_reverse: final_outputs = map_structure( diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py new file mode 100644 index 0000000000000..1006d6efd884e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py @@ -0,0 +1,240 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import paddle + +paddle.set_default_dtype("float64") + +import numpy as np +import unittest + +from paddle.fluid import framework +from paddle import fluid + + +bidirectional_list = ["bidirectional", "bidirect"] + + +class TestSimpleRNN(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.batch_size = 4 + self.input_size = 16 + self.hidden_size = 16 + self.seq_len = 12 + self.seed = 1234 + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + + place = paddle.set_device(self.place) + paddle.disable_static(self.place) + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) + self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + + paddle.enable_static() + + with paddle.fluid.unique_name.guard(): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard( + main_program=main_program, startup_program=startup_program + ): + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + + self.exe = fluid.Executor( + fluid.CPUPlace() + if self.place == "cpu" + else fluid.CUDAPlace(0) + ) + + rnn_in_data = paddle.static.data( + "x", + [None, self.batch_size, self.hidden_size], + dtype="float64", + ) + pre_h_data = paddle.static.data( + "pre_h", + [self.batch_size, self.hidden_size], + dtype="float64", + ) + seq_len_data = paddle.static.data( + "seq_len", [self.batch_size], dtype="int64" + ) + cell_st = paddle.nn.SimpleRNNCell( + self.input_size, self.hidden_size + ) + self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + st_out, st_last_h = self.rnn_st( + rnn_in_data, pre_h_data, sequence_length=seq_len_data + ) + + self.fetch_list = [st_out, st_last_h] + + self.exe.run(framework.default_startup_program()) + + self.main_program = framework.default_main_program() + + paddle.disable_static(self.place) + + def test_base(self, test_seq_len=False): + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + prev_h = np.random.randn(4, 16) + + paddle.disable_static(self.place) + if test_seq_len: + seq_len = np.array([9, 10, 8, 12]) + else: + seq_len = np.array([12, 12, 12, 12]) + + y1, h1 = self.rnn_net( + paddle.to_tensor(x), + paddle.to_tensor(prev_h), + sequence_length=paddle.to_tensor(seq_len), + ) + + paddle.enable_static() + out = self.exe.run( + self.main_program, + feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, + fetch_list=[self.fetch_list], + ) + + y2, h2 = out + + np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_base() + self.test_base(True) + + +class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.batch_size = 4 + self.input_size = 16 + self.hidden_size = 16 + self.seq_len = 12 + self.seed = 1234 + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + + place = paddle.set_device(self.place) + paddle.disable_static(self.place) + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) + self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + + paddle.enable_static() + + with paddle.fluid.unique_name.guard(): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard( + main_program=main_program, startup_program=startup_program + ): + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + + self.exe = fluid.Executor( + fluid.CPUPlace() + if self.place == "cpu" + else fluid.CUDAPlace(0) + ) + + rnn_in_data = paddle.static.data( + "x", + [None, self.batch_size, self.hidden_size], + dtype="float64", + ) + pre_h_data = paddle.static.data( + "pre_h", + [self.batch_size, self.hidden_size], + dtype="float64", + ) + seq_len_data = paddle.static.data( + "seq_len", [self.batch_size], dtype="int64" + ) + cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) + self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + st_out, st_last_h = self.rnn_st( + rnn_in_data, pre_h_data, sequence_length=seq_len_data + ) + + self.fetch_list = [st_out, st_last_h] + + self.exe.run(framework.default_startup_program()) + + self.main_program = framework.default_main_program() + + paddle.disable_static(self.place) + + def test_base(self, test_seq_len=False): + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + prev_h = np.random.randn(4, 16) + + paddle.disable_static(self.place) + if test_seq_len: + seq_len = np.array([9, 10, 8, 12]) + else: + seq_len = np.array([12, 12, 12, 12]) + + y1, h1 = self.rnn_net( + paddle.to_tensor(x), + paddle.to_tensor(prev_h), + sequence_length=paddle.to_tensor(seq_len), + ) + + paddle.enable_static() + out = self.exe.run( + self.main_program, + feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, + fetch_list=[self.fetch_list], + ) + + y2, h2 = out + + np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_base() + self.test_base(True) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() From 8790ec404f489a5342c1328e676b18f11fff99ed Mon Sep 17 00:00:00 2001 From: phlrain Date: Mon, 21 Nov 2022 14:06:14 +0000 Subject: [PATCH 02/16] update code --- python/paddle/fluid/layers/rnn.py | 75 +++++++++++-------------------- 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 3d0bbc959fc5c..da7e4028862dc 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -674,8 +674,8 @@ def _switch_grad(x, stop=False): if not time_major: inputs = map_structure(_transpose_batch_time, inputs) + max_seq_len = nn.shape(flatten(inputs)[0])[0] if sequence_length: - max_seq_len = nn.shape(flatten(inputs)[0])[0] mask = sequence_lod.sequence_mask( sequence_length, maxlen=max_seq_len, @@ -686,66 +686,45 @@ def _switch_grad(x, stop=False): inputs = map_structure(lambda x: tensor.reverse(x, axis=[0]), inputs) mask = tensor.reverse(mask, axis=[0]) if sequence_length else None - # StaticRNN - # rnn = control_flow.StaticRNN() - # with rnn.step(): - # inputs = map_structure(rnn.step_input, inputs) - # states = map_structure(rnn.memory, initial_states) - # copy_states = map_structure(lambda x: x, states) - # outputs, new_states = cell(inputs, copy_states, **kwargs) - # assert_same_structure(states, new_states) - # if sequence_length: - # step_mask = rnn.step_input(mask) - # new_states = map_structure( - # partial(_maybe_copy, step_mask=step_mask), states, new_states - # ) - - # map_structure(rnn.update_memory, states, new_states) - # flat_outputs = flatten(outputs) - # map_structure(rnn.step_output, outputs) - # map_structure(rnn.step_output, new_states) - - # rnn_out = rnn() - start_i = paddle.ones([1], dtype="int64") - if isinstance(inputs, (list, tuple)): - end = paddle.shape(inputs[0])[0] - else: - end = paddle.shape(inputs)[0] + with paddle.fluid.framework.device_guard("cpu"): + start_i = paddle.zeros([1], dtype="int64") + end = max_seq_len - end = paddle.cast(end, "int64") - cond = start_i < end - print(type(cond)) + end = paddle.cast(end, "int64") + cond = start_i < end while_op = control_flow.While(cond) - out_array = control_flow.create_array(dtype="float64") # change here + out_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) + with while_op.block(): + step_in = inputs[start_i] outputs, new_states = cell(step_in, initial_states, **kwargs) + assert isinstance(outputs, paddle.fluid.framework.Variable) - for ns, os in zip(new_states, initial_states): - paddle.assign(ns, os) + assert_same_structure(new_states, initial_states) + if sequence_length: + step_mask = mask[start_i] + new_states = map_structure( + partial(_maybe_copy, step_mask=step_mask), + initial_states, + new_states, + ) + map_structure(paddle.assign, new_states, initial_states) - paddle.fluid.layers.Print(outputs) control_flow.array_write(outputs, start_i, out_array) - print("outs", outputs) - - new_st_i = start_i + 1 - new_cond = new_st_i < end - paddle.assign(new_st_i, start_i) - print(new_st_i.dtype) - print(end.dtype) - paddle.fluid.layers.less_than(x=new_st_i, y=end, cond=cond) + with paddle.fluid.framework.device_guard("cpu"): + new_st_i = start_i + 1 + new_cond = new_st_i < end + paddle.assign(new_st_i, start_i) + paddle.fluid.layers.less_than(x=new_st_i, y=end, cond=cond) - print("test", out_array.shape) - - out, _ = paddle.fluid.layers.tensor_array_to_tensor(out_array) + out, _ = paddle.fluid.layers.tensor_array_to_tensor( + out_array, axis=0, use_stack=True + ) final_outputs = out - # final_outputs = rnn_out[: len(flat_outputs)] - # final_outputs = pack_sequence_as(outputs, final_outputs) - # final_states = map_structure(lambda x: x[-1], rnn_out[len(flat_outputs) :]) final_states = initial_states - # final_states = pack_sequence_as(new_states, final_states) if is_reverse: final_outputs = map_structure( From 5de707cfec0376a2c90738d61c21db637ca8a031 Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 23 Nov 2022 05:22:37 +0000 Subject: [PATCH 03/16] fix rnn bug --- .../fluid/operators/controlflow/while_op.cc | 9 +- python/paddle/fluid/layers/rnn.py | 58 +- .../fluid/tests/unittests/rnn/test_rnn_api.py | 359 +++-- .../test_eager_deletion_recurrent_op.py | 1420 ++++++++--------- .../tests/unittests/test_rnn_cell_api.py | 2 + 5 files changed, 1004 insertions(+), 844 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index b5e30d8aaf05c..8f87fdf932f54 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -293,7 +293,12 @@ class WhileGradOp : public framework::OperatorBase { auto &outside_tensor = og_outside.Get(); auto &inside_tensor = *og_inside.GetMutable(); inside_tensor.set_lod(outside_tensor.lod()); - inside_tensor.ShareDataWith(outside_tensor); + if (outside_tensor.IsInitialized()) { + // (todo hongyu, need to update here) + VLOG(8) << "skip data share " << outside_og_name; + inside_tensor.ShareDataWith(outside_tensor); + } else { + } } else if (og_outside.IsType()) { auto outside_array = og_outside.GetMutable(); @@ -492,6 +497,7 @@ class WhileGradOpMaker : public framework::SingleGradOpMaker { block_ins.insert(o); } std::unordered_set output_grads; + for (const auto *op : grad_block->AllOps()) { for (auto &input_name : op->InputArgumentNames()) { // If the input of Op has been recorded or is generated by the forward @@ -504,7 +510,6 @@ class WhileGradOpMaker : public framework::SingleGradOpMaker { parent_block->FindVarRecursive(input_name) != nullptr)) { continue; } - output_grads.insert(input_name); } for (auto &output_name : op->OutputArgumentNames()) { diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index da7e4028862dc..bf9d496403eff 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -696,35 +696,69 @@ def _switch_grad(x, stop=False): out_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) + init_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) + init_array = map_structure( + lambda x: control_flow.create_array(dtype=x.dtype), initial_states + ) + + map_structure( + lambda x, y: control_flow.array_write(x, start_i, y), + initial_states, + init_array, + ) + with while_op.block(): step_in = inputs[start_i] - outputs, new_states = cell(step_in, initial_states, **kwargs) + # step_in = paddle.fluid.layers.Print( step_in, message="step in") + pre_state = map_structure( + lambda x: control_flow.array_read(x, start_i), init_array + ) + # pre_state = paddle.fluid.layers.Print( pre_state, message="pre") + outputs, new_states = cell(step_in, pre_state, **kwargs) assert isinstance(outputs, paddle.fluid.framework.Variable) - - assert_same_structure(new_states, initial_states) + assert_same_structure(new_states, pre_state) if sequence_length: - step_mask = mask[start_i] + step_mask = paddle.unsqueeze(mask[start_i], 1) + # paddle.fluid.layers.Print( step_mask, message="mask") + # new_states = map_structure( + # partial(_maybe_copy, step_mask=step_mask), + # pre_state, new_states + # ) new_states = map_structure( - partial(_maybe_copy, step_mask=step_mask), - initial_states, + lambda x, y: (x * step_mask + y * (1.0 - step_mask)), new_states, + pre_state, ) - map_structure(paddle.assign, new_states, initial_states) control_flow.array_write(outputs, start_i, out_array) with paddle.fluid.framework.device_guard("cpu"): - new_st_i = start_i + 1 - new_cond = new_st_i < end - paddle.assign(new_st_i, start_i) - paddle.fluid.layers.less_than(x=new_st_i, y=end, cond=cond) + + start_i = paddle.fluid.layers.increment( + x=start_i, value=1, in_place=True + ) + map_structure( + lambda x, y: control_flow.array_write(x, start_i, y), + new_states, + init_array, + ) + + with paddle.fluid.framework.device_guard("cpu"): + paddle.fluid.layers.less_than(x=start_i, y=end, cond=cond) out, _ = paddle.fluid.layers.tensor_array_to_tensor( out_array, axis=0, use_stack=True ) + + all_state = map_structure( + lambda x: paddle.fluid.layers.tensor_array_to_tensor( + x, axis=0, use_stack=True + )[0], + init_array, + ) final_outputs = out - final_states = initial_states + final_states = map_structure(lambda x: x[-1], all_state) if is_reverse: final_outputs = map_structure( diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py index 1006d6efd884e..cc03b7bf33cba 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py @@ -17,17 +17,227 @@ paddle.set_default_dtype("float64") +import unittest +from paddle import fluid + import numpy as np import unittest from paddle.fluid import framework -from paddle import fluid bidirectional_list = ["bidirectional", "bidirect"] -class TestSimpleRNN(unittest.TestCase): +# class TestSimpleRNN(unittest.TestCase): +# def __init__(self, time_major=True, direction="forward", place="cpu"): +# super().__init__("runTest") +# self.time_major = time_major +# self.direction = direction +# self.num_directions = 2 if direction in bidirectional_list else 1 +# self.place = place +# self.batch_size = 4 +# self.input_size = 16 +# self.hidden_size = 16 +# self.seq_len = 12 +# self.seed = 1234 + +# def setUp(self): +# # Since `set_device` is global, set `set_device` in `setUp` rather than +# # `__init__` to avoid using an error device set by another test case. + +# place = paddle.set_device(self.place) +# paddle.disable_static(self.place) +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) +# cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) +# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + +# paddle.enable_static() + +# with paddle.fluid.unique_name.guard(): +# main_program = paddle.static.Program() +# startup_program = paddle.static.Program() +# with paddle.static.program_guard( +# main_program=main_program, startup_program=startup_program +# ): +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) + +# self.exe = fluid.Executor( +# fluid.CPUPlace() +# if self.place == "cpu" +# else fluid.CUDAPlace(0) +# ) + +# rnn_in_data = paddle.static.data( +# "x", +# [None, self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# pre_h_data = paddle.static.data( +# "pre_h", +# [self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# seq_len_data = paddle.static.data( +# "seq_len", [self.batch_size], dtype="int64" +# ) +# cell_st = paddle.nn.SimpleRNNCell( +# self.input_size, self.hidden_size +# ) +# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) +# st_out, st_last_h = self.rnn_st( +# rnn_in_data, pre_h_data, sequence_length=seq_len_data +# ) + +# self.fetch_list = [st_out, st_last_h] + +# self.exe.run(framework.default_startup_program()) + +# self.main_program = framework.default_main_program() + +# paddle.disable_static(self.place) + +# def test_base(self, test_seq_len=False): +# x = np.random.randn(12, 4, 16) +# if not self.time_major: +# x = np.transpose(x, [1, 0, 2]) +# prev_h = np.random.randn(4, 16) + +# paddle.disable_static(self.place) +# if test_seq_len: +# seq_len = np.array([9, 10, 8, 12]) +# else: +# seq_len = np.array([12, 12, 12, 12]) + +# y1, h1 = self.rnn_net( +# paddle.to_tensor(x), +# paddle.to_tensor(prev_h), +# sequence_length=paddle.to_tensor(seq_len), +# ) + +# paddle.enable_static() +# out = self.exe.run( +# self.main_program, +# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, +# fetch_list=[self.fetch_list], +# ) + +# y2, h2 = out + +# np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) +# np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + +# def runTest(self): +# self.test_base() +# self.test_base(True) + + +# class TestGRU(unittest.TestCase): +# def __init__(self, time_major=True, direction="forward", place="cpu"): +# super().__init__("runTest") +# self.time_major = time_major +# self.direction = direction +# self.num_directions = 2 if direction in bidirectional_list else 1 +# self.place = place +# self.batch_size = 4 +# self.input_size = 16 +# self.hidden_size = 16 +# self.seq_len = 12 +# self.seed = 1234 + +# def setUp(self): +# # Since `set_device` is global, set `set_device` in `setUp` rather than +# # `__init__` to avoid using an error device set by another test case. + +# place = paddle.set_device(self.place) +# paddle.disable_static(self.place) +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) +# cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) +# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + +# paddle.enable_static() + +# with paddle.fluid.unique_name.guard(): +# main_program = paddle.static.Program() +# startup_program = paddle.static.Program() +# with paddle.static.program_guard( +# main_program=main_program, startup_program=startup_program +# ): +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) + +# self.exe = fluid.Executor( +# fluid.CPUPlace() +# if self.place == "cpu" +# else fluid.CUDAPlace(0) +# ) + +# rnn_in_data = paddle.static.data( +# "x", +# [None, self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# pre_h_data = paddle.static.data( +# "pre_h", +# [self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# seq_len_data = paddle.static.data( +# "seq_len", [self.batch_size], dtype="int64" +# ) +# cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) +# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) +# st_out, st_last_h = self.rnn_st( +# rnn_in_data, pre_h_data, sequence_length=seq_len_data +# ) + +# self.fetch_list = [st_out, st_last_h] + +# self.exe.run(framework.default_startup_program()) + +# self.main_program = framework.default_main_program() + +# paddle.disable_static(self.place) + +# def test_base(self, test_seq_len=False): +# x = np.random.randn(12, 4, 16) +# if not self.time_major: +# x = np.transpose(x, [1, 0, 2]) +# prev_h = np.random.randn(4, 16) + +# paddle.disable_static(self.place) +# if test_seq_len: +# seq_len = np.array([9, 10, 8, 12]) +# else: +# seq_len = np.array([12, 12, 12, 12]) + +# y1, h1 = self.rnn_net( +# paddle.to_tensor(x), +# paddle.to_tensor(prev_h), +# sequence_length=paddle.to_tensor(seq_len), +# ) + +# paddle.enable_static() +# out = self.exe.run( +# self.main_program, +# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, +# fetch_list=[self.fetch_list], +# ) + +# y2, h2 = out + +# np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) +# np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + +# def runTest(self): +# self.test_base() +# self.test_base(True) + + +class TestGRUBackward(unittest.TestCase): def __init__(self, time_major=True, direction="forward", place="cpu"): super().__init__("runTest") self.time_major = time_major @@ -35,8 +245,8 @@ def __init__(self, time_major=True, direction="forward", place="cpu"): self.num_directions = 2 if direction in bidirectional_list else 1 self.place = place self.batch_size = 4 - self.input_size = 16 - self.hidden_size = 16 + self.input_size = 4 + self.hidden_size = 4 self.seq_len = 12 self.seed = 1234 @@ -48,7 +258,7 @@ def setUp(self): paddle.disable_static(self.place) paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) + cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) paddle.enable_static() @@ -62,7 +272,7 @@ def setUp(self): paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - self.exe = fluid.Executor( + self.exe = paddle.fluid.Executor( fluid.CPUPlace() if self.place == "cpu" else fluid.CUDAPlace(0) @@ -81,118 +291,20 @@ def setUp(self): seq_len_data = paddle.static.data( "seq_len", [self.batch_size], dtype="int64" ) - cell_st = paddle.nn.SimpleRNNCell( - self.input_size, self.hidden_size - ) - self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) - st_out, st_last_h = self.rnn_st( - rnn_in_data, pre_h_data, sequence_length=seq_len_data - ) - self.fetch_list = [st_out, st_last_h] + pre_h_data.stop_gradient = False + rnn_in_data.stop_gradient = False - self.exe.run(framework.default_startup_program()) - - self.main_program = framework.default_main_program() - - paddle.disable_static(self.place) - - def test_base(self, test_seq_len=False): - x = np.random.randn(12, 4, 16) - if not self.time_major: - x = np.transpose(x, [1, 0, 2]) - prev_h = np.random.randn(4, 16) - - paddle.disable_static(self.place) - if test_seq_len: - seq_len = np.array([9, 10, 8, 12]) - else: - seq_len = np.array([12, 12, 12, 12]) - - y1, h1 = self.rnn_net( - paddle.to_tensor(x), - paddle.to_tensor(prev_h), - sequence_length=paddle.to_tensor(seq_len), - ) - - paddle.enable_static() - out = self.exe.run( - self.main_program, - feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, - fetch_list=[self.fetch_list], - ) - - y2, h2 = out - - np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) - - def runTest(self): - self.test_base() - self.test_base(True) - - -class TestGRU(unittest.TestCase): - def __init__(self, time_major=True, direction="forward", place="cpu"): - super().__init__("runTest") - self.time_major = time_major - self.direction = direction - self.num_directions = 2 if direction in bidirectional_list else 1 - self.place = place - self.batch_size = 4 - self.input_size = 16 - self.hidden_size = 16 - self.seq_len = 12 - self.seed = 1234 - - def setUp(self): - # Since `set_device` is global, set `set_device` in `setUp` rather than - # `__init__` to avoid using an error device set by another test case. - - place = paddle.set_device(self.place) - paddle.disable_static(self.place) - paddle.seed(self.seed) - paddle.framework.random._manual_program_seed(self.seed) - cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) - self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) - - paddle.enable_static() - - with paddle.fluid.unique_name.guard(): - main_program = paddle.static.Program() - startup_program = paddle.static.Program() - with paddle.static.program_guard( - main_program=main_program, startup_program=startup_program - ): - paddle.seed(self.seed) - paddle.framework.random._manual_program_seed(self.seed) - - self.exe = fluid.Executor( - fluid.CPUPlace() - if self.place == "cpu" - else fluid.CUDAPlace(0) - ) - - rnn_in_data = paddle.static.data( - "x", - [None, self.batch_size, self.hidden_size], - dtype="float64", - ) - pre_h_data = paddle.static.data( - "pre_h", - [self.batch_size, self.hidden_size], - dtype="float64", - ) - seq_len_data = paddle.static.data( - "seq_len", [self.batch_size], dtype="int64" - ) cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + st_out, st_last_h = self.rnn_st( rnn_in_data, pre_h_data, sequence_length=seq_len_data ) - - self.fetch_list = [st_out, st_last_h] + loss = paddle.sum(st_out) + sgd = paddle.optimizer.SGD(0.1) + sgd.minimize(loss) + self.fetch_list = [st_out, st_last_h, "pre_h@GRAD", "x@GRAD"] self.exe.run(framework.default_startup_program()) @@ -201,10 +313,10 @@ def setUp(self): paddle.disable_static(self.place) def test_base(self, test_seq_len=False): - x = np.random.randn(12, 4, 16) + x = np.random.randn(12, 4, self.hidden_size) if not self.time_major: x = np.transpose(x, [1, 0, 2]) - prev_h = np.random.randn(4, 16) + prev_h = np.random.randn(4, self.hidden_size) paddle.disable_static(self.place) if test_seq_len: @@ -212,11 +324,20 @@ def test_base(self, test_seq_len=False): else: seq_len = np.array([12, 12, 12, 12]) + x_in = paddle.to_tensor(x) + h_in = paddle.to_tensor(prev_h) + x_in.stop_gradient = False + h_in.stop_gradient = False y1, h1 = self.rnn_net( - paddle.to_tensor(x), - paddle.to_tensor(prev_h), + x_in, + h_in, sequence_length=paddle.to_tensor(seq_len), ) + loss = y1.sum() + loss.backward() + + # print( x_in.gradient()) + print(h_in.gradient()) paddle.enable_static() out = self.exe.run( @@ -225,14 +346,14 @@ def test_base(self, test_seq_len=False): fetch_list=[self.fetch_list], ) - y2, h2 = out + y2, h2, g1, g2 = out - np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) - np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + print(g1) def runTest(self): + self.test_base() - self.test_base(True) + # self.test_base(True) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index 21ebf05038e8c..ef1479f8098d0 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -15,14 +15,8 @@ import os import numpy as np import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers import unittest -from paddle.fluid import ParamAttr -from paddle.fluid.framework import Program, grad_var_name -from paddle.fluid.executor import Executor -from paddle.fluid.backward import append_backward import paddle paddle.enable_static() @@ -32,712 +26,712 @@ fluid.core._set_eager_deletion_mode(0.0, 1.0, True) -class PyRNNBase: - def __init__(self, input_shape, output_shape): - self.x = np.ones(shape=input_shape).astype("float32") - self.y = np.zeros(shape=output_shape).astype("float32") +# class PyRNNBase: +# def __init__(self, input_shape, output_shape): +# self.x = np.ones(shape=input_shape).astype("float32") +# self.y = np.zeros(shape=output_shape).astype("float32") - def step(self, step_id, x): - raise NotImplementedError +# def step(self, step_id, x): +# raise NotImplementedError - def forward(self): - for step_id in range(self.x.shape[0]): - self.step(step_id, self.x[step_id]) - return np.array([np.mean(self.y)]) - - def segment_inputs(self): - return [self.x[i] for i in range(self.x.shape[0])] - - -class PySimpleRNN1(PyRNNBase): - def __init__(self, input_shape, output_shape): - super().__init__(input_shape, output_shape) - - seq_len, batch_size, input_dim = input_shape - self.h_boot = np.random.normal(size=(batch_size, input_dim)).astype( - "float32" - ) - - self.scale = 1.0 / 2.0 - men_dim = (seq_len, batch_size, input_dim) - self.mems = np.zeros(shape=men_dim).astype("float32") - - def step(self, step_id, x): - if step_id == 0: - pre_mem = self.h_boot - else: - pre_mem = self.mems[step_id - 1] - self.mems[step_id] = (pre_mem + x) * self.scale - self.y[step_id] = self.mems[step_id] - - -class PySimpleRNN2(PyRNNBase): - def __init__(self, input_shape, output_shape): - super().__init__(input_shape, output_shape) - - seq_len, batch_size, input_dim = input_shape - self.W = np.ones(shape=(input_dim, input_dim)).astype("float32") - self.U = np.zeros(shape=(input_dim, input_dim)).astype("float32") - self.h_boot = np.ones(shape=(batch_size, input_dim)).astype("float32") - - men_dim = (seq_len, batch_size, input_dim) - self.mems = np.zeros(shape=men_dim).astype("float32") - - def step(self, step_id, x): - if step_id > 0: - pre_mem = self.mems[step_id - 1] - else: - pre_mem = self.h_boot - xW = np.matmul(x, self.W).astype("float32") - hU = np.matmul(pre_mem, self.U).astype("float32") - - def py_sigmoid(x): - return 1.0 / (1.0 + np.exp(-x)) - - self.mems[step_id] = py_sigmoid(xW + hU) - self.y[step_id] = self.mems[step_id] - - -def create_tensor(np_data, place): - tensor = core.LoDTensor() - tensor.set(np_data, place) - return tensor - - -class EagerDeletionRecurrentOpTest1(unittest.TestCase): - ''' - Test RNNOp - equation: - h_t = ( x_t + h_{t-1} ) / scale - vars: - - x - memories: - - h - outputs: - - h - ''' - - input_dim = 2 - batch_size = 1 - sent_len = 1 - - def setup_program(self): - self.main_program = Program() - self.startup_program = Program() - self.place = core.CPUPlace() - - def setUp(self): - self.setup_program() - self.data_field = {"x", "h_boot"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - - with fluid.program_guard(self.main_program, self.startup_program): - self.output = paddle.mean(self.create_rnn_op()) - - def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot' - ) - h_boot.stop_gradient = False - - rnn = layers.StaticRNN() - with rnn.step(): - h_pre = rnn.memory(init=h_boot) - x_t = rnn.step_input(x) - - h = layers.scale( - x=layers.elementwise_add(x=h_pre, y=x_t), - scale=self.py_rnn.scale, - ) - - rnn.update_memory(h_pre, h) - rnn.output(h) - - return rnn() - - def forward(self): - gc_vars = core._get_eager_deletion_vars( - self.main_program.desc, [self.output.name] - ) - self.assertEqual(len(gc_vars), self.main_program.num_blocks) - self.feed_map = { - x: create_tensor(getattr(self.py_rnn, x), self.place) - for x in self.data_field - } - exe = Executor(self.place) - out = exe.run( - self.main_program, feed=self.feed_map, fetch_list=[self.output] - ) - - return out[0] - - def backward(self): - self.feed_map = { - x: create_tensor(getattr(self.py_rnn, x), self.place) - for x in self.data_field - } - fetch_list = [ - self.main_program.global_block().var(grad_var_name(x)) - for x in self.data_field - ] - - gc_vars = core._get_eager_deletion_vars( - self.main_program.desc, [var.name for var in fetch_list] - ) - self.assertEqual(len(gc_vars), self.main_program.num_blocks) - - exe = Executor(self.place) - return exe.run( - self.main_program, - feed=self.feed_map, - fetch_list=fetch_list, - return_numpy=False, - ) - - def test_backward(self, rtol=0.01): - self.check_forward() - num_grad = self.get_numerical_gradient() - - with fluid.program_guard(self.main_program, self.startup_program): - append_backward(self.output) - - ana_grad = [np.array(x) for x in self.backward()] - - for idx, name in enumerate(self.data_field): - self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) - np.testing.assert_allclose( - num_grad[idx], - ana_grad[idx], - rtol=rtol, - err_msg='num_grad (' - + name - + ') has diff at ' - + str(self.place) - + '\nExpect ' - + str(num_grad[idx]) - + '\n' - + 'But Got' - + str(ana_grad[idx]) - + ' in class ' - + self.__class__.__name__, - ) - - def check_forward(self): - pd_output = self.forward() - py_output = self.py_rnn.forward() - self.assertEqual(pd_output.shape, py_output.shape) - np.testing.assert_allclose(pd_output, py_output, rtol=0.01) - - def get_numerical_gradient(self, delta=0.005): - dloss_dout = 1.0 - feed_list = [getattr(self.py_rnn, x) for x in self.data_field] - grad_list = [np.zeros_like(x) for x in feed_list] - for feed, grad in zip(feed_list, grad_list): - for f, g in np.nditer([feed, grad], op_flags=['readwrite']): - o = float(f) - f[...] = o + delta - y_pos = self.forward() - - f[...] = o - delta - y_neg = self.forward() - - f[...] = o - dout_dfeed = (y_pos - y_neg) / (delta * 2) - g[...] = dout_dfeed[0] - - return grad_list - - -class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): - r''' - Test RNNOp - equation: - h_t = \sigma (W x_t + U h_{t-1}) - weights: - - W - - U - vars: - - x - memories: - - h - outputs: - - h - ''' - - input_dim = 2 - batch_size = 10 - sent_len = 2 - - def setUp(self): - self.setup_program() - - self.data_field = {"x", "h_boot", "W", "U"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) - - with fluid.program_guard(self.main_program, self.startup_program): - self.output = paddle.mean(self.create_rnn_op()) - - def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot' - ) - h_boot.stop_gradient = False - - rnn = layers.StaticRNN() - with rnn.step(): - h_pre = rnn.memory(init=h_boot) - x_t = rnn.step_input(x) - - temp_l = layers.fc( - input=x_t, - size=self.input_dim, - param_attr=ParamAttr( - name='W', - initializer=fluid.initializer.ConstantInitializer(1.0), - ), - bias_attr=False, - ) - temp_r = layers.fc( - input=h_pre, - size=self.input_dim, - param_attr=ParamAttr( - name='U', - initializer=fluid.initializer.ConstantInitializer(0.0), - ), - bias_attr=False, - ) - - h = paddle.nn.functional.sigmoid( - x=layers.elementwise_add(x=temp_l, y=temp_r) - ) - - rnn.update_memory(h_pre, h) - rnn.output(h) - - return rnn() - - def test_backward(self): - super().test_backward(rtol=0.01) - - -class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1): - ''' - Test RNNOp with two memories - equation: - h_1 = h_pre_1 - h_2 = h_pre_2 - y = h_1 + h_2 - vars: - - x - memories: - - h_1, h_2 - outputs: - - y - ''' - - class PySimpleRNN3(PyRNNBase): - def __init__(self, input_shape, output_shape): - super( - EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3, self - ).__init__(input_shape, output_shape) - - seq_len, batch_size, input_dim = input_shape - self.h_boot1 = np.random.normal( - size=(batch_size, input_dim) - ).astype("float32") - self.h_boot2 = np.random.normal( - size=(batch_size, input_dim) - ).astype("float32") - - men_dim = (seq_len, batch_size, input_dim) - self.mems1 = np.zeros(shape=men_dim).astype("float32") - self.mems2 = np.zeros(shape=men_dim).astype("float32") - - def step(self, step_id, x): - if step_id == 0: - pre_mem1 = self.h_boot1 - pre_mem2 = self.h_boot2 - else: - pre_mem1 = self.mems1[step_id - 1] - pre_mem2 = self.mems2[step_id - 1] - self.mems1[step_id] = pre_mem1 - self.mems2[step_id] = pre_mem2 - self.y[step_id] = self.mems1[step_id] + self.mems2[step_id] + x - - input_dim = 1 - batch_size = 1 - sent_len = 2 - - def setUp(self): - self.setup_program() - - self.data_field = {"x", "h_boot1", "h_boot2"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3( - self.input_shape, self.output_shape - ) - - with fluid.program_guard(self.main_program, self.startup_program): - self.output = paddle.mean(self.create_rnn_op()) - - def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - h_boot1 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot1', - append_batch_size=False, - ) - h_boot1.stop_gradient = False - h_boot2 = layers.data( - shape=[self.batch_size, self.input_dim], - dtype='float32', - name='h_boot2', - append_batch_size=False, - ) - h_boot2.stop_gradient = False - - rnn = layers.StaticRNN() - with rnn.step(): - h_pre1 = rnn.memory(init=h_boot1) - h_pre2 = rnn.memory(init=h_boot2) - x_t = rnn.step_input(x) - - mem1 = layers.scale(x=h_pre1, scale=1.0) - mem2 = layers.scale(x=h_pre2, scale=1.0) - out = layers.sums(input=[mem1, x_t, mem2]) - - rnn.update_memory(h_pre1, mem1) - rnn.update_memory(h_pre2, mem2) - rnn.output(out) - - return rnn() - - -class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1): - ''' - Test RNNOp without memory boot - equation: - mem = x + mem_pre - y = mem - vars: - - x - memories: - - mem - outputs: - - y - ''' - - class PySimpleRNN4(PyRNNBase): - def __init__(self, input_shape, output_shape): - super( - EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4, self - ).__init__(input_shape, output_shape) - men_dim = input_shape - self.mems = np.zeros(shape=men_dim).astype("float32") - - def step(self, step_id, x): - if step_id == 0: - pre_mem = np.zeros_like(x) - else: - pre_mem = self.mems[step_id - 1] - self.mems[step_id] = pre_mem + x - self.y[step_id] = self.mems[step_id] - - input_dim = 1 - batch_size = 1 - sent_len = 2 - - def setUp(self): - self.setup_program() - - self.data_field = {"x"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4( - self.input_shape, self.output_shape - ) - - with fluid.program_guard(self.main_program, self.startup_program): - self.output = paddle.mean(self.create_rnn_op()) - - def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - - rnn = layers.StaticRNN() - with rnn.step(): - mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) - x_t = rnn.step_input(x) - mem = layers.elementwise_add(x=mem_pre, y=x_t) - rnn.update_memory(mem_pre, mem) - rnn.output(mem) - - return rnn() - - -class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): - ''' - Test RNNOp with two recurrent ops - equation: - first_rnn: - mem_inside = x + mem_pre_inside - first_inside_out = mem_inside - second_rnn: - mem = x + reduce_sum(rnn_inside_out) - y = mem + mem_pre - vars: - - x - memories: - - mem_inside - - mem - outputs: - - y - ''' - - class PySimpleRNN5(PyRNNBase): - def __init__(self, input_shape, output_shape): - super().__init__(input_shape, output_shape) - self.mem_0 = np.zeros(shape=input_shape).astype("float32") - self.mem_1 = np.zeros(shape=input_shape).astype("float32") - self.rnn_0_output = np.zeros(shape=input_shape).astype("float32") - - def step(self, step_id, x): - # First Rnn - for step in range(self.x.shape[0]): - x_t = self.x[step] - pre_mem = ( - np.zeros_like(x_t) if step == 0 else self.mem_0[step - 1] - ) - self.mem_0[step] = x_t + pre_mem - self.rnn_0_output[step] = self.mem_0[step] - # Second RNN - pre_mem = ( - np.zeros_like(x) if step_id == 0 else self.mem_1[step_id - 1] - ) - self.mem_1[step_id] = x + np.sum(self.rnn_0_output) - self.y[step_id] = self.mem_1[step_id] + pre_mem - - input_dim = 1 - batch_size = 1 - sent_len = 1 - - def setUp(self): - self.setup_program() - - self.data_field = {"x"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = EagerDeletionTwoRecurrentOpsTest.PySimpleRNN5( - self.input_shape, self.output_shape - ) - - with fluid.program_guard(self.main_program, self.startup_program): - self.output = paddle.mean(self.create_rnn_op()) - - def create_rnn_op(self): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - - rnn_0 = layers.StaticRNN() - with rnn_0.step(): - x_t = rnn_0.step_input(x) - mem_pre = rnn_0.memory(shape=[-1, self.input_dim], batch_ref=x) - mem = layers.elementwise_add(x=mem_pre, y=x_t) - rnn_0.update_memory(mem_pre, mem) - rnn_0.output(mem) - - rnn_1 = layers.StaticRNN() - with rnn_1.step(): - mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x) - x_t = rnn_1.step_input(x) - last_rnn_output = rnn_0() - last_rnn_sum = fluid.layers.reduce_sum(last_rnn_output) - mem = layers.elementwise_add(x=x_t, y=last_rnn_sum) - y = layers.elementwise_add(x=mem_pre, y=mem) - rnn_1.update_memory(mem_pre, mem) - rnn_1.output(y) - return rnn_1() - - -class EagerDeletionRecurrentOpParallelExecutorTest( - EagerDeletionRecurrentOpTest1 -): - ''' - Test RNNOp with ParallelExecutor - equation: - h_t = ( x_t + h_{t-1} ) / scale - vars: - - x - memories: - - h - outputs: - - h - ''' - - def forward(self): - self.feed_map = { - x: create_tensor(getattr(self.py_rnn, x), self.place) - for x in self.data_field - } - - build_strategy = fluid.BuildStrategy() - build_strategy.enable_inplace = True - exec_strategy = fluid.ExecutionStrategy() - parallel_exe = fluid.ParallelExecutor( - use_cuda=False, - main_program=self.main_program, - build_strategy=build_strategy, - exec_strategy=exec_strategy, - ) - out = parallel_exe.run(feed=self.feed_map, fetch_list=[self.output]) - return out[0] - - def backward(self): - self.feed_map = { - x: create_tensor(getattr(self.py_rnn, x), self.place) - for x in self.data_field - } - fetch_list = [ - self.main_program.global_block().var(grad_var_name(x)) - for x in self.data_field - ] - - build_strategy = fluid.BuildStrategy() - build_strategy.enable_inplace = True - exec_strategy = fluid.ExecutionStrategy() - parallel_exe = fluid.ParallelExecutor( - use_cuda=False, - loss_name=self.output.name, - main_program=self.main_program, - build_strategy=build_strategy, - exec_strategy=exec_strategy, - ) - return parallel_exe.run( - feed=self.feed_map, fetch_list=fetch_list, return_numpy=False - ) - - -class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( - EagerDeletionRecurrentOpTest1 -): - ''' - Test one forward only RNN and one backward RNN in one program - ''' - - def setUp(self): - self.setup_program() - self.data_field = {"x", "h_boot"} - - self.input_shape = (self.sent_len, self.batch_size, self.input_dim) - self.output_shape = (self.sent_len, self.batch_size, self.input_dim) - self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - - with fluid.program_guard(self.main_program, self.startup_program): - x = layers.data( - shape=[self.sent_len, self.batch_size, self.input_dim], - dtype='float32', - name='x', - append_batch_size=False, - ) - x.stop_gradient = False - h_boot = layers.data( - shape=[self.input_dim], dtype='float32', name='h_boot' - ) - h_boot.stop_gradient = False - - forward_only_rnn = layers.StaticRNN() - with forward_only_rnn.step(): - h_pre = forward_only_rnn.memory(init=h_boot) - x_t = forward_only_rnn.step_input(x) - - h = layers.scale( - x=layers.elementwise_add(x=h_pre, y=x_t), - scale=self.py_rnn.scale, - ) - - forward_only_rnn.update_memory(h_pre, h) - forward_only_rnn.output(h) - forward_only_output = forward_only_rnn() - forward_only_output.stop_gradient = True - self.forward_only_output = paddle.mean(forward_only_output) - - rnn = layers.StaticRNN() - with rnn.step(): - h_pre = rnn.memory(init=h_boot) - x_t = rnn.step_input(x) - - h = layers.scale( - x=layers.elementwise_add(x=h_pre, y=x_t), - scale=self.py_rnn.scale, - ) - - rnn.update_memory(h_pre, h) - rnn.output(h) - - self.output = paddle.mean(rnn()) - - def forward_two_rnn(self): - self.feed_map = { - x: create_tensor(getattr(self.py_rnn, x), self.place) - for x in self.data_field - } - exe = Executor(self.place) - out = exe.run( - self.main_program, - feed=self.feed_map, - fetch_list=[self.forward_only_output, self.output], - ) - - return out[0], out[1] - - def check_forward(self): - forward_only_output, pd_output = self.forward_two_rnn() - py_output = self.py_rnn.forward() - self.assertEqual(forward_only_output.shape, py_output.shape) - self.assertEqual(pd_output.shape, py_output.shape) - np.testing.assert_allclose(forward_only_output, py_output, rtol=0.01) - np.testing.assert_allclose(pd_output, py_output, rtol=0.01) +# def forward(self): +# for step_id in range(self.x.shape[0]): +# self.step(step_id, self.x[step_id]) +# return np.array([np.mean(self.y)]) + +# def segment_inputs(self): +# return [self.x[i] for i in range(self.x.shape[0])] + + +# class PySimpleRNN1(PyRNNBase): +# def __init__(self, input_shape, output_shape): +# super().__init__(input_shape, output_shape) + +# seq_len, batch_size, input_dim = input_shape +# self.h_boot = np.random.normal(size=(batch_size, input_dim)).astype( +# "float32" +# ) + +# self.scale = 1.0 / 2.0 +# men_dim = (seq_len, batch_size, input_dim) +# self.mems = np.zeros(shape=men_dim).astype("float32") + +# def step(self, step_id, x): +# if step_id == 0: +# pre_mem = self.h_boot +# else: +# pre_mem = self.mems[step_id - 1] +# self.mems[step_id] = (pre_mem + x) * self.scale +# self.y[step_id] = self.mems[step_id] + + +# class PySimpleRNN2(PyRNNBase): +# def __init__(self, input_shape, output_shape): +# super().__init__(input_shape, output_shape) + +# seq_len, batch_size, input_dim = input_shape +# self.W = np.ones(shape=(input_dim, input_dim)).astype("float32") +# self.U = np.zeros(shape=(input_dim, input_dim)).astype("float32") +# self.h_boot = np.ones(shape=(batch_size, input_dim)).astype("float32") + +# men_dim = (seq_len, batch_size, input_dim) +# self.mems = np.zeros(shape=men_dim).astype("float32") + +# def step(self, step_id, x): +# if step_id > 0: +# pre_mem = self.mems[step_id - 1] +# else: +# pre_mem = self.h_boot +# xW = np.matmul(x, self.W).astype("float32") +# hU = np.matmul(pre_mem, self.U).astype("float32") + +# def py_sigmoid(x): +# return 1.0 / (1.0 + np.exp(-x)) + +# self.mems[step_id] = py_sigmoid(xW + hU) +# self.y[step_id] = self.mems[step_id] + + +# def create_tensor(np_data, place): +# tensor = core.LoDTensor() +# tensor.set(np_data, place) +# return tensor + + +# class EagerDeletionRecurrentOpTest1(unittest.TestCase): +# ''' +# Test RNNOp +# equation: +# h_t = ( x_t + h_{t-1} ) / scale +# vars: +# - x +# memories: +# - h +# outputs: +# - h +# ''' + +# input_dim = 2 +# batch_size = 1 +# sent_len = 1 + +# def setup_program(self): +# self.main_program = Program() +# self.startup_program = Program() +# self.place = core.CPUPlace() + +# def setUp(self): +# self.setup_program() +# self.data_field = {"x", "h_boot"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) + +# with fluid.program_guard(self.main_program, self.startup_program): +# self.output = paddle.mean(self.create_rnn_op()) + +# def create_rnn_op(self): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False +# h_boot = layers.data( +# shape=[self.input_dim], dtype='float32', name='h_boot' +# ) +# h_boot.stop_gradient = False + +# rnn = layers.StaticRNN() +# with rnn.step(): +# h_pre = rnn.memory(init=h_boot) +# x_t = rnn.step_input(x) + +# h = layers.scale( +# x=layers.elementwise_add(x=h_pre, y=x_t), +# scale=self.py_rnn.scale, +# ) + +# rnn.update_memory(h_pre, h) +# rnn.output(h) + +# return rnn() + +# def forward(self): +# gc_vars = core._get_eager_deletion_vars( +# self.main_program.desc, [self.output.name] +# ) +# self.assertEqual(len(gc_vars), self.main_program.num_blocks) +# self.feed_map = { +# x: create_tensor(getattr(self.py_rnn, x), self.place) +# for x in self.data_field +# } +# exe = Executor(self.place) +# out = exe.run( +# self.main_program, feed=self.feed_map, fetch_list=[self.output] +# ) + +# return out[0] + +# def backward(self): +# self.feed_map = { +# x: create_tensor(getattr(self.py_rnn, x), self.place) +# for x in self.data_field +# } +# fetch_list = [ +# self.main_program.global_block().var(grad_var_name(x)) +# for x in self.data_field +# ] + +# gc_vars = core._get_eager_deletion_vars( +# self.main_program.desc, [var.name for var in fetch_list] +# ) +# self.assertEqual(len(gc_vars), self.main_program.num_blocks) + +# exe = Executor(self.place) +# return exe.run( +# self.main_program, +# feed=self.feed_map, +# fetch_list=fetch_list, +# return_numpy=False, +# ) + +# def test_backward(self, rtol=0.01): +# self.check_forward() +# num_grad = self.get_numerical_gradient() + +# with fluid.program_guard(self.main_program, self.startup_program): +# append_backward(self.output) + +# ana_grad = [np.array(x) for x in self.backward()] + +# for idx, name in enumerate(self.data_field): +# self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) +# np.testing.assert_allclose( +# num_grad[idx], +# ana_grad[idx], +# rtol=rtol, +# err_msg='num_grad (' +# + name +# + ') has diff at ' +# + str(self.place) +# + '\nExpect ' +# + str(num_grad[idx]) +# + '\n' +# + 'But Got' +# + str(ana_grad[idx]) +# + ' in class ' +# + self.__class__.__name__, +# ) + +# def check_forward(self): +# pd_output = self.forward() +# py_output = self.py_rnn.forward() +# self.assertEqual(pd_output.shape, py_output.shape) +# np.testing.assert_allclose(pd_output, py_output, rtol=0.01) + +# def get_numerical_gradient(self, delta=0.005): +# dloss_dout = 1.0 +# feed_list = [getattr(self.py_rnn, x) for x in self.data_field] +# grad_list = [np.zeros_like(x) for x in feed_list] +# for feed, grad in zip(feed_list, grad_list): +# for f, g in np.nditer([feed, grad], op_flags=['readwrite']): +# o = float(f) +# f[...] = o + delta +# y_pos = self.forward() + +# f[...] = o - delta +# y_neg = self.forward() + +# f[...] = o +# dout_dfeed = (y_pos - y_neg) / (delta * 2) +# g[...] = dout_dfeed[0] + +# return grad_list + + +# class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): +# r''' +# Test RNNOp +# equation: +# h_t = \sigma (W x_t + U h_{t-1}) +# weights: +# - W +# - U +# vars: +# - x +# memories: +# - h +# outputs: +# - h +# ''' + +# input_dim = 2 +# batch_size = 10 +# sent_len = 2 + +# def setUp(self): +# self.setup_program() + +# self.data_field = {"x", "h_boot", "W", "U"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) + +# with fluid.program_guard(self.main_program, self.startup_program): +# self.output = paddle.mean(self.create_rnn_op()) + +# def create_rnn_op(self): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False +# h_boot = layers.data( +# shape=[self.input_dim], dtype='float32', name='h_boot' +# ) +# h_boot.stop_gradient = False + +# rnn = layers.StaticRNN() +# with rnn.step(): +# h_pre = rnn.memory(init=h_boot) +# x_t = rnn.step_input(x) + +# temp_l = layers.fc( +# input=x_t, +# size=self.input_dim, +# param_attr=ParamAttr( +# name='W', +# initializer=fluid.initializer.ConstantInitializer(1.0), +# ), +# bias_attr=False, +# ) +# temp_r = layers.fc( +# input=h_pre, +# size=self.input_dim, +# param_attr=ParamAttr( +# name='U', +# initializer=fluid.initializer.ConstantInitializer(0.0), +# ), +# bias_attr=False, +# ) + +# h = paddle.nn.functional.sigmoid( +# x=layers.elementwise_add(x=temp_l, y=temp_r) +# ) + +# rnn.update_memory(h_pre, h) +# rnn.output(h) + +# return rnn() + +# def test_backward(self): +# super().test_backward(rtol=0.01) + + +# class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1): +# ''' +# Test RNNOp with two memories +# equation: +# h_1 = h_pre_1 +# h_2 = h_pre_2 +# y = h_1 + h_2 +# vars: +# - x +# memories: +# - h_1, h_2 +# outputs: +# - y +# ''' + +# class PySimpleRNN3(PyRNNBase): +# def __init__(self, input_shape, output_shape): +# super( +# EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3, self +# ).__init__(input_shape, output_shape) + +# seq_len, batch_size, input_dim = input_shape +# self.h_boot1 = np.random.normal( +# size=(batch_size, input_dim) +# ).astype("float32") +# self.h_boot2 = np.random.normal( +# size=(batch_size, input_dim) +# ).astype("float32") + +# men_dim = (seq_len, batch_size, input_dim) +# self.mems1 = np.zeros(shape=men_dim).astype("float32") +# self.mems2 = np.zeros(shape=men_dim).astype("float32") + +# def step(self, step_id, x): +# if step_id == 0: +# pre_mem1 = self.h_boot1 +# pre_mem2 = self.h_boot2 +# else: +# pre_mem1 = self.mems1[step_id - 1] +# pre_mem2 = self.mems2[step_id - 1] +# self.mems1[step_id] = pre_mem1 +# self.mems2[step_id] = pre_mem2 +# self.y[step_id] = self.mems1[step_id] + self.mems2[step_id] + x + +# input_dim = 1 +# batch_size = 1 +# sent_len = 2 + +# def setUp(self): +# self.setup_program() + +# self.data_field = {"x", "h_boot1", "h_boot2"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3( +# self.input_shape, self.output_shape +# ) + +# with fluid.program_guard(self.main_program, self.startup_program): +# self.output = paddle.mean(self.create_rnn_op()) + +# def create_rnn_op(self): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False +# h_boot1 = layers.data( +# shape=[self.batch_size, self.input_dim], +# dtype='float32', +# name='h_boot1', +# append_batch_size=False, +# ) +# h_boot1.stop_gradient = False +# h_boot2 = layers.data( +# shape=[self.batch_size, self.input_dim], +# dtype='float32', +# name='h_boot2', +# append_batch_size=False, +# ) +# h_boot2.stop_gradient = False + +# rnn = layers.StaticRNN() +# with rnn.step(): +# h_pre1 = rnn.memory(init=h_boot1) +# h_pre2 = rnn.memory(init=h_boot2) +# x_t = rnn.step_input(x) + +# mem1 = layers.scale(x=h_pre1, scale=1.0) +# mem2 = layers.scale(x=h_pre2, scale=1.0) +# out = layers.sums(input=[mem1, x_t, mem2]) + +# rnn.update_memory(h_pre1, mem1) +# rnn.update_memory(h_pre2, mem2) +# rnn.output(out) + +# return rnn() + + +# class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1): +# ''' +# Test RNNOp without memory boot +# equation: +# mem = x + mem_pre +# y = mem +# vars: +# - x +# memories: +# - mem +# outputs: +# - y +# ''' + +# class PySimpleRNN4(PyRNNBase): +# def __init__(self, input_shape, output_shape): +# super( +# EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4, self +# ).__init__(input_shape, output_shape) +# men_dim = input_shape +# self.mems = np.zeros(shape=men_dim).astype("float32") + +# def step(self, step_id, x): +# if step_id == 0: +# pre_mem = np.zeros_like(x) +# else: +# pre_mem = self.mems[step_id - 1] +# self.mems[step_id] = pre_mem + x +# self.y[step_id] = self.mems[step_id] + +# input_dim = 1 +# batch_size = 1 +# sent_len = 2 + +# def setUp(self): +# self.setup_program() + +# self.data_field = {"x"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4( +# self.input_shape, self.output_shape +# ) + +# with fluid.program_guard(self.main_program, self.startup_program): +# self.output = paddle.mean(self.create_rnn_op()) + +# def create_rnn_op(self): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False + +# rnn = layers.StaticRNN() +# with rnn.step(): +# mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) +# x_t = rnn.step_input(x) +# mem = layers.elementwise_add(x=mem_pre, y=x_t) +# rnn.update_memory(mem_pre, mem) +# rnn.output(mem) + +# return rnn() + + +# class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): +# ''' +# Test RNNOp with two recurrent ops +# equation: +# first_rnn: +# mem_inside = x + mem_pre_inside +# first_inside_out = mem_inside +# second_rnn: +# mem = x + reduce_sum(rnn_inside_out) +# y = mem + mem_pre +# vars: +# - x +# memories: +# - mem_inside +# - mem +# outputs: +# - y +# ''' + +# class PySimpleRNN5(PyRNNBase): +# def __init__(self, input_shape, output_shape): +# super().__init__(input_shape, output_shape) +# self.mem_0 = np.zeros(shape=input_shape).astype("float32") +# self.mem_1 = np.zeros(shape=input_shape).astype("float32") +# self.rnn_0_output = np.zeros(shape=input_shape).astype("float32") + +# def step(self, step_id, x): +# # First Rnn +# for step in range(self.x.shape[0]): +# x_t = self.x[step] +# pre_mem = ( +# np.zeros_like(x_t) if step == 0 else self.mem_0[step - 1] +# ) +# self.mem_0[step] = x_t + pre_mem +# self.rnn_0_output[step] = self.mem_0[step] +# # Second RNN +# pre_mem = ( +# np.zeros_like(x) if step_id == 0 else self.mem_1[step_id - 1] +# ) +# self.mem_1[step_id] = x + np.sum(self.rnn_0_output) +# self.y[step_id] = self.mem_1[step_id] + pre_mem + +# input_dim = 1 +# batch_size = 1 +# sent_len = 1 + +# def setUp(self): +# self.setup_program() + +# self.data_field = {"x"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = EagerDeletionTwoRecurrentOpsTest.PySimpleRNN5( +# self.input_shape, self.output_shape +# ) + +# with fluid.program_guard(self.main_program, self.startup_program): +# self.output = paddle.mean(self.create_rnn_op()) + +# def create_rnn_op(self): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False + +# rnn_0 = layers.StaticRNN() +# with rnn_0.step(): +# x_t = rnn_0.step_input(x) +# mem_pre = rnn_0.memory(shape=[-1, self.input_dim], batch_ref=x) +# mem = layers.elementwise_add(x=mem_pre, y=x_t) +# rnn_0.update_memory(mem_pre, mem) +# rnn_0.output(mem) + +# rnn_1 = layers.StaticRNN() +# with rnn_1.step(): +# mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x) +# x_t = rnn_1.step_input(x) +# last_rnn_output = rnn_0() +# last_rnn_sum = fluid.layers.reduce_sum(last_rnn_output) +# mem = layers.elementwise_add(x=x_t, y=last_rnn_sum) +# y = layers.elementwise_add(x=mem_pre, y=mem) +# rnn_1.update_memory(mem_pre, mem) +# rnn_1.output(y) +# return rnn_1() + + +# class EagerDeletionRecurrentOpParallelExecutorTest( +# EagerDeletionRecurrentOpTest1 +# ): +# ''' +# Test RNNOp with ParallelExecutor +# equation: +# h_t = ( x_t + h_{t-1} ) / scale +# vars: +# - x +# memories: +# - h +# outputs: +# - h +# ''' + +# def forward(self): +# self.feed_map = { +# x: create_tensor(getattr(self.py_rnn, x), self.place) +# for x in self.data_field +# } + +# build_strategy = fluid.BuildStrategy() +# build_strategy.enable_inplace = True +# exec_strategy = fluid.ExecutionStrategy() +# parallel_exe = fluid.ParallelExecutor( +# use_cuda=False, +# main_program=self.main_program, +# build_strategy=build_strategy, +# exec_strategy=exec_strategy, +# ) +# out = parallel_exe.run(feed=self.feed_map, fetch_list=[self.output]) +# return out[0] + +# def backward(self): +# self.feed_map = { +# x: create_tensor(getattr(self.py_rnn, x), self.place) +# for x in self.data_field +# } +# fetch_list = [ +# self.main_program.global_block().var(grad_var_name(x)) +# for x in self.data_field +# ] + +# build_strategy = fluid.BuildStrategy() +# build_strategy.enable_inplace = True +# exec_strategy = fluid.ExecutionStrategy() +# parallel_exe = fluid.ParallelExecutor( +# use_cuda=False, +# loss_name=self.output.name, +# main_program=self.main_program, +# build_strategy=build_strategy, +# exec_strategy=exec_strategy, +# ) +# return parallel_exe.run( +# feed=self.feed_map, fetch_list=fetch_list, return_numpy=False +# ) + + +# class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( +# EagerDeletionRecurrentOpTest1 +# ): +# ''' +# Test one forward only RNN and one backward RNN in one program +# ''' + +# def setUp(self): +# self.setup_program() +# self.data_field = {"x", "h_boot"} + +# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) +# self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) + +# with fluid.program_guard(self.main_program, self.startup_program): +# x = layers.data( +# shape=[self.sent_len, self.batch_size, self.input_dim], +# dtype='float32', +# name='x', +# append_batch_size=False, +# ) +# x.stop_gradient = False +# h_boot = layers.data( +# shape=[self.input_dim], dtype='float32', name='h_boot' +# ) +# h_boot.stop_gradient = False + +# forward_only_rnn = layers.StaticRNN() +# with forward_only_rnn.step(): +# h_pre = forward_only_rnn.memory(init=h_boot) +# x_t = forward_only_rnn.step_input(x) + +# h = layers.scale( +# x=layers.elementwise_add(x=h_pre, y=x_t), +# scale=self.py_rnn.scale, +# ) + +# forward_only_rnn.update_memory(h_pre, h) +# forward_only_rnn.output(h) +# forward_only_output = forward_only_rnn() +# forward_only_output.stop_gradient = True +# self.forward_only_output = paddle.mean(forward_only_output) + +# rnn = layers.StaticRNN() +# with rnn.step(): +# h_pre = rnn.memory(init=h_boot) +# x_t = rnn.step_input(x) + +# h = layers.scale( +# x=layers.elementwise_add(x=h_pre, y=x_t), +# scale=self.py_rnn.scale, +# ) + +# rnn.update_memory(h_pre, h) +# rnn.output(h) + +# self.output = paddle.mean(rnn()) + +# def forward_two_rnn(self): +# self.feed_map = { +# x: create_tensor(getattr(self.py_rnn, x), self.place) +# for x in self.data_field +# } +# exe = Executor(self.place) +# out = exe.run( +# self.main_program, +# feed=self.feed_map, +# fetch_list=[self.forward_only_output, self.output], +# ) + +# return out[0], out[1] + +# def check_forward(self): +# forward_only_output, pd_output = self.forward_two_rnn() +# py_output = self.py_rnn.forward() +# self.assertEqual(forward_only_output.shape, py_output.shape) +# self.assertEqual(pd_output.shape, py_output.shape) +# np.testing.assert_allclose(forward_only_output, py_output, rtol=0.01) +# np.testing.assert_allclose(pd_output, py_output, rtol=0.01) class RecurrentNet(paddle.nn.Layer): @@ -761,20 +755,24 @@ def tearDown(self): def test_recurrent_backward(self): net = RecurrentNet() - inputs = paddle.rand((4, 23, 16)) + inputs = paddle.rand((4, 2, 16)) inputs.stop_gradient = False prev_h = paddle.randn((4, 32)) prev_h.stop_gradient = False outputs, final_states = net(inputs, prev_h) + # print( final_states ) outputs.backward() dy_grad = inputs.gradient() inputs.clear_gradient() + print("oreh", prev_h.gradient()) net = paddle.jit.to_static(net) outputs, final_states = net(inputs, prev_h) + # print( final_states) outputs.backward() st_grad = inputs.gradient() + print("preh ", prev_h.gradient()) np.testing.assert_allclose(dy_grad, st_grad) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 1d04b4310539f..588f03330ddce 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -30,6 +30,7 @@ import paddle.fluid.layers.utils as utils import numpy as np +import paddle class TestLSTMCellError(unittest.TestCase): @@ -744,4 +745,5 @@ def test_seq2seq_model(self): if __name__ == '__main__': + paddle.enable_static() unittest.main() From c8f184c652cddf6a768bed6fff890be6fbabad42 Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 23 Nov 2022 09:21:00 +0000 Subject: [PATCH 04/16] update --- paddle/fluid/framework/op_proto_maker.cc | 68 ++++---- .../fluid/operators/controlflow/while_op.cc | 1 + python/paddle/fluid/backward.py | 1 + .../fluid/tests/unittests/rnn/test_rnn_api.py | 146 +++++++++++++++++- 4 files changed, 181 insertions(+), 35 deletions(-) diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index fbad45e889156..c98ffd30e808e 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -77,43 +77,45 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, } op_checker_->InitDefaultAttributeMap(extra_attrs_ptr); - AddAttr(OpRoleAttrName(), "The role of this operator") - .InEnum( - {static_cast(OpRole::kForward), - static_cast(OpRole::kBackward), - static_cast(OpRole::kOptimize), - static_cast(OpRole::kRPC), - static_cast(OpRole::kDist), - static_cast(OpRole::kLRSched), - static_cast(OpRole::kLoss) | static_cast(OpRole::kForward), - static_cast(OpRole::kLoss) | - static_cast(OpRole::kBackward), - static_cast(OpRole::kOptimize) | - static_cast(OpRole::kLRSched), - static_cast(OpRole::kNotSpecified)}) - .SetDefault(static_cast(OpRole::kNotSpecified)) - .AsExtra(); - AddAttr>(OpRoleVarAttrName(), - "Optimized for variable") - .SetDefault({}) - .AsExtra(); - - AddAttr(OpNamescopeAttrName(), "Operator name with namescope.") - .SetDefault("") - .AsExtra(); - - AddAttr>(OpCreationCallstackAttrName(), - "Callstack for Op Creation.") - .SetDefault({}) - .AsExtra(); + // AddAttr(OpRoleAttrName(), "The role of this operator") + // .InEnum( + // {static_cast(OpRole::kForward), + // static_cast(OpRole::kBackward), + // static_cast(OpRole::kOptimize), + // static_cast(OpRole::kRPC), + // static_cast(OpRole::kDist), + // static_cast(OpRole::kLRSched), + // static_cast(OpRole::kLoss) | + // static_cast(OpRole::kForward), + // static_cast(OpRole::kLoss) | + // static_cast(OpRole::kBackward), + // static_cast(OpRole::kOptimize) | + // static_cast(OpRole::kLRSched), + // static_cast(OpRole::kNotSpecified)}) + // .SetDefault(static_cast(OpRole::kNotSpecified)) + // .AsExtra(); + // AddAttr>(OpRoleVarAttrName(), + // "Optimized for variable") + // .SetDefault({}) + // .AsExtra(); + + // AddAttr(OpNamescopeAttrName(), "Operator name with + // namescope.") + // .SetDefault("") + // .AsExtra(); + + // AddAttr>(OpCreationCallstackAttrName(), + // "Callstack for Op Creation.") + // .SetDefault({}) + // .AsExtra(); AddAttr(OpDeviceAttrName(), "Device type of this operator.") .SetDefault("") .AsExtra(); - AddAttr(OpWithQuantAttrName(), - "Whether the operator has attributes used by quantization. ") - .SetDefault(false) - .AsExtra(); + // AddAttr(OpWithQuantAttrName(), + // "Whether the operator has attributes used by quantization. ") + // .SetDefault(false) + // .AsExtra(); Validate(); } diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 8f87fdf932f54..de474ca99e59e 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -472,6 +472,7 @@ class WhileGradOpMaker : public framework::SingleGradOpMaker { // Ignore IGs that is not generated by the inside block. std::unordered_set inner_op_outputs; for (const auto *op : grad_block->AllOps()) { + LOG(ERROR) << op->Type(); for (auto &oname : op->OutputArgumentNames()) { inner_op_outputs.insert(oname); } diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 8f8b4bfa73115..159ea6fb0be6e 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1348,6 +1348,7 @@ def update_distop_context( pre_input_grad_names_set = copy.copy(input_grad_names_set) input_grad_names_set = None sub_block_path = op_path_dict[op._block_attr_id("sub_block")] + print("sub lock", sub_block_path) _append_backward_ops_( sub_block, sub_block_path, diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py index cc03b7bf33cba..4502c1f10a954 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py @@ -258,7 +258,7 @@ def setUp(self): paddle.disable_static(self.place) paddle.seed(self.seed) paddle.framework.random._manual_program_seed(self.seed) - cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) + cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) paddle.enable_static() @@ -295,7 +295,9 @@ def setUp(self): pre_h_data.stop_gradient = False rnn_in_data.stop_gradient = False - cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) + cell_st = paddle.nn.SimpleRNNCell( + self.input_size, self.hidden_size + ) self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) st_out, st_last_h = self.rnn_st( @@ -349,6 +351,7 @@ def test_base(self, test_seq_len=False): y2, h2, g1, g2 = out print(g1) + print(self.main_program) def runTest(self): @@ -356,6 +359,145 @@ def runTest(self): # self.test_base(True) +# class TinyRNN(RNNCellBase): +# def __init__( self, +# hidden_size): +# super().__init__() +# self.hidden_size = hidden_size + +# def forward( self, input, state=None): +# out = input + input +# return out, state + +# @property +# def state_shape(self): +# r""" +# The `state_shape` of GRUCell is a shape `[hidden_size]` (-1 for batch +# size would be automatically inserted into shape). The shape corresponds +# to the shape of :math:`h_{t-1}`. +# """ +# return (self.hidden_size,) + + +# class TestTinyRNNBackward(unittest.TestCase): +# def __init__(self, time_major=True, direction="forward", place="cpu"): +# super().__init__("runTest") +# self.time_major = time_major +# self.direction = direction +# self.num_directions = 2 if direction in bidirectional_list else 1 +# self.place = place +# self.batch_size = 4 +# self.input_size = 4 +# self.hidden_size = 4 +# self.seq_len = 12 +# self.seed = 1234 + +# def setUp(self): +# # Since `set_device` is global, set `set_device` in `setUp` rather than +# # `__init__` to avoid using an error device set by another test case. + +# place = paddle.set_device(self.place) +# paddle.disable_static(self.place) +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) +# cell_dy = TinyRNN(self.input_size) +# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + +# paddle.enable_static() + +# with paddle.fluid.unique_name.guard(): +# main_program = paddle.static.Program() +# startup_program = paddle.static.Program() +# with paddle.static.program_guard( +# main_program=main_program, startup_program=startup_program +# ): +# paddle.seed(self.seed) +# paddle.framework.random._manual_program_seed(self.seed) + +# self.exe = paddle.fluid.Executor( +# fluid.CPUPlace() +# if self.place == "cpu" +# else fluid.CUDAPlace(0) +# ) + +# rnn_in_data = paddle.static.data( +# "x", +# [None, self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# pre_h_data = paddle.static.data( +# "pre_h", +# [self.batch_size, self.hidden_size], +# dtype="float64", +# ) +# seq_len_data = paddle.static.data( +# "seq_len", [self.batch_size], dtype="int64" +# ) + +# pre_h_data.stop_gradient = False +# rnn_in_data.stop_gradient = False + +# cell_st = TinyRNN(self.input_size) +# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + +# st_out, st_last_h = self.rnn_st( +# rnn_in_data, pre_h_data, sequence_length=seq_len_data +# ) +# loss = paddle.sum(st_out) +# sgd = paddle.optimizer.SGD(0.1) +# sgd.minimize(loss) +# self.fetch_list = [st_out, st_last_h, "x@GRAD"] + +# self.exe.run(framework.default_startup_program()) + +# self.main_program = framework.default_main_program() + +# paddle.disable_static(self.place) + +# def test_base(self, test_seq_len=False): +# x = np.random.randn(12, 4, self.hidden_size) +# if not self.time_major: +# x = np.transpose(x, [1, 0, 2]) +# prev_h = np.random.randn(4, self.hidden_size) + +# paddle.disable_static(self.place) +# if test_seq_len: +# seq_len = np.array([9, 10, 8, 12]) +# else: +# seq_len = np.array([12, 12, 12, 12]) + +# x_in = paddle.to_tensor(x) +# h_in = paddle.to_tensor(prev_h) +# x_in.stop_gradient = False +# h_in.stop_gradient = False +# y1, h1 = self.rnn_net( +# x_in, +# h_in, +# sequence_length=paddle.to_tensor(seq_len), +# ) +# loss = y1.sum() +# loss.backward() + +# print( x_in.gradient()) +# # print(h_in.gradient()) + +# paddle.enable_static() +# out = self.exe.run( +# self.main_program, +# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, +# fetch_list=[self.fetch_list], +# ) + +# y2, h2, g1 = out + +# print(g1) + +# def runTest(self): + +# self.test_base(True) +# # self.test_base(True) + + if __name__ == "__main__": paddle.enable_static() unittest.main() From 25968b49038caa7bd1ee32e77f9151da18fdce6a Mon Sep 17 00:00:00 2001 From: xiongkun Date: Wed, 23 Nov 2022 13:55:06 +0000 Subject: [PATCH 05/16] fix _find_op_path_ bugs in append_backward. --- python/paddle/fluid/backward.py | 4 ++++ python/paddle/fluid/layers/rnn.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 159ea6fb0be6e..a83e98033bcb9 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -2221,6 +2221,10 @@ def _find_op_path_( op.desc.output_arg_names(), output_names ): relevant_op_flags[i] = True + if core.has_non_empty_grad_op_maker(op.type): + for name in op.desc.input_arg_names(): + if name not in no_grad_set: + output_names.add(name) op_path = [ block.ops[i] for i in range(len(block.ops)) if relevant_op_flags[i] diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 230f4b5907667..edb526288a96a 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -694,7 +694,6 @@ def _switch_grad(x, stop=False): out_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) - init_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) init_array = map_structure( lambda x: control_flow.create_array(dtype=x.dtype), initial_states ) From caf97edbdc16e890cc95556eab2e1b2752dabe1d Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 24 Nov 2022 10:27:54 +0000 Subject: [PATCH 06/16] polish code --- .../fluid/operators/controlflow/while_op.cc | 2 - python/paddle/fluid/backward.py | 1 - .../fluid/tests/unittests/rnn/test_rnn_api.py | 563 +++++++----------- 3 files changed, 212 insertions(+), 354 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index de474ca99e59e..f91c1f575c761 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -297,7 +297,6 @@ class WhileGradOp : public framework::OperatorBase { // (todo hongyu, need to update here) VLOG(8) << "skip data share " << outside_og_name; inside_tensor.ShareDataWith(outside_tensor); - } else { } } else if (og_outside.IsType()) { auto outside_array = @@ -472,7 +471,6 @@ class WhileGradOpMaker : public framework::SingleGradOpMaker { // Ignore IGs that is not generated by the inside block. std::unordered_set inner_op_outputs; for (const auto *op : grad_block->AllOps()) { - LOG(ERROR) << op->Type(); for (auto &oname : op->OutputArgumentNames()) { inner_op_outputs.insert(oname); } diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index a83e98033bcb9..df975d06a45d4 100755 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -1348,7 +1348,6 @@ def update_distop_context( pre_input_grad_names_set = copy.copy(input_grad_names_set) input_grad_names_set = None sub_block_path = op_path_dict[op._block_attr_id("sub_block")] - print("sub lock", sub_block_path) _append_backward_ops_( sub_block, sub_block_path, diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py index 4502c1f10a954..c020cc0dcf8d6 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py @@ -29,212 +29,212 @@ bidirectional_list = ["bidirectional", "bidirect"] -# class TestSimpleRNN(unittest.TestCase): -# def __init__(self, time_major=True, direction="forward", place="cpu"): -# super().__init__("runTest") -# self.time_major = time_major -# self.direction = direction -# self.num_directions = 2 if direction in bidirectional_list else 1 -# self.place = place -# self.batch_size = 4 -# self.input_size = 16 -# self.hidden_size = 16 -# self.seq_len = 12 -# self.seed = 1234 - -# def setUp(self): -# # Since `set_device` is global, set `set_device` in `setUp` rather than -# # `__init__` to avoid using an error device set by another test case. - -# place = paddle.set_device(self.place) -# paddle.disable_static(self.place) -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) -# cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) -# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) - -# paddle.enable_static() - -# with paddle.fluid.unique_name.guard(): -# main_program = paddle.static.Program() -# startup_program = paddle.static.Program() -# with paddle.static.program_guard( -# main_program=main_program, startup_program=startup_program -# ): -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) - -# self.exe = fluid.Executor( -# fluid.CPUPlace() -# if self.place == "cpu" -# else fluid.CUDAPlace(0) -# ) - -# rnn_in_data = paddle.static.data( -# "x", -# [None, self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# pre_h_data = paddle.static.data( -# "pre_h", -# [self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# seq_len_data = paddle.static.data( -# "seq_len", [self.batch_size], dtype="int64" -# ) -# cell_st = paddle.nn.SimpleRNNCell( -# self.input_size, self.hidden_size -# ) -# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) -# st_out, st_last_h = self.rnn_st( -# rnn_in_data, pre_h_data, sequence_length=seq_len_data -# ) - -# self.fetch_list = [st_out, st_last_h] - -# self.exe.run(framework.default_startup_program()) - -# self.main_program = framework.default_main_program() - -# paddle.disable_static(self.place) - -# def test_base(self, test_seq_len=False): -# x = np.random.randn(12, 4, 16) -# if not self.time_major: -# x = np.transpose(x, [1, 0, 2]) -# prev_h = np.random.randn(4, 16) - -# paddle.disable_static(self.place) -# if test_seq_len: -# seq_len = np.array([9, 10, 8, 12]) -# else: -# seq_len = np.array([12, 12, 12, 12]) - -# y1, h1 = self.rnn_net( -# paddle.to_tensor(x), -# paddle.to_tensor(prev_h), -# sequence_length=paddle.to_tensor(seq_len), -# ) - -# paddle.enable_static() -# out = self.exe.run( -# self.main_program, -# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, -# fetch_list=[self.fetch_list], -# ) - -# y2, h2 = out - -# np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) -# np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) - -# def runTest(self): -# self.test_base() -# self.test_base(True) - - -# class TestGRU(unittest.TestCase): -# def __init__(self, time_major=True, direction="forward", place="cpu"): -# super().__init__("runTest") -# self.time_major = time_major -# self.direction = direction -# self.num_directions = 2 if direction in bidirectional_list else 1 -# self.place = place -# self.batch_size = 4 -# self.input_size = 16 -# self.hidden_size = 16 -# self.seq_len = 12 -# self.seed = 1234 - -# def setUp(self): -# # Since `set_device` is global, set `set_device` in `setUp` rather than -# # `__init__` to avoid using an error device set by another test case. - -# place = paddle.set_device(self.place) -# paddle.disable_static(self.place) -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) -# cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) -# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) - -# paddle.enable_static() - -# with paddle.fluid.unique_name.guard(): -# main_program = paddle.static.Program() -# startup_program = paddle.static.Program() -# with paddle.static.program_guard( -# main_program=main_program, startup_program=startup_program -# ): -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) - -# self.exe = fluid.Executor( -# fluid.CPUPlace() -# if self.place == "cpu" -# else fluid.CUDAPlace(0) -# ) - -# rnn_in_data = paddle.static.data( -# "x", -# [None, self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# pre_h_data = paddle.static.data( -# "pre_h", -# [self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# seq_len_data = paddle.static.data( -# "seq_len", [self.batch_size], dtype="int64" -# ) -# cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) -# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) -# st_out, st_last_h = self.rnn_st( -# rnn_in_data, pre_h_data, sequence_length=seq_len_data -# ) - -# self.fetch_list = [st_out, st_last_h] - -# self.exe.run(framework.default_startup_program()) - -# self.main_program = framework.default_main_program() - -# paddle.disable_static(self.place) - -# def test_base(self, test_seq_len=False): -# x = np.random.randn(12, 4, 16) -# if not self.time_major: -# x = np.transpose(x, [1, 0, 2]) -# prev_h = np.random.randn(4, 16) - -# paddle.disable_static(self.place) -# if test_seq_len: -# seq_len = np.array([9, 10, 8, 12]) -# else: -# seq_len = np.array([12, 12, 12, 12]) - -# y1, h1 = self.rnn_net( -# paddle.to_tensor(x), -# paddle.to_tensor(prev_h), -# sequence_length=paddle.to_tensor(seq_len), -# ) - -# paddle.enable_static() -# out = self.exe.run( -# self.main_program, -# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, -# fetch_list=[self.fetch_list], -# ) - -# y2, h2 = out - -# np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) -# np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) - -# def runTest(self): -# self.test_base() -# self.test_base(True) +class TestSimpleRNN(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.batch_size = 4 + self.input_size = 16 + self.hidden_size = 16 + self.seq_len = 12 + self.seed = 1234 + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + + place = paddle.set_device(self.place) + paddle.disable_static(self.place) + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + cell_dy = paddle.nn.SimpleRNNCell(self.input_size, self.hidden_size) + self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + + paddle.enable_static() + + with paddle.fluid.unique_name.guard(): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard( + main_program=main_program, startup_program=startup_program + ): + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + + self.exe = fluid.Executor( + fluid.CPUPlace() + if self.place == "cpu" + else fluid.CUDAPlace(0) + ) + + rnn_in_data = paddle.static.data( + "x", + [None, self.batch_size, self.hidden_size], + dtype="float64", + ) + pre_h_data = paddle.static.data( + "pre_h", + [self.batch_size, self.hidden_size], + dtype="float64", + ) + seq_len_data = paddle.static.data( + "seq_len", [self.batch_size], dtype="int64" + ) + cell_st = paddle.nn.SimpleRNNCell( + self.input_size, self.hidden_size + ) + self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + st_out, st_last_h = self.rnn_st( + rnn_in_data, pre_h_data, sequence_length=seq_len_data + ) + + self.fetch_list = [st_out, st_last_h] + + self.exe.run(framework.default_startup_program()) + + self.main_program = framework.default_main_program() + + paddle.disable_static(self.place) + + def test_base(self, test_seq_len=False): + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + prev_h = np.random.randn(4, 16) + + paddle.disable_static(self.place) + if test_seq_len: + seq_len = np.array([9, 10, 8, 12]) + else: + seq_len = np.array([12, 12, 12, 12]) + + y1, h1 = self.rnn_net( + paddle.to_tensor(x), + paddle.to_tensor(prev_h), + sequence_length=paddle.to_tensor(seq_len), + ) + + paddle.enable_static() + out = self.exe.run( + self.main_program, + feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, + fetch_list=[self.fetch_list], + ) + + y2, h2 = out + + np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_base() + self.test_base(True) + + +class TestGRU(unittest.TestCase): + def __init__(self, time_major=True, direction="forward", place="cpu"): + super().__init__("runTest") + self.time_major = time_major + self.direction = direction + self.num_directions = 2 if direction in bidirectional_list else 1 + self.place = place + self.batch_size = 4 + self.input_size = 16 + self.hidden_size = 16 + self.seq_len = 12 + self.seed = 1234 + + def setUp(self): + # Since `set_device` is global, set `set_device` in `setUp` rather than + # `__init__` to avoid using an error device set by another test case. + + place = paddle.set_device(self.place) + paddle.disable_static(self.place) + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + cell_dy = paddle.nn.GRUCell(self.input_size, self.hidden_size) + self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) + + paddle.enable_static() + + with paddle.fluid.unique_name.guard(): + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard( + main_program=main_program, startup_program=startup_program + ): + paddle.seed(self.seed) + paddle.framework.random._manual_program_seed(self.seed) + + self.exe = fluid.Executor( + fluid.CPUPlace() + if self.place == "cpu" + else fluid.CUDAPlace(0) + ) + + rnn_in_data = paddle.static.data( + "x", + [None, self.batch_size, self.hidden_size], + dtype="float64", + ) + pre_h_data = paddle.static.data( + "pre_h", + [self.batch_size, self.hidden_size], + dtype="float64", + ) + seq_len_data = paddle.static.data( + "seq_len", [self.batch_size], dtype="int64" + ) + cell_st = paddle.nn.GRUCell(self.input_size, self.hidden_size) + self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) + st_out, st_last_h = self.rnn_st( + rnn_in_data, pre_h_data, sequence_length=seq_len_data + ) + + self.fetch_list = [st_out, st_last_h] + + self.exe.run(framework.default_startup_program()) + + self.main_program = framework.default_main_program() + + paddle.disable_static(self.place) + + def test_base(self, test_seq_len=False): + x = np.random.randn(12, 4, 16) + if not self.time_major: + x = np.transpose(x, [1, 0, 2]) + prev_h = np.random.randn(4, 16) + + paddle.disable_static(self.place) + if test_seq_len: + seq_len = np.array([9, 10, 8, 12]) + else: + seq_len = np.array([12, 12, 12, 12]) + + y1, h1 = self.rnn_net( + paddle.to_tensor(x), + paddle.to_tensor(prev_h), + sequence_length=paddle.to_tensor(seq_len), + ) + + paddle.enable_static() + out = self.exe.run( + self.main_program, + feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, + fetch_list=[self.fetch_list], + ) + + y2, h2 = out + + np.testing.assert_allclose(y1.numpy(), y2, atol=1e-8, rtol=1e-5) + np.testing.assert_allclose(h1.numpy(), h2, atol=1e-8, rtol=1e-5) + + def runTest(self): + self.test_base() + self.test_base(True) class TestGRUBackward(unittest.TestCase): @@ -304,7 +304,7 @@ def setUp(self): rnn_in_data, pre_h_data, sequence_length=seq_len_data ) loss = paddle.sum(st_out) - sgd = paddle.optimizer.SGD(0.1) + sgd = paddle.optimizer.SGD(0.0) sgd.minimize(loss) self.fetch_list = [st_out, st_last_h, "pre_h@GRAD", "x@GRAD"] @@ -338,8 +338,7 @@ def test_base(self, test_seq_len=False): loss = y1.sum() loss.backward() - # print( x_in.gradient()) - print(h_in.gradient()) + h1_grad = h_in.gradient() paddle.enable_static() out = self.exe.run( @@ -350,152 +349,14 @@ def test_base(self, test_seq_len=False): y2, h2, g1, g2 = out - print(g1) - print(self.main_program) + np.testing.assert_allclose(h1_grad, g1, atol=1e-8, rtol=1e-5) def runTest(self): + self.test_base(True) + self.test_base() self.test_base() - # self.test_base(True) - - -# class TinyRNN(RNNCellBase): -# def __init__( self, -# hidden_size): -# super().__init__() -# self.hidden_size = hidden_size - -# def forward( self, input, state=None): -# out = input + input -# return out, state - -# @property -# def state_shape(self): -# r""" -# The `state_shape` of GRUCell is a shape `[hidden_size]` (-1 for batch -# size would be automatically inserted into shape). The shape corresponds -# to the shape of :math:`h_{t-1}`. -# """ -# return (self.hidden_size,) - - -# class TestTinyRNNBackward(unittest.TestCase): -# def __init__(self, time_major=True, direction="forward", place="cpu"): -# super().__init__("runTest") -# self.time_major = time_major -# self.direction = direction -# self.num_directions = 2 if direction in bidirectional_list else 1 -# self.place = place -# self.batch_size = 4 -# self.input_size = 4 -# self.hidden_size = 4 -# self.seq_len = 12 -# self.seed = 1234 - -# def setUp(self): -# # Since `set_device` is global, set `set_device` in `setUp` rather than -# # `__init__` to avoid using an error device set by another test case. - -# place = paddle.set_device(self.place) -# paddle.disable_static(self.place) -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) -# cell_dy = TinyRNN(self.input_size) -# self.rnn_net = paddle.nn.RNN(cell_dy, time_major=self.time_major) - -# paddle.enable_static() - -# with paddle.fluid.unique_name.guard(): -# main_program = paddle.static.Program() -# startup_program = paddle.static.Program() -# with paddle.static.program_guard( -# main_program=main_program, startup_program=startup_program -# ): -# paddle.seed(self.seed) -# paddle.framework.random._manual_program_seed(self.seed) - -# self.exe = paddle.fluid.Executor( -# fluid.CPUPlace() -# if self.place == "cpu" -# else fluid.CUDAPlace(0) -# ) - -# rnn_in_data = paddle.static.data( -# "x", -# [None, self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# pre_h_data = paddle.static.data( -# "pre_h", -# [self.batch_size, self.hidden_size], -# dtype="float64", -# ) -# seq_len_data = paddle.static.data( -# "seq_len", [self.batch_size], dtype="int64" -# ) - -# pre_h_data.stop_gradient = False -# rnn_in_data.stop_gradient = False - -# cell_st = TinyRNN(self.input_size) -# self.rnn_st = paddle.nn.RNN(cell_st, time_major=self.time_major) - -# st_out, st_last_h = self.rnn_st( -# rnn_in_data, pre_h_data, sequence_length=seq_len_data -# ) -# loss = paddle.sum(st_out) -# sgd = paddle.optimizer.SGD(0.1) -# sgd.minimize(loss) -# self.fetch_list = [st_out, st_last_h, "x@GRAD"] - -# self.exe.run(framework.default_startup_program()) - -# self.main_program = framework.default_main_program() - -# paddle.disable_static(self.place) - -# def test_base(self, test_seq_len=False): -# x = np.random.randn(12, 4, self.hidden_size) -# if not self.time_major: -# x = np.transpose(x, [1, 0, 2]) -# prev_h = np.random.randn(4, self.hidden_size) - -# paddle.disable_static(self.place) -# if test_seq_len: -# seq_len = np.array([9, 10, 8, 12]) -# else: -# seq_len = np.array([12, 12, 12, 12]) - -# x_in = paddle.to_tensor(x) -# h_in = paddle.to_tensor(prev_h) -# x_in.stop_gradient = False -# h_in.stop_gradient = False -# y1, h1 = self.rnn_net( -# x_in, -# h_in, -# sequence_length=paddle.to_tensor(seq_len), -# ) -# loss = y1.sum() -# loss.backward() - -# print( x_in.gradient()) -# # print(h_in.gradient()) - -# paddle.enable_static() -# out = self.exe.run( -# self.main_program, -# feed={"x": x, "pre_h": prev_h, "seq_len": seq_len}, -# fetch_list=[self.fetch_list], -# ) - -# y2, h2, g1 = out - -# print(g1) - -# def runTest(self): - -# self.test_base(True) -# # self.test_base(True) + self.test_base(True) if __name__ == "__main__": From 61b0f4fc9b2ad3e741cfa5916c78a4ad88db84d8 Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 24 Nov 2022 12:43:23 +0000 Subject: [PATCH 07/16] revert op proto --- .gitignore | 1 + paddle/fluid/framework/op_proto_maker.cc | 68 ++++++++++++------------ 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/.gitignore b/.gitignore index 14109003e478a..5430ec91c5486 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,4 @@ paddle/fluid/pybind/tmp_eager_op_function_impl.h paddle/fluid/pybind/eager_op_function_impl.h paddle/fluid/pybind/eager_op_function_impl.h paddle/fluid/pybind/op_function_impl.h +paddle/fluid/pybind/*final_state_op_function_impl.h diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index c98ffd30e808e..fbad45e889156 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -77,45 +77,43 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, } op_checker_->InitDefaultAttributeMap(extra_attrs_ptr); - // AddAttr(OpRoleAttrName(), "The role of this operator") - // .InEnum( - // {static_cast(OpRole::kForward), - // static_cast(OpRole::kBackward), - // static_cast(OpRole::kOptimize), - // static_cast(OpRole::kRPC), - // static_cast(OpRole::kDist), - // static_cast(OpRole::kLRSched), - // static_cast(OpRole::kLoss) | - // static_cast(OpRole::kForward), - // static_cast(OpRole::kLoss) | - // static_cast(OpRole::kBackward), - // static_cast(OpRole::kOptimize) | - // static_cast(OpRole::kLRSched), - // static_cast(OpRole::kNotSpecified)}) - // .SetDefault(static_cast(OpRole::kNotSpecified)) - // .AsExtra(); - // AddAttr>(OpRoleVarAttrName(), - // "Optimized for variable") - // .SetDefault({}) - // .AsExtra(); - - // AddAttr(OpNamescopeAttrName(), "Operator name with - // namescope.") - // .SetDefault("") - // .AsExtra(); - - // AddAttr>(OpCreationCallstackAttrName(), - // "Callstack for Op Creation.") - // .SetDefault({}) - // .AsExtra(); + AddAttr(OpRoleAttrName(), "The role of this operator") + .InEnum( + {static_cast(OpRole::kForward), + static_cast(OpRole::kBackward), + static_cast(OpRole::kOptimize), + static_cast(OpRole::kRPC), + static_cast(OpRole::kDist), + static_cast(OpRole::kLRSched), + static_cast(OpRole::kLoss) | static_cast(OpRole::kForward), + static_cast(OpRole::kLoss) | + static_cast(OpRole::kBackward), + static_cast(OpRole::kOptimize) | + static_cast(OpRole::kLRSched), + static_cast(OpRole::kNotSpecified)}) + .SetDefault(static_cast(OpRole::kNotSpecified)) + .AsExtra(); + AddAttr>(OpRoleVarAttrName(), + "Optimized for variable") + .SetDefault({}) + .AsExtra(); + + AddAttr(OpNamescopeAttrName(), "Operator name with namescope.") + .SetDefault("") + .AsExtra(); + + AddAttr>(OpCreationCallstackAttrName(), + "Callstack for Op Creation.") + .SetDefault({}) + .AsExtra(); AddAttr(OpDeviceAttrName(), "Device type of this operator.") .SetDefault("") .AsExtra(); - // AddAttr(OpWithQuantAttrName(), - // "Whether the operator has attributes used by quantization. ") - // .SetDefault(false) - // .AsExtra(); + AddAttr(OpWithQuantAttrName(), + "Whether the operator has attributes used by quantization. ") + .SetDefault(false) + .AsExtra(); Validate(); } From 360735dcb931444a0f67d7b8c5a26ed3e8102a83 Mon Sep 17 00:00:00 2001 From: phlrain Date: Tue, 29 Nov 2022 05:41:43 +0000 Subject: [PATCH 08/16] update --- paddle/fluid/operators/controlflow/while_op.cc | 8 +++----- python/paddle/fluid/tests/unittests/test_while_loop_op.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index f91c1f575c761..f98a6b5795b63 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -293,11 +293,9 @@ class WhileGradOp : public framework::OperatorBase { auto &outside_tensor = og_outside.Get(); auto &inside_tensor = *og_inside.GetMutable(); inside_tensor.set_lod(outside_tensor.lod()); - if (outside_tensor.IsInitialized()) { - // (todo hongyu, need to update here) - VLOG(8) << "skip data share " << outside_og_name; - inside_tensor.ShareDataWith(outside_tensor); - } + + inside_tensor.ShareDataWith(outside_tensor); + } else if (og_outside.IsType()) { auto outside_array = og_outside.GetMutable(); diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py index deaebf4a45d7f..ebfeab91c21d3 100644 --- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py @@ -296,7 +296,7 @@ def body(i, x): res = exe.run( main_program, feed={'i': feed_i, 'x': feed_x}, - fetch_list=[mean.name, i.grad_name, x.grad_name], + fetch_list=[mean.name, x.grad_name], ) np.testing.assert_allclose(np.asarray(res[0]), data, rtol=1e-05) np.testing.assert_allclose(np.asarray(res[1]), i_grad, rtol=1e-05) From 5bb85e8df3771f3ca76a30f6cda442b637e2fe06 Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 1 Dec 2022 03:15:52 +0000 Subject: [PATCH 09/16] udpate while --- .../fluid/operators/controlflow/while_op.cc | 78 +- .../test_eager_deletion_recurrent_op.py | 697 +----------------- .../tests/unittests/test_rnn_cell_api.py | 1 - 3 files changed, 67 insertions(+), 709 deletions(-) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 587428fb24cbf..7a3e271fc67e1 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -367,6 +367,7 @@ class WhileGradOp : public framework::OperatorBase { auto *block = Attr(kStepBlock); auto *program = block->Program(); + auto *parent_block = block->ParentBlock(); auto &skip_vars = Attr>(kSkipEagerDeletionVars); VLOG(2) << GetSkipEagerDeletionVarsDebugString(skip_vars); @@ -428,15 +429,46 @@ class WhileGradOp : public framework::OperatorBase { continue; } + if (cur_scope_iter == step_scopes->rbegin()) { + auto &og_outside = *scope.FindVar(outside_og_name); + if (og_outside.IsType() && + !og_outside.GetMutable()->IsInitialized()) { + auto *var_desc = parent_block->FindVarRecursive(outside_og_name); + PADDLE_ENFORCE_NOT_NULL(var_desc, + platform::errors::PreconditionNotMet( + "Var `%s` is not found in parent " + "block, can't fill constant.", + outside_og_name)); + auto shape = var_desc->GetShape(); + VLOG(8) << "Found uninitialized tensor " << outside_og_name + << " in step 0, fill it with 0.0f. dims=" + << phi::make_ddim(shape); + framework::AttributeMap attrs; + attrs["dtype"] = var_desc->GetDataType(); + attrs["shape"] = phi::vectorize(phi::make_ddim(shape)); + attrs["value"] = 0.0f; + + auto var_name = outside_og_name; + auto zero_op = + framework::OpRegistry::CreateOp("fill_constant", + framework::VariableNameMap{}, + {{"Out", {var_name}}}, + attrs); + zero_op->Run(scope, dev_place); + } + } + auto &og_outside = *scope.FindVar(outside_og_name); auto &og_inside = *cur_scope.Var(inside_og_name); if (og_outside.IsType()) { auto &outside_tensor = og_outside.Get(); auto &inside_tensor = *og_inside.GetMutable(); inside_tensor.set_lod(outside_tensor.lod()); - - inside_tensor.ShareDataWith(outside_tensor); - + if (outside_tensor.IsInitialized()) { + // (todo hongyu, need to update here) + VLOG(8) << "skip data share " << outside_og_name; + inside_tensor.ShareDataWith(outside_tensor); + } } else if (og_outside.IsType()) { auto outside_array = og_outside.GetMutable(); @@ -536,9 +568,10 @@ class WhileGradOp : public framework::OperatorBase { // continue; // } - auto var_iter = std::find(outside_og_names.begin(), - outside_og_names.end(), - pg_ig_names[param_id]); + auto is_var_input_and_output = + std::find(outside_og_names.begin(), + outside_og_names.end(), + pg_ig_names[param_id]) != outside_og_names.end(); // zero gradient variable in step 0 if (cur_scope_iter == step_scopes->rbegin()) { @@ -557,8 +590,7 @@ class WhileGradOp : public framework::OperatorBase { inside_grad_name, framework::ToTypeName(var->Type()))); - if ((var_iter == outside_og_names.end()) && - var->IsType()) { + if (!is_var_input_and_output && var->IsType()) { auto &inside_tensor = var->Get(); framework::AttributeMap attrs; attrs["dtype"] = @@ -577,10 +609,7 @@ class WhileGradOp : public framework::OperatorBase { inside_tensor.lod()); } } - auto var_outside = scope.FindVar(pg_ig_names[param_id]); - if ((var_iter == outside_og_names.end()) || - ((var_iter != outside_og_names.end()) && - var_outside->IsType())) { + if (!is_var_input_and_output) { auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", @@ -589,6 +618,8 @@ class WhileGradOp : public framework::OperatorBase { framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); + } else { + ShareVariable(cur_scope, scope, pg_ig_names[param_id]); } } dev_ctx.Wait(); @@ -597,6 +628,29 @@ class WhileGradOp : public framework::OperatorBase { step_scopes->clear(); } + void ShareVariable(const framework::Scope &source, + const framework::Scope &dest, + std::string name) const { + auto from_var = source.FindVar(name); + auto to_var = dest.FindVar(name); + if (from_var->IsType()) { + if (from_var->Get().IsInitialized()) { + to_var->GetMutable()->ShareDataWith( + from_var->Get()); + } + } else if (from_var->IsType()) { + auto from_arr = from_var->GetMutable(); + auto to_arr = to_var->GetMutable(); + to_arr->clear(); + to_arr->resize(from_arr->size()); + for (size_t i = 0; i < to_arr->size(); ++i) { + if (from_arr->at(i).IsInitialized()) { + to_arr->at(i).ShareDataWith(from_arr->at(i)); + } + } + } + } + private: mutable std::shared_ptr executor_{nullptr}; mutable std::unique_ptr ctx_{nullptr}; diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index 7abc456203229..b7952c3736b26 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -50,697 +50,6 @@ def segment_inputs(self): return [self.x[i] for i in range(self.x.shape[0])] -# class PySimpleRNN1(PyRNNBase): -# def __init__(self, input_shape, output_shape): -# super().__init__(input_shape, output_shape) - -# seq_len, batch_size, input_dim = input_shape -# self.h_boot = np.random.normal(size=(batch_size, input_dim)).astype( -# "float32" -# ) - -# self.scale = 1.0 / 2.0 -# men_dim = (seq_len, batch_size, input_dim) -# self.mems = np.zeros(shape=men_dim).astype("float32") - -# def step(self, step_id, x): -# if step_id == 0: -# pre_mem = self.h_boot -# else: -# pre_mem = self.mems[step_id - 1] -# self.mems[step_id] = (pre_mem + x) * self.scale -# self.y[step_id] = self.mems[step_id] - - -# class PySimpleRNN2(PyRNNBase): -# def __init__(self, input_shape, output_shape): -# super().__init__(input_shape, output_shape) - -# seq_len, batch_size, input_dim = input_shape -# self.W = np.ones(shape=(input_dim, input_dim)).astype("float32") -# self.U = np.zeros(shape=(input_dim, input_dim)).astype("float32") -# self.h_boot = np.ones(shape=(batch_size, input_dim)).astype("float32") - -# men_dim = (seq_len, batch_size, input_dim) -# self.mems = np.zeros(shape=men_dim).astype("float32") - -# def step(self, step_id, x): -# if step_id > 0: -# pre_mem = self.mems[step_id - 1] -# else: -# pre_mem = self.h_boot -# xW = np.matmul(x, self.W).astype("float32") -# hU = np.matmul(pre_mem, self.U).astype("float32") - -# def py_sigmoid(x): -# return 1.0 / (1.0 + np.exp(-x)) - -# self.mems[step_id] = py_sigmoid(xW + hU) -# self.y[step_id] = self.mems[step_id] - - -# def create_tensor(np_data, place): -# tensor = core.LoDTensor() -# tensor.set(np_data, place) -# return tensor - - -# class EagerDeletionRecurrentOpTest1(unittest.TestCase): -# ''' -# Test RNNOp -# equation: -# h_t = ( x_t + h_{t-1} ) / scale -# vars: -# - x -# memories: -# - h -# outputs: -# - h -# ''' - -# input_dim = 2 -# batch_size = 1 -# sent_len = 1 - -# def setup_program(self): -# self.main_program = Program() -# self.startup_program = Program() -# self.place = core.CPUPlace() - -# def setUp(self): -# self.setup_program() -# self.data_field = {"x", "h_boot"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - -# with fluid.program_guard(self.main_program, self.startup_program): -# self.output = paddle.mean(self.create_rnn_op()) - -# def create_rnn_op(self): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False -# h_boot = layers.data( -# shape=[self.input_dim], dtype='float32', name='h_boot' -# ) -# h_boot.stop_gradient = False - -# rnn = layers.StaticRNN() -# with rnn.step(): -# h_pre = rnn.memory(init=h_boot) -# x_t = rnn.step_input(x) - -# h = layers.scale( -# x=layers.elementwise_add(x=h_pre, y=x_t), -# scale=self.py_rnn.scale, -# ) - -# rnn.update_memory(h_pre, h) -# rnn.output(h) - -# return rnn() - -# def forward(self): -# gc_vars = core._get_eager_deletion_vars( -# self.main_program.desc, [self.output.name] -# ) -# self.assertEqual(len(gc_vars), self.main_program.num_blocks) -# self.feed_map = { -# x: create_tensor(getattr(self.py_rnn, x), self.place) -# for x in self.data_field -# } -# exe = Executor(self.place) -# out = exe.run( -# self.main_program, feed=self.feed_map, fetch_list=[self.output] -# ) - -# return out[0] - -# def backward(self): -# self.feed_map = { -# x: create_tensor(getattr(self.py_rnn, x), self.place) -# for x in self.data_field -# } -# fetch_list = [ -# self.main_program.global_block().var(grad_var_name(x)) -# for x in self.data_field -# ] - -# gc_vars = core._get_eager_deletion_vars( -# self.main_program.desc, [var.name for var in fetch_list] -# ) -# self.assertEqual(len(gc_vars), self.main_program.num_blocks) - -# exe = Executor(self.place) -# return exe.run( -# self.main_program, -# feed=self.feed_map, -# fetch_list=fetch_list, -# return_numpy=False, -# ) - -# def test_backward(self, rtol=0.01): -# self.check_forward() -# num_grad = self.get_numerical_gradient() - -# with fluid.program_guard(self.main_program, self.startup_program): -# append_backward(self.output) - -# ana_grad = [np.array(x) for x in self.backward()] - -# for idx, name in enumerate(self.data_field): -# self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape) -# np.testing.assert_allclose( -# num_grad[idx], -# ana_grad[idx], -# rtol=rtol, -# err_msg='num_grad (' -# + name -# + ') has diff at ' -# + str(self.place) -# + '\nExpect ' -# + str(num_grad[idx]) -# + '\n' -# + 'But Got' -# + str(ana_grad[idx]) -# + ' in class ' -# + self.__class__.__name__, -# ) - -# def check_forward(self): -# pd_output = self.forward() -# py_output = self.py_rnn.forward() -# self.assertEqual(pd_output.shape, py_output.shape) -# np.testing.assert_allclose(pd_output, py_output, rtol=0.01) - -# def get_numerical_gradient(self, delta=0.005): -# dloss_dout = 1.0 -# feed_list = [getattr(self.py_rnn, x) for x in self.data_field] -# grad_list = [np.zeros_like(x) for x in feed_list] -# for feed, grad in zip(feed_list, grad_list): -# for f, g in np.nditer([feed, grad], op_flags=['readwrite']): -# o = float(f) -# f[...] = o + delta -# y_pos = self.forward() - -# f[...] = o - delta -# y_neg = self.forward() - -# f[...] = o -# dout_dfeed = (y_pos - y_neg) / (delta * 2) -# g[...] = dout_dfeed[0] - -# return grad_list - - -# class EagerDeletionRecurrentOpTest2(EagerDeletionRecurrentOpTest1): -# r''' -# Test RNNOp -# equation: -# h_t = \sigma (W x_t + U h_{t-1}) -# weights: -# - W -# - U -# vars: -# - x -# memories: -# - h -# outputs: -# - h -# ''' - -# input_dim = 2 -# batch_size = 10 -# sent_len = 2 - -# def setUp(self): -# self.setup_program() - -# self.data_field = {"x", "h_boot", "W", "U"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = PySimpleRNN2(self.input_shape, self.output_shape) - -# with fluid.program_guard(self.main_program, self.startup_program): -# self.output = paddle.mean(self.create_rnn_op()) - -# def create_rnn_op(self): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False -# h_boot = layers.data( -# shape=[self.input_dim], dtype='float32', name='h_boot' -# ) -# h_boot.stop_gradient = False - -# rnn = layers.StaticRNN() -# with rnn.step(): -# h_pre = rnn.memory(init=h_boot) -# x_t = rnn.step_input(x) - -# temp_l = layers.fc( -# input=x_t, -# size=self.input_dim, -# param_attr=ParamAttr( -# name='W', -# initializer=fluid.initializer.ConstantInitializer(1.0), -# ), -# bias_attr=False, -# ) -# temp_r = layers.fc( -# input=h_pre, -# size=self.input_dim, -# param_attr=ParamAttr( -# name='U', -# initializer=fluid.initializer.ConstantInitializer(0.0), -# ), -# bias_attr=False, -# ) - -# h = paddle.nn.functional.sigmoid( -# x=layers.elementwise_add(x=temp_l, y=temp_r) -# ) - -# rnn.update_memory(h_pre, h) -# rnn.output(h) - -# return rnn() - -# def test_backward(self): -# super().test_backward(rtol=0.01) - - -# class EagerDeletionRecurrentOpMultipleMemoryTest(EagerDeletionRecurrentOpTest1): -# ''' -# Test RNNOp with two memories -# equation: -# h_1 = h_pre_1 -# h_2 = h_pre_2 -# y = h_1 + h_2 -# vars: -# - x -# memories: -# - h_1, h_2 -# outputs: -# - y -# ''' - -# class PySimpleRNN3(PyRNNBase): -# def __init__(self, input_shape, output_shape): -# super( -# EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3, self -# ).__init__(input_shape, output_shape) - -# seq_len, batch_size, input_dim = input_shape -# self.h_boot1 = np.random.normal( -# size=(batch_size, input_dim) -# ).astype("float32") -# self.h_boot2 = np.random.normal( -# size=(batch_size, input_dim) -# ).astype("float32") - -# men_dim = (seq_len, batch_size, input_dim) -# self.mems1 = np.zeros(shape=men_dim).astype("float32") -# self.mems2 = np.zeros(shape=men_dim).astype("float32") - -# def step(self, step_id, x): -# if step_id == 0: -# pre_mem1 = self.h_boot1 -# pre_mem2 = self.h_boot2 -# else: -# pre_mem1 = self.mems1[step_id - 1] -# pre_mem2 = self.mems2[step_id - 1] -# self.mems1[step_id] = pre_mem1 -# self.mems2[step_id] = pre_mem2 -# self.y[step_id] = self.mems1[step_id] + self.mems2[step_id] + x - -# input_dim = 1 -# batch_size = 1 -# sent_len = 2 - -# def setUp(self): -# self.setup_program() - -# self.data_field = {"x", "h_boot1", "h_boot2"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = EagerDeletionRecurrentOpMultipleMemoryTest.PySimpleRNN3( -# self.input_shape, self.output_shape -# ) - -# with fluid.program_guard(self.main_program, self.startup_program): -# self.output = paddle.mean(self.create_rnn_op()) - -# def create_rnn_op(self): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False -# h_boot1 = layers.data( -# shape=[self.batch_size, self.input_dim], -# dtype='float32', -# name='h_boot1', -# append_batch_size=False, -# ) -# h_boot1.stop_gradient = False -# h_boot2 = layers.data( -# shape=[self.batch_size, self.input_dim], -# dtype='float32', -# name='h_boot2', -# append_batch_size=False, -# ) -# h_boot2.stop_gradient = False - -# rnn = layers.StaticRNN() -# with rnn.step(): -# h_pre1 = rnn.memory(init=h_boot1) -# h_pre2 = rnn.memory(init=h_boot2) -# x_t = rnn.step_input(x) - -# mem1 = layers.scale(x=h_pre1, scale=1.0) -# mem2 = layers.scale(x=h_pre2, scale=1.0) -# out = layers.sums(input=[mem1, x_t, mem2]) - -# rnn.update_memory(h_pre1, mem1) -# rnn.update_memory(h_pre2, mem2) -# rnn.output(out) - -# return rnn() - - -# class EagerDeletionRecurrentOpNoMemBootTest(EagerDeletionRecurrentOpTest1): -# ''' -# Test RNNOp without memory boot -# equation: -# mem = x + mem_pre -# y = mem -# vars: -# - x -# memories: -# - mem -# outputs: -# - y -# ''' - -# class PySimpleRNN4(PyRNNBase): -# def __init__(self, input_shape, output_shape): -# super( -# EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4, self -# ).__init__(input_shape, output_shape) -# men_dim = input_shape -# self.mems = np.zeros(shape=men_dim).astype("float32") - -# def step(self, step_id, x): -# if step_id == 0: -# pre_mem = np.zeros_like(x) -# else: -# pre_mem = self.mems[step_id - 1] -# self.mems[step_id] = pre_mem + x -# self.y[step_id] = self.mems[step_id] - -# input_dim = 1 -# batch_size = 1 -# sent_len = 2 - -# def setUp(self): -# self.setup_program() - -# self.data_field = {"x"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = EagerDeletionRecurrentOpNoMemBootTest.PySimpleRNN4( -# self.input_shape, self.output_shape -# ) - -# with fluid.program_guard(self.main_program, self.startup_program): -# self.output = paddle.mean(self.create_rnn_op()) - -# def create_rnn_op(self): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False - -# rnn = layers.StaticRNN() -# with rnn.step(): -# mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) -# x_t = rnn.step_input(x) -# mem = layers.elementwise_add(x=mem_pre, y=x_t) -# rnn.update_memory(mem_pre, mem) -# rnn.output(mem) - -# return rnn() - - -# class EagerDeletionTwoRecurrentOpsTest(EagerDeletionRecurrentOpTest1): -# ''' -# Test RNNOp with two recurrent ops -# equation: -# first_rnn: -# mem_inside = x + mem_pre_inside -# first_inside_out = mem_inside -# second_rnn: -# mem = x + reduce_sum(rnn_inside_out) -# y = mem + mem_pre -# vars: -# - x -# memories: -# - mem_inside -# - mem -# outputs: -# - y -# ''' - -# class PySimpleRNN5(PyRNNBase): -# def __init__(self, input_shape, output_shape): -# super().__init__(input_shape, output_shape) -# self.mem_0 = np.zeros(shape=input_shape).astype("float32") -# self.mem_1 = np.zeros(shape=input_shape).astype("float32") -# self.rnn_0_output = np.zeros(shape=input_shape).astype("float32") - -# def step(self, step_id, x): -# # First Rnn -# for step in range(self.x.shape[0]): -# x_t = self.x[step] -# pre_mem = ( -# np.zeros_like(x_t) if step == 0 else self.mem_0[step - 1] -# ) -# self.mem_0[step] = x_t + pre_mem -# self.rnn_0_output[step] = self.mem_0[step] -# # Second RNN -# pre_mem = ( -# np.zeros_like(x) if step_id == 0 else self.mem_1[step_id - 1] -# ) -# self.mem_1[step_id] = x + np.sum(self.rnn_0_output) -# self.y[step_id] = self.mem_1[step_id] + pre_mem - -# input_dim = 1 -# batch_size = 1 -# sent_len = 1 - -# def setUp(self): -# self.setup_program() - -# self.data_field = {"x"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = EagerDeletionTwoRecurrentOpsTest.PySimpleRNN5( -# self.input_shape, self.output_shape -# ) - -# with fluid.program_guard(self.main_program, self.startup_program): -# self.output = paddle.mean(self.create_rnn_op()) - -# def create_rnn_op(self): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False - -# rnn_0 = layers.StaticRNN() -# with rnn_0.step(): -# x_t = rnn_0.step_input(x) -# mem_pre = rnn_0.memory(shape=[-1, self.input_dim], batch_ref=x) -# mem = layers.elementwise_add(x=mem_pre, y=x_t) -# rnn_0.update_memory(mem_pre, mem) -# rnn_0.output(mem) - -# rnn_1 = layers.StaticRNN() -# with rnn_1.step(): -# mem_pre = rnn_1.memory(shape=[-1, self.input_dim], batch_ref=x) -# x_t = rnn_1.step_input(x) -# last_rnn_output = rnn_0() -# last_rnn_sum = fluid.layers.reduce_sum(last_rnn_output) -# mem = layers.elementwise_add(x=x_t, y=last_rnn_sum) -# y = layers.elementwise_add(x=mem_pre, y=mem) -# rnn_1.update_memory(mem_pre, mem) -# rnn_1.output(y) -# return rnn_1() - - -# class EagerDeletionRecurrentOpParallelExecutorTest( -# EagerDeletionRecurrentOpTest1 -# ): -# ''' -# Test RNNOp with ParallelExecutor -# equation: -# h_t = ( x_t + h_{t-1} ) / scale -# vars: -# - x -# memories: -# - h -# outputs: -# - h -# ''' - -# def forward(self): -# self.feed_map = { -# x: create_tensor(getattr(self.py_rnn, x), self.place) -# for x in self.data_field -# } - -# build_strategy = fluid.BuildStrategy() -# build_strategy.enable_inplace = True -# exec_strategy = fluid.ExecutionStrategy() -# parallel_exe = fluid.ParallelExecutor( -# use_cuda=False, -# main_program=self.main_program, -# build_strategy=build_strategy, -# exec_strategy=exec_strategy, -# ) -# out = parallel_exe.run(feed=self.feed_map, fetch_list=[self.output]) -# return out[0] - -# def backward(self): -# self.feed_map = { -# x: create_tensor(getattr(self.py_rnn, x), self.place) -# for x in self.data_field -# } -# fetch_list = [ -# self.main_program.global_block().var(grad_var_name(x)) -# for x in self.data_field -# ] - -# build_strategy = fluid.BuildStrategy() -# build_strategy.enable_inplace = True -# exec_strategy = fluid.ExecutionStrategy() -# parallel_exe = fluid.ParallelExecutor( -# use_cuda=False, -# loss_name=self.output.name, -# main_program=self.main_program, -# build_strategy=build_strategy, -# exec_strategy=exec_strategy, -# ) -# return parallel_exe.run( -# feed=self.feed_map, fetch_list=fetch_list, return_numpy=False -# ) - - -# class EagerDeletionFarwardOnlyRnnAndBackwardRnnTest( -# EagerDeletionRecurrentOpTest1 -# ): -# ''' -# Test one forward only RNN and one backward RNN in one program -# ''' - -# def setUp(self): -# self.setup_program() -# self.data_field = {"x", "h_boot"} - -# self.input_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.output_shape = (self.sent_len, self.batch_size, self.input_dim) -# self.py_rnn = PySimpleRNN1(self.input_shape, self.output_shape) - -# with fluid.program_guard(self.main_program, self.startup_program): -# x = layers.data( -# shape=[self.sent_len, self.batch_size, self.input_dim], -# dtype='float32', -# name='x', -# append_batch_size=False, -# ) -# x.stop_gradient = False -# h_boot = layers.data( -# shape=[self.input_dim], dtype='float32', name='h_boot' -# ) -# h_boot.stop_gradient = False - -# forward_only_rnn = layers.StaticRNN() -# with forward_only_rnn.step(): -# h_pre = forward_only_rnn.memory(init=h_boot) -# x_t = forward_only_rnn.step_input(x) - -# h = layers.scale( -# x=layers.elementwise_add(x=h_pre, y=x_t), -# scale=self.py_rnn.scale, -# ) - -# forward_only_rnn.update_memory(h_pre, h) -# forward_only_rnn.output(h) -# forward_only_output = forward_only_rnn() -# forward_only_output.stop_gradient = True -# self.forward_only_output = paddle.mean(forward_only_output) - -# rnn = layers.StaticRNN() -# with rnn.step(): -# h_pre = rnn.memory(init=h_boot) -# x_t = rnn.step_input(x) - -# h = layers.scale( -# x=layers.elementwise_add(x=h_pre, y=x_t), -# scale=self.py_rnn.scale, -# ) - -# rnn.update_memory(h_pre, h) -# rnn.output(h) - -# self.output = paddle.mean(rnn()) - -# def forward_two_rnn(self): -# self.feed_map = { -# x: create_tensor(getattr(self.py_rnn, x), self.place) -# for x in self.data_field -# } -# exe = Executor(self.place) -# out = exe.run( -# self.main_program, -# feed=self.feed_map, -# fetch_list=[self.forward_only_output, self.output], -# ) - -# return out[0], out[1] - -# def check_forward(self): -# forward_only_output, pd_output = self.forward_two_rnn() -# py_output = self.py_rnn.forward() -# self.assertEqual(forward_only_output.shape, py_output.shape) -# self.assertEqual(pd_output.shape, py_output.shape) -# np.testing.assert_allclose(forward_only_output, py_output, rtol=0.01) -# np.testing.assert_allclose(pd_output, py_output, rtol=0.01) - - class PySimpleRNN1(PyRNNBase): def __init__(self, input_shape, output_shape): super().__init__(input_shape, output_shape) @@ -1451,24 +760,20 @@ def tearDown(self): def test_recurrent_backward(self): net = RecurrentNet() - inputs = paddle.rand((4, 2, 16)) + inputs = paddle.rand((4, 23, 16)) inputs.stop_gradient = False prev_h = paddle.randn((4, 32)) prev_h.stop_gradient = False outputs, final_states = net(inputs, prev_h) - # print( final_states ) outputs.backward() dy_grad = inputs.gradient() inputs.clear_gradient() - print("oreh", prev_h.gradient()) net = paddle.jit.to_static(net) outputs, final_states = net(inputs, prev_h) - # print( final_states) outputs.backward() st_grad = inputs.gradient() - print("preh ", prev_h.gradient()) np.testing.assert_allclose(dy_grad, st_grad) diff --git a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py index 3673baab6ba10..73995d0ee00db 100644 --- a/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py +++ b/python/paddle/fluid/tests/unittests/test_rnn_cell_api.py @@ -742,5 +742,4 @@ def test_seq2seq_model(self): if __name__ == '__main__': - paddle.enable_static() unittest.main() From 5e03fda508f5c67c2a999d25a27e5bb5e55326d0 Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 1 Dec 2022 03:23:44 +0000 Subject: [PATCH 10/16] format --- paddle/fluid/operators/controlflow/while_op.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index 7a3e271fc67e1..24060ce7ebfdb 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -305,6 +305,7 @@ class WhileOp : public framework::OperatorBase { cond_data = GetCondData( scope.FindVar(Input(kCondition))->Get()); } + scope.DeleteScope(¤t_scope); } } From 2ed574c3dcb14f387f88c66392f94a31cda2f4d0 Mon Sep 17 00:00:00 2001 From: phlrain Date: Fri, 2 Dec 2022 08:44:33 +0000 Subject: [PATCH 11/16] revert test while loop op --- python/paddle/fluid/tests/unittests/test_while_loop_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_while_loop_op.py b/python/paddle/fluid/tests/unittests/test_while_loop_op.py index 6ebf383984188..0b4cc57c77f49 100644 --- a/python/paddle/fluid/tests/unittests/test_while_loop_op.py +++ b/python/paddle/fluid/tests/unittests/test_while_loop_op.py @@ -297,7 +297,7 @@ def body(i, x): res = exe.run( main_program, feed={'i': feed_i, 'x': feed_x}, - fetch_list=[mean.name, x.grad_name], + fetch_list=[mean.name, i.grad_name, x.grad_name], ) np.testing.assert_allclose(np.asarray(res[0]), data, rtol=1e-05) np.testing.assert_allclose(np.asarray(res[1]), i_grad, rtol=1e-05) From 11e5767f44b57ad5058999b66090b8d0e7b1e055 Mon Sep 17 00:00:00 2001 From: phlrain Date: Fri, 2 Dec 2022 11:34:43 +0000 Subject: [PATCH 12/16] fix create array --- python/paddle/fluid/layers/rnn.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 1d283dc9cfeed..ac9398a7f8463 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -692,10 +692,10 @@ def _switch_grad(x, stop=False): cond = start_i < end while_op = control_flow.While(cond) - out_array = control_flow.create_array(dtype=flatten(inputs)[0].dtype) + out_array = paddle.tensor.create_array(dtype=flatten(inputs)[0].dtype) init_array = map_structure( - lambda x: control_flow.create_array(dtype=x.dtype), initial_states + lambda x: paddle.tensor.create_array(dtype=x.dtype), initial_states ) map_structure( @@ -709,7 +709,7 @@ def _switch_grad(x, stop=False): step_in = inputs[start_i] # step_in = paddle.fluid.layers.Print( step_in, message="step in") pre_state = map_structure( - lambda x: control_flow.array_read(x, start_i), init_array + lambda x: paddle.tensor.array_read(x, start_i), init_array ) # pre_state = paddle.fluid.layers.Print( pre_state, message="pre") outputs, new_states = cell(step_in, pre_state, **kwargs) @@ -728,7 +728,7 @@ def _switch_grad(x, stop=False): pre_state, ) - control_flow.array_write(outputs, start_i, out_array) + paddle.tensor.array_write(outputs, start_i, out_array) with paddle.fluid.framework.device_guard("cpu"): @@ -736,13 +736,14 @@ def _switch_grad(x, stop=False): x=start_i, value=1, in_place=True ) map_structure( - lambda x, y: control_flow.array_write(x, start_i, y), + lambda x, y: paddle.tensor.array_write(x, start_i, y), new_states, init_array, ) with paddle.fluid.framework.device_guard("cpu"): - paddle.fluid.layers.less_than(x=start_i, y=end, cond=cond) + new_cond = paddle.tensor.less_than(start_i, end) + paddle.fluid.layers.assign(new_cond, cond) out, _ = paddle.fluid.layers.tensor_array_to_tensor( out_array, axis=0, use_stack=True From 2cedb0d9d76a7621d89964633c946ef01c757586 Mon Sep 17 00:00:00 2001 From: phlrain Date: Thu, 8 Dec 2022 06:17:40 +0000 Subject: [PATCH 13/16] fix windows error --- .../fluid/tests/unittests/rnn/test_rnn_api.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py index c020cc0dcf8d6..f138ab3216eb4 100644 --- a/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py +++ b/python/paddle/fluid/tests/unittests/rnn/test_rnn_api.py @@ -18,14 +18,12 @@ paddle.set_default_dtype("float64") import unittest -from paddle import fluid import numpy as np -import unittest +from paddle import fluid from paddle.fluid import framework - bidirectional_list = ["bidirectional", "bidirect"] @@ -107,9 +105,9 @@ def test_base(self, test_seq_len=False): paddle.disable_static(self.place) if test_seq_len: - seq_len = np.array([9, 10, 8, 12]) + seq_len = np.array([9, 10, 8, 12], "int64") else: - seq_len = np.array([12, 12, 12, 12]) + seq_len = np.array([12, 12, 12, 12], "int64") y1, h1 = self.rnn_net( paddle.to_tensor(x), @@ -210,9 +208,9 @@ def test_base(self, test_seq_len=False): paddle.disable_static(self.place) if test_seq_len: - seq_len = np.array([9, 10, 8, 12]) + seq_len = np.array([9, 10, 8, 12], "int64") else: - seq_len = np.array([12, 12, 12, 12]) + seq_len = np.array([12, 12, 12, 12], "int64") y1, h1 = self.rnn_net( paddle.to_tensor(x), @@ -322,9 +320,9 @@ def test_base(self, test_seq_len=False): paddle.disable_static(self.place) if test_seq_len: - seq_len = np.array([9, 10, 8, 12]) + seq_len = np.array([9, 10, 8, 12], "int64") else: - seq_len = np.array([12, 12, 12, 12]) + seq_len = np.array([12, 12, 12, 12], "int64") x_in = paddle.to_tensor(x) h_in = paddle.to_tensor(prev_h) From a4bb359cba035ca307e7c6a6fc0230bd5d805139 Mon Sep 17 00:00:00 2001 From: phlrain Date: Sun, 11 Dec 2022 14:01:37 +0000 Subject: [PATCH 14/16] fix bug --- python/paddle/fluid/layers/rnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 60e90b54aefef..cd229f41da227 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -663,7 +663,7 @@ def _switch_grad(x, stop=False): if not time_major: inputs = map_structure(_transpose_batch_time, inputs) - max_seq_len = nn.shape(flatten(inputs)[0])[0] + max_seq_len = paddle.shape(flatten(inputs)[0])[0] if sequence_length: mask = sequence_lod.sequence_mask( sequence_length, From 24f24cad1d60b59f8b110d83146fd72beccc77c4 Mon Sep 17 00:00:00 2001 From: phlrain Date: Tue, 13 Dec 2022 14:38:42 +0000 Subject: [PATCH 15/16] update --- python/paddle/nn/layer/rnn.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index bbe0f0c4760b7..9b44ca0da927b 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -308,9 +308,7 @@ def _switch_grad(x, stop=False): with paddle.fluid.framework.device_guard("cpu"): - start_i = paddle.fluid.layers.increment( - x=start_i, value=1, in_place=True - ) + start_i = paddle.tensor.increment(x=start_i, value=1) map_structure( lambda x, y: paddle.tensor.array_write(x, start_i, y), new_states, From f9967c6cba5108e6a948f9754b8d0a0abeb584a3 Mon Sep 17 00:00:00 2001 From: phlrain Date: Wed, 14 Dec 2022 14:40:14 +0000 Subject: [PATCH 16/16] fix array write bug --- python/paddle/nn/layer/rnn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 9b44ca0da927b..7fbce5abc0b13 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -275,7 +275,7 @@ def _switch_grad(x, stop=False): ) map_structure( - lambda x, y: control_flow.array_write(x, start_i, y), + lambda x, y: paddle.tensor.array_write(x, start_i, y), initial_states, init_array, )