From ea9d783a958a211df6cfae934060262b71fa4b77 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 30 Mar 2021 03:18:38 +0000 Subject: [PATCH 01/10] support save load for NPU --- paddle/fluid/framework/tensor_util.cc | 34 +++++++++++++++++-- paddle/fluid/operators/load_combine_op_npu.cc | 27 +++++++++++++++ paddle/fluid/operators/load_op_npu.cc | 26 ++++++++++++++ paddle/fluid/operators/save_combine_op_npu.cc | 26 ++++++++++++++ paddle/fluid/operators/save_op_npu.cc | 30 ++++++++++++++++ 5 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/operators/load_combine_op_npu.cc create mode 100644 paddle/fluid/operators/load_op_npu.cc create mode 100644 paddle/fluid/operators/save_combine_op_npu.cc create mode 100644 paddle/fluid/operators/save_op_npu.cc diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index d8e79d40c23eb..52c7999e2a56b 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -822,6 +822,29 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, #else PADDLE_THROW(platform::errors::Unimplemented( "XPUPlace is not supported when not compiled with XPU")); +#endif + } else if (platform::is_npu_place(tensor.place())) { +#ifdef PADDLE_WITH_ASCEND_CL + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto& npu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + BOOST_GET_CONST(platform::NPUPlace, tensor.place()), + reinterpret_cast(data), size_to_write, + npu_dev_ctx.stream()); + npu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW(platform::errors::Unimplemented( + "NPUPlace is not supported when not compiled with NPU")); #endif } else { os.write(static_cast(data_ptr), @@ -877,8 +900,10 @@ void TensorFromStream(std::istream& is, Tensor* tensor, auto ctx = platform::CPUDeviceContext(); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); if (platform::is_gpu_place(dev_ctx.GetPlace()) || - platform::is_xpu_place(dev_ctx.GetPlace())) { -#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU + platform::is_xpu_place(dev_ctx.GetPlace()) || + platform::is_npu_place(dev_ctx.GetPlace())) { +#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU || \ + defined PADDLE_WITH_ASCEND_CL Tensor cpu_tensor; cpu_tensor.Resize(framework::make_ddim(shape)); framework::VisitDataType( @@ -891,9 +916,12 @@ void TensorFromStream(std::istream& is, Tensor* tensor, if (platform::is_gpu_place(dev_ctx.GetPlace())) { PADDLE_THROW(platform::errors::Unimplemented( "CUDAPlace is not supported when not compiled with CUDA")); - } else { + } else if (platform::is_xpu_place(dev_ctx.GetPlace())) { PADDLE_THROW(platform::errors::Unimplemented( "XPUPlace is not supported when not compiled with XPU")); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "NPUPlace is not supported when not compiled with NPU")); } #endif } else { diff --git a/paddle/fluid/operators/load_combine_op_npu.cc b/paddle/fluid/operators/load_combine_op_npu.cc new file mode 100644 index 0000000000000..11f0bdaf76e55 --- /dev/null +++ b/paddle/fluid/operators/load_combine_op_npu.cc @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include "paddle/fluid/operators/load_combine_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + load_combine, + ops::LoadCombineOpKernel, + ops::LoadCombineOpKernel, + ops::LoadCombineOpKernel, + ops::LoadCombineOpKernel, + ops::LoadCombineOpKernel); +#endif diff --git a/paddle/fluid/operators/load_op_npu.cc b/paddle/fluid/operators/load_op_npu.cc new file mode 100644 index 0000000000000..17384c40974f5 --- /dev/null +++ b/paddle/fluid/operators/load_op_npu.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include "paddle/fluid/operators/load_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + load, ops::LoadOpKernel, + ops::LoadOpKernel, + ops::LoadOpKernel, + ops::LoadOpKernel, + ops::LoadOpKernel); +#endif diff --git a/paddle/fluid/operators/save_combine_op_npu.cc b/paddle/fluid/operators/save_combine_op_npu.cc new file mode 100644 index 0000000000000..30905bf00bc2e --- /dev/null +++ b/paddle/fluid/operators/save_combine_op_npu.cc @@ -0,0 +1,26 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include "paddle/fluid/operators/save_combine_op.h" + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + save_combine, + ops::SaveCombineOpKernel, + ops::SaveCombineOpKernel, + ops::SaveCombineOpKernel, + ops::SaveCombineOpKernel); +#endif diff --git a/paddle/fluid/operators/save_op_npu.cc b/paddle/fluid/operators/save_op_npu.cc new file mode 100644 index 0000000000000..89458ee735fd8 --- /dev/null +++ b/paddle/fluid/operators/save_op_npu.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef PADDLE_WITH_ASCEND_CL +#include "paddle/fluid/operators/save_op.h" +#include "paddle/fluid/platform/float16.h" + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + save, ops::SaveOpKernel, + ops::SaveOpKernel, + ops::SaveOpKernel, + ops::SaveOpKernel, + ops::SaveOpKernel, + ops::SaveOpKernel, + ops::SaveOpKernel); +#endif From 7b856773a7eb92d59d6f60c2353d62e80dcd4ad9 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 30 Mar 2021 03:29:25 +0000 Subject: [PATCH 02/10] add save load npu unittest --- .../tests/unittests/npu/test_save_load_npu.py | 1507 +++++++++++++++++ 1 file changed, 1507 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py new file mode 100644 index 0000000000000..dc485cb6ab038 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -0,0 +1,1507 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import sys +sys.path.append("..") +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.dygraph.nn import Embedding +import paddle.fluid.framework as framework +from paddle.fluid.optimizer import Adam +from paddle.fluid.dygraph.base import to_variable +from test_imperative_base import new_program_scope +from paddle.fluid.executor import global_scope +import numpy as np +import six +import pickle +import os +import errno + + +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__() + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + +class PtbModel(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + vocab_size, + num_layers=2, + num_steps=20, + init_scale=0.1, + dropout=None): + super(PtbModel, self).__init__() + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_layers = num_layers + self.num_steps = num_steps + self.dropout = dropout + self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) + self.embedding = Embedding( + size=[vocab_size, hidden_size], + dtype='float32', + is_sparse=False, + param_attr=fluid.ParamAttr( + name='embedding_para', + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))) + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + + def forward(self, input, label, init_hidden, init_cell): + init_h = fluid.layers.reshape( + init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + + init_c = fluid.layers.reshape( + init_cell, shape=[self.num_layers, -1, self.hidden_size]) + + x_emb = self.embedding(input) + x_emb = fluid.layers.reshape( + x_emb, shape=[-1, self.num_steps, self.hidden_size]) + if self.dropout is not None and self.dropout > 0.0: + x_emb = fluid.layers.dropout( + x_emb, + dropout_prob=self.drop_out, + dropout_implementation='upscale_in_train') + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, + init_c) + + rnn_out = fluid.layers.reshape( + rnn_out, shape=[-1, self.num_steps, self.hidden_size]) + projection = fluid.layers.matmul(rnn_out, self.softmax_weight) + projection = fluid.layers.elementwise_add(projection, self.softmax_bias) + projection = fluid.layers.reshape( + projection, shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) + loss = fluid.layers.reduce_mean(loss, dim=[0]) + loss = fluid.layers.reduce_sum(loss) + + return loss, last_hidden, last_cell + + +""" +class TestSaveLoadBase(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, "./test_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(main_program, "./test_1.pdparams", exe) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestSaveLoadPartial(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, "./test_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(test_program, "./test_1.pdopt", None) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + fluid.load(test_program, "./test_1.pdmodel", None) + + +class TestSaveLoadSetStateDict(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, "./test_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(main_program, "./test_1", exe) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestProgramStatePartial(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.save(main_program, os.path.join('some_dir', 'test_1')) + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + #fluid.load(test_program, "./test_1", None ) + program_state = fluid.load_program_state( + os.path.join('some_dir', 'test_1')) + + program_state_1 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdparams')) + + program_state_2 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdopt')) + + program_state_3 = fluid.load_program_state( + os.path.join('some_dir', 'test_1.pdmodel')) + + fluid.set_program_state(test_program, program_state) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # check 1 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_1) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # check 2 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_2) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # check 3 + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.set_program_state(test_program, program_state_3) + + for var in test_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestVariableInit(unittest.TestCase): + def test_variable_init(self): + + x = fluid.data(name="x", shape=[10, 10], dtype='float32') + y = fluid.layers.fc(x, 10) + z = fluid.layers.fc(y, 10) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + fluid.save(fluid.default_main_program(), "./test_path") + + def set_var(var, ndarray): + t = var.get_tensor() + p = t._place() + if p.is_cpu_place(): + place = paddle.fluid.CPUPlace() + else: + place = paddle.NPUPlace(0) + + t.set(ndarray, place) + + program = fluid.default_main_program() + new_scope = fluid.core.Scope() + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + parameter_list = list( + filter(fluid.io.is_parameter, program.list_vars())) + + fluid.core._create_loaded_parameter(parameter_list, new_scope, + exe._default_executor) + parameter_file_name = "./test_path.pdparams" + with open(parameter_file_name, 'rb') as f: + load_dict = pickle.load(f) + + for v in parameter_list: + assert v.name in load_dict, \ + "Can not find [{}] in model file [{}]".format( + v.name, parameter_file_name) + new_v = new_scope.find_var(v.name) + set_var(new_v, load_dict[v.name]) + + opt_list = list( + filter(fluid.io.is_belong_to_optimizer, program.list_vars())) + + fluid.core._create_loaded_parameter(opt_list, new_scope, + exe._default_executor) + opt_file_name = "./test_path.pdopt" + with open(opt_file_name, 'rb') as f: + load_dict = pickle.load(f) + + for v in opt_list: + assert v.name in load_dict, \ + "Can not find [{}] in model file [{}]".format( + v.name, opt_file_name) + + new_v = new_scope.find_var(v.name) + set_var(new_v, load_dict[v.name]) + + base_map = {} + for var in program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + base_map[var.name] = t + + for var in program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(new_scope.find_var(var.name).get_tensor()) + base_t = base_map[var.name] + + self.assertTrue(np.array_equal(new_t, base_t)) +""" + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestLoadFromOldInterface(unittest.TestCase): + def setUp(self): + if os.path.exists("test_path.pdparams"): + os.remove("test_path.pdparams") + + if os.path.exists("test_static_load_var_list.pdparams"): + os.remove("test_static_load_var_list.pdparams") + + def test_load_from_old_interface(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_clone_program = fluid.default_main_program().clone() + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + #fluid.save(main_program, "./test_1") + fluid.io.save_persistables(exe, "test_path", main_program) + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(main_program, "test_path", exe) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + old_shape = np.array(ten).shape + new_shape = [e + 10 for e in old_shape] + + var.desc.set_shape(new_shape) + with self.assertRaises(RuntimeError): + fluid.load(main_program, "test_path", exe) + + # check unused parameter + + fluid.load(test_clone_program, "test_path", exe) + + def test_load_from_old_interface_var_list(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_clone_program = fluid.default_main_program().clone() + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + #fluid.save(main_program, "./test_1") + fluid.io.save_persistables(exe, "test_static_load_var_list", + main_program) + + # set var to zero + var_list = [] + for i, var in enumerate(main_program.list_vars()): + if isinstance(var, framework.Parameter) or var.persistable: + if i % 2 == 0: + var_list.append(var) + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + fluid.load(main_program, "test_static_load_var_list", exe, var_list) + var_list_names = [var.name for var in var_list] + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + if var.name in var_list_names: + # loaded vars + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + else: + #not loaded vars + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + +""" +class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): + def test_load_from_old_interface(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + #fluid.save(main_program, "./test_1") + fluid.io.save_persistables( + exe, "test_path", main_program, filename="model_single") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + file_model_path = os.path.join("test_path", "model_single") + fluid.load(main_program, file_model_path, exe, + fluid.io.get_program_persistable_vars(main_program)) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + # test exception + # change shape + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + old_shape = np.array(ten).shape + new_shape = [e + 10 for e in old_shape] + + var.desc.set_shape(new_shape) + + with self.assertRaises(RuntimeError): + fluid.load(main_program, file_model_path, exe, + fluid.io.get_program_persistable_vars(main_program)) + + fluid.io.save_params( + exe, "test_path", main_program, filename="model_single") + with self.assertRaises(RuntimeError): + fluid.load(main_program, file_model_path, exe, + fluid.io.get_program_persistable_vars(main_program)) + + # check when executor is None + with self.assertRaises(ValueError): + fluid.load(main_program, file_model_path, None, + fluid.io.get_program_persistable_vars(main_program)) + + # check when var list is None + with self.assertRaises(ValueError): + fluid.load(main_program, file_model_path, exe, None) + + # check save params, load var_list = get_program_persistable_vars + with self.assertRaises(RuntimeError): + temp_var = framework.Variable( + main_program.global_block(), + shape=[1], + name="test_temp_var") + all_var_list = list(main_program.list_vars()) + fluid.load(main_program, file_model_path, exe, + all_var_list + [temp_var]) + + +class TestProgramStateOldSave(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.io.save_persistables(exe, "test_program_1", main_program) + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + # case 1: load basic + program_state = fluid.load_program_state("test_program_1") + fluid.set_program_state(main_program, program_state) + self.check_in_static(main_program, base_map) + + # case 2: load with no need file + def symlink_force(target, link_name): + try: + os.symlink(target, link_name) + except OSError as e: + if e.errno == errno.EEXIST: + os.remove(link_name) + os.symlink(target, link_name) + else: + raise e + + orig_filepath = './test_program_1/fc_0.w_0' + symlink_filepath = './test_program_1/link_fc_0.w_0' + # create a needless link file for coverage + symlink_force(orig_filepath, symlink_filepath) + program_state = fluid.load_program_state("test_program_1") + fluid.set_program_state(main_program, program_state) + self.check_in_static(main_program, base_map) + + # case 3: load with var_list + program_state = fluid.load_program_state( + "test_program_1", main_program.all_parameters()) + fluid.set_program_state(main_program, program_state) + self.check_in_static(main_program, base_map) + + # make sure `load_program_state` can be used in dynamic graph mode + with fluid.dygraph.guard(place): + load_state = fluid.load_program_state("test_program_1") + for k, v in load_state.items(): + self.assertTrue(np.array_equal(base_map[k], v)) + + def check_in_static(self, main_program, base_map): + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + +class TestStaticSaveLoadLargeParameters(unittest.TestCase): + def test_large_parameters_static_save(self): + # enable static mode + paddle.enable_static() + LARGE_PARAM = 2**26 + with new_program_scope(): + # create network + x = paddle.static.data( + name="static_save_load_large_x", + shape=[None, 10], + dtype='float32') + z = paddle.static.nn.fc(x, LARGE_PARAM) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + prog = paddle.static.default_main_program() + + inputs = np.random.randn(1, 10).astype("float32") + result_z = exe.run(program=prog, + feed={"static_save_load_large_x": inputs}, + fetch_list=[z.name]) + path = "test_static_save_load_large_param/static_save" + paddle.fluid.save(prog, path) + + paddle.fluid.load(prog, path) + result_load = exe.run(program=prog, + feed={"static_save_load_large_x": inputs}, + fetch_list=[z.name]) + # compare results before and after saving + self.assertTrue( + np.sum(np.abs(result_z[0] - result_load[0])) < 1e-15) + + +class TestProgramStateOldSaveSingleModel(unittest.TestCase): + def test_ptb_rnn_cpu_float32(self): + seed = 90 + hidden_size = 10 + vocab_size = 1000 + num_layers = 1 + num_steps = 3 + init_scale = 0.1 + batch_size = 4 + batch_num = 200 + + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + ptb_model = PtbModel( + "ptb_model", + hidden_size=hidden_size, + vocab_size=vocab_size, + num_layers=num_layers, + num_steps=num_steps, + init_scale=init_scale) + + # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( + # ) else fluid.CUDAPlace(0) + place = fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) + exe = fluid.Executor(place) + sgd = Adam(learning_rate=1e-3) + x = fluid.layers.data( + name="x", shape=[-1, num_steps], dtype='int64') + y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') + init_hidden = fluid.layers.data( + name="init_hidden", shape=[1], dtype='float32') + init_cell = fluid.layers.data( + name="init_cell", shape=[1], dtype='float32') + + static_loss, static_last_hidden, static_last_cell = ptb_model( + x, y, init_hidden, init_cell) + + test_program = fluid.default_main_program().clone(for_test=True) + + add_1 = fluid.layers.fc(static_last_hidden, + size=hidden_size, + num_flatten_dims=2, + bias_attr=False) + + sgd.minimize(static_loss) + static_param_updated = dict() + static_param_init = dict() + + out = exe.run(framework.default_startup_program()) + + static_loss_value = None + static_last_cell_value = None + static_last_hidden_value = None + for i in range(batch_num): + x_data = np.arange(12).reshape(4, 3).astype('int64') + y_data = np.arange(1, 13).reshape(4, 3).astype('int64') + x_data = x_data.reshape((-1, num_steps, 1)) + y_data = y_data.reshape((-1, 1)) + init_hidden_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + init_cell_data = np.zeros( + (num_layers, batch_size, hidden_size), dtype='float32') + fetch_list = [static_loss, static_last_hidden, static_last_cell] + out = exe.run(fluid.default_main_program(), + feed={ + "x": x_data, + "y": y_data, + "init_hidden": init_hidden_data, + "init_cell": init_cell_data + }, + fetch_list=fetch_list) + static_loss_value = out[0] + static_last_hidden_value = out[1] + static_last_cell_value = out[2] + + # get value before save + main_program = framework.default_main_program() + base_map = {} + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been update + self.assertTrue(np.sum(np.abs(t)) != 0) + base_map[var.name] = t + + fluid.io.save_persistables( + exe, "test_program_2", main_program, filename="model_1") + + # set var to zero + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + ten = fluid.global_scope().find_var(var.name).get_tensor() + ten.set(np.zeros_like(np.array(ten)), place) + + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + # make sure all the paramerter or optimizer var have been set to zero + self.assertTrue(np.sum(np.abs(new_t)) == 0) + + #fluid.load(test_program, "./test_1", None ) + program_state = fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=fluid.io.get_program_persistable_vars(main_program)) + fluid.set_program_state(main_program, program_state) + + for var in main_program.list_vars(): + if isinstance(var, framework.Parameter) or var.persistable: + new_t = np.array(fluid.global_scope().find_var(var.name) + .get_tensor()) + base_t = base_map[var.name] + self.assertTrue(np.array_equal(new_t, base_t)) + + with self.assertRaises(ValueError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1")) + + with self.assertRaises(TypeError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=["str"]) + + with self.assertRaises(RuntimeError): + fluid.load_program_state( + os.path.join("test_program_2", "model_1"), + var_list=[ + main_program.global_block().create_var( + name="fake_var_name", persistable=True) + ]) +""" + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() From 9e86ed76d28458429a1adba8d5a1f5bcb259d997 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 30 Mar 2021 08:12:44 +0000 Subject: [PATCH 03/10] support np.array transform in NPU --- paddle/fluid/pybind/tensor_py.h | 26 ++++++++++++++++++- .../tests/unittests/npu/test_save_load_npu.py | 2 ++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 51fc3439c9a59..7d5cff770ff24 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -644,6 +644,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, } bool is_gpu_tensor = platform::is_gpu_place(tensor.place()); bool is_xpu_tensor = platform::is_xpu_place(tensor.place()); + bool is_npu_tensor = platform::is_npu_place(tensor.place()); const auto &tensor_dims = tensor.dims(); auto tensor_dtype = tensor.type(); size_t sizeof_dtype = framework::SizeOfType(tensor_dtype); @@ -662,7 +663,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, std::string py_dtype_str = details::TensorDTypeToPyDTypeStr(tensor.type()); - if (!is_gpu_tensor && !is_xpu_tensor) { + if (!is_gpu_tensor && !is_xpu_tensor && !is_npu_tensor) { if (!need_deep_copy) { auto base = py::cast(std::move(tensor)); return py::array(py::dtype(py_dtype_str.c_str()), py_dims, py_strides, @@ -729,6 +730,29 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, PADDLE_THROW(platform::errors::PermissionDenied( "Cannot use CUDAPlace in CPU only version, " "Please recompile or reinstall Paddle with CUDA support.")); +#endif + } else if (is_npu_tensor) { +#ifdef PADDLE_WITH_ASCEND_CL + py::array py_arr(py::dtype(py_dtype_str.c_str()), py_dims, py_strides); + PADDLE_ENFORCE_EQ(py_arr.writeable(), true, + platform::errors::InvalidArgument( + "PyArray is not writable, in which case memory leak " + "or double free would occur")); + PADDLE_ENFORCE_EQ( + py_arr.owndata(), true, + platform::errors::InvalidArgument( + "PyArray does not own data, in which case memory leak " + "or double free would occur")); + + size_t copy_bytes = sizeof_dtype * numel; + auto p = BOOST_GET_CONST(platform::NPUPlace, tensor.place()); + paddle::memory::Copy(platform::CPUPlace(), py_arr.mutable_data(), p, + tensor_buf_ptr, copy_bytes); + return py_arr; +#else + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot use NPUPlace in CPU/GPU/XPU version, " + "Please recompile or reinstall Paddle with NPU support.")); #endif } PADDLE_THROW(platform::errors::Unimplemented("Place is not supported")); diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index dc485cb6ab038..3b06341f635f8 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -32,6 +32,8 @@ import os import errno +paddle.enable_static() + class SimpleLSTMRNN(fluid.Layer): def __init__(self, From fa651acf4eda5dae5ae21163e7d931bb346f85e9 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 30 Mar 2021 10:37:17 +0000 Subject: [PATCH 04/10] fix errors --- paddle/fluid/pybind/tensor_py.h | 8 +++- python/paddle/fluid/io.py | 12 ++++- .../tests/unittests/npu/test_save_load_npu.py | 45 ++++++++----------- 3 files changed, 35 insertions(+), 30 deletions(-) diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 7d5cff770ff24..a2524912f2497 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -746,8 +746,12 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, size_t copy_bytes = sizeof_dtype * numel; auto p = BOOST_GET_CONST(platform::NPUPlace, tensor.place()); - paddle::memory::Copy(platform::CPUPlace(), py_arr.mutable_data(), p, - tensor_buf_ptr, copy_bytes); + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &ctx = *pool.Get(tensor.place()); + paddle::memory::Copy( + platform::CPUPlace(), py_arr.mutable_data(), p, tensor_buf_ptr, + copy_bytes, + reinterpret_cast(ctx).stream()); return py_arr; #else PADDLE_THROW(platform::errors::PermissionDenied( diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index d5963675a82a0..560abad626405 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -1973,6 +1973,10 @@ def set_var(var, ndarray): p = paddle.fluid.core.Place() p.set_place(t._place()) place = paddle.fluid.XPUPlace(p.xpu_device_id()) + elif p.is_npu_place(): + p = paddle.fluid.core.Place() + p.set_place(t._place()) + place = paddle.fluid.NPUPlace(p.npu_device_id()) else: p = paddle.fluid.core.Place() p.set_place(t._place()) @@ -2115,8 +2119,8 @@ def _load_vars_with_try_catch(exe, error_str = "Failed to load model/variables `%s`, please make sure " \ "model/variables file is saved with the following APIs: " \ "save_params, save_persistables, save_vars." - filenames = [var.name for var in vars - ] if filename is None else filename + filenames = [var.name for var in + vars] if filename is None else filename if raise_error: raise RuntimeError(error_str % filenames) else: @@ -2256,6 +2260,10 @@ def set_program_state(program, state_dict): p = paddle.fluid.core.Place() p.set_place(ten_place) py_place = paddle.fluid.XPUPlace(p.xpu_device_id()) + elif ten_place.is_npu_place(): + p = paddle.fluid.core.Place() + p.set_place(ten_place) + py_place = paddle.fluid.NPUPlace(p.npu_device_id()) ten.set(new_para_np, py_place) diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index 3b06341f635f8..be93dba9c03bd 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -216,7 +216,8 @@ def forward(self, input, label, init_hidden, init_cell): return loss, last_hidden, last_cell -""" +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestSaveLoadBase(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -239,8 +240,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -320,6 +319,8 @@ def test_ptb_rnn_cpu_float32(self): self.assertTrue(np.array_equal(new_t, base_t)) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestSaveLoadPartial(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -342,8 +343,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -432,6 +431,8 @@ def test_ptb_rnn_cpu_float32(self): fluid.load(test_program, "./test_1.pdmodel", None) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestSaveLoadSetStateDict(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -454,8 +455,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -535,6 +534,8 @@ def test_ptb_rnn_cpu_float32(self): self.assertTrue(np.array_equal(new_t, base_t)) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestProgramStatePartial(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -557,8 +558,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -719,6 +718,8 @@ def test_ptb_rnn_cpu_float32(self): self.assertTrue(np.array_equal(new_t, base_t)) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestVariableInit(unittest.TestCase): def test_variable_init(self): @@ -726,8 +727,6 @@ def test_variable_init(self): y = fluid.layers.fc(x, 10) z = fluid.layers.fc(y, 10) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -748,8 +747,6 @@ def set_var(var, ndarray): program = fluid.default_main_program() new_scope = fluid.core.Scope() - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -800,7 +797,6 @@ def set_var(var, ndarray): base_t = base_map[var.name] self.assertTrue(np.array_equal(new_t, base_t)) -""" @unittest.skipIf(not paddle.is_compiled_with_npu(), @@ -834,8 +830,6 @@ def test_load_from_old_interface(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -952,8 +946,6 @@ def test_load_from_old_interface_var_list(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -1045,7 +1037,8 @@ def test_load_from_old_interface_var_list(self): self.assertTrue(np.sum(np.abs(new_t)) == 0) -""" +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): def test_load_from_old_interface(self): seed = 90 @@ -1068,8 +1061,6 @@ def test_load_from_old_interface(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -1192,6 +1183,8 @@ def test_load_from_old_interface(self): all_var_list + [temp_var]) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestProgramStateOldSave(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -1214,8 +1207,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -1338,6 +1329,8 @@ def check_in_static(self, main_program, base_map): self.assertTrue(np.array_equal(new_t, base_t)) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestStaticSaveLoadLargeParameters(unittest.TestCase): def test_large_parameters_static_save(self): # enable static mode @@ -1371,6 +1364,8 @@ def test_large_parameters_static_save(self): np.sum(np.abs(result_z[0] - result_load[0])) < 1e-15) +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") class TestProgramStateOldSaveSingleModel(unittest.TestCase): def test_ptb_rnn_cpu_float32(self): seed = 90 @@ -1393,8 +1388,6 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - # place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - # ) else fluid.CUDAPlace(0) place = fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) exe = fluid.Executor(place) @@ -1502,7 +1495,7 @@ def test_ptb_rnn_cpu_float32(self): main_program.global_block().create_var( name="fake_var_name", persistable=True) ]) -""" + if __name__ == '__main__': paddle.enable_static() From bfcc7cd75d7b947b66353ebe4fb02db556c834ca Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 30 Mar 2021 13:58:28 +0000 Subject: [PATCH 05/10] delete dygraph in unittest --- .../paddle/fluid/tests/unittests/npu/test_save_load_npu.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index be93dba9c03bd..7b6bd72f65c5f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -1314,12 +1314,6 @@ def symlink_force(target, link_name): fluid.set_program_state(main_program, program_state) self.check_in_static(main_program, base_map) - # make sure `load_program_state` can be used in dynamic graph mode - with fluid.dygraph.guard(place): - load_state = fluid.load_program_state("test_program_1") - for k, v in load_state.items(): - self.assertTrue(np.array_equal(base_map[k], v)) - def check_in_static(self, main_program, base_map): for var in main_program.list_vars(): if isinstance(var, framework.Parameter) or var.persistable: From ea0e15b2d1aafdd239d91f2e1eeb98e8c4e1dc2c Mon Sep 17 00:00:00 2001 From: pangyoki Date: Thu, 8 Apr 2021 13:00:08 +0000 Subject: [PATCH 06/10] add Wait --- paddle/fluid/framework/tensor_util.cc | 17 +- paddle/fluid/pybind/tensor_py.h | 1 + .../tests/unittests/npu/test_save_load_npu.py | 182 +----------------- 3 files changed, 16 insertions(+), 184 deletions(-) diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 52c7999e2a56b..21c87681f6877 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -912,6 +912,9 @@ void TensorFromStream(std::istream& is, Tensor* tensor, is.read(static_cast(buf), size); auto dst_place = dev_ctx.GetPlace(); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); + if (platform::is_npu_place(dev_ctx.GetPlace())) { + dev_ctx.Wait(); + } #else if (platform::is_gpu_place(dev_ctx.GetPlace())) { PADDLE_THROW(platform::errors::Unimplemented( @@ -962,8 +965,10 @@ void TensorFromStream(std::istream& is, Tensor* tensor, auto ctx = platform::CPUDeviceContext(); size_t size = tensor->numel() * framework::SizeOfType(desc.data_type()); if (platform::is_gpu_place(dev_ctx.GetPlace()) || - platform::is_xpu_place(dev_ctx.GetPlace())) { -#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU + platform::is_xpu_place(dev_ctx.GetPlace()) || + platform::is_npu_place(dev_ctx.GetPlace())) { +#if defined PADDLE_WITH_CUDA || defined PADDLE_WITH_XPU || \ + defined PADDLE_WITH_ASCEND_CL Tensor cpu_tensor; cpu_tensor.Resize(framework::make_ddim(dims)); framework::VisitDataType( @@ -972,13 +977,19 @@ void TensorFromStream(std::istream& is, Tensor* tensor, is.read(static_cast(buf), size); auto dst_place = dev_ctx.GetPlace(); framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor); + if (platform::is_npu_place(dev_ctx.GetPlace())) { + dev_ctx.Wait(); + } #else if (platform::is_gpu_place(dev_ctx.GetPlace())) { PADDLE_THROW(platform::errors::Unimplemented( "CUDAPlace is not supported when not compiled with CUDA")); - } else { + } else if (platform::is_xpu_place(dev_ctx.GetPlace())) { PADDLE_THROW(platform::errors::Unimplemented( "XPUPlace is not supported when not compiled with XPU")); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "NPUPlace is not supported when not compiled with NPU")); } #endif } else { diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index a2524912f2497..d0b7cdb84af48 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -752,6 +752,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, platform::CPUPlace(), py_arr.mutable_data(), p, tensor_buf_ptr, copy_bytes, reinterpret_cast(ctx).stream()); + ctx->Wait(); return py_arr; #else PADDLE_THROW(platform::errors::PermissionDenied( diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index 7b6bd72f65c5f..a031fc025ab84 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -31,191 +31,11 @@ import pickle import os import errno +from ..unittests.test_static_save_load import SimpleLSTMRNN, PtbModel paddle.enable_static() -class SimpleLSTMRNN(fluid.Layer): - def __init__(self, - name_scope, - hidden_size, - num_steps, - num_layers=2, - init_scale=0.1, - dropout=None): - super(SimpleLSTMRNN, self).__init__() - self._hidden_size = hidden_size - self._num_layers = num_layers - self._init_scale = init_scale - self._dropout = dropout - self._input = None - self._num_steps = num_steps - self.cell_array = [] - self.hidden_array = [] - - self.weight_1_arr = [] - self.weight_2_arr = [] - self.bias_arr = [] - self.mask_array = [] - - for i in range(self._num_layers): - weight_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)), - shape=[self._hidden_size * 2, self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)) - self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) - bias_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)), - shape=[self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.Constant(0.0)) - self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) - - def forward(self, input_embedding, init_hidden=None, init_cell=None): - self.cell_array = [] - self.hidden_array = [] - - for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) - self.hidden_array.append(pre_hidden) - self.cell_array.append(pre_cell) - - res = [] - for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) - for k in range(self._num_layers): - pre_hidden = self.hidden_array[k] - pre_cell = self.cell_array[k] - weight_1 = self.weight_1_arr[k] - bias = self.bias_arr[k] - - nn = fluid.layers.concat([self._input, pre_hidden], 1) - gate_input = fluid.layers.matmul(x=nn, y=weight_1) - - gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) - c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( - i) * fluid.layers.tanh(j) - m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) - self.hidden_array[k] = m - self.cell_array[k] = c - self._input = m - - if self._dropout is not None and self._dropout > 0.0: - self._input = fluid.layers.dropout( - self._input, - dropout_prob=self._dropout, - dropout_implementation='upscale_in_train') - res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) - real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) - last_hidden = fluid.layers.concat(self.hidden_array, 1) - last_hidden = fluid.layers.reshape( - last_hidden, shape=[-1, self._num_layers, self._hidden_size]) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) - last_cell = fluid.layers.concat(self.cell_array, 1) - last_cell = fluid.layers.reshape( - last_cell, shape=[-1, self._num_layers, self._hidden_size]) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) - return real_res, last_hidden, last_cell - - -class PtbModel(fluid.Layer): - def __init__(self, - name_scope, - hidden_size, - vocab_size, - num_layers=2, - num_steps=20, - init_scale=0.1, - dropout=None): - super(PtbModel, self).__init__() - self.hidden_size = hidden_size - self.vocab_size = vocab_size - self.init_scale = init_scale - self.num_layers = num_layers - self.num_steps = num_steps - self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - self.full_name(), - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( - name='embedding_para', - initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale))) - self.softmax_weight = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.hidden_size, self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale)) - self.softmax_bias = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale)) - - def forward(self, input, label, init_hidden, init_cell): - init_h = fluid.layers.reshape( - init_hidden, shape=[self.num_layers, -1, self.hidden_size]) - - init_c = fluid.layers.reshape( - init_cell, shape=[self.num_layers, -1, self.hidden_size]) - - x_emb = self.embedding(input) - x_emb = fluid.layers.reshape( - x_emb, shape=[-1, self.num_steps, self.hidden_size]) - if self.dropout is not None and self.dropout > 0.0: - x_emb = fluid.layers.dropout( - x_emb, - dropout_prob=self.drop_out, - dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) - - rnn_out = fluid.layers.reshape( - rnn_out, shape=[-1, self.num_steps, self.hidden_size]) - projection = fluid.layers.matmul(rnn_out, self.softmax_weight) - projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) - loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) - loss = fluid.layers.reduce_mean(loss, dim=[0]) - loss = fluid.layers.reduce_sum(loss) - - return loss, last_hidden, last_cell - - @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestSaveLoadBase(unittest.TestCase): From 741380c893e6464af56ee085637082ae45b79134 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Thu, 8 Apr 2021 13:46:20 +0000 Subject: [PATCH 07/10] fix unittest --- paddle/fluid/pybind/tensor_py.h | 2 +- .../tests/unittests/npu/test_save_load_npu.py | 184 +++++++++++++++++- 2 files changed, 183 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index d0b7cdb84af48..f1ec8c3ea67c7 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -752,7 +752,7 @@ inline py::array TensorToPyArray(const framework::Tensor &tensor, platform::CPUPlace(), py_arr.mutable_data(), p, tensor_buf_ptr, copy_bytes, reinterpret_cast(ctx).stream()); - ctx->Wait(); + ctx.Wait(); return py_arr; #else PADDLE_THROW(platform::errors::PermissionDenied( diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index a031fc025ab84..483b76bd716f4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -20,7 +20,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.dygraph.nn import Embedding +from paddle.nn import Embedding import paddle.fluid.framework as framework from paddle.fluid.optimizer import Adam from paddle.fluid.dygraph.base import to_variable @@ -31,11 +31,191 @@ import pickle import os import errno -from ..unittests.test_static_save_load import SimpleLSTMRNN, PtbModel paddle.enable_static() +class SimpleLSTMRNN(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + num_steps, + num_layers=2, + init_scale=0.1, + dropout=None): + super(SimpleLSTMRNN, self).__init__() + self._hidden_size = hidden_size + self._num_layers = num_layers + self._init_scale = init_scale + self._dropout = dropout + self._input = None + self._num_steps = num_steps + self.cell_array = [] + self.hidden_array = [] + + self.weight_1_arr = [] + self.weight_2_arr = [] + self.bias_arr = [] + self.mask_array = [] + + for i in range(self._num_layers): + weight_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 2, self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)) + self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) + bias_1 = self.create_parameter( + attr=fluid.ParamAttr( + initializer=fluid.initializer.UniformInitializer( + low=-self._init_scale, high=self._init_scale)), + shape=[self._hidden_size * 4], + dtype="float32", + default_initializer=fluid.initializer.Constant(0.0)) + self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) + + def forward(self, input_embedding, init_hidden=None, init_cell=None): + self.cell_array = [] + self.hidden_array = [] + + for i in range(self._num_layers): + pre_hidden = fluid.layers.slice( + init_hidden, axes=[0], starts=[i], ends=[i + 1]) + pre_cell = fluid.layers.slice( + init_cell, axes=[0], starts=[i], ends=[i + 1]) + pre_hidden = fluid.layers.reshape( + pre_hidden, shape=[-1, self._hidden_size]) + pre_cell = fluid.layers.reshape( + pre_cell, shape=[-1, self._hidden_size]) + self.hidden_array.append(pre_hidden) + self.cell_array.append(pre_cell) + + res = [] + for index in range(self._num_steps): + self._input = fluid.layers.slice( + input_embedding, axes=[1], starts=[index], ends=[index + 1]) + self._input = fluid.layers.reshape( + self._input, shape=[-1, self._hidden_size]) + for k in range(self._num_layers): + pre_hidden = self.hidden_array[k] + pre_cell = self.cell_array[k] + weight_1 = self.weight_1_arr[k] + bias = self.bias_arr[k] + + nn = fluid.layers.concat([self._input, pre_hidden], 1) + gate_input = fluid.layers.matmul(x=nn, y=weight_1) + + gate_input = fluid.layers.elementwise_add(gate_input, bias) + i, j, f, o = fluid.layers.split( + gate_input, num_or_sections=4, dim=-1) + c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( + i) * fluid.layers.tanh(j) + m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) + self.hidden_array[k] = m + self.cell_array[k] = c + self._input = m + + if self._dropout is not None and self._dropout > 0.0: + self._input = fluid.layers.dropout( + self._input, + dropout_prob=self._dropout, + dropout_implementation='upscale_in_train') + res.append( + fluid.layers.reshape( + self._input, shape=[1, -1, self._hidden_size])) + real_res = fluid.layers.concat(res, 0) + real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) + last_hidden = fluid.layers.concat(self.hidden_array, 1) + last_hidden = fluid.layers.reshape( + last_hidden, shape=[-1, self._num_layers, self._hidden_size]) + last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) + last_cell = fluid.layers.concat(self.cell_array, 1) + last_cell = fluid.layers.reshape( + last_cell, shape=[-1, self._num_layers, self._hidden_size]) + last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) + return real_res, last_hidden, last_cell + + +class PtbModel(fluid.Layer): + def __init__(self, + name_scope, + hidden_size, + vocab_size, + num_layers=2, + num_steps=20, + init_scale=0.1, + dropout=None): + super(PtbModel, self).__init__() + self.hidden_size = hidden_size + self.vocab_size = vocab_size + self.init_scale = init_scale + self.num_layers = num_layers + self.num_steps = num_steps + self.dropout = dropout + self.simple_lstm_rnn = SimpleLSTMRNN( + self.full_name(), + hidden_size, + num_steps, + num_layers=num_layers, + init_scale=init_scale, + dropout=dropout) + self.embedding = Embedding( + num_embeddings=vocab_size, + embedding_dim=hidden_size, + weight_attr=fluid.ParamAttr( + name='embedding_para', + initializer=fluid.initializer.UniformInitializer( + low=-init_scale, high=init_scale))) + self.softmax_weight = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.hidden_size, self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + self.softmax_bias = self.create_parameter( + attr=fluid.ParamAttr(), + shape=[self.vocab_size], + dtype="float32", + default_initializer=fluid.initializer.UniformInitializer( + low=-self.init_scale, high=self.init_scale)) + + def forward(self, input, label, init_hidden, init_cell): + init_h = fluid.layers.reshape( + init_hidden, shape=[self.num_layers, -1, self.hidden_size]) + + init_c = fluid.layers.reshape( + init_cell, shape=[self.num_layers, -1, self.hidden_size]) + + input = fluid.layers.cast(input, "int32") + x_emb = self.embedding(input) + x_emb = fluid.layers.reshape( + x_emb, shape=[-1, self.num_steps, self.hidden_size]) + if self.dropout is not None and self.dropout > 0.0: + x_emb = fluid.layers.dropout( + x_emb, + dropout_prob=self.drop_out, + dropout_implementation='upscale_in_train') + rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, + init_c) + + rnn_out = fluid.layers.reshape( + rnn_out, shape=[-1, self.num_steps, self.hidden_size]) + projection = fluid.layers.matmul(rnn_out, self.softmax_weight) + projection = fluid.layers.elementwise_add(projection, self.softmax_bias) + projection = fluid.layers.reshape( + projection, shape=[-1, self.vocab_size]) + loss = fluid.layers.softmax_with_cross_entropy( + logits=projection, label=label, soft_label=False) + loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) + loss = fluid.layers.reduce_mean(loss, dim=[0]) + loss = fluid.layers.reduce_sum(loss) + + return loss, last_hidden, last_cell + + @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestSaveLoadBase(unittest.TestCase): From 9b3e05393a3c2a6ac1d7f7f870647d311167d5d0 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Fri, 9 Apr 2021 06:38:19 +0000 Subject: [PATCH 08/10] fix review comment --- paddle/fluid/operators/load_combine_op_npu.cc | 2 - paddle/fluid/operators/load_op_npu.cc | 4 +- paddle/fluid/operators/save_combine_op_npu.cc | 2 - paddle/fluid/operators/save_op_npu.cc | 2 - .../tests/unittests/npu/test_save_load_npu.py | 1380 +---------------- .../tests/unittests/test_static_save_load.py | 82 +- 6 files changed, 93 insertions(+), 1379 deletions(-) diff --git a/paddle/fluid/operators/load_combine_op_npu.cc b/paddle/fluid/operators/load_combine_op_npu.cc index 11f0bdaf76e55..4b9b96c23b0b7 100644 --- a/paddle/fluid/operators/load_combine_op_npu.cc +++ b/paddle/fluid/operators/load_combine_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/operators/load_combine_op.h" namespace ops = paddle::operators; @@ -24,4 +23,3 @@ REGISTER_OP_NPU_KERNEL( ops::LoadCombineOpKernel, ops::LoadCombineOpKernel, ops::LoadCombineOpKernel); -#endif diff --git a/paddle/fluid/operators/load_op_npu.cc b/paddle/fluid/operators/load_op_npu.cc index 17384c40974f5..1f53280345831 100644 --- a/paddle/fluid/operators/load_op_npu.cc +++ b/paddle/fluid/operators/load_op_npu.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/operators/load_op.h" namespace ops = paddle::operators; @@ -23,4 +22,3 @@ REGISTER_OP_NPU_KERNEL( ops::LoadOpKernel, ops::LoadOpKernel, ops::LoadOpKernel); -#endif diff --git a/paddle/fluid/operators/save_combine_op_npu.cc b/paddle/fluid/operators/save_combine_op_npu.cc index 30905bf00bc2e..1fb136a5110db 100644 --- a/paddle/fluid/operators/save_combine_op_npu.cc +++ b/paddle/fluid/operators/save_combine_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/operators/save_combine_op.h" namespace ops = paddle::operators; @@ -23,4 +22,3 @@ REGISTER_OP_NPU_KERNEL( ops::SaveCombineOpKernel, ops::SaveCombineOpKernel, ops::SaveCombineOpKernel); -#endif diff --git a/paddle/fluid/operators/save_op_npu.cc b/paddle/fluid/operators/save_op_npu.cc index 89458ee735fd8..90db1a0bb85d6 100644 --- a/paddle/fluid/operators/save_op_npu.cc +++ b/paddle/fluid/operators/save_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/operators/save_op.h" #include "paddle/fluid/platform/float16.h" @@ -27,4 +26,3 @@ REGISTER_OP_NPU_KERNEL( ops::SaveOpKernel, ops::SaveOpKernel); -#endif diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index 483b76bd716f4..f73a190254657 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -31,114 +31,11 @@ import pickle import os import errno +from test_static_save_load import * paddle.enable_static() -class SimpleLSTMRNN(fluid.Layer): - def __init__(self, - name_scope, - hidden_size, - num_steps, - num_layers=2, - init_scale=0.1, - dropout=None): - super(SimpleLSTMRNN, self).__init__() - self._hidden_size = hidden_size - self._num_layers = num_layers - self._init_scale = init_scale - self._dropout = dropout - self._input = None - self._num_steps = num_steps - self.cell_array = [] - self.hidden_array = [] - - self.weight_1_arr = [] - self.weight_2_arr = [] - self.bias_arr = [] - self.mask_array = [] - - for i in range(self._num_layers): - weight_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)), - shape=[self._hidden_size * 2, self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)) - self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) - bias_1 = self.create_parameter( - attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( - low=-self._init_scale, high=self._init_scale)), - shape=[self._hidden_size * 4], - dtype="float32", - default_initializer=fluid.initializer.Constant(0.0)) - self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) - - def forward(self, input_embedding, init_hidden=None, init_cell=None): - self.cell_array = [] - self.hidden_array = [] - - for i in range(self._num_layers): - pre_hidden = fluid.layers.slice( - init_hidden, axes=[0], starts=[i], ends=[i + 1]) - pre_cell = fluid.layers.slice( - init_cell, axes=[0], starts=[i], ends=[i + 1]) - pre_hidden = fluid.layers.reshape( - pre_hidden, shape=[-1, self._hidden_size]) - pre_cell = fluid.layers.reshape( - pre_cell, shape=[-1, self._hidden_size]) - self.hidden_array.append(pre_hidden) - self.cell_array.append(pre_cell) - - res = [] - for index in range(self._num_steps): - self._input = fluid.layers.slice( - input_embedding, axes=[1], starts=[index], ends=[index + 1]) - self._input = fluid.layers.reshape( - self._input, shape=[-1, self._hidden_size]) - for k in range(self._num_layers): - pre_hidden = self.hidden_array[k] - pre_cell = self.cell_array[k] - weight_1 = self.weight_1_arr[k] - bias = self.bias_arr[k] - - nn = fluid.layers.concat([self._input, pre_hidden], 1) - gate_input = fluid.layers.matmul(x=nn, y=weight_1) - - gate_input = fluid.layers.elementwise_add(gate_input, bias) - i, j, f, o = fluid.layers.split( - gate_input, num_or_sections=4, dim=-1) - c = pre_cell * fluid.layers.sigmoid(f) + fluid.layers.sigmoid( - i) * fluid.layers.tanh(j) - m = fluid.layers.tanh(c) * fluid.layers.sigmoid(o) - self.hidden_array[k] = m - self.cell_array[k] = c - self._input = m - - if self._dropout is not None and self._dropout > 0.0: - self._input = fluid.layers.dropout( - self._input, - dropout_prob=self._dropout, - dropout_implementation='upscale_in_train') - res.append( - fluid.layers.reshape( - self._input, shape=[1, -1, self._hidden_size])) - real_res = fluid.layers.concat(res, 0) - real_res = fluid.layers.transpose(x=real_res, perm=[1, 0, 2]) - last_hidden = fluid.layers.concat(self.hidden_array, 1) - last_hidden = fluid.layers.reshape( - last_hidden, shape=[-1, self._num_layers, self._hidden_size]) - last_hidden = fluid.layers.transpose(x=last_hidden, perm=[1, 0, 2]) - last_cell = fluid.layers.concat(self.cell_array, 1) - last_cell = fluid.layers.reshape( - last_cell, shape=[-1, self._num_layers, self._hidden_size]) - last_cell = fluid.layers.transpose(x=last_cell, perm=[1, 0, 2]) - return real_res, last_hidden, last_cell - - class PtbModel(fluid.Layer): def __init__(self, name_scope, @@ -218,1277 +115,74 @@ def forward(self, input, label, init_hidden, init_cell): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSaveLoadBase(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.save(main_program, "./test_1") - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.load(main_program, "./test_1.pdparams", exe) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") -class TestSaveLoadPartial(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_program = fluid.default_main_program().clone(for_test=True) - - add_1 = fluid.layers.fc(static_last_hidden, - size=hidden_size, - num_flatten_dims=2, - bias_attr=False) - - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.save(main_program, "./test_1") - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.load(test_program, "./test_1.pdopt", None) - - for var in test_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - fluid.load(test_program, "./test_1.pdmodel", None) +class TestNPUSaveLoadBase(TestSaveLoadBase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestSaveLoadSetStateDict(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.save(main_program, "./test_1") - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.load(main_program, "./test_1", exe) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) +class TestNPUSaveLoadPartial(TestSaveLoadPartial): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestProgramStatePartial(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_program = fluid.default_main_program().clone(for_test=True) - - add_1 = fluid.layers.fc(static_last_hidden, - size=hidden_size, - num_flatten_dims=2, - bias_attr=False) - - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.save(main_program, os.path.join('some_dir', 'test_1')) - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - #fluid.load(test_program, "./test_1", None ) - program_state = fluid.load_program_state( - os.path.join('some_dir', 'test_1')) - - program_state_1 = fluid.load_program_state( - os.path.join('some_dir', 'test_1.pdparams')) - - program_state_2 = fluid.load_program_state( - os.path.join('some_dir', 'test_1.pdopt')) - - program_state_3 = fluid.load_program_state( - os.path.join('some_dir', 'test_1.pdmodel')) - - fluid.set_program_state(test_program, program_state) - - for var in test_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - # check 1 - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.set_program_state(test_program, program_state_1) - - for var in test_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - # check 2 - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.set_program_state(test_program, program_state_2) - - for var in test_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - # check 3 - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.set_program_state(test_program, program_state_3) - - for var in test_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) +class TestNPUSaveLoadSetStateDict(TestSaveLoadSetStateDict): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestVariableInit(unittest.TestCase): - def test_variable_init(self): - - x = fluid.data(name="x", shape=[10, 10], dtype='float32') - y = fluid.layers.fc(x, 10) - z = fluid.layers.fc(y, 10) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - - fluid.save(fluid.default_main_program(), "./test_path") - - def set_var(var, ndarray): - t = var.get_tensor() - p = t._place() - if p.is_cpu_place(): - place = paddle.fluid.CPUPlace() - else: - place = paddle.NPUPlace(0) - - t.set(ndarray, place) - - program = fluid.default_main_program() - new_scope = fluid.core.Scope() - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( +class TestNPUProgramStatePartial(TestProgramStatePartial): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - parameter_list = list( - filter(fluid.io.is_parameter, program.list_vars())) - - fluid.core._create_loaded_parameter(parameter_list, new_scope, - exe._default_executor) - parameter_file_name = "./test_path.pdparams" - with open(parameter_file_name, 'rb') as f: - load_dict = pickle.load(f) - - for v in parameter_list: - assert v.name in load_dict, \ - "Can not find [{}] in model file [{}]".format( - v.name, parameter_file_name) - new_v = new_scope.find_var(v.name) - set_var(new_v, load_dict[v.name]) - - opt_list = list( - filter(fluid.io.is_belong_to_optimizer, program.list_vars())) - - fluid.core._create_loaded_parameter(opt_list, new_scope, - exe._default_executor) - opt_file_name = "./test_path.pdopt" - with open(opt_file_name, 'rb') as f: - load_dict = pickle.load(f) - - for v in opt_list: - assert v.name in load_dict, \ - "Can not find [{}] in model file [{}]".format( - v.name, opt_file_name) - - new_v = new_scope.find_var(v.name) - set_var(new_v, load_dict[v.name]) - - base_map = {} - for var in program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - base_map[var.name] = t - - for var in program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(new_scope.find_var(var.name).get_tensor()) - base_t = base_map[var.name] - - self.assertTrue(np.array_equal(new_t, base_t)) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestLoadFromOldInterface(unittest.TestCase): - def setUp(self): - if os.path.exists("test_path.pdparams"): - os.remove("test_path.pdparams") - - if os.path.exists("test_static_load_var_list.pdparams"): - os.remove("test_static_load_var_list.pdparams") - - def test_load_from_old_interface(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_clone_program = fluid.default_main_program().clone() - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - #fluid.save(main_program, "./test_1") - fluid.io.save_persistables(exe, "test_path", main_program) - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.load(main_program, "test_path", exe) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - old_shape = np.array(ten).shape - new_shape = [e + 10 for e in old_shape] - - var.desc.set_shape(new_shape) - with self.assertRaises(RuntimeError): - fluid.load(main_program, "test_path", exe) - - # check unused parameter - - fluid.load(test_clone_program, "test_path", exe) - - def test_load_from_old_interface_var_list(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_clone_program = fluid.default_main_program().clone() - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - #fluid.save(main_program, "./test_1") - fluid.io.save_persistables(exe, "test_static_load_var_list", - main_program) - - # set var to zero - var_list = [] - for i, var in enumerate(main_program.list_vars()): - if isinstance(var, framework.Parameter) or var.persistable: - if i % 2 == 0: - var_list.append(var) - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - fluid.load(main_program, "test_static_load_var_list", exe, var_list) - var_list_names = [var.name for var in var_list] - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - if var.name in var_list_names: - # loaded vars - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - else: - #not loaded vars - self.assertTrue(np.sum(np.abs(new_t)) == 0) +class TestNPUVariableInit(TestVariableInit): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): - def test_load_from_old_interface(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - #fluid.save(main_program, "./test_1") - fluid.io.save_persistables( - exe, "test_path", main_program, filename="model_single") - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - file_model_path = os.path.join("test_path", "model_single") - fluid.load(main_program, file_model_path, exe, - fluid.io.get_program_persistable_vars(main_program)) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - # test exception - # change shape - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - old_shape = np.array(ten).shape - new_shape = [e + 10 for e in old_shape] - - var.desc.set_shape(new_shape) - - with self.assertRaises(RuntimeError): - fluid.load(main_program, file_model_path, exe, - fluid.io.get_program_persistable_vars(main_program)) - - fluid.io.save_params( - exe, "test_path", main_program, filename="model_single") - with self.assertRaises(RuntimeError): - fluid.load(main_program, file_model_path, exe, - fluid.io.get_program_persistable_vars(main_program)) - - # check when executor is None - with self.assertRaises(ValueError): - fluid.load(main_program, file_model_path, None, - fluid.io.get_program_persistable_vars(main_program)) - - # check when var list is None - with self.assertRaises(ValueError): - fluid.load(main_program, file_model_path, exe, None) - - # check save params, load var_list = get_program_persistable_vars - with self.assertRaises(RuntimeError): - temp_var = framework.Variable( - main_program.global_block(), - shape=[1], - name="test_temp_var") - all_var_list = list(main_program.list_vars()) - fluid.load(main_program, file_model_path, exe, - all_var_list + [temp_var]) +class TestNPULoadFromOldInterface(TestLoadFromOldInterface): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestProgramStateOldSave(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_program = fluid.default_main_program().clone(for_test=True) - - add_1 = fluid.layers.fc(static_last_hidden, - size=hidden_size, - num_flatten_dims=2, - bias_attr=False) - - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.io.save_persistables(exe, "test_program_1", main_program) - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - # case 1: load basic - program_state = fluid.load_program_state("test_program_1") - fluid.set_program_state(main_program, program_state) - self.check_in_static(main_program, base_map) - - # case 2: load with no need file - def symlink_force(target, link_name): - try: - os.symlink(target, link_name) - except OSError as e: - if e.errno == errno.EEXIST: - os.remove(link_name) - os.symlink(target, link_name) - else: - raise e - - orig_filepath = './test_program_1/fc_0.w_0' - symlink_filepath = './test_program_1/link_fc_0.w_0' - # create a needless link file for coverage - symlink_force(orig_filepath, symlink_filepath) - program_state = fluid.load_program_state("test_program_1") - fluid.set_program_state(main_program, program_state) - self.check_in_static(main_program, base_map) - - # case 3: load with var_list - program_state = fluid.load_program_state( - "test_program_1", main_program.all_parameters()) - fluid.set_program_state(main_program, program_state) - self.check_in_static(main_program, base_map) - - def check_in_static(self, main_program, base_map): - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) +class TestNPULoadFromOldInterfaceSingleFile(TestLoadFromOldInterfaceSingleFile): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestStaticSaveLoadLargeParameters(unittest.TestCase): - def test_large_parameters_static_save(self): - # enable static mode - paddle.enable_static() - LARGE_PARAM = 2**26 - with new_program_scope(): - # create network - x = paddle.static.data( - name="static_save_load_large_x", - shape=[None, 10], - dtype='float32') - z = paddle.static.nn.fc(x, LARGE_PARAM) - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - prog = paddle.static.default_main_program() - - inputs = np.random.randn(1, 10).astype("float32") - result_z = exe.run(program=prog, - feed={"static_save_load_large_x": inputs}, - fetch_list=[z.name]) - path = "test_static_save_load_large_param/static_save" - paddle.fluid.save(prog, path) - - paddle.fluid.load(prog, path) - result_load = exe.run(program=prog, - feed={"static_save_load_large_x": inputs}, - fetch_list=[z.name]) - # compare results before and after saving - self.assertTrue( - np.sum(np.abs(result_z[0] - result_load[0])) < 1e-15) +class TestNPUProgramStateOldSave(TestProgramStateOldSave): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") -class TestProgramStateOldSaveSingleModel(unittest.TestCase): - def test_ptb_rnn_cpu_float32(self): - seed = 90 - hidden_size = 10 - vocab_size = 1000 - num_layers = 1 - num_steps = 3 - init_scale = 0.1 - batch_size = 4 - batch_num = 200 - - with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - ptb_model = PtbModel( - "ptb_model", - hidden_size=hidden_size, - vocab_size=vocab_size, - num_layers=num_layers, - num_steps=num_steps, - init_scale=init_scale) - - place = fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - exe = fluid.Executor(place) - sgd = Adam(learning_rate=1e-3) - x = fluid.layers.data( - name="x", shape=[-1, num_steps], dtype='int64') - y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') - init_hidden = fluid.layers.data( - name="init_hidden", shape=[1], dtype='float32') - init_cell = fluid.layers.data( - name="init_cell", shape=[1], dtype='float32') - - static_loss, static_last_hidden, static_last_cell = ptb_model( - x, y, init_hidden, init_cell) - - test_program = fluid.default_main_program().clone(for_test=True) - - add_1 = fluid.layers.fc(static_last_hidden, - size=hidden_size, - num_flatten_dims=2, - bias_attr=False) - - sgd.minimize(static_loss) - static_param_updated = dict() - static_param_init = dict() - - out = exe.run(framework.default_startup_program()) - - static_loss_value = None - static_last_cell_value = None - static_last_hidden_value = None - for i in range(batch_num): - x_data = np.arange(12).reshape(4, 3).astype('int64') - y_data = np.arange(1, 13).reshape(4, 3).astype('int64') - x_data = x_data.reshape((-1, num_steps, 1)) - y_data = y_data.reshape((-1, 1)) - init_hidden_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - init_cell_data = np.zeros( - (num_layers, batch_size, hidden_size), dtype='float32') - fetch_list = [static_loss, static_last_hidden, static_last_cell] - out = exe.run(fluid.default_main_program(), - feed={ - "x": x_data, - "y": y_data, - "init_hidden": init_hidden_data, - "init_cell": init_cell_data - }, - fetch_list=fetch_list) - static_loss_value = out[0] - static_last_hidden_value = out[1] - static_last_cell_value = out[2] - - # get value before save - main_program = framework.default_main_program() - base_map = {} - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been update - self.assertTrue(np.sum(np.abs(t)) != 0) - base_map[var.name] = t - - fluid.io.save_persistables( - exe, "test_program_2", main_program, filename="model_1") - - # set var to zero - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - ten = fluid.global_scope().find_var(var.name).get_tensor() - ten.set(np.zeros_like(np.array(ten)), place) - - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - # make sure all the paramerter or optimizer var have been set to zero - self.assertTrue(np.sum(np.abs(new_t)) == 0) - - #fluid.load(test_program, "./test_1", None ) - program_state = fluid.load_program_state( - os.path.join("test_program_2", "model_1"), - var_list=fluid.io.get_program_persistable_vars(main_program)) - fluid.set_program_state(main_program, program_state) - - for var in main_program.list_vars(): - if isinstance(var, framework.Parameter) or var.persistable: - new_t = np.array(fluid.global_scope().find_var(var.name) - .get_tensor()) - base_t = base_map[var.name] - self.assertTrue(np.array_equal(new_t, base_t)) - - with self.assertRaises(ValueError): - fluid.load_program_state( - os.path.join("test_program_2", "model_1")) - - with self.assertRaises(TypeError): - fluid.load_program_state( - os.path.join("test_program_2", "model_1"), - var_list=["str"]) - - with self.assertRaises(RuntimeError): - fluid.load_program_state( - os.path.join("test_program_2", "model_1"), - var_list=[ - main_program.global_block().create_var( - name="fake_var_name", persistable=True) - ]) +class TestNPUProgramStateOldSaveSingleModel(TestProgramStateOldSaveSingleModel): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_npu( + ) else paddle.NPUPlace(0) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 0f4fca6d7f848..aba7d4883ae9a 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -213,6 +213,10 @@ def forward(self, input, label, init_hidden, init_cell): class TestSaveLoadBase(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -234,8 +238,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -314,6 +317,10 @@ def test_ptb_rnn_cpu_float32(self): class TestSaveLoadPartial(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -335,8 +342,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -424,6 +430,10 @@ def test_ptb_rnn_cpu_float32(self): class TestSaveLoadSetStateDict(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -445,8 +455,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -525,6 +534,10 @@ def test_ptb_rnn_cpu_float32(self): class TestProgramStatePartial(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -546,8 +559,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -707,14 +719,17 @@ def test_ptb_rnn_cpu_float32(self): class TestVariableInit(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_variable_init(self): x = fluid.data(name="x", shape=[10, 10], dtype='float32') y = fluid.layers.fc(x, 10) z = fluid.layers.fc(y, 10) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) @@ -727,6 +742,8 @@ def set_var(var, ndarray): place = paddle.fluid.CPUPlace() elif p.is_cuda_pinned_place(): place = paddle.fluid.CUDAPinnedPlace() + elif p.is_npu_place(): + place = paddle.NPUPlace(0) else: p = paddle.fluid.core.Place() p.set_place(t._place()) @@ -737,8 +754,7 @@ def set_var(var, ndarray): program = fluid.default_main_program() new_scope = fluid.core.Scope() - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) parameter_list = list( filter(fluid.io.is_parameter, program.list_vars())) @@ -797,6 +813,10 @@ def setUp(self): if os.path.exists("test_static_load_var_list.pdparams"): os.remove("test_static_load_var_list.pdparams") + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_load_from_old_interface(self): seed = 90 hidden_size = 10 @@ -818,8 +838,7 @@ def test_load_from_old_interface(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -934,8 +953,7 @@ def test_load_from_old_interface_var_list(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -1026,6 +1044,10 @@ def test_load_from_old_interface_var_list(self): class TestLoadFromOldInterfaceSingleFile(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_load_from_old_interface(self): seed = 90 hidden_size = 10 @@ -1047,8 +1069,7 @@ def test_load_from_old_interface(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -1170,6 +1191,10 @@ def test_load_from_old_interface(self): class TestProgramStateOldSave(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -1191,8 +1216,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( @@ -1298,11 +1322,12 @@ def symlink_force(target, link_name): fluid.set_program_state(main_program, program_state) self.check_in_static(main_program, base_map) - # make sure `load_program_state` can be used in dynamic graph mode - with fluid.dygraph.guard(place): - load_state = fluid.load_program_state("test_program_1") - for k, v in load_state.items(): - self.assertTrue(np.array_equal(base_map[k], v)) + if not core.is_compiled_with_npu(): + # make sure `load_program_state` can be used in dynamic graph mode + with fluid.dygraph.guard(place): + load_state = fluid.load_program_state("test_program_1") + for k, v in load_state.items(): + self.assertTrue(np.array_equal(base_map[k], v)) def check_in_static(self, main_program, base_map): for var in main_program.list_vars(): @@ -1347,6 +1372,10 @@ def test_large_parameters_static_save(self): class TestProgramStateOldSaveSingleModel(unittest.TestCase): + def set_place(self): + return fluid.CPUPlace() if not core.is_compiled_with_cuda( + ) else fluid.CUDAPlace(0) + def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 @@ -1368,8 +1397,7 @@ def test_ptb_rnn_cpu_float32(self): num_steps=num_steps, init_scale=init_scale) - place = fluid.CPUPlace() if not core.is_compiled_with_cuda( - ) else fluid.CUDAPlace(0) + place = self.set_place() exe = fluid.Executor(place) sgd = Adam(learning_rate=1e-3) x = fluid.layers.data( From a8ba41832f594678a275606f5409a55adce3703b Mon Sep 17 00:00:00 2001 From: pangyoki Date: Fri, 9 Apr 2021 07:47:58 +0000 Subject: [PATCH 09/10] fix unittest problem --- .../tests/unittests/npu/test_save_load_npu.py | 90 +------------------ .../tests/unittests/test_static_save_load.py | 53 +++-------- 2 files changed, 14 insertions(+), 129 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index f73a190254657..04fbf24d23e17 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -33,85 +33,6 @@ import errno from test_static_save_load import * -paddle.enable_static() - - -class PtbModel(fluid.Layer): - def __init__(self, - name_scope, - hidden_size, - vocab_size, - num_layers=2, - num_steps=20, - init_scale=0.1, - dropout=None): - super(PtbModel, self).__init__() - self.hidden_size = hidden_size - self.vocab_size = vocab_size - self.init_scale = init_scale - self.num_layers = num_layers - self.num_steps = num_steps - self.dropout = dropout - self.simple_lstm_rnn = SimpleLSTMRNN( - self.full_name(), - hidden_size, - num_steps, - num_layers=num_layers, - init_scale=init_scale, - dropout=dropout) - self.embedding = Embedding( - num_embeddings=vocab_size, - embedding_dim=hidden_size, - weight_attr=fluid.ParamAttr( - name='embedding_para', - initializer=fluid.initializer.UniformInitializer( - low=-init_scale, high=init_scale))) - self.softmax_weight = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.hidden_size, self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale)) - self.softmax_bias = self.create_parameter( - attr=fluid.ParamAttr(), - shape=[self.vocab_size], - dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( - low=-self.init_scale, high=self.init_scale)) - - def forward(self, input, label, init_hidden, init_cell): - init_h = fluid.layers.reshape( - init_hidden, shape=[self.num_layers, -1, self.hidden_size]) - - init_c = fluid.layers.reshape( - init_cell, shape=[self.num_layers, -1, self.hidden_size]) - - input = fluid.layers.cast(input, "int32") - x_emb = self.embedding(input) - x_emb = fluid.layers.reshape( - x_emb, shape=[-1, self.num_steps, self.hidden_size]) - if self.dropout is not None and self.dropout > 0.0: - x_emb = fluid.layers.dropout( - x_emb, - dropout_prob=self.drop_out, - dropout_implementation='upscale_in_train') - rnn_out, last_hidden, last_cell = self.simple_lstm_rnn(x_emb, init_h, - init_c) - - rnn_out = fluid.layers.reshape( - rnn_out, shape=[-1, self.num_steps, self.hidden_size]) - projection = fluid.layers.matmul(rnn_out, self.softmax_weight) - projection = fluid.layers.elementwise_add(projection, self.softmax_bias) - projection = fluid.layers.reshape( - projection, shape=[-1, self.vocab_size]) - loss = fluid.layers.softmax_with_cross_entropy( - logits=projection, label=label, soft_label=False) - loss = fluid.layers.reshape(loss, shape=[-1, self.num_steps]) - loss = fluid.layers.reduce_mean(loss, dim=[0]) - loss = fluid.layers.reduce_sum(loss) - - return loss, last_hidden, last_cell - @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") @@ -145,14 +66,6 @@ def set_place(self): ) else paddle.NPUPlace(0) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") -class TestNPUVariableInit(TestVariableInit): - def set_place(self): - return fluid.CPUPlace() if not core.is_compiled_with_npu( - ) else paddle.NPUPlace(0) - - @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestNPULoadFromOldInterface(TestLoadFromOldInterface): @@ -172,6 +85,9 @@ def set_place(self): @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestNPUProgramStateOldSave(TestProgramStateOldSave): + def setUp(self): + self.test_dygraph = False + def set_place(self): return fluid.CPUPlace() if not core.is_compiled_with_npu( ) else paddle.NPUPlace(0) diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index aba7d4883ae9a..0dae204c89eef 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -18,7 +18,7 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.dygraph.nn import Embedding +from paddle.nn import Embedding import paddle.fluid.framework as framework from paddle.fluid.optimizer import Adam from paddle.fluid.dygraph.base import to_variable @@ -158,11 +158,10 @@ def __init__(self, num_layers=num_layers, init_scale=init_scale, dropout=dropout) - self.embedding = Embedding( - size=[vocab_size, hidden_size], - dtype='float32', - is_sparse=False, - param_attr=fluid.ParamAttr( + self.embedding = paddle.nn.Embedding( + num_embeddings=vocab_size, + embedding_dim=hidden_size, + weight_attr=fluid.ParamAttr( name='embedding_para', initializer=fluid.initializer.UniformInitializer( low=-init_scale, high=init_scale))) @@ -186,6 +185,8 @@ def forward(self, input, label, init_hidden, init_cell): init_c = fluid.layers.reshape( init_cell, shape=[self.num_layers, -1, self.hidden_size]) + # NPU 'tok_k' kernel only support `int32` dtype, so cast `input` from `int64` to `int32`. + input = fluid.layers.cast(input, "int32") x_emb = self.embedding(input) x_emb = fluid.layers.reshape( x_emb, shape=[-1, self.num_steps, self.hidden_size]) @@ -742,8 +743,6 @@ def set_var(var, ndarray): place = paddle.fluid.CPUPlace() elif p.is_cuda_pinned_place(): place = paddle.fluid.CUDAPinnedPlace() - elif p.is_npu_place(): - place = paddle.NPUPlace(0) else: p = paddle.fluid.core.Place() p.set_place(t._place()) @@ -1191,6 +1190,9 @@ def test_load_from_old_interface(self): class TestProgramStateOldSave(unittest.TestCase): + def setUp(self): + self.test_dygraph = True + def set_place(self): return fluid.CPUPlace() if not core.is_compiled_with_cuda( ) else fluid.CUDAPlace(0) @@ -1322,7 +1324,7 @@ def symlink_force(target, link_name): fluid.set_program_state(main_program, program_state) self.check_in_static(main_program, base_map) - if not core.is_compiled_with_npu(): + if self.test_dygraph: # make sure `load_program_state` can be used in dynamic graph mode with fluid.dygraph.guard(place): load_state = fluid.load_program_state("test_program_1") @@ -1338,39 +1340,6 @@ def check_in_static(self, main_program, base_map): self.assertTrue(np.array_equal(new_t, base_t)) -class TestStaticSaveLoadLargeParameters(unittest.TestCase): - def test_large_parameters_static_save(self): - # enable static mode - paddle.enable_static() - LARGE_PARAM = 2**26 - with new_program_scope(): - # create network - x = paddle.static.data( - name="static_save_load_large_x", - shape=[None, 10], - dtype='float32') - z = paddle.static.nn.fc(x, LARGE_PARAM) - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - prog = paddle.static.default_main_program() - - inputs = np.random.randn(1, 10).astype("float32") - result_z = exe.run(program=prog, - feed={"static_save_load_large_x": inputs}, - fetch_list=[z.name]) - path = "test_static_save_load_large_param/static_save" - paddle.fluid.save(prog, path) - - paddle.fluid.load(prog, path) - result_load = exe.run(program=prog, - feed={"static_save_load_large_x": inputs}, - fetch_list=[z.name]) - # compare results before and after saving - self.assertTrue( - np.sum(np.abs(result_z[0] - result_load[0])) < 1e-15) - - class TestProgramStateOldSaveSingleModel(unittest.TestCase): def set_place(self): return fluid.CPUPlace() if not core.is_compiled_with_cuda( From b1f914a737f31024e93a358e503187f06d17c3b5 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Fri, 9 Apr 2021 07:56:13 +0000 Subject: [PATCH 10/10] fix little problem --- python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py | 2 ++ python/paddle/fluid/tests/unittests/test_static_save_load.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py index 04fbf24d23e17..e7e7fb39c913b 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_save_load_npu.py @@ -33,6 +33,8 @@ import errno from test_static_save_load import * +paddle.enable_static() + @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index 0dae204c89eef..200e6fd35fdd3 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -30,6 +30,8 @@ import os import errno +paddle.enable_static() + class SimpleLSTMRNN(fluid.Layer): def __init__(self,