From 64c317830db91179bcf989d08df43535ac65112c Mon Sep 17 00:00:00 2001 From: ronny1996 <524019753@qq.com> Date: Mon, 19 Jul 2021 11:29:08 +0000 Subject: [PATCH 1/3] add one_hot_op and tests --- paddle/fluid/operators/one_hot_op_npu.cc | 87 ++++++++ .../unittests/npu/test_one_hot_op_npu.py | 205 ++++++++++++++++++ 2 files changed, 292 insertions(+) create mode 100644 paddle/fluid/operators/one_hot_op_npu.cc create mode 100644 python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc new file mode 100644 index 0000000000000..482c729f60f2a --- /dev/null +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -0,0 +1,87 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/one_hot_op.h" + +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { +using Tensor = framework::Tensor; + +template +class OneHotNPUKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + auto* in = ctx.Input("X"); + auto* out = ctx.Output("Out"); + int depth = ctx.Attr("depth"); + + if (ctx.HasInput("depth_tensor")) { + auto* depth_tensor = ctx.Input("depth_tensor"); + std::vector depth_data; + framework::TensorToVector(*depth_tensor, dev_ctx, &depth_data); + depth = depth_data[0]; + auto in_dims = in->dims(); + framework::DDim out_dims(in_dims); + out_dims[out_dims.size() - 1] = depth; + out->Resize(out_dims); + } + out->mutable_data(ctx.GetPlace()); + + Tensor on_value, off_value; + on_value.mutable_data(framework::make_ddim({1}), ctx.GetPlace()); + off_value.mutable_data(framework::make_ddim({1}), ctx.GetPlace()); + FillNpuTensorWithConstant(&on_value, 1.0f); + FillNpuTensorWithConstant(&off_value, 0.0f); + + if (in->type() == framework::proto::VarType::INT32) { + NpuOpRunner runner; + runner.SetType("OneHot") + .AddInput(*in) + .AddInput(std::vector({static_cast(depth)})) + .AddInput(on_value) + .AddInput(off_value) + .AddAttr("axis", -1) + .AddOutput(*out); + runner.Run(dev_ctx.stream()); + } else { + Tensor transformed_in; + transformed_in.mutable_data(in->dims(), dev_ctx.GetPlace()); + const auto& cast_runner = NpuOpRunner("Cast", {*in}, {transformed_in}, + {{"dst_type", ACL_INT32}}); + cast_runner.Run(dev_ctx.stream()); + NpuOpRunner runner; + runner.SetType("OneHot") + .AddInput(transformed_in) + .AddInput(std::vector({static_cast(depth)})) + .AddInput(on_value) + .AddInput(off_value) + .AddAttr("axis", -1) + .AddOutput(*out); + runner.Run(dev_ctx.stream()); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +namespace plat = paddle::platform; + +REGISTER_OP_NPU_KERNEL(one_hot, ops::OneHotNPUKernel, + ops::OneHotNPUKernel); diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py new file mode 100644 index 0000000000000..e0ee842e6509a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py @@ -0,0 +1,205 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import sys +import unittest +import numpy as np +sys.path.append("..") + +from op_test import OpTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.framework import Program, program_guard + +paddle.enable_static() + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32)} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp_attr(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'dtype': int(core.VarDesc.VarType.FP32), 'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp_default_dtype(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + depth_np = np.array(10).astype('int32') + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod), 'depth_tensor': depth_np} + self.attrs = {} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp_default_dtype_attr(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp_out_of_range(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + x_lod = [[4, 1, 3, 3]] + x = [np.random.choice([-1, depth]) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int32').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth, 'allow_out_of_range': True} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +@unittest.skipIf(not paddle.is_compiled_with_npu(), + "core is not compiled with NPU") +class TestOneHotOp_dtype_int64(OpTest): + def set_npu(self): + self.__class__.use_npu = True + + def setUp(self): + self.set_npu() + self.op_type = 'one_hot' + depth = 10 + dimension = 12 + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in range(sum(x_lod[0]))] + x = np.array(x).astype('int64').reshape([sum(x_lod[0]), 1]) + + out = np.zeros(shape=(np.product(x.shape[:-1]), + depth)).astype('float32') + + for i in range(np.product(x.shape)): + out[i, x[i]] = 1.0 + + self.inputs = {'X': (x, x_lod)} + self.attrs = {'depth': depth} + self.outputs = {'Out': (out, x_lod)} + + def test_check_output(self): + self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() From 37490c554bb1be3223f274428bd2913150bc9a3d Mon Sep 17 00:00:00 2001 From: ronny1996 <524019753@qq.com> Date: Tue, 20 Jul 2021 07:23:07 +0000 Subject: [PATCH 2/3] update --- .../fluid/tests/unittests/npu/test_one_hot_op_npu.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py index e0ee842e6509a..c92fffb2d26cb 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_one_hot_op_npu.py @@ -28,8 +28,6 @@ paddle.enable_static() -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp(OpTest): def set_npu(self): self.__class__.use_npu = True @@ -58,8 +56,6 @@ def test_check_output(self): self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp_attr(OpTest): def set_npu(self): self.__class__.use_npu = True @@ -87,8 +83,6 @@ def test_check_output(self): self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp_default_dtype(OpTest): def set_npu(self): self.__class__.use_npu = True @@ -117,8 +111,6 @@ def test_check_output(self): self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp_default_dtype_attr(OpTest): def set_npu(self): self.__class__.use_npu = True @@ -146,8 +138,6 @@ def test_check_output(self): self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp_out_of_range(OpTest): def set_npu(self): self.__class__.use_npu = True @@ -171,8 +161,6 @@ def test_check_output(self): self.check_output_with_place(paddle.NPUPlace(0), check_dygraph=False) -@unittest.skipIf(not paddle.is_compiled_with_npu(), - "core is not compiled with NPU") class TestOneHotOp_dtype_int64(OpTest): def set_npu(self): self.__class__.use_npu = True From 094e4b5847a3e1f73c4f0a90e460f4d920208cb4 Mon Sep 17 00:00:00 2001 From: ronny1996 <524019753@qq.com> Date: Mon, 26 Jul 2021 09:12:38 +0000 Subject: [PATCH 3/3] make code clear --- paddle/fluid/operators/npu_op_runner.cc | 32 ++++++++++++++++++++++++ paddle/fluid/operators/npu_op_runner.h | 4 +++ paddle/fluid/operators/one_hot_op_npu.cc | 15 ++++------- 3 files changed, 41 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/operators/npu_op_runner.cc b/paddle/fluid/operators/npu_op_runner.cc index 4461941e85c2a..a134b542c246e 100644 --- a/paddle/fluid/operators/npu_op_runner.cc +++ b/paddle/fluid/operators/npu_op_runner.cc @@ -240,6 +240,38 @@ NpuOpRunner &NpuOpRunner::AddInput(std::vector &&dims) { return *this; } +NpuOpRunner &NpuOpRunner::AddInput(std::vector &&values) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(platform::CPUPlace())); + Tensor host_tensor; + TensorFromVector(values, *dev_ctx, &host_tensor); + host_tensors_.emplace_back(host_tensor); + + // create aclTensorDesc + input_descs_.emplace_back(CreateTensorDesc(host_tensor, ACL_MEMTYPE_HOST)); + // create aclDataBuffer + input_buffers_.emplace_back(CreateDataBuffer(host_tensor)); + + return *this; +} + +NpuOpRunner &NpuOpRunner::AddInput(std::vector &&values) { + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto *dev_ctx = + static_cast(pool.Get(platform::CPUPlace())); + Tensor host_tensor; + TensorFromVector(values, *dev_ctx, &host_tensor); + host_tensors_.emplace_back(host_tensor); + + // create aclTensorDesc + input_descs_.emplace_back(CreateTensorDesc(host_tensor, ACL_MEMTYPE_HOST)); + // create aclDataBuffer + input_buffers_.emplace_back(CreateDataBuffer(host_tensor)); + + return *this; +} + NpuOpRunner &NpuOpRunner::AddOutput(const Tensor &tensor) { // create aclTensorDesc output_descs_.emplace_back(CreateTensorDesc(tensor)); diff --git a/paddle/fluid/operators/npu_op_runner.h b/paddle/fluid/operators/npu_op_runner.h index 2257c209550d6..45e973970a956 100644 --- a/paddle/fluid/operators/npu_op_runner.h +++ b/paddle/fluid/operators/npu_op_runner.h @@ -71,6 +71,10 @@ class NpuOpRunner { NpuOpRunner &AddInput(std::vector &&dims); + NpuOpRunner &AddInput(std::vector &&values); + + NpuOpRunner &AddInput(std::vector &&values); + NpuOpRunner &AddOutput(const Tensor &tensor); NpuOpRunner &AddInputs(const std::vector &tensors); diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc index 482c729f60f2a..1cf99d844c888 100644 --- a/paddle/fluid/operators/one_hot_op_npu.cc +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -42,19 +42,14 @@ class OneHotNPUKernel : public framework::OpKernel { } out->mutable_data(ctx.GetPlace()); - Tensor on_value, off_value; - on_value.mutable_data(framework::make_ddim({1}), ctx.GetPlace()); - off_value.mutable_data(framework::make_ddim({1}), ctx.GetPlace()); - FillNpuTensorWithConstant(&on_value, 1.0f); - FillNpuTensorWithConstant(&off_value, 0.0f); - + float on_value = 1.0f, off_value = 0.0f; if (in->type() == framework::proto::VarType::INT32) { NpuOpRunner runner; runner.SetType("OneHot") .AddInput(*in) .AddInput(std::vector({static_cast(depth)})) - .AddInput(on_value) - .AddInput(off_value) + .AddInput(std::vector({on_value})) + .AddInput(std::vector({off_value})) .AddAttr("axis", -1) .AddOutput(*out); runner.Run(dev_ctx.stream()); @@ -68,8 +63,8 @@ class OneHotNPUKernel : public framework::OpKernel { runner.SetType("OneHot") .AddInput(transformed_in) .AddInput(std::vector({static_cast(depth)})) - .AddInput(on_value) - .AddInput(off_value) + .AddInput(std::vector({on_value})) + .AddInput(std::vector({off_value})) .AddAttr("axis", -1) .AddOutput(*out); runner.Run(dev_ctx.stream());