add topk op

PaddlePaddle · Mar 16, 2021 · 26e6e6e · 26e6e6e
1 parent 795b0f9
commit 26e6e6e
Show file tree

Hide file tree

Showing 2 changed files with 207 additions and 0 deletions.
diff --git a/paddle/fluid/operators/top_k_op_npu.cc b/paddle/fluid/operators/top_k_op_npu.cc
@@ -0,0 +1,98 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef PADDLE_WITH_ASCEND_CL
+#include <memory>
+#include <string>
+
+#include "paddle/fluid/operators/top_k_op.h"
+#include "paddle/fluid/operators/npu_op_runner.h"
+
+namespace paddle {
+namespace operators {
+
+void topk_assit_help(framework::Tensor* assit_tensor, int64_t dim, const framework::ExecutionContext& ctx) {
+  const int64_t UB_SIZE = dim;
+  std::vector<paddle::platform::float16> assit;
+  assit.resize(2 * UB_SIZE);
+  for (int64_t i = 0; i < UB_SIZE; i++) {
+    assit[i] = static_cast<paddle::platform::float16>(i);
+  }
+
+  for (int64_t i = 0; i < UB_SIZE; i++) {
+    int64_t idx = static_cast<paddle::platform::float16>(i);
+    int64_t gap = i - idx;
+    assit[i + dim] = static_cast<paddle::platform::float16>(gap);
+  }
+  framework::TensorFromVector(assit, ctx.device_context(), assit_tensor);
+}
+
+
+template <typename DeviceContext, typename T>
+class TopkNPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    // read input
+    auto* input = ctx.Input<framework::LoDTensor>("X");
+    auto* output = ctx.Output<framework::LoDTensor>("Out");
+    auto* indices = ctx.Output<framework::LoDTensor>("Indices");
+
+    size_t k = static_cast<int>(ctx.Attr<int>("k"));
+
+    output->mutable_data<paddle::platform::float16>(ctx.GetPlace());
+    indices->mutable_data<paddle::platform::float16>(ctx.GetPlace());
+
+    // prepare assit
+    auto dim = input->dims().size();
+    framework::Tensor assist_seq_tensor;
+    assist_seq_tensor.Resize({2 * dim});
+    assist_seq_tensor.mutable_data<paddle::platform::float16>(ctx.GetPlace());
+    topk_assit_help(&assist_seq_tensor, dim, ctx);
+
+    //bool sorted = true;
+    framework::NPUAttributeMap attr_input = {{"sorted", "true"}, {"k", static_cast<int>(k)}, {"dim", -1}, {"largest", true}};
+
+    // run ascend
+    auto runner = NpuOpRunner("TopKD",
+                              {*input, assist_seq_tensor},
+                              {*output, *indices},
+                              attr_input);
+
+    auto stream =
+        ctx.template device_context<paddle::platform::NPUDeviceContext>()
+            .stream();
+
+    runner.Run(stream);
+
+    std::cout << "4444444444444444" << std::endl;
+    /*
+    std::cout << "after run "<<std::endl;
+    framework::Tensor cpu_tensor;
+    framework::TensorCopySync(*indices, platform::CPUPlace(), &cpu_tensor);
+    auto* data = cpu_tensor.data<T>();
+    auto vec_data = std::vector<T>(data, data + indices->numel());
+    for(int i=0; i<static_cast<int>(vec_data.size()); ++i){
+       VLOG(3) << " vec_data["<< i << "] = " << vec_data[i];
+    }
+    VLOG(10) << "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEnd" ;
+    */
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OP_NPU_KERNEL(
+    top_k,
+    ops::TopkNPUKernel<paddle::platform::NPUDeviceContext, paddle::platform::float16>);
+#endif
diff --git a/paddle/fluid/operators/top_k_op_npu_test.cc b/paddle/fluid/operators/top_k_op_npu_test.cc
@@ -0,0 +1,109 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
+#include <string>
+#include <thread>  // NOLINT
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/program_desc.h"
+#include "paddle/fluid/operators/dropout_op.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/string/printf.h"
+
+namespace f = paddle::framework;
+namespace p = paddle::platform;
+namespace m = paddle::operators::math;
+
+USE_OP(top_k);
+USE_OP_DEVICE_KERNEL(top_k, NPU);
+
+template <typename T>
+void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
+  // init
+  auto x = scope->Var("X");
+  auto tensor_x = x->GetMutable<f::LoDTensor>();
+
+  auto k = scope->Var("K");
+  auto tensor_k = k->GetMutable<f::LoDTensor>();
+
+  int dim0 = 100;
+  int top_num = 5;
+
+  std::vector<T> init;
+  for (int64_t i = 0; i < dim0; ++i) {
+    init.push_back(static_cast<T>(0.01 * i));
+  }
+  TensorFromVector(init, ctx, tensor_x);
+  tensor_x->Resize({dim0});
+  ctx.Wait();
+
+  std::vector<int> init_k;
+  for (int i = 0; i < 1; i++ ) {
+      init_k.push_back(top_num);
+  }
+  TensorFromVector(init_k, ctx, tensor_k);
+  tensor_k->Resize({1});
+  ctx.Wait();
+
+  auto place = ctx.GetPlace();
+  auto out = scope->Var("Out");
+  auto tensor_out = out->GetMutable<f::LoDTensor>();
+
+  auto indices = scope->Var("Indices");
+  auto tensor_indices = indices->GetMutable<f::LoDTensor>();
+
+  // run
+  auto op =
+      f::OpRegistry::CreateOp("top_k", {{"X", {"X"}}, {"K", {"K"}}},
+                              {{"Out", {"Out"}}, {"Indices", {"Indices"}}}, {});
+
+  op->Run(*scope, place);
+  ctx.Wait();
+
+  for (auto i = 0; i < tensor_out->dims().size(); ++i){
+      VLOG(3) << "dim:" <<  i << " " << tensor_out->dims()[i];
+  }
+
+  f::Tensor cpu_tensor;
+  TensorCopySync(*tensor_out, p::CPUPlace(), &cpu_tensor);
+  auto data = cpu_tensor.data<T>();
+  auto vec_data = std::vector<T>(data, data + tensor_out->numel());
+  for(int i=0; i<static_cast<int>(vec_data.size()); ++i){
+    VLOG(3) << "top_k vec_data_out["<< i << "] = " << vec_data[i];
+  }
+
+  f::Tensor cpu_tensor1;
+  TensorCopySync(*tensor_indices, p::CPUPlace(), &cpu_tensor1);
+  auto data1 = cpu_tensor1.data<T>();
+  auto vec_data1 = std::vector<T>(data1, data1 + tensor_indices->numel());
+  for(int i=0; i<static_cast<int>(vec_data1.size()); ++i){
+    VLOG(3) << "topk index_out["<< i << "] = " << vec_data1[i];
+  }
+
+
+  ctx.Wait();
+
+  // EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(dim0 * dim1 * dim2));
+};
+
+
+TEST(top_k, NPU_fp16) {
+  f::Scope scope;
+  p::NPUDeviceContext ctx(p::NPUPlace(4));
+  Compare<p::float16>(&scope, ctx);
+}