Skip to content

Commit

Permalink
add topk op
Browse files Browse the repository at this point in the history
  • Loading branch information
oyjxer committed Mar 16, 2021
1 parent 795b0f9 commit 26e6e6e
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 0 deletions.
98 changes: 98 additions & 0 deletions paddle/fluid/operators/top_k_op_npu.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifdef PADDLE_WITH_ASCEND_CL
#include <memory>
#include <string>

#include "paddle/fluid/operators/top_k_op.h"
#include "paddle/fluid/operators/npu_op_runner.h"

namespace paddle {
namespace operators {

void topk_assit_help(framework::Tensor* assit_tensor, int64_t dim, const framework::ExecutionContext& ctx) {
const int64_t UB_SIZE = dim;
std::vector<paddle::platform::float16> assit;
assit.resize(2 * UB_SIZE);
for (int64_t i = 0; i < UB_SIZE; i++) {
assit[i] = static_cast<paddle::platform::float16>(i);
}

for (int64_t i = 0; i < UB_SIZE; i++) {
int64_t idx = static_cast<paddle::platform::float16>(i);
int64_t gap = i - idx;
assit[i + dim] = static_cast<paddle::platform::float16>(gap);
}
framework::TensorFromVector(assit, ctx.device_context(), assit_tensor);
}


template <typename DeviceContext, typename T>
class TopkNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
// read input
auto* input = ctx.Input<framework::LoDTensor>("X");
auto* output = ctx.Output<framework::LoDTensor>("Out");
auto* indices = ctx.Output<framework::LoDTensor>("Indices");

size_t k = static_cast<int>(ctx.Attr<int>("k"));

output->mutable_data<paddle::platform::float16>(ctx.GetPlace());
indices->mutable_data<paddle::platform::float16>(ctx.GetPlace());

// prepare assit
auto dim = input->dims().size();
framework::Tensor assist_seq_tensor;
assist_seq_tensor.Resize({2 * dim});
assist_seq_tensor.mutable_data<paddle::platform::float16>(ctx.GetPlace());
topk_assit_help(&assist_seq_tensor, dim, ctx);

//bool sorted = true;
framework::NPUAttributeMap attr_input = {{"sorted", "true"}, {"k", static_cast<int>(k)}, {"dim", -1}, {"largest", true}};

// run ascend
auto runner = NpuOpRunner("TopKD",
{*input, assist_seq_tensor},
{*output, *indices},
attr_input);

auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();

runner.Run(stream);

std::cout << "4444444444444444" << std::endl;
/*
std::cout << "after run "<<std::endl;
framework::Tensor cpu_tensor;
framework::TensorCopySync(*indices, platform::CPUPlace(), &cpu_tensor);
auto* data = cpu_tensor.data<T>();
auto vec_data = std::vector<T>(data, data + indices->numel());
for(int i=0; i<static_cast<int>(vec_data.size()); ++i){
VLOG(3) << " vec_data["<< i << "] = " << vec_data[i];
}
VLOG(10) << "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEnd" ;
*/
}
};

} // namespace operators
} // namespace paddle

namespace ops = paddle::operators;

REGISTER_OP_NPU_KERNEL(
top_k,
ops::TopkNPUKernel<paddle::platform::NPUDeviceContext, paddle::platform::float16>);
#endif
109 changes: 109 additions & 0 deletions paddle/fluid/operators/top_k_op_npu_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#ifndef _WIN32
#include <unistd.h>
#endif

#include <string>
#include <thread> // NOLINT
#include <vector>

#include "gtest/gtest.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/dropout_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/string/printf.h"

namespace f = paddle::framework;
namespace p = paddle::platform;
namespace m = paddle::operators::math;

USE_OP(top_k);
USE_OP_DEVICE_KERNEL(top_k, NPU);

template <typename T>
void Compare(f::Scope* scope, const p::DeviceContext& ctx) {
// init
auto x = scope->Var("X");
auto tensor_x = x->GetMutable<f::LoDTensor>();

auto k = scope->Var("K");
auto tensor_k = k->GetMutable<f::LoDTensor>();

int dim0 = 100;
int top_num = 5;

std::vector<T> init;
for (int64_t i = 0; i < dim0; ++i) {
init.push_back(static_cast<T>(0.01 * i));
}
TensorFromVector(init, ctx, tensor_x);
tensor_x->Resize({dim0});
ctx.Wait();

std::vector<int> init_k;
for (int i = 0; i < 1; i++ ) {
init_k.push_back(top_num);
}
TensorFromVector(init_k, ctx, tensor_k);
tensor_k->Resize({1});
ctx.Wait();

auto place = ctx.GetPlace();
auto out = scope->Var("Out");
auto tensor_out = out->GetMutable<f::LoDTensor>();

auto indices = scope->Var("Indices");
auto tensor_indices = indices->GetMutable<f::LoDTensor>();

// run
auto op =
f::OpRegistry::CreateOp("top_k", {{"X", {"X"}}, {"K", {"K"}}},
{{"Out", {"Out"}}, {"Indices", {"Indices"}}}, {});

op->Run(*scope, place);
ctx.Wait();

for (auto i = 0; i < tensor_out->dims().size(); ++i){
VLOG(3) << "dim:" << i << " " << tensor_out->dims()[i];
}

f::Tensor cpu_tensor;
TensorCopySync(*tensor_out, p::CPUPlace(), &cpu_tensor);
auto data = cpu_tensor.data<T>();
auto vec_data = std::vector<T>(data, data + tensor_out->numel());
for(int i=0; i<static_cast<int>(vec_data.size()); ++i){
VLOG(3) << "top_k vec_data_out["<< i << "] = " << vec_data[i];
}

f::Tensor cpu_tensor1;
TensorCopySync(*tensor_indices, p::CPUPlace(), &cpu_tensor1);
auto data1 = cpu_tensor1.data<T>();
auto vec_data1 = std::vector<T>(data1, data1 + tensor_indices->numel());
for(int i=0; i<static_cast<int>(vec_data1.size()); ++i){
VLOG(3) << "topk index_out["<< i << "] = " << vec_data1[i];
}


ctx.Wait();

// EXPECT_EQ((uint32_t)out_vec.size(), (uint32_t)(dim0 * dim1 * dim2));
};


TEST(top_k, NPU_fp16) {
f::Scope scope;
p::NPUDeviceContext ctx(p::NPUPlace(4));
Compare<p::float16>(&scope, ctx);
}

0 comments on commit 26e6e6e

Please sign in to comment.