Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

【PTen】Add empty and empty_like kernel in pten #38334

Merged
merged 5 commits into from
Dec 23, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions paddle/fluid/operators/empty_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,20 @@ class EmptyOp : public framework::OperatorWithKernel {
framework::proto::VarType::Type(context.Attr<int>("dtype")),
context.GetPlace());
}

framework::KernelSignature GetExpectedPtenKernelArgs(
const framework::ExecutionContext& ctx) const override {
std::string shape;
if (ctx.HasInput("ShapeTensor")) {
shape = "ShapeTensor";
} else if (ctx.MultiInput<framework::Tensor>("ShapeTensorList").size()) {
shape = "ShapeTensorList";
} else {
shape = "shape";
}

return framework::KernelSignature("empty", {}, {shape}, {"Out"});
}
};

class EmptyOpVarTypeInference : public framework::VarTypeInference {
Expand Down
4 changes: 2 additions & 2 deletions paddle/pten/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ add_subdirectory(tests)

# make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu)
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu empty_kernel_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu scale_kernel_gpu full_kernel_gpu empty_kernel_gpu)
endif()
if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
Expand Down
5 changes: 5 additions & 0 deletions paddle/pten/api/include/kernel_signature.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ using dot_kernel = void (*)(const DeviceContext&,
using flatten_kernel =
void (*)(const DeviceContext&, const DenseTensor&, int, int, DenseTensor*);

using empty_kernel = void (*)(const DeviceContext&,
const ScalarArray&,
DenseTensor*);

using empty_like_kernel = void (*)(const DeviceContext&, DenseTensor*);
using full_kernel = void (*)(const DeviceContext&,
const ScalarArray&,
const Scalar&,
Expand Down
6 changes: 4 additions & 2 deletions paddle/pten/api/lib/kernel_declare.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,17 @@ limitations under the License. */
// the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed

PT_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(empty, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(scale, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(full_like, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(empty, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(flatten, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
Expand Down
1 change: 1 addition & 0 deletions paddle/pten/kernels/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_contex
cc_library(scale_kernel_cpu SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(full_kernel_cpu SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function)
cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(empty_kernel_cpu SRCS empty_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
41 changes: 41 additions & 0 deletions paddle/pten/kernels/cpu/empty_kernel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/pten/kernels/empty_kernel.h"

#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/empty_kernel_impl.h"

PT_REGISTER_CTX_KERNEL(empty,
CPU,
ALL_LAYOUT,
pten::Empty,
bool,
int,
int64_t,
float,
double,
paddle::platform::float16) {}

PT_REGISTER_CTX_KERNEL(empty_like,
CPU,
ALL_LAYOUT,
pten::EmptyLike,
bool,
int,
int64_t,
float,
double,
paddle::platform::float16) {}
19 changes: 19 additions & 0 deletions paddle/pten/kernels/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
if(WITH_GPU)
nv_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
nv_library(empty_kernel_cuda SRCS empty_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda)
hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary)
hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
hip_library(empty_kernel_cuda SRCS empty_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
28 changes: 28 additions & 0 deletions paddle/pten/kernels/empty_kernel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"

namespace pten {

template <typename T, typename ContextT>
void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out);

template <typename T, typename ContextT>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out);

} // namespace pten
1 change: 0 additions & 1 deletion paddle/pten/kernels/full_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

#pragma once

#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/kernels/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ if(WITH_GPU)
nv_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
nv_library(empty_kernel_gpu SRCS empty_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
Expand All @@ -14,4 +15,5 @@ elseif(WITH_ROCM)
hip_library(scale_kernel_gpu SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(full_kernel_gpu SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function)
hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
hip_library(empty_kernel_gpu SRCS empty_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif()
41 changes: 41 additions & 0 deletions paddle/pten/kernels/gpu/empty_kernel.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/pten/kernels/empty_kernel.h"

#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/impl/empty_kernel_impl.h"

PT_REGISTER_CTX_KERNEL(empty,
GPU,
ALL_LAYOUT,
pten::Empty,
bool,
int,
int64_t,
float,
double,
paddle::platform::float16) {}

PT_REGISTER_CTX_KERNEL(empty_like,
GPU,
ALL_LAYOUT,
pten::EmptyLike,
bool,
int,
int64_t,
float,
double,
paddle::platform::float16) {}
34 changes: 34 additions & 0 deletions paddle/pten/kernels/impl/empty_kernel_impl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"

namespace pten {

template <typename T, typename ContextT>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

empty是不是个设备无关的kernel?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

按理说是的,不过目前框架只提供了cpu和gpu的kernel,这里先按照设备无关来写

void Empty(const ContextT& dev_ctx,
const ScalarArray& shape,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
}

template <typename T, typename ContextT>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
out->mutable_data<T>();
}

} // namespace pten
1 change: 1 addition & 0 deletions paddle/pten/tests/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cc_test(test_framework_place_utils storage SRCS test_place_utils.cc DEPS pten_ap
cc_test(test_mean_api SRCS test_mean_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_dot_api SRCS test_dot_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_empty_api SRCS test_empty_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_fill_api SRCS test_fill_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS pten_tensor pten_api pten_api_utils)
Expand Down
127 changes: 127 additions & 0 deletions paddle/pten/tests/api/test_empty_api.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <gtest/gtest.h>
#include <memory>

#include "paddle/pten/api/include/api.h"

#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"

namespace paddle {
namespace tests {

namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;

// TODO(chenweihang): Remove this test after the API is used in the dygraph
TEST(API, empty_like) {
// 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());
auto dense_x = std::make_shared<pten::DenseTensor>(
alloc,
pten::DenseTensorMeta(pten::DataType::FLOAT32,
framework::make_ddim({3, 2}),
pten::DataLayout::NCHW));

paddle::experimental::Tensor x(dense_x);

// 2. test API
auto out = paddle::experimental::empty_like(x, pten::DataType::FLOAT32);

// 3. check result
ASSERT_EQ(out.dims().size(), 2);
ASSERT_EQ(out.dims()[0], 3);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}

TEST(API, empty1) {
// 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());

auto dense_shape = std::make_shared<pten::DenseTensor>(
alloc,
pten::DenseTensorMeta(pten::DataType::INT64,
framework::make_ddim({2}),
pten::DataLayout::NCHW));
auto* shape_data = dense_shape->mutable_data<int64_t>();
shape_data[0] = 2;
shape_data[1] = 3;

paddle::experimental::Tensor tensor_shape(dense_shape);

// 2. test API
auto out = paddle::experimental::empty(tensor_shape, pten::DataType::FLOAT32);

// 3. check result
ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}

TEST(API, empty2) {
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
paddle::platform::CPUPlace());

auto dense_scalar = std::make_shared<pten::DenseTensor>(
alloc,
pten::DenseTensorMeta(pten::DataType::INT32,
framework::make_ddim({1}),
pten::DataLayout::NCHW));
dense_scalar->mutable_data<int32_t>()[0] = 2;

paddle::experimental::Tensor shape_scalar1(dense_scalar);
paddle::experimental::Tensor shape_scalar2(dense_scalar);
std::vector<paddle::experimental::Tensor> list_shape{shape_scalar1,
shape_scalar2};

auto out = paddle::experimental::empty(list_shape, pten::DataType::FLOAT32);

ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 4);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}

TEST(API, empty3) {
std::vector<int64_t> vector_shape{2, 3};

auto out = paddle::experimental::empty(vector_shape, pten::DataType::INT32);

ASSERT_EQ(out.shape().size(), 2UL);
ASSERT_EQ(out.shape()[0], 2);
ASSERT_EQ(out.numel(), 6);
ASSERT_EQ(out.is_cpu(), true);
ASSERT_EQ(out.type(), pten::DataType::INT32);
ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
}

} // namespace tests
} // namespace paddle
Loading