From 3f5f789ed8e2f64c83c672f5ec842332879f1c04 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Thu, 14 Oct 2021 12:32:29 +0000 Subject: [PATCH] remove mkldnn tensor & polish details --- cmake/generic.cmake | 2 +- cmake/tcmpt.cmake | 9 +- paddle/fluid/framework/eigen.h | 44 ----- ...est_reference_count_pass_last_lived_ops.cc | 2 +- paddle/fluid/framework/operator.cc | 15 -- paddle/fluid/framework/tcmpt_utils.cc | 38 +--- paddle/fluid/framework/type_defs.h | 2 - paddle/fluid/imperative/prepared_operator.cc | 15 -- .../pscore/heter_listen_and_server_test.cc | 2 +- .../operators/pscore/heter_server_test.cc | 2 +- paddle/fluid/operators/scale_op_xpu.cc | 1 - paddle/fluid/operators/sign_op.cc | 3 +- paddle/tcmpt/api/include/core.h | 1 - paddle/tcmpt/core/mkldnn_dense_tensor.h | 56 ------ paddle/tcmpt/cpu/CMakeLists.txt | 1 + paddle/tcmpt/cuda/CMakeLists.txt | 1 + paddle/tcmpt/cuda/linalg.cu | 20 +-- paddle/tcmpt/eigen/common.h | 170 ++++++++++++++++++ paddle/tcmpt/eigen/dot.h | 50 ++++++ paddle/tcmpt/eigen/fill.h | 5 +- paddle/tcmpt/eigen/mean.h | 6 +- paddle/tcmpt/eigen/scale.h | 6 +- paddle/tcmpt/eigen/sign.h | 6 +- 23 files changed, 249 insertions(+), 208 deletions(-) delete mode 100644 paddle/tcmpt/core/mkldnn_dense_tensor.h create mode 100644 paddle/tcmpt/eigen/common.h create mode 100644 paddle/tcmpt/eigen/dot.h diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 7390bd17e386e..12b4530a77a4c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -117,7 +117,7 @@ function(find_fluid_modules TARGET_NAME) endfunction(find_fluid_modules) set_property(GLOBAL PROPERTY TCMPT_MODULES "") -# find all top modules is used for paddle static library +# find all tcmpt modules is used for paddle static library # for building inference libs function(find_tcmpt_modules TARGET_NAME) get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) diff --git a/cmake/tcmpt.cmake b/cmake/tcmpt.cmake index 3ffc168c6bed0..819cd42287974 100644 --- a/cmake/tcmpt.cmake +++ b/cmake/tcmpt.cmake @@ -1,4 +1,10 @@ -# TODO(chenweihang): keep message comment for debuging, remove it if needless +# `kernel_instantiate` functionis used to declare the template instantiation of +# the Kernel function generated through code analysis, only for windows +# (because the windows platform msvc compiler cannot automatically instantiate +# the template function through decltype) +# TODO(chenweihang): keep message comment for debuging, it is still useful, +# I will remove it if needless later + function(kernel_instantiate TARGET) set(target_file ${CURRENT_BINARY_DIR}/${TARGET}.tmp CACHE INTERNAL "${CURRENT_BINARY_DIR}/${TARGET} file") set(target_file_final ${CURRENT_BINARY_DIR}/${TARGET}) @@ -36,7 +42,6 @@ function(kernel_instantiate TARGET) endforeach() # message(STATUS "INST CONTENT: ${instantiate_context}") file(APPEND ${target_file} "${instantiate_context}\n") - # copy_if_different(${target_file} ${target_file_final}) string(REPLACE "." "_" cmd_name ${TARGET}) # this is a dummy target for custom command, should always be run firstly to update ${target_file_final} # TODO(chenweihang): nameing rule need to enchance diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index 56843b9aa6853..a6abda8a83bc8 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -19,8 +19,6 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor.h" #include "unsupported/Eigen/CXX11/Tensor" -#include "paddle/tcmpt/core/dense_tensor.h" - namespace paddle { namespace framework { @@ -69,28 +67,6 @@ struct EigenTensor { static ConstType From(const Tensor& tensor) { return From(tensor, tensor.dims_); } - - // for pt::DenseTensor - static Type From(pt::DenseTensor& tensor, DDim dims) { // NOLINT - // why tensor.data() not work? - // return Type(const_cast(reinterpret_cast(tensor.data())), - // EigenDim::From(dims)); - return Type(const_cast(tensor.data()), EigenDim::From(dims)); - } - - static Type From(pt::DenseTensor& tensor) { // NOLINT - return From(tensor, tensor.dims()); - } // NOLINT - - static ConstType From(const pt::DenseTensor& tensor, DDim dims) { - // return ConstType(reinterpret_cast(tensor.data()), - // EigenDim::From(dims)); - return ConstType(tensor.data(), EigenDim::From(dims)); - } - - static ConstType From(const pt::DenseTensor& tensor) { - return From(tensor, tensor.dims()); - } }; template { const Tensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims_)}); } - - // for pt::DenseTensor - static typename EigenVector::Type Flatten( - pt::DenseTensor& tensor) { // NOLINT - return EigenVector::From(tensor, {product(tensor.dims())}); - } - - static typename EigenVector::ConstType Flatten( - const pt::DenseTensor& tensor) { // NOLINT - return EigenVector::From(tensor, {product(tensor.dims())}); - } }; template ()); } - - // for pt::DenseTensor - static Type From(pt::DenseTensor& tensor) { // NOLINT - return Type(const_cast(tensor.data())); - } - - static ConstType From(const pt::DenseTensor& tensor) { - return ConstType(tensor.data()); - } }; // Define Tensor with 32-bit index. diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc index 8cf541637557b..f410171f99896 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/test_reference_count_pass_last_lived_ops.cc @@ -21,7 +21,7 @@ #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/program_desc.h" -USE_NO_KERNEL_OP(scale); +USE_OP(scale); USE_OP(elementwise_mul); USE_OP(elementwise_add); USE_OP(elementwise_add_grad); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 1b0cf462479d2..a47089ecba5cd 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1155,7 +1155,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // and RCOM backend, the XPU, NPU and MKLDNN will be supported in the second // phase - // VLOG(1) << "Pt KernelFactory: " << pt::KernelFactory::Instance(); if (FLAGS_use_pt_kernel && pt::KernelFactory::Instance().ContainsKernel(type_.c_str())) { if (pt_kernel_key_.get() == nullptr || pt_kernel_.get() == nullptr) { @@ -1263,17 +1262,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } -static bool ContainSelectedRows(const VariableValueMap& inputs) { - for (auto& var_pair : inputs) { - for (auto* var : var_pair.second) { - if (var->IsType()) { - return true; - } - } - } - return false; -} - // TODO(chenweihang): now only check single var input static bool IsValidVar(const std::string& name, const VariableValueMap& inputs) { @@ -1303,9 +1291,6 @@ static pt::KernelName ConstructPtKernelName(const std::string& op_type, const VariableValueMap& inputs) { std::string overload_name; // TODO(chenweihang): adapt SelectedRows by xiaowei's design - // if (ContainSelectedRows(inputs)) { - // overload_name = pt::kContainSelectedRowsSuffix; - // } if (ContainHostTensor(op_proto, inputs)) { if (overload_name != "") { overload_name += "."; diff --git a/paddle/fluid/framework/tcmpt_utils.cc b/paddle/fluid/framework/tcmpt_utils.cc index f83f6b593a60d..71ef2d3450ae9 100644 --- a/paddle/fluid/framework/tcmpt_utils.cc +++ b/paddle/fluid/framework/tcmpt_utils.cc @@ -13,18 +13,15 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/tcmpt_utils.h" + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows.h" - #include "paddle/fluid/framework/variable.h" -#include "paddle/tcmpt/api/include/core.h" -#include "paddle/tcmpt/api/include/symbols.h" namespace paddle { namespace framework { // TODO(chenweihang, shixiaowei): adapt SelectedRows - template <> std::shared_ptr MakeTensorImpl( const LoDTensor& tensor, pt::Backend backend, pt::DataType dtype, @@ -167,38 +164,5 @@ std::shared_ptr OutputVariableToPtTensor( return nullptr; } -/* For MKLDNNDenseTensor (move this part into a single file later) */ -#ifdef PADDLE_WITH_MKLDNN - -template <> -std::shared_ptr MakeTensorImpl( - const Tensor& tensor, const platform::Place& place, - proto::VarType::Type type) { - auto holder = tensor.Holder(); - auto tensor_impl = std::make_shared( - pt::TensorMeta(tensor.dims(), pt::TransToPtBackend(place), - pt::TransToPtDataType(type), - pt::TransToPtLayout(tensor.layout()), tensor.offset()), - pt::TensorStatus()); - - if (holder != nullptr) { - tensor_impl->ShareAllocation(tensor.Holder()); - } else { - VLOG(1) << "Old MKLDNN Tensor holder is nullptr."; - } - - tensor_impl->set_format(tensor.format()); - return tensor_impl; -} - -template <> -void ShareTensorImpl(pt::MKLDNNDenseTensor* tensor_impl, Tensor* out) { - out->ResetHolderWithType(tensor_impl->allocation(), - pt::TransToProtoVarType(tensor_impl->type())); - out->set_format(tensor_impl->format()); -} - -#endif - } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 9d19d0bce6071..1c5469d02c3ef 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -33,7 +33,6 @@ class BlockDesc; class Variable; class InferNoNeedBufferVarsFN; -// TODO(chenweihang): AttirbuteMap also need to be ordered // TODO(panyx0718): Replace vector with something like gtl::Vector. using VariableNameMap = std::map>; using VariableValueMap = std::map>; @@ -44,7 +43,6 @@ using Attribute = boost::variant< std::vector, bool, std::vector, BlockDesc*, int64_t, std::vector, std::vector, std::vector>; -// TODO(chenweihang): AttirbuteMap also need to be ordered using AttributeMap = std::unordered_map; #ifdef PADDLE_WITH_ASCEND_CL diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index c3cda9e8e992c..f7e57bec1da9e 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -137,18 +137,6 @@ static framework::VariableValueMap BuildInputMap( return inputs; } -template -static bool ContainSelectedRows(const NameVarMap& inputs) { - for (auto& var_pair : inputs) { - for (auto& var : var_pair.second) { - if (var->Var().template IsType()) { - return true; - } - } - } - return false; -} - // TODO(chenweihang): enhance rules, not all dispensable inputs // are host tensor, now only for scale kernel verify template @@ -169,9 +157,6 @@ static pt::KernelName ConstructPtKernelName( const NameVarMap& inputs) { std::string overload_name; // TODO(chenweihang): adapt SelectedRows by xiaowei's design - // if (ContainSelectedRows(inputs)) { - // overload_name = pt::kContainSelectedRowsSuffix; - // } if (ContainHostTensor(op_proto, inputs)) { if (overload_name != "") { overload_name += "."; diff --git a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc index bbc7f01597900..3b005e10d9b98 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc @@ -32,7 +32,7 @@ using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; DECLARE_double(eager_delete_tensor_gb); -USE_NO_KERNEL_OP(scale); +USE_OP(scale); USE_NO_KERNEL_OP(heter_listen_and_serv); framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) { diff --git a/paddle/fluid/operators/pscore/heter_server_test.cc b/paddle/fluid/operators/pscore/heter_server_test.cc index 3e6897073e129..df2eb70b144e4 100644 --- a/paddle/fluid/operators/pscore/heter_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_server_test.cc @@ -29,7 +29,7 @@ namespace distributed = paddle::distributed; using MultiVarMsg = ::paddle::distributed::MultiVariableMessage; using VarMsg = ::paddle::distributed::VariableMessage; -USE_NO_KERNEL_OP(scale); +USE_OP(scale); std::shared_ptr b_rpc_service; diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index c467f3f89d064..e0dfad91570ad 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace operators { - template class ScaleXPUKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/sign_op.cc b/paddle/fluid/operators/sign_op.cc index a491da3931964..6207c33f9d629 100644 --- a/paddle/fluid/operators/sign_op.cc +++ b/paddle/fluid/operators/sign_op.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/operators/sign_op.h" +#include #include "paddle/fluid/platform/float16.h" namespace paddle { diff --git a/paddle/tcmpt/api/include/core.h b/paddle/tcmpt/api/include/core.h index d6b73dcbee66e..fd863186abb30 100644 --- a/paddle/tcmpt/api/include/core.h +++ b/paddle/tcmpt/api/include/core.h @@ -19,5 +19,4 @@ limitations under the License. */ #include "paddle/tcmpt/core/dense_tensor.h" #include "paddle/tcmpt/core/kernel_context.h" #include "paddle/tcmpt/core/kernel_factory.h" -#include "paddle/tcmpt/core/mkldnn_dense_tensor.h" #include "paddle/tcmpt/core/scalar.h" diff --git a/paddle/tcmpt/core/mkldnn_dense_tensor.h b/paddle/tcmpt/core/mkldnn_dense_tensor.h deleted file mode 100644 index 0aea392fce93d..0000000000000 --- a/paddle/tcmpt/core/mkldnn_dense_tensor.h +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifdef PADDLE_WITH_MKLDNN - -#include "mkldnn.hpp" - -#include "paddle/tcmpt/core/dense_tensor.h" - -namespace pt { - -class MKLDNNDenseTensor : public DenseTensor { - public: - // Not allowed to initialize a tensor without descriptive metadata - MKLDNNDenseTensor() = delete; - - MKLDNNDenseTensor(const MKLDNNDenseTensor&) = delete; - MKLDNNDenseTensor& operator=(const MKLDNNDenseTensor&) = delete; - MKLDNNDenseTensor(MKLDNNDenseTensor&&) = delete; - MKLDNNDenseTensor& operator=(MKLDNNDenseTensor&&) = delete; - - MKLDNNDenseTensor(const TensorMeta& meta, const TensorStatus& status) - : DenseTensor(meta, status) {} - - mkldnn::memory::format_tag format() const { return format_; } - - void set_format(const mkldnn::memory::format_tag format) { format_ = format; } - - private: - /** - * @brief the detail format of memory block which have layout as kMKLDNN - * - * @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C, - * nChw16c, etc. For a MKLDNN memory block, layout will be set as - * DataLayout::kMKLDNN meanwhile detail memory format will be kept in - * this field. - */ - mkldnn::memory::format_tag format_ = mkldnn::memory::format_tag::undef; -}; - -} // namespace pt - -#endif diff --git a/paddle/tcmpt/cpu/CMakeLists.txt b/paddle/tcmpt/cpu/CMakeLists.txt index fbb0a45266003..3480ebba53155 100644 --- a/paddle/tcmpt/cpu/CMakeLists.txt +++ b/paddle/tcmpt/cpu/CMakeLists.txt @@ -1,5 +1,6 @@ if(WIN32) set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cpu) + kernel_instantiate(creation.cc) kernel_instantiate(math.cc) kernel_instantiate(linalg.cc) endif() diff --git a/paddle/tcmpt/cuda/CMakeLists.txt b/paddle/tcmpt/cuda/CMakeLists.txt index 94de051e2e3a4..458d93529f435 100644 --- a/paddle/tcmpt/cuda/CMakeLists.txt +++ b/paddle/tcmpt/cuda/CMakeLists.txt @@ -1,5 +1,6 @@ if(WIN32) set(CURRENT_BINARY_DIR ${PADDLE_BINARY_DIR}/paddle/tcmpt/cuda) + kernel_instantiate(creation.cu) kernel_instantiate(math.cu) kernel_instantiate(linalg.cu) endif() diff --git a/paddle/tcmpt/cuda/linalg.cu b/paddle/tcmpt/cuda/linalg.cu index acfdf59b27441..118d3326e5fb5 100644 --- a/paddle/tcmpt/cuda/linalg.cu +++ b/paddle/tcmpt/cuda/linalg.cu @@ -15,10 +15,9 @@ #include "paddle/tcmpt/cuda/linalg.h" #include "paddle/tcmpt/core/kernel_registry.h" +#include "paddle/tcmpt/eigen/dot.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/platform/complex.h" namespace pt { @@ -28,22 +27,7 @@ void Dot(const CUDAContext& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out) { - out->mutable_data(); - if (1 == out->dims().size()) { - auto eigen_out = paddle::framework::EigenScalar::From(*out); - auto eigen_x = paddle::framework::EigenVector::Flatten(x); - auto eigen_y = paddle::framework::EigenVector::Flatten(y); - - auto& dev = *dev_ctx.eigen_device(); - eigen_out.device(dev) = (eigen_x * eigen_y).sum(); - } else { - auto eigen_out = paddle::framework::EigenMatrix::From(*out); - auto eigen_x = paddle::framework::EigenMatrix::From(x); - auto eigen_y = paddle::framework::EigenMatrix::From(y); - - auto& dev = *dev_ctx.eigen_device(); - eigen_out.device(dev) = (eigen_x * eigen_y).sum(Eigen::DSizes(1)); - } + eigen::Dot(dev_ctx, x, y, out); } } // namespace pt diff --git a/paddle/tcmpt/eigen/common.h b/paddle/tcmpt/eigen/common.h new file mode 100644 index 0000000000000..37bed55a7d97a --- /dev/null +++ b/paddle/tcmpt/eigen/common.h @@ -0,0 +1,170 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include "paddle/tcmpt/core/dense_tensor.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace pt { + +// EigenDim converts paddle::platform::DDim into Eigen::DSizes. +template +struct EigenDim { + using Type = Eigen::DSizes; + + static Type From(const DDim& dims) { + PADDLE_ENFORCE_EQ(arity(dims), + D, + paddle::platform::errors::InvalidArgument( + "Input dimension size should be equal to %d, but " + "received dimension size is %d.", + arity(dims), + D)); + Type ret; + for (int64_t d = 0; d < arity(dims); d++) { + ret[d] = dims[d]; + } + return ret; + } +}; + +// Interpret paddle::platform::Tensor as EigenTensor and EigenConstTensor. +template +struct EigenTensor { + // TODO(qijun) Now, default type in unaligned, and we will make a benchmark on + // the speed of aligned and unaligned version in future. + using Type = Eigen::TensorMap>; + + using ConstType = + Eigen::TensorMap>; + + static Type From(pt::DenseTensor& tensor, DDim dims) { // NOLINT + // why tensor.data() not work? + // return Type(const_cast(reinterpret_cast(tensor.data())), + // EigenDim::From(dims)); + return Type(const_cast(tensor.data()), EigenDim::From(dims)); + } + + static Type From(pt::DenseTensor& tensor) { // NOLINT + return From(tensor, tensor.dims()); + } // NOLINT + + static ConstType From(const pt::DenseTensor& tensor, DDim dims) { + // return ConstType(reinterpret_cast(tensor.data()), + // EigenDim::From(dims)); + return ConstType(tensor.data(), EigenDim::From(dims)); + } + + static ConstType From(const pt::DenseTensor& tensor) { + return From(tensor, tensor.dims()); + } +}; + +template +struct EigenMatrix : public EigenTensor { + static typename EigenMatrix::Type Reshape(pt::DenseTensor& tensor, // NOLINT + int num_col_dims) { + int rank = tensor.dims().size(); + PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), + true, + paddle::platform::errors::InvalidArgument( + "Input dimension number(num_col_dims) must be " + "between 0 and %d, but received number is %d.", + rank, + num_col_dims)); + return EigenMatrix::From(tensor, + flatten_to_2d(tensor.dims(), num_col_dims)); + } + + static typename EigenMatrix::ConstType Reshape(const pt::DenseTensor& tensor, + int num_col_dims) { + int rank = tensor.dims().size(); + PADDLE_ENFORCE_EQ((num_col_dims > 0 && num_col_dims < rank), + true, + paddle::platform::errors::InvalidArgument( + "Input dimension number(num_col_dims) must be " + "between 0 and %d, but received number is %d.", + rank, + num_col_dims)); + return EigenMatrix::From(tensor, + flatten_to_2d(tensor.dims(), num_col_dims)); + } +}; + +template +struct EigenVector : public EigenTensor { + // Flatten reshapes a Tensor into an EigenVector. + static typename EigenVector::Type Flatten( + pt::DenseTensor& tensor) { // NOLINT + return EigenVector::From(tensor, {product(tensor.dims())}); + } + + static typename EigenVector::ConstType Flatten( + const pt::DenseTensor& tensor) { // NOLINT + return EigenVector::From(tensor, {product(tensor.dims())}); + } +}; + +template +struct EigenScalar { + // Scalar tensor (implemented as a rank-0 tensor) of scalar type T. + using Type = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + using ConstType = Eigen::TensorMap< + Eigen::TensorFixedSize, MajorType, IndexType>>; + + static Type From(pt::DenseTensor& tensor) { // NOLINT + return Type(const_cast(tensor.data())); + } + + static ConstType From(const pt::DenseTensor& tensor) { + return ConstType(tensor.data()); + } +}; + +// Define Tensor with 32-bit index. +template +using Tensor32BitIndex = + Eigen::TensorMap, Eigen::Aligned>; + +template +Eigen::DSizes To32BitDims(const DSizes& in) { + Eigen::DSizes out; + for (int i = 0; i < DSizes::count; ++i) { + out[i] = in[i]; + } + return out; +} + +template +Tensor32BitIndex +To32BitIndex(EigenTensor in) { + using RetType = + Tensor32BitIndex; + return RetType(in.data(), To32BitDims(in.dimensions())); +} + +} // namespace pt diff --git a/paddle/tcmpt/eigen/dot.h b/paddle/tcmpt/eigen/dot.h new file mode 100644 index 0000000000000..5e323e4448409 --- /dev/null +++ b/paddle/tcmpt/eigen/dot.h @@ -0,0 +1,50 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/tcmpt/core/dense_tensor.h" +#include "paddle/tcmpt/eigen/common.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/operators/eigen/eigen_function.h" + +namespace pt { +namespace eigen { + +template +void Dot(const DevCtx& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + out->mutable_data(); + if (1 == out->dims().size()) { + auto eigen_out = pt::EigenScalar::From(*out); + auto eigen_x = pt::EigenVector::Flatten(x); + auto eigen_y = pt::EigenVector::Flatten(y); + + auto& dev = *dev_ctx.eigen_device(); + eigen_out.device(dev) = (eigen_x * eigen_y).sum(); + } else { + auto eigen_out = pt::EigenMatrix::From(*out); + auto eigen_x = pt::EigenMatrix::From(x); + auto eigen_y = pt::EigenMatrix::From(y); + + auto& dev = *dev_ctx.eigen_device(); + eigen_out.device(dev) = (eigen_x * eigen_y).sum(Eigen::DSizes(1)); + } +} + +} // namespace eigen +} // namespace pt diff --git a/paddle/tcmpt/eigen/fill.h b/paddle/tcmpt/eigen/fill.h index 6a21ca6932cd5..fb56ccdd8e125 100644 --- a/paddle/tcmpt/eigen/fill.h +++ b/paddle/tcmpt/eigen/fill.h @@ -15,8 +15,9 @@ limitations under the License. */ #pragma once #include "paddle/tcmpt/core/dense_tensor.h" +#include "paddle/tcmpt/eigen/common.h" -#include "paddle/fluid/framework/eigen.h" +// See Note [ Why still include the fluid headers? ] #include "paddle/fluid/operators/eigen/eigen_function.h" namespace pt { @@ -50,7 +51,7 @@ void fill(const DeviceContext& context, DenseTensor* tensor, VType val) { static_cast(std::numeric_limits::max()), static_cast(val))); - auto t = paddle::framework::EigenVector::Flatten(*tensor); + auto t = pt::EigenVector::Flatten(*tensor); t.device(*context.eigen_device()) = t.constant(static_cast(val)); } diff --git a/paddle/tcmpt/eigen/mean.h b/paddle/tcmpt/eigen/mean.h index bd2c5ad2bf219..e70870e7954b7 100644 --- a/paddle/tcmpt/eigen/mean.h +++ b/paddle/tcmpt/eigen/mean.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include "paddle/tcmpt/core/dense_tensor.h" +#include "paddle/tcmpt/eigen/common.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/eigen/eigen_function.h" namespace pt { @@ -30,8 +30,8 @@ void Mean(const DevCtx& dev_ctx, const DenseTensor& x, DenseTensor* out) { // TODO(chenweihang): if we design new tensor, we should support // the low-level calc functor use new tensor as input, // which may be a big project! - auto eigen_x = paddle::framework::EigenVector::Flatten(x); - auto eigen_out = paddle::framework::EigenScalar::From(*out); + auto eigen_x = pt::EigenVector::Flatten(x); + auto eigen_out = pt::EigenScalar::From(*out); auto& dev = *dev_ctx.eigen_device(); eigen_out.device(dev) = eigen_x.mean(); diff --git a/paddle/tcmpt/eigen/scale.h b/paddle/tcmpt/eigen/scale.h index 5bea4fb300af4..152cb61800c8b 100644 --- a/paddle/tcmpt/eigen/scale.h +++ b/paddle/tcmpt/eigen/scale.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include "paddle/tcmpt/core/dense_tensor.h" +#include "paddle/tcmpt/eigen/common.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/eigen/eigen_function.h" namespace pt { @@ -32,8 +32,8 @@ void Scale(const DevCtx& dev_ctx, DenseTensor* out) { // calc out->mutable_data(); - auto eigen_out = paddle::framework::EigenVector::Flatten(*out); - auto eigen_x = paddle::framework::EigenVector::Flatten(x); + auto eigen_out = pt::EigenVector::Flatten(*out); + auto eigen_x = pt::EigenVector::Flatten(x); auto& dev = *dev_ctx.eigen_device(); // TODO(chenweihang): now the eigen function here need the dtype of scale, // eigen_x, bias should be same, so here need cast for two scalar arg, diff --git a/paddle/tcmpt/eigen/sign.h b/paddle/tcmpt/eigen/sign.h index b138123e81ee0..d41702576b3a1 100644 --- a/paddle/tcmpt/eigen/sign.h +++ b/paddle/tcmpt/eigen/sign.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include "paddle/tcmpt/core/dense_tensor.h" +#include "paddle/tcmpt/eigen/common.h" // See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/eigen/eigen_function.h" namespace pt { @@ -33,8 +33,8 @@ void Sign(const DevCtx& dev_ctx, const DenseTensor& x, DenseTensor* out) { // TODO(chenweihang): if we design new tensor, we should support // the low-level calc functor use new tensor as input, // which may be a big project! - auto eigen_out = paddle::framework::EigenVector::Flatten(*out); - auto eigen_x = paddle::framework::EigenVector::Flatten(x); + auto eigen_out = pt::EigenVector::Flatten(*out); + auto eigen_x = pt::EigenVector::Flatten(x); auto& dev = *dev_ctx.eigen_device(); paddle::operators::EigenSign, T>::Eval(