Skip to content

Commit

Permalink
[Pten]Move kernel_primitives lib to Pten directory (PaddlePaddle#39169)
Browse files Browse the repository at this point in the history
* move kernel_primitives

* use pten's errors
  • Loading branch information
YuanRisheng authored Jan 26, 2022
1 parent bd5c962 commit 452bcbe
Show file tree
Hide file tree
Showing 11 changed files with 578 additions and 403 deletions.
235 changes: 2 additions & 233 deletions paddle/fluid/operators/kernel_primitives/functor_primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,241 +13,10 @@
// limitations under the License.

#pragma once

#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/pten/kernels/funcs/eigen/extensions.h"
#include "paddle/pten/kernels/primitive/functor_primitives.h"

namespace paddle {
namespace operators {
namespace kernel_primitives {
namespace details {

static __device__ __forceinline__ platform::float16 Exp(platform::float16 x) {
return ::Eigen::numext::exp(x);
}

static __device__ __forceinline__ float Exp(float x) { return expf(x); }

static __device__ __forceinline__ double Exp(double x) { return exp(x); }

static __device__ __forceinline__ platform::float16 Log(platform::float16 x) {
return ::Eigen::numext::log(x);
}

static __device__ __forceinline__ float Log(float x) { return logf(x); }

static __device__ __forceinline__ double Log(double x) { return log(x); }

} // namespace details

/******************************** Unary Functor *******************************/

/**
* @brief Default unary exp functor
*/
template <typename Tx, typename Ty = Tx>
struct ExpFunctor {
HOSTDEVICE inline ExpFunctor() {}

HOSTDEVICE explicit inline ExpFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(details::Exp(x));
}
};

/**
* @brief Default unary identity functor
*/
template <typename Tx, typename Ty = Tx>
struct IdentityFunctor {
HOSTDEVICE inline IdentityFunctor() {}

HOSTDEVICE explicit inline IdentityFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(x);
}
};

/**
* @brief Default unary div functor. Divide by a constant
*/
template <typename Tx, typename Ty = Tx>
struct DivideFunctor {
private:
using MPType = typename ::paddle::operators::details::MPTypeTrait<Tx>::Type;

public:
HOSTDEVICE inline DivideFunctor() { n_inv = static_cast<MPType>(1.0f); }

HOSTDEVICE explicit inline DivideFunctor(int n) : n_inv((MPType)(1.0 / n)) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(static_cast<MPType>(x) * n_inv);
}

private:
MPType n_inv;
};

/**
* @brief Default inverse functor
*/
template <typename Tx, typename Ty = Tx>
struct InverseFunctor {
HOSTDEVICE inline InverseFunctor() {}

HOSTDEVICE explicit inline InverseFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(-x);
}
};

/**
* @brief Default unary square functor
*/
template <typename Tx, typename Ty = Tx>
struct SquareFunctor {
HOSTDEVICE inline SquareFunctor() {}

HOSTDEVICE explicit inline SquareFunctor(int n) {}

HOSTDEVICE inline Ty operator()(const Tx x) const {
return static_cast<Ty>(x) * static_cast<Ty>(x);
}
};

/****************************** Binary Functor ********************************/

/**
* @brief Default binary min functor
*/
template <typename T>
struct MinFunctor {
inline T initial() { return static_cast<T>(std::numeric_limits<T>::max()); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return (b < a) ? b : a;
}
};

/**
* @brief Default binary max functor
*/
template <typename T>
struct MaxFunctor {
inline T initial() {
return static_cast<T>(std::numeric_limits<T>::lowest());
}

__device__ __forceinline__ T operator()(const T a, const T b) const {
return (b > a) ? b : a;
}
};

/**
* @brief Default binary add functor
*/
template <typename T>
struct AddFunctor {
inline T initial() { return static_cast<T>(0.0f); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b + a;
}
};

/**
* @brief Default binary add functor
*/
template <typename T>
struct MulFunctor {
inline T initial() { return static_cast<T>(1.0f); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b * a;
}
};

/**
* @brief Default binary logic or functor
*/
template <typename T>
struct LogicalOrFunctor {
inline T initial() { return static_cast<T>(false); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b || a;
}
};

/**
* @brief Default binary logic and functor
*/
template <typename T>
struct LogicalAndFunctor {
inline T initial() { return static_cast<T>(true); }

__device__ __forceinline__ T operator()(const T a, const T b) const {
return b && a;
}
};

/**
* @brief Default binary sub functor
*/
template <typename T>
struct SubFunctor {
inline T initial() { return static_cast<T>(0.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const { return a - b; }
};

/**
* @brief Default binary div functor
*/
template <typename T, typename Enable = void>
struct DivFunctor {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const { return a / b; }
};

template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE_NE(b, 0,
platform::errors::InvalidArgument(
"Integer division by zero encountered "
"in (floor) divide. Please check the input value."));
return a / b;
}
};

/**
* @brief Default binary floor divide functor
*/
template <typename T>
struct FloorDivFunctor {
inline T initial() { return static_cast<T>(1.0f); }

inline HOSTDEVICE T operator()(const T a, const T b) const {
PADDLE_ENFORCE_NE(b, 0,
platform::errors::InvalidArgument(
"Integer division by zero encountered "
"in (floor) divide. Please check the input value."));
return static_cast<T>(std::trunc(a / b));
}
};

} // namespace kernel_primitives
namespace kernel_primitives = pten::kps;
} // namespace operators
} // namespace paddle
55 changes: 2 additions & 53 deletions paddle/fluid/operators/kernel_primitives/kernel_primitives.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,61 +13,10 @@
// limitations under the License.

#pragma once
#include "paddle/fluid/operators/kernel_primitives/helper_primitives.h"
#ifdef PADDLE_WITH_XPU2
#include "paddle/fluid/operators/kernel_primitives/compute_primitives_xpu2.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives_xpu2.h"
#include "paddle/fluid/operators/kernel_primitives/functor_primitives_xpu2.h"

#define KPStream XPUStream
#define KPDevice paddle::platform::XPUDeviceContext
#define _ptr_ _global_ptr_
#define __forceinline__ __inline__
#define __restrict__

#define THREAD_ID_X core_id()
#define THREAD_ID_Y 0
#define THREAD_ID_Z 0

#define BLOCK_NUM_X core_num()
#define BLOCK_NUM_Y 0
#define BLOCK_NUM_Z 0

#define BLOCK_ID_X cluster_id()
#define BLOCK_ID_Y 0
#define BLOCK_ID_Z 0

#define GRID_NUM_X cluster_num()
#define GRID_NUM_Y 0
#define GRID_NUM_Z 0
#else
#include "paddle/fluid/operators/kernel_primitives/compute_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/datamover_primitives.h"
#include "paddle/fluid/operators/kernel_primitives/functor_primitives.h"

#define KPStream gpuStream_t
#define KPDevice paddle::platform::CUDADeviceContext
#define _ptr_

#define THREAD_ID_X threadIdx.x
#define THREAD_ID_Y threadIdx.y
#define THREAD_ID_Z threadIdx.z

#define BLOCK_NUM_X blockDim.x
#define BLOCK_NUM_Y blockDim.y
#define BLOCK_NUM_Z blockDim.z

#define BLOCK_ID_X blockIdx.x
#define BLOCK_ID_Y blockIdx.y
#define BLOCK_ID_Z blockIdx.z

#define GRID_NUM_X gridDim.x
#define GRID_NUM_Y gridDim.y
#define GRID_NUM_Z gridDim.z
#endif
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

namespace paddle {
namespace operators {
namespace kernel_primitives {}
namespace kernel_primitives = pten::kps;
}
}
4 changes: 2 additions & 2 deletions paddle/pten/kernels/funcs/elementwise_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ limitations under the License. */
#include "paddle/pten/kernels/empty_kernel.h"

#if defined(__NVCC__) || defined(__HIPCC__)
#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
#include "paddle/fluid/platform/aligned_vector.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/function_traits.h"
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

namespace kps = paddle::operators::kernel_primitives;
namespace kps = pten::kps;

#endif

Expand Down
4 changes: 2 additions & 2 deletions paddle/pten/kernels/gpu/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ namespace cub = hipcub;

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/fast_divmod.h"
#include "paddle/fluid/string/string_helper.h"
#include "paddle/pten/core/array.h"
#include "paddle/pten/core/enforce.h"
#include "paddle/pten/kernels/primitive/kernel_primitives.h"

#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
Expand All @@ -51,7 +51,7 @@ namespace cub = hipcub;
#define REDUCE_SPLIT_BOUNDARY 512
#define REDUCE_VEC_SIZE 4

namespace kps = paddle::operators::kernel_primitives;
namespace kps = pten::kps;

namespace pten {
namespace kernels {
Expand Down
Loading

0 comments on commit 452bcbe

Please sign in to comment.