Skip to content

Commit

Permalink
Merge branch 'FixEighOP' of https://github.com/Zjq9409/Paddle into Fi…
Browse files Browse the repository at this point in the history
…xEighOP
  • Loading branch information
Zjq9409 committed Sep 17, 2021
2 parents da51c6c + 0ce2584 commit 6c46c22
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 81 deletions.
139 changes: 85 additions & 54 deletions paddle/fluid/operators/math/eigen_values_vectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include "Eigen/Core"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/operators/svd_helper.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cusolver.h"
Expand All @@ -28,14 +27,81 @@ namespace math {

template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
using InputMatrixMap = Eigen::Map<
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

template <typename T>
using InputMatrixMap = Eigen::Map<const EigenMatrix<T>>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using OutputMatrixMap = Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>;

template <typename ValueType>
inline void ComputeFloatEigenvaluesAndVectors(ValueType *x_data,
ValueType *eigenvalues_data,
ValueType *eigenvectors_data,
int batches, int rows, int cols,
bool has_vectors) {
int stride = rows * cols;
for (int i = 0; i < batches; i++) {
auto m = InputMatrixMap<ValueType>(x_data + i * stride, rows, cols);
auto eigenvalues =
OutputMatrixMap<ValueType>(eigenvalues_data + i * rows, 1, rows);
auto eigenvectors =
OutputMatrixMap<ValueType>(eigenvectors_data + i * stride, rows, cols);

Eigen::SelfAdjointEigenSolver<Eigen::Matrix<
ValueType, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors
: Eigen::EigenvaluesOnly);
PADDLE_ENFORCE_EQ(
eigen_solver.info(), Eigen::Success,
platform::errors::InvalidArgument(
"Self Adjoint Eigen decomposition is not successful. "
"The %d-th input matrice might not be not be positive definite.",
i));

eigenvalues = eigen_solver.eigenvalues().transpose();
if (has_vectors) {
eigenvectors = eigen_solver.eigenvectors();
}
}
}

template <typename T>
using OutputMatrixMap = Eigen::Map<EigenMatrix<T>>;
template <typename T, typename ValueType>
inline void ComputeComplexEigenvaluesAndVectors(T *x_data,
ValueType *eigenvalues_data,
T *eigenvectors_data,
int batches, int rows, int cols,
bool has_vectors) {
using Complex = std::complex<ValueType>;
Complex *input = reinterpret_cast<Complex *>(x_data);
Complex *eigenvectors_data_ = reinterpret_cast<Complex *>(eigenvectors_data);

int stride = rows * cols;
for (int i = 0; i < batches; i++) {
auto m = InputMatrixMap<Complex>(input + i * stride, rows, cols);
auto eigenvalues =
OutputMatrixMap<ValueType>(eigenvalues_data + i * rows, 1, rows);
auto eigenvectors =
OutputMatrixMap<Complex>(eigenvectors_data_ + i * stride, rows, cols);

Eigen::SelfAdjointEigenSolver<
Eigen::Matrix<Complex, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors
: Eigen::EigenvaluesOnly);
PADDLE_ENFORCE_EQ(
eigen_solver.info(), Eigen::Success,
platform::errors::InvalidArgument(
"Self Adjoint Eigen decomposition is not successful. "
"The %d-th input matrice might not be not be positive definite.",
i));

eigenvalues = eigen_solver.eigenvalues().transpose();
if (has_vectors) {
eigenvectors = eigen_solver.eigenvectors();
}
}
}

inline int64_t GetBatchSize(framework::DDim dims) {
int64_t batch_size = 1;
Expand Down Expand Up @@ -82,56 +148,21 @@ struct MatrixEighFunctor<platform::CPUDeviceContext, ValueType, T> {
auto *value_data =
eigen_values->mutable_data<ValueType>(output_value_dim, ctx.GetPlace());

auto *x_data = input_tensor.data<T>();
auto *vector_data = eigen_vectors->mutable_data<T>(dims, ctx.GetPlace());
ComputeEigenvaluesAndVectors(x_data, value_data, vector_data, batch_size,
rows, has_vectors);
if (framework::IsComplexType(input_tensor.type())) {
auto *x_data = input_tensor.data<T>();
auto *vector_data = eigen_vectors->mutable_data<T>(dims, ctx.GetPlace());
ComputeComplexEigenvaluesAndVectors<T, ValueType>(
x_data, value_data, vector_data, batch_size, rows, rows, has_vectors);
} else {
auto *x_data = input_tensor.data<ValueType>();
auto *vector_data =
eigen_vectors->mutable_data<ValueType>(dims, ctx.GetPlace());
ComputeFloatEigenvaluesAndVectors<ValueType>(
x_data, value_data, vector_data, batch_size, rows, rows, has_vectors);
}
}

inline void ComputeEigenvaluesAndVectors(T *x_data, ValueType *value_data,
T *vector_data, int batches,
int rows, bool has_vectors) const;
};

#define EIGEN_WITH_TYPES(m) \
m(float, float, float) m(double, double, double) \
m(float, paddle::platform::complex<float>, std::complex<float>) \
m(double, paddle::platform::complex<double>, std::complex<double>)

#define EIGEN_INSTANCE(ValueType, T, CastType) \
template <> \
inline void MatrixEighFunctor<platform::CPUDeviceContext, ValueType, T>:: \
ComputeEigenvaluesAndVectors(T *x_data, ValueType *value_data, \
T *vector_data, int batches, int rows, \
bool has_vectors) const { \
int stride = rows * rows; \
for (int i = 0; i < batches; i++) { \
CastType *x_data_ = reinterpret_cast<CastType *>(x_data); \
CastType *vector_data_ = reinterpret_cast<CastType *>(vector_data); \
auto eigenvalues = \
OutputMatrixMap<ValueType>(value_data + i * rows, 1, rows); \
auto m = InputMatrixMap<CastType>(x_data_ + i * stride, rows, rows); \
auto eigenvectors = \
OutputMatrixMap<CastType>(vector_data_ + i * stride, rows, rows); \
Eigen::SelfAdjointEigenSolver<EigenMatrix<CastType>> eigen_solver( \
m, \
has_vectors ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly); \
PADDLE_ENFORCE_EQ( \
eigen_solver.info(), Eigen::Success, \
platform::errors::InvalidArgument( \
"Self Adjoint Eigen decomposition is not successful. " \
"The %d-th input matrice might not be not be positive " \
"definite.", \
i)); \
eigenvalues = eigen_solver.eigenvalues().transpose(); \
if (has_vectors) { \
eigenvectors = eigen_solver.eigenvectors(); \
} \
} \
}

EIGEN_WITH_TYPES(EIGEN_INSTANCE);

#ifdef PADDLE_WITH_CUDA

// Calculates the eigenvalues ​​and eigenvectors of Hermitian or real
Expand Down
37 changes: 10 additions & 27 deletions paddle/fluid/operators/svd_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ struct DeviceIndependenceTensorOperations {
NameInTensorMap inputs({{"X", {&x}}});
return CreateOpRunAndReturnTensor("reduce_max", inputs, attrs, out_dim);
}
template <typename T1 = T>
// Support float and complex type subtraction,the default is T type
template <typename InT = T>
framework::Tensor Sub(const framework::Tensor& x,
const framework::Tensor& y) {
framework::Tensor ret;
Expand All @@ -350,18 +351,18 @@ struct DeviceIndependenceTensorOperations {
#if defined(__NVCC__) || defined(__HIPCC__)
// For GPU, there is no need to define XxxInverseFunctor and call
// ElementwiseComputeEx in two branches.
ElementwiseComputeEx<SubFunctor<T1>, DeviceContext, T1>(
context, &x, &y, -1, SubFunctor<T1>(), &ret);
ElementwiseComputeEx<SubFunctor<InT>, DeviceContext, InT>(
context, &x, &y, -1, SubFunctor<InT>(), &ret);
#endif
} else {
if (x.dims().size() >= y.dims().size()) {
ElementwiseComputeEx<SubFunctor<T1>, DeviceContext, T1>(
context, &x, &y, -1, SubFunctor<T1>(), &ret);
ElementwiseComputeEx<SubFunctor<InT>, DeviceContext, InT>(
context, &x, &y, -1, SubFunctor<InT>(), &ret);
} else {
ElementwiseComputeEx<InverseSubFunctor<T1>, DeviceContext, T1>(
// This is copyed from elementwise_sub, which means we
// need reverse will xrank < yrank
context, &x, &y, -1, InverseSubFunctor<T1>(), &ret);
// This is copyed from elementwise_sub, which means we
// need reverse will xrank < yrank
ElementwiseComputeEx<InverseSubFunctor<InT>, DeviceContext, InT>(
context, &x, &y, -1, InverseSubFunctor<InT>(), &ret);
}
}
return ret;
Expand Down Expand Up @@ -471,24 +472,6 @@ struct DeviceIndependenceTensorOperations {
return out;
}

// framework::Tensor Sub_(const framework::Tensor& x,
// const framework::Tensor& y) {
// framework::Tensor ret;
// std::vector<int> out_shape = GetBroadcastShape({&x, &y});
// ret.Resize(framework::make_ddim(out_shape));
// if (x.dims().size() >= y.dims().size()) {
// ElementwiseComputeEx<SubFunctor<ValueType>, DeviceContext, ValueType>(
// context, &x, &y, -1, SubFunctor<ValueType>(), &ret);
// } else {
// ElementwiseComputeEx<InverseSubFunctor<ValueType>, DeviceContext,
// ValueType>(
// // This is copyed from elementwise_sub, which means we
// // need reverse will xrank < yrank
// context, &x, &y, -1, InverseSubFunctor<ValueType>(), &ret);
// }
// return ret;
// }

private:
const framework::ExecutionContext& context;
BlasT<DeviceContext, T> GetBlas() {
Expand Down

1 comment on commit 6c46c22

@paddle-bot-old
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Congratulation! Your pull request passed all required CI. You could ask reviewer(s) to approve and merge. 🎉

Please sign in to comment.