-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CPU forward calculation replaces Eigen with Lapack;Modify linalg exposure rules #35916
Changes from 5 commits
9e4cf0d
8d53c2f
7aa8183
dbadce0
8733853
10b7ce8
7b8aa10
1c0fe59
3cf4e85
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,8 +14,8 @@ | |
|
||
#pragma once | ||
|
||
#include "Eigen/Core" | ||
#include "paddle/fluid/memory/memory.h" | ||
#include "paddle/fluid/operators/math/lapack_function.h" | ||
#include "paddle/fluid/operators/svd_helper.h" | ||
#ifdef PADDLE_WITH_CUDA | ||
#include "paddle/fluid/platform/dynload/cusolver.h" | ||
|
@@ -25,84 +25,6 @@ namespace paddle { | |
namespace operators { | ||
namespace math { | ||
|
||
template <typename T, int MajorType = Eigen::RowMajor, | ||
typename IndexType = Eigen::DenseIndex> | ||
using InputMatrixMap = Eigen::Map< | ||
const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>; | ||
|
||
template <typename T, int MajorType = Eigen::RowMajor, | ||
typename IndexType = Eigen::DenseIndex> | ||
using OutputMatrixMap = Eigen::Map< | ||
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>; | ||
|
||
template <typename ValueType> | ||
inline void ComputeFloatEigenvaluesAndVectors(ValueType *x_data, | ||
ValueType *eigenvalues_data, | ||
ValueType *eigenvectors_data, | ||
int batches, int rows, int cols, | ||
bool has_vectors) { | ||
int stride = rows * cols; | ||
for (int i = 0; i < batches; i++) { | ||
auto m = InputMatrixMap<ValueType>(x_data + i * stride, rows, cols); | ||
auto eigenvalues = | ||
OutputMatrixMap<ValueType>(eigenvalues_data + i * rows, 1, rows); | ||
auto eigenvectors = | ||
OutputMatrixMap<ValueType>(eigenvectors_data + i * stride, rows, cols); | ||
|
||
Eigen::SelfAdjointEigenSolver<Eigen::Matrix< | ||
ValueType, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> | ||
eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors | ||
: Eigen::EigenvaluesOnly); | ||
PADDLE_ENFORCE_EQ( | ||
eigen_solver.info(), Eigen::Success, | ||
platform::errors::InvalidArgument( | ||
"Self Adjoint Eigen decomposition is not successful. " | ||
"The %d-th input matrice might not be not be positive definite.", | ||
i)); | ||
|
||
eigenvalues = eigen_solver.eigenvalues().transpose(); | ||
if (has_vectors) { | ||
eigenvectors = eigen_solver.eigenvectors(); | ||
} | ||
} | ||
} | ||
|
||
template <typename T, typename ValueType> | ||
inline void ComputeComplexEigenvaluesAndVectors(T *x_data, | ||
ValueType *eigenvalues_data, | ||
T *eigenvectors_data, | ||
int batches, int rows, int cols, | ||
bool has_vectors) { | ||
using Complex = std::complex<ValueType>; | ||
Complex *input = reinterpret_cast<Complex *>(x_data); | ||
Complex *eigenvectors_data_ = reinterpret_cast<Complex *>(eigenvectors_data); | ||
|
||
int stride = rows * cols; | ||
for (int i = 0; i < batches; i++) { | ||
auto m = InputMatrixMap<Complex>(input + i * stride, rows, cols); | ||
auto eigenvalues = | ||
OutputMatrixMap<ValueType>(eigenvalues_data + i * rows, 1, rows); | ||
auto eigenvectors = | ||
OutputMatrixMap<Complex>(eigenvectors_data_ + i * stride, rows, cols); | ||
|
||
Eigen::SelfAdjointEigenSolver< | ||
Eigen::Matrix<Complex, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>> | ||
eigen_solver(m, has_vectors ? Eigen::ComputeEigenvectors | ||
: Eigen::EigenvaluesOnly); | ||
PADDLE_ENFORCE_EQ( | ||
eigen_solver.info(), Eigen::Success, | ||
platform::errors::InvalidArgument( | ||
"Self Adjoint Eigen decomposition is not successful. " | ||
"The %d-th input matrice might not be not be positive definite.", | ||
i)); | ||
|
||
eigenvalues = eigen_solver.eigenvalues().transpose(); | ||
if (has_vectors) { | ||
eigenvectors = eigen_solver.eigenvectors(); | ||
} | ||
} | ||
} | ||
|
||
inline int64_t GetBatchSize(framework::DDim dims) { | ||
int64_t batch_size = 1; | ||
auto dim_size = dims.size(); | ||
|
@@ -128,37 +50,74 @@ struct MatrixEighFunctor<platform::CPUDeviceContext, ValueType, T> { | |
void operator()(const framework::ExecutionContext &ctx, const Tensor &input, | ||
Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, | ||
bool has_vectors) { | ||
auto dims = input.dims(); | ||
auto output_value_dim = eigen_values->dims(); | ||
auto *out_value = eigen_values->mutable_data<ValueType>(ctx.GetPlace()); | ||
|
||
int64_t batch_size = 1; | ||
int dim_size = dims.size(); | ||
for (int64_t i = 0; i < dim_size - 2; i++) { | ||
batch_size *= dims[i]; | ||
} | ||
auto dito = | ||
DeviceIndependenceTensorOperations<platform::CPUDeviceContext, T>(ctx); | ||
Tensor input_tensor; | ||
TensorCopy(input, ctx.GetPlace(), &input_tensor); | ||
if (!is_lower) { | ||
input_tensor = dito.Transpose(input); | ||
math::DeviceIndependenceTensorOperations<platform::CPUDeviceContext, T>( | ||
ctx); | ||
*eigen_vectors = dito.Transpose(input); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如果 |
||
auto *out_vector = eigen_vectors->mutable_data<T>(ctx.GetPlace()); | ||
|
||
auto dims = input.dims(); | ||
int dim_size = dims.size(); | ||
int64_t batch_size = GetBatchSize(dims); | ||
|
||
int vector_stride = dims[dim_size - 1] * dims[dim_size - 2]; | ||
int values_stride = dims[dim_size - 1]; | ||
char uplo = is_lower ? 'L' : 'U'; | ||
char jobz = has_vectors ? 'V' : 'N'; | ||
auto n = dims[dim_size - 1]; | ||
auto lda = std::max<int64_t>(1, n); | ||
|
||
int lwork = -1; | ||
int lrwork = -1; | ||
int liwork = -1; | ||
int iwork_opt = -1; | ||
T lwork_opt = static_cast<T>(-1); | ||
ValueType rwork_opt = static_cast<ValueType>(-1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这些参数都是啥意思?-1代表什么?lapack函数中会写这些参数吗?可以加一些注释说明下。 |
||
|
||
Tensor info_tensor; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 如果一次只计算1个矩阵,貌似不用定义一个 |
||
auto *infos_data = info_tensor.mutable_data<int>( | ||
framework::make_ddim({batch_size}), ctx.GetPlace()); | ||
|
||
math::lapackEvd<T, ValueType>(jobz, uplo, n, out_vector, lda, out_value, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 加个注释说明单独这一次lapack函数调用的目的是什么。 |
||
&lwork_opt, lwork, &rwork_opt, lrwork, | ||
&iwork_opt, liwork, infos_data); | ||
|
||
lwork = std::max<int>(1, static_cast<int>(lwork_opt)); | ||
liwork = std::max<int>(1, iwork_opt); | ||
|
||
Tensor rwork_tensor; | ||
ValueType *rwork_data = nullptr; | ||
|
||
// complex type | ||
if (framework::IsComplexType(eigen_vectors->type())) { | ||
lrwork = std::max<int>(1, static_cast<int>(rwork_opt)); | ||
rwork_data = rwork_tensor.mutable_data<ValueType>( | ||
framework::make_ddim({lrwork}), ctx.GetPlace()); | ||
} | ||
int rows = dims[dims.size() - 2]; | ||
|
||
auto *value_data = | ||
eigen_values->mutable_data<ValueType>(output_value_dim, ctx.GetPlace()); | ||
Tensor iwork_tensor, work_tensor; | ||
auto *iwork_data = iwork_tensor.mutable_data<int>( | ||
framework::make_ddim({liwork}), ctx.GetPlace()); | ||
auto *work_data = work_tensor.mutable_data<T>(framework::make_ddim({lwork}), | ||
ctx.GetPlace()); | ||
|
||
if (framework::IsComplexType(input_tensor.type())) { | ||
auto *x_data = input_tensor.data<T>(); | ||
auto *vector_data = eigen_vectors->mutable_data<T>(dims, ctx.GetPlace()); | ||
ComputeComplexEigenvaluesAndVectors<T, ValueType>( | ||
x_data, value_data, vector_data, batch_size, rows, rows, has_vectors); | ||
} else { | ||
auto *x_data = input_tensor.data<ValueType>(); | ||
auto *vector_data = | ||
eigen_vectors->mutable_data<ValueType>(dims, ctx.GetPlace()); | ||
ComputeFloatEigenvaluesAndVectors<ValueType>( | ||
x_data, value_data, vector_data, batch_size, rows, rows, has_vectors); | ||
for (auto i = 0; i < batch_size; i++) { | ||
auto *value_data = out_value + i * values_stride; | ||
auto *vector_data = out_vector + i * vector_stride; | ||
int *info_ptr = &infos_data[i]; | ||
math::lapackEvd<T, ValueType>(jobz, uplo, n, vector_data, lda, value_data, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
work_data, lwork, rwork_data, lrwork, | ||
iwork_data, liwork, info_ptr); | ||
PADDLE_ENFORCE_EQ( | ||
*info_ptr, 0, | ||
platform::errors::PreconditionNotMet( | ||
"For batch [%d]: the [%d] argument had an illegal value", i, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个报错信息不太具有实际含义。 |
||
*info_ptr)); | ||
} | ||
if (has_vectors) { | ||
*eigen_vectors = dito.Transpose(*eigen_vectors); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 确认下,前后2个 |
||
} | ||
} | ||
}; | ||
|
@@ -175,6 +134,12 @@ struct MatrixEighFunctor<platform::CUDADeviceContext, ValueType, T> { | |
Tensor *eigen_values, Tensor *eigen_vectors, bool is_lower, | ||
bool has_vectors) { | ||
auto *out_value = eigen_values->mutable_data<ValueType>(ctx.GetPlace()); | ||
|
||
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); | ||
auto dito = | ||
math::DeviceIndependenceTensorOperations<platform::CUDADeviceContext, | ||
T>(ctx); | ||
*eigen_vectors = dito.Transpose(input); | ||
auto *out_vector = eigen_vectors->mutable_data<T>(ctx.GetPlace()); | ||
|
||
auto &dims = input.dims(); | ||
|
@@ -191,13 +156,6 @@ struct MatrixEighFunctor<platform::CUDADeviceContext, ValueType, T> { | |
auto vector_stride = dims[dim_size - 1] * dims[dim_size - 2]; | ||
auto values_stride = dims[dim_size - 1]; | ||
|
||
auto &dev_ctx = ctx.template device_context<platform::CUDADeviceContext>(); | ||
auto dito = | ||
math::DeviceIndependenceTensorOperations<platform::CUDADeviceContext, | ||
T>(ctx); | ||
Tensor output_v_var_trans = dito.Transpose(input); | ||
TensorCopy(output_v_var_trans, ctx.GetPlace(), eigen_vectors); | ||
|
||
int lwork = 0; | ||
auto info = memory::Alloc(dev_ctx, sizeof(int) * batch_size); | ||
auto *info_ptr = reinterpret_cast<int *>(info->ptr()); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,6 +31,50 @@ void lapackLu<float>(int m, int n, float *a, int lda, int *ipiv, int *info) { | |
platform::dynload::sgetrf_(&m, &n, a, &lda, ipiv, info); | ||
} | ||
|
||
// eigh | ||
template <> | ||
void lapackEvd<float, float>(char jobz, char uplo, int n, float *a, int lda, | ||
float *w, float *work, int lwork, float *rwork, | ||
int lrwork, int *iwork, int liwork, int *info) { | ||
(void)rwork; // unused | ||
(void)lrwork; // unused | ||
platform::dynload::ssyevd_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, | ||
&liwork, info); | ||
} | ||
|
||
template <> | ||
void lapackEvd<double, double>(char jobz, char uplo, int n, double *a, int lda, | ||
double *w, double *work, int lwork, | ||
double *rwork, int lrwork, int *iwork, | ||
int liwork, int *info) { | ||
(void)rwork; // unused | ||
(void)lrwork; // unused | ||
platform::dynload::dsyevd_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, iwork, | ||
&liwork, info); | ||
} | ||
|
||
template <> | ||
void lapackEvd<paddle::platform::complex<float>, float>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这里可不必加 |
||
char jobz, char uplo, int n, paddle::platform::complex<float> *a, int lda, | ||
float *w, paddle::platform::complex<float> *work, int lwork, float *rwork, | ||
int lrwork, int *iwork, int liwork, int *info) { | ||
platform::dynload::cheevd_(&jobz, &uplo, &n, | ||
reinterpret_cast<std::complex<float> *>(a), &lda, | ||
w, reinterpret_cast<std::complex<float> *>(work), | ||
&lwork, rwork, &lrwork, iwork, &liwork, info); | ||
} | ||
|
||
template <> | ||
void lapackEvd<paddle::platform::complex<double>, double>( | ||
char jobz, char uplo, int n, paddle::platform::complex<double> *a, int lda, | ||
double *w, paddle::platform::complex<double> *work, int lwork, | ||
double *rwork, int lrwork, int *iwork, int liwork, int *info) { | ||
platform::dynload::zheevd_(&jobz, &uplo, &n, | ||
reinterpret_cast<std::complex<double> *>(a), &lda, | ||
w, reinterpret_cast<std::complex<double> *>(work), | ||
&lwork, rwork, &lrwork, iwork, &liwork, info); | ||
} | ||
|
||
// Eig | ||
template <> | ||
void lapackEig<double>(char jobvl, char jobvr, int n, double *a, int lda, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
functor里面不再需要从ctx里面获取运行时信息,最好传
DeviceContext
。