Skip to content

Commit

Permalink
BUG: core: use blas_ilp64 also for *_matmul, *_dot, and *_vdot
Browse files Browse the repository at this point in the history
Changing these to support ILP64 blas was missed in numpygh-15012
  • Loading branch information
pv committed Dec 14, 2019
1 parent b7f42ea commit de8a10d
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 38 deletions.
16 changes: 8 additions & 8 deletions numpy/core/src/multiarray/arraytypes.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -3535,17 +3535,17 @@ NPY_NO_EXPORT void
npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(@type@));
int is2b = blas_stride(is2, sizeof(@type@));
CBLAS_INT is1b = blas_stride(is1, sizeof(@type@));
CBLAS_INT is2b = blas_stride(is2, sizeof(@type@));

if (is1b && is2b)
{
double sum = 0.; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;

sum += cblas_@prefix@dot(chunk,
sum += CBLAS_FUNC(cblas_@prefix@dot)(chunk,
(@type@ *) ip1, is1b,
(@type@ *) ip2, is2b);
/* use char strides here */
Expand Down Expand Up @@ -3584,17 +3584,17 @@ NPY_NO_EXPORT void
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(@ctype@));
int is2b = blas_stride(is2, sizeof(@ctype@));
CBLAS_INT is1b = blas_stride(is1, sizeof(@ctype@));
CBLAS_INT is2b = blas_stride(is2, sizeof(@ctype@));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
@type@ tmp[2];

cblas_@prefix@dotu_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_@prefix@dotu_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down
10 changes: 9 additions & 1 deletion numpy/core/src/multiarray/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
*/
if (stride > 0 && npy_is_aligned((void *)stride, itemsize)) {
stride /= itemsize;
#ifndef HAVE_BLAS_ILP64
if (stride <= INT_MAX) {
#else
if (stride <= NPY_MAX_INT64) {
#endif
return stride;
}
}
Expand All @@ -314,7 +318,11 @@ blas_stride(npy_intp stride, unsigned itemsize)
* Define a chunksize for CBLAS. CBLAS counts in integers.
*/
#if NPY_MAX_INTP > INT_MAX
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
# ifndef HAVE_BLAS_ILP64
# define NPY_CBLAS_CHUNK (INT_MAX / 2 + 1)
# else
# define NPY_CBLAS_CHUNK (NPY_MAX_INT64 / 2 + 1)
# endif
#else
# define NPY_CBLAS_CHUNK NPY_MAX_INTP
#endif
Expand Down
16 changes: 8 additions & 8 deletions numpy/core/src/multiarray/vdot.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ CFLOAT_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(npy_cfloat));
int is2b = blas_stride(is2, sizeof(npy_cfloat));
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cfloat));
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cfloat));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
float tmp[2];

cblas_cdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_cdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down Expand Up @@ -66,17 +66,17 @@ CDOUBLE_vdot(char *ip1, npy_intp is1, char *ip2, npy_intp is2,
char *op, npy_intp n, void *NPY_UNUSED(ignore))
{
#if defined(HAVE_CBLAS)
int is1b = blas_stride(is1, sizeof(npy_cdouble));
int is2b = blas_stride(is2, sizeof(npy_cdouble));
CBLAS_INT is1b = blas_stride(is1, sizeof(npy_cdouble));
CBLAS_INT is2b = blas_stride(is2, sizeof(npy_cdouble));

if (is1b && is2b) {
double sum[2] = {0., 0.}; /* double for stability */

while (n > 0) {
int chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
CBLAS_INT chunk = n < NPY_CBLAS_CHUNK ? n : NPY_CBLAS_CHUNK;
double tmp[2];

cblas_zdotc_sub((int)n, ip1, is1b, ip2, is2b, tmp);
CBLAS_FUNC(cblas_zdotc_sub)((CBLAS_INT)n, ip1, is1b, ip2, is2b, tmp);
sum[0] += (double)tmp[0];
sum[1] += (double)tmp[1];
/* use char strides here */
Expand Down
49 changes: 28 additions & 21 deletions numpy/core/src/umath/matmul.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
* -1 to be conservative, in case blas internally uses a for loop with an
* inclusive upper bound
*/
#ifndef HAVE_BLAS_ILP64
#define BLAS_MAXSIZE (NPY_MAX_INT - 1)
#else
#define BLAS_MAXSIZE (NPY_MAX_INT64 - 1)
#endif

/*
* Determine if a 2d matrix can be used by BLAS
Expand Down Expand Up @@ -84,25 +88,25 @@ NPY_NO_EXPORT void
* op: data in c order, m shape
*/
enum CBLAS_ORDER order;
int M, N, lda;
CBLAS_INT M, N, lda;

assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE);
assert (is_blasable2d(is2_n, sizeof(@typ@), n, 1, sizeof(@typ@)));
M = (int)m;
N = (int)n;
M = (CBLAS_INT)m;
N = (CBLAS_INT)n;

if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) {
order = CblasColMajor;
lda = (int)(is1_m / sizeof(@typ@));
lda = (CBLAS_INT)(is1_m / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
order = CblasRowMajor;
assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@)));
lda = (int)(is1_n / sizeof(@typ@));
lda = (CBLAS_INT)(is1_n / sizeof(@typ@));
}
cblas_@prefix@gemv(order, CblasTrans, N, M, @step1@, ip1, lda, ip2,
CBLAS_FUNC(cblas_@prefix@gemv)(order, CblasTrans, N, M, @step1@, ip1, lda, ip2,
is2_n / sizeof(@typ@), @step0@, op, op_m / sizeof(@typ@));
}

Expand All @@ -117,37 +121,37 @@ NPY_NO_EXPORT void
*/
enum CBLAS_ORDER order = CblasRowMajor;
enum CBLAS_TRANSPOSE trans1, trans2;
int M, N, P, lda, ldb, ldc;
CBLAS_INT M, N, P, lda, ldb, ldc;
assert(m <= BLAS_MAXSIZE && n <= BLAS_MAXSIZE && p <= BLAS_MAXSIZE);
M = (int)m;
N = (int)n;
P = (int)p;
M = (CBLAS_INT)m;
N = (CBLAS_INT)n;
P = (CBLAS_INT)p;

assert(is_blasable2d(os_m, os_p, m, p, sizeof(@typ@)));
ldc = (int)(os_m / sizeof(@typ@));
ldc = (CBLAS_INT)(os_m / sizeof(@typ@));

if (is_blasable2d(is1_m, is1_n, m, n, sizeof(@typ@))) {
trans1 = CblasNoTrans;
lda = (int)(is1_m / sizeof(@typ@));
lda = (CBLAS_INT)(is1_m / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
assert(is_blasable2d(is1_n, is1_m, n, m, sizeof(@typ@)));
trans1 = CblasTrans;
lda = (int)(is1_n / sizeof(@typ@));
lda = (CBLAS_INT)(is1_n / sizeof(@typ@));
}

if (is_blasable2d(is2_n, is2_p, n, p, sizeof(@typ@))) {
trans2 = CblasNoTrans;
ldb = (int)(is2_n / sizeof(@typ@));
ldb = (CBLAS_INT)(is2_n / sizeof(@typ@));
}
else {
/* If not ColMajor, caller should have ensured we are RowMajor */
/* will not assert in release mode */
assert(is_blasable2d(is2_p, is2_n, p, n, sizeof(@typ@)));
trans2 = CblasTrans;
ldb = (int)(is2_p / sizeof(@typ@));
ldb = (CBLAS_INT)(is2_p / sizeof(@typ@));
}
/*
* Use syrk if we have a case of a matrix times its transpose.
Expand All @@ -162,12 +166,14 @@ NPY_NO_EXPORT void
) {
npy_intp i,j;
if (trans1 == CblasNoTrans) {
cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@,
ip1, lda, @step0@, op, ldc);
CBLAS_FUNC(cblas_@prefix@syrk)(
order, CblasUpper, trans1, P, N, @step1@,
ip1, lda, @step0@, op, ldc);
}
else {
cblas_@prefix@syrk(order, CblasUpper, trans1, P, N, @step1@,
ip1, ldb, @step0@, op, ldc);
CBLAS_FUNC(cblas_@prefix@syrk)(
order, CblasUpper, trans1, P, N, @step1@,
ip1, ldb, @step0@, op, ldc);
}
/* Copy the triangle */
for (i = 0; i < P; i++) {
Expand All @@ -178,8 +184,9 @@ NPY_NO_EXPORT void

}
else {
cblas_@prefix@gemm(order, trans1, trans2, M, P, N, @step1@, ip1, lda,
ip2, ldb, @step0@, op, ldc);
CBLAS_FUNC(cblas_@prefix@gemm)(
order, trans1, trans2, M, P, N, @step1@, ip1, lda,
ip2, ldb, @step0@, op, ldc);
}
}

Expand Down

0 comments on commit de8a10d

Please sign in to comment.