diff --git a/src/blas/backends/cublas/cublas_batch.cpp b/src/blas/backends/cublas/cublas_batch.cpp index bea86b596..85eadbe42 100644 --- a/src/blas/backends/cublas/cublas_batch.cpp +++ b/src/blas/backends/cublas/cublas_batch.cpp @@ -162,10 +162,10 @@ inline void gemm_batch(const char *func_name, Func func, sycl::queue &queue, tra auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, a_, - lda, stride_a, b_, ldb, stride_b, (cuDataType *)&beta, c_, ldc, - stride_c, batch_size); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, + a_, lda, stride_a, b_, ldb, stride_b, (cuDataType *)&beta, c_, + ldc, stride_c, batch_size); }); }); } @@ -495,10 +495,10 @@ inline sycl::event gemm_batch(const char *func_name, Func func, sycl::queue &que auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, a_, - lda, stride_a, b_, ldb, stride_b, (cuDataType *)&beta, c_, ldc, - stride_c, batch_size); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, + a_, lda, stride_a, b_, ldb, stride_b, (cuDataType *)&beta, c_, + ldc, stride_c, batch_size); }); }); return done; @@ -550,11 +550,11 @@ inline sycl::event gemm_batch(const char *func_name, Func func, sycl::queue &que auto **a_ = reinterpret_cast(a); auto **b_ = reinterpret_cast(b); auto **c_ = reinterpret_cast(c); - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(transa[i]), - get_cublas_operation(transb[i]), (int)m[i], (int)n[i], - (int)k[i], (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], - b_ + offset, (int)ldb[i], (cuDataType *)&beta[i], c_ + offset, - (int)ldc[i], (int)group_size[i]); + CUBLAS_ERROR_FUNC_T_SYNC( + func_name, func, err, handle, get_cublas_operation(transa[i]), + get_cublas_operation(transb[i]), (int)m[i], (int)n[i], (int)k[i], + (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], + (cuDataType *)&beta[i], c_ + offset, (int)ldc[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -632,7 +632,7 @@ inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &que for (int64_t i = 0; i < group_count; i++) { auto **a_ = reinterpret_cast(a); auto **b_ = reinterpret_cast(b); - CUBLAS_ERROR_FUNC_T( + CUBLAS_ERROR_FUNC_T_SYNC( func_name, func, err, handle, get_cublas_side_mode(left_right[i]), get_cublas_fill_mode(upper_lower[i]), get_cublas_operation(trans[i]), get_cublas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], diff --git a/src/blas/backends/cublas/cublas_helper.hpp b/src/blas/backends/cublas/cublas_helper.hpp index bf803a7c7..0ee9930e3 100644 --- a/src/blas/backends/cublas/cublas_helper.hpp +++ b/src/blas/backends/cublas/cublas_helper.hpp @@ -180,11 +180,23 @@ class cuda_error : virtual public std::runtime_error { throw cublas_error(std::string(#name) + std::string(" : "), err); \ } -#define CUBLAS_ERROR_FUNC_T(name, func, err, ...) \ - err = func(__VA_ARGS__); \ +#define CUBLAS_ERROR_FUNC_SYNC(name, err, handle, ...) \ + err = name(handle, __VA_ARGS__); \ + if (err != CUBLAS_STATUS_SUCCESS) { \ + throw cublas_error(std::string(#name) + std::string(" : "), err); \ + } \ + cudaStream_t currentStreamId; \ + CUBLAS_ERROR_FUNC(cublasGetStream, err, handle, ¤tStreamId); \ + cuStreamSynchronize(currentStreamId); + +#define CUBLAS_ERROR_FUNC_T_SYNC(name, func, err, handle, ...) \ + err = func(handle, __VA_ARGS__); \ if (err != CUBLAS_STATUS_SUCCESS) { \ throw cublas_error(std::string(name) + std::string(" : "), err); \ - } + } \ + cudaStream_t currentStreamId; \ + CUBLAS_ERROR_FUNC(cublasGetStream, err, handle, ¤tStreamId); \ + cuStreamSynchronize(currentStreamId); inline cublasOperation_t get_cublas_operation(oneapi::mkl::transpose trn) { switch (trn) { diff --git a/src/blas/backends/cublas/cublas_level1.cpp b/src/blas/backends/cublas/cublas_level1.cpp index 7dee2657a..24f18f096 100644 --- a/src/blas/backends/cublas/cublas_level1.cpp +++ b/src/blas/backends/cublas/cublas_level1.cpp @@ -53,7 +53,7 @@ inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // ASUM does not support negative index - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, std::abs(incx), res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, std::abs(incx), res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -86,8 +86,8 @@ inline void scal(const char *func_name, Func func, sycl::queue &queue, int64_t n auto x_ = sc.get_mem(x_acc); cublasStatus_t err; // SCAL does not support negative incx - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, - std::abs(incx)); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + std::abs(incx)); }); }); } @@ -117,8 +117,8 @@ inline void axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, incx, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, + incx, y_, incy); }); }); } @@ -180,7 +180,7 @@ inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buf auto c_ = sc.get_mem(c_acc); auto s_ = sc.get_mem(s_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, a_, b_, c_, s_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, a_, b_, c_, s_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -223,7 +223,7 @@ inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n auto y_ = sc.get_mem(y_acc); auto param_ = sc.get_mem(param_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, param_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -255,7 +255,7 @@ inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy); }); }); } @@ -294,7 +294,7 @@ inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, auto y_ = sc.get_mem(y_acc); auto res_ = sc.get_mem(res_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -338,8 +338,8 @@ inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, + (cuDataType2 *)&c, (cuDataType3 *)&s); }); }); } @@ -376,7 +376,7 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, auto y_ = sc.get_mem(y_acc); auto res_ = sc.get_mem(res_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); + CUBLAS_ERROR_FUNC_SYNC(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -418,7 +418,7 @@ inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::bu auto y1_ = sc.get_mem(y1_acc); auto param_ = sc.get_mem(param_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -466,7 +466,7 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t cublasStatus_t err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference netlib BLAS. - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, int_res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, int_res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -503,7 +503,7 @@ inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy); }); }); } @@ -549,7 +549,7 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t cublasStatus_t err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented as a reference IAMIN. - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, int_res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, int_res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -593,7 +593,7 @@ inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // NRM2 does not support negative index - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, std::abs(incx), res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, std::abs(incx), res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST // to be set, therfore we need to reset this to the default value // in order to avoid CUDA_ERROR_ILLEGAL_ADRESS errors @@ -635,7 +635,7 @@ inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, in auto res_ = reinterpret_cast(result); cublasStatus_t err; // ASUM does not support negative index - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, std::abs(incx), res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, std::abs(incx), res_); }); }); return done; @@ -668,8 +668,8 @@ inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, in auto x_ = reinterpret_cast(x); cublasStatus_t err; // SCAL does not support negative incx - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, - std::abs(incx)); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + std::abs(incx)); }); }); return done; @@ -704,8 +704,8 @@ inline sycl::event axpy(const char *func_name, Func func, sycl::queue &queue, in auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, incx, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, + incx, y_, incy); }); }); return done; @@ -764,7 +764,7 @@ inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 auto c_ = reinterpret_cast(c); auto s_ = reinterpret_cast(s); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, a_, b_, c_, s_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, a_, b_, c_, s_); }); }); return done; @@ -799,7 +799,7 @@ inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, in auto y_ = reinterpret_cast(y); auto param_ = reinterpret_cast(param); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, param_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, param_); }); }); return done; @@ -832,7 +832,7 @@ inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, in auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy); }); }); return done; @@ -867,7 +867,7 @@ inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int auto y_ = reinterpret_cast(y); auto res_ = reinterpret_cast(result); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, res_); }); }); return done; @@ -906,8 +906,8 @@ inline sycl::event rot(const char *func_name, Func func, sycl::queue &queue, int auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy, + (cuDataType2 *)&c, (cuDataType3 *)&s); }); }); return done; @@ -943,7 +943,7 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 auto y_ = reinterpret_cast(y); auto res_ = reinterpret_cast(result); cublasStatus_t err; - CUBLAS_ERROR_FUNC(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); + CUBLAS_ERROR_FUNC_SYNC(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); }); }); done.wait(); @@ -973,7 +973,7 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T auto y1_ = reinterpret_cast(&y1); auto param_ = reinterpret_cast(param); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); }); }); return done; @@ -1014,7 +1014,7 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i cublasStatus_t err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference iamax. - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, int_res_p_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, int_res_p_); }); }); done.wait(); @@ -1049,7 +1049,7 @@ inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, in auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, y_, incy); }); }); return done; @@ -1092,7 +1092,7 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i cublasStatus_t err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented iamin. - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, incx, int_res_p_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, incx, int_res_p_); }); }); done.wait(); @@ -1130,7 +1130,7 @@ inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, in auto res_ = reinterpret_cast(result); cublasStatus_t err; // NRM2 does not support negative index - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, n, x_, std::abs(incx), res_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, x_, std::abs(incx), res_); }); }); return done; diff --git a/src/blas/backends/cublas/cublas_level2.cpp b/src/blas/backends/cublas/cublas_level2.cpp index 1a3ef88bf..8f711243b 100644 --- a/src/blas/backends/cublas/cublas_level2.cpp +++ b/src/blas/backends/cublas/cublas_level2.cpp @@ -46,9 +46,9 @@ inline void gemv(const char *func_name, Func func, sycl::queue &queue, transpose auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(trans), m, n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), m, + n, (cuDataType *)&alpha, a_, lda, x_, incx, + (cuDataType *)&beta, y_, incy); }); }); } @@ -83,9 +83,9 @@ inline void gbmv(const char *func_name, Func func, sycl::queue &queue, transpose auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(trans), m, n, kl, - ku, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, - y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), m, + n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, + (cuDataType *)&beta, y_, incy); }); }); } @@ -120,8 +120,8 @@ inline void ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, incx, - y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); } @@ -157,9 +157,9 @@ inline void hbmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - k, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, + a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -192,9 +192,9 @@ inline void hemv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -227,8 +227,9 @@ inline void her(const char *func_name, Func func, sycl::queue &queue, uplo upper auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuScalarType *)&alpha, x_, incx, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + x_, incx, a_, lda); }); }); } @@ -261,8 +262,9 @@ inline void her2(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); } @@ -296,8 +298,9 @@ inline void hpmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, x_, incx, (cuDataType *)&beta, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -330,8 +333,9 @@ inline void hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuScalarType *)&alpha, x_, incx, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + x_, incx, a_); }); }); } @@ -363,8 +367,9 @@ inline void hpr2(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_); }); }); } @@ -397,9 +402,9 @@ inline void sbmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - k, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, + a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -433,9 +438,9 @@ inline void symv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -466,8 +471,9 @@ inline void syr(const char *func_name, Func func, sycl::queue &queue, uplo upper auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, a_, lda); }); }); } @@ -501,8 +507,9 @@ inline void syr2(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); } @@ -539,8 +546,9 @@ inline void spmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, x_, incx, (cuDataType *)&beta, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + x_, incx, (cuDataType *)&beta, y_, incy); }); }); } @@ -571,8 +579,9 @@ inline void spr(const char *func_name, Func func, sycl::queue &queue, uplo upper auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, a_); }); }); } @@ -604,8 +613,9 @@ inline void spr2(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto x_ = sc.get_mem(x_acc); auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_); }); }); } @@ -636,9 +646,9 @@ inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, k, - a_, lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, k, a_, lda, x_, incx); }); }); } @@ -672,9 +682,9 @@ inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, k, - a_, lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, k, a_, lda, x_, incx); }); }); } @@ -708,9 +718,9 @@ inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, x_, incx); }); }); } @@ -743,9 +753,9 @@ inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, x_, incx); }); }); } @@ -778,9 +788,9 @@ inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, lda, x_, incx); }); }); } @@ -813,9 +823,9 @@ inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto x_ = sc.get_mem(x_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, lda, x_, incx); }); }); } @@ -854,9 +864,9 @@ inline sycl::event gemv(const char *func_name, Func func, sycl::queue &queue, tr auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(trans), m, n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), m, + n, (cuDataType *)&alpha, a_, lda, x_, incx, + (cuDataType *)&beta, y_, incy); }); }); return done; @@ -894,9 +904,9 @@ inline sycl::event gbmv(const char *func_name, Func func, sycl::queue &queue, tr auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(trans), m, n, kl, - ku, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, - y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), m, + n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, + (cuDataType *)&beta, y_, incy); }); }); return done; @@ -934,8 +944,8 @@ inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, incx, - y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); return done; @@ -975,9 +985,9 @@ inline sycl::event hbmv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - k, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, + a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1012,9 +1022,9 @@ inline sycl::event hemv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1050,8 +1060,9 @@ inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, upl auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuScalarType *)&alpha, x_, incx, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + x_, incx, a_, lda); }); }); return done; @@ -1087,8 +1098,9 @@ inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); return done; @@ -1124,8 +1136,9 @@ inline sycl::event hpmv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, x_, incx, (cuDataType *)&beta, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1161,8 +1174,9 @@ inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, upl auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuScalarType *)&alpha, x_, incx, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + x_, incx, a_); }); }); return done; @@ -1198,8 +1212,9 @@ inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_); }); }); return done; @@ -1236,9 +1251,9 @@ inline sycl::event sbmv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - k, (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, + a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1274,9 +1289,9 @@ inline sycl::event symv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, lda, x_, incx, (cuDataType *)&beta, y_, - incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + lda, x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1311,8 +1326,9 @@ inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, upl auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, a_, lda); }); }); return done; @@ -1350,8 +1366,9 @@ inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_, lda); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_, lda); }); }); return done; @@ -1390,8 +1407,9 @@ inline sycl::event spmv(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, a_, x_, incx, (cuDataType *)&beta, y_, incy); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, + x_, incx, (cuDataType *)&beta, y_, incy); }); }); return done; @@ -1426,8 +1444,9 @@ inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, upl auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, a_); }); }); return done; @@ -1462,8 +1481,9 @@ inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, up auto x_ = reinterpret_cast(x); auto y_ = reinterpret_cast(y); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), n, - (cuDataType *)&alpha, x_, incx, y_, incy, a_); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + incx, y_, incy, a_); }); }); return done; @@ -1499,9 +1519,9 @@ inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, k, - a_, lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, k, a_, lda, x_, incx); }); }); return done; @@ -1539,9 +1559,9 @@ inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, k, - a_, lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, k, a_, lda, x_, incx); }); }); return done; @@ -1578,9 +1598,9 @@ inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, x_, incx); }); }); return done; @@ -1617,9 +1637,9 @@ inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, x_, incx); }); }); return done; @@ -1656,9 +1676,9 @@ inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, lda, x_, incx); }); }); return done; @@ -1695,9 +1715,9 @@ inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto x_ = reinterpret_cast(x); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), get_cublas_diag_type(unit_diag), n, a_, - lda, x_, incx); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), n, a_, lda, x_, incx); }); }); return done; diff --git a/src/blas/backends/cublas/cublas_level3.cpp b/src/blas/backends/cublas/cublas_level3.cpp index 07e48333c..5ea4e2152 100644 --- a/src/blas/backends/cublas/cublas_level3.cpp +++ b/src/blas/backends/cublas/cublas_level3.cpp @@ -47,9 +47,9 @@ inline void gemm(const char *func_name, Func func, sycl::queue &queue, transpose auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); } @@ -94,10 +94,10 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, a_, - DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, ldc, DT_C, - CUBLAS_GEMM_DEFAULT); + CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, + a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, + ldc, DT_C, CUBLAS_GEMM_DEFAULT); }); }); } @@ -139,9 +139,9 @@ inline void symm(const char *func_name, Func func, sycl::queue &queue, side left auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); } @@ -178,9 +178,9 @@ inline void hemm(const char *func_name, Func func, sycl::queue &queue, side left auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); } @@ -211,9 +211,10 @@ inline void syrk(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + ldc); }); }); } @@ -249,9 +250,10 @@ inline void herk(const char *func_name, Func func, sycl::queue &queue, uplo uppe auto a_ = sc.get_mem(a_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuScalarType *)&alpha, a_, lda, - (cuScalarType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, + c_, ldc); }); }); } @@ -286,9 +288,10 @@ inline void syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upp auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, + (cuDataType *)&beta, c_, ldc); }); }); } @@ -325,9 +328,10 @@ inline void her2k(const char *func_name, Func func, sycl::queue &queue, uplo upp auto b_ = sc.get_mem(b_acc); auto c_ = sc.get_mem(c_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - b_, ldb, (cuScalarType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, + (cuScalarType *)&beta, c_, ldc); }); }); } @@ -364,10 +368,10 @@ inline void trmm(const char *func_name, Func func, sycl::queue &queue, side left auto a_ = sc.get_mem(a_acc); auto b_ = sc.get_mem(b_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, b_, ldb); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, b_, ldb); }); }); } @@ -400,10 +404,10 @@ inline void trsm(const char *func_name, Func func, sycl::queue &queue, side left auto a_ = sc.get_mem(a_acc); auto b_ = sc.get_mem(b_acc); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb); }); }); } @@ -442,9 +446,9 @@ inline sycl::event gemm(const char *func_name, Func func, sycl::queue &queue, tr auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); return done; @@ -488,10 +492,10 @@ inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, a_, - DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, ldc, DT_C, - CUBLAS_GEMM_DEFAULT); + CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), + get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, + a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, + ldc, DT_C, CUBLAS_GEMM_DEFAULT); }); }); return done; @@ -537,9 +541,9 @@ inline sycl::event symm(const char *func_name, Func func, sycl::queue &queue, si auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); return done; @@ -579,9 +583,9 @@ inline sycl::event hemm(const char *func_name, Func func, sycl::queue &queue, si auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); }); }); return done; @@ -616,9 +620,10 @@ inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + ldc); }); }); return done; @@ -657,9 +662,10 @@ inline sycl::event herk(const char *func_name, Func func, sycl::queue &queue, up auto a_ = reinterpret_cast(a); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuScalarType *)&alpha, a_, lda, - (cuScalarType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, + c_, ldc); }); }); return done; @@ -697,9 +703,10 @@ inline sycl::event syr2k(const char *func_name, Func func, sycl::queue &queue, u auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - b_, ldb, (cuDataType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, + (cuDataType *)&beta, c_, ldc); }); }); return done; @@ -740,9 +747,10 @@ inline sycl::event her2k(const char *func_name, Func func, sycl::queue &queue, u auto b_ = reinterpret_cast(b); auto c_ = reinterpret_cast(c); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), - get_cublas_operation(trans), n, k, (cuDataType *)&alpha, a_, lda, - b_, ldb, (cuScalarType *)&beta, c_, ldc); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, + (cuScalarType *)&beta, c_, ldc); }); }); return done; @@ -783,10 +791,10 @@ inline sycl::event trmm(const char *func_name, Func func, sycl::queue &queue, si auto a_ = reinterpret_cast(a); auto b_ = reinterpret_cast(b); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb, b_, ldb); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb, b_, ldb); }); }); return done; @@ -823,10 +831,10 @@ inline sycl::event trsm(const char *func_name, Func func, sycl::queue &queue, si auto a_ = reinterpret_cast(a); auto b_ = reinterpret_cast(b); cublasStatus_t err; - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, a_, - lda, b_, ldb); + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(left_right), + get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), + get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, + a_, lda, b_, ldb); }); }); return done;