From 658919657bf3eb85da42bd917e8f997752f838e7 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Sun, 16 Jun 2024 14:54:10 -0700 Subject: [PATCH 1/2] Clean up legacy linalg implementation from the backend --- dpnp/backend/CMakeLists.txt | 1 - dpnp/backend/kernels/dpnp_krnl_common.cpp | 496 +----------- dpnp/backend/kernels/dpnp_krnl_linalg.cpp | 914 ---------------------- dpnp/backend/src/dpnp_fptr.hpp | 1 - dpnp/backend/src/dpnp_iface_fptr.cpp | 1 - 5 files changed, 1 insertion(+), 1412 deletions(-) delete mode 100644 dpnp/backend/kernels/dpnp_krnl_linalg.cpp diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt index 2ce0dfd5c04..f1f5b447772 100644 --- a/dpnp/backend/CMakeLists.txt +++ b/dpnp/backend/CMakeLists.txt @@ -30,7 +30,6 @@ set(DPNP_SRC kernels/dpnp_krnl_elemwise.cpp kernels/dpnp_krnl_fft.cpp kernels/dpnp_krnl_indexing.cpp - kernels/dpnp_krnl_linalg.cpp kernels/dpnp_krnl_logic.cpp kernels/dpnp_krnl_manipulation.cpp kernels/dpnp_krnl_mathematical.cpp diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index 423851e4bfd..b2f4247d8d1 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -38,69 +38,6 @@ namespace mkl_blas_cm = oneapi::mkl::blas::column_major; namespace mkl_blas_rm = oneapi::mkl::blas::row_major; namespace mkl_lapack = oneapi::mkl::lapack; -template -class dpnp_astype_c_kernel; - -template -DPCTLSyclEventRef dpnp_astype_c(DPCTLSyclQueueRef q_ref, - const void *array1_in, - void *result1, - const size_t size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::event event; - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size); - const _DataType *array_in = input1_ptr.get_ptr(); - _ResultType *result = reinterpret_cast<_ResultType *>(result1); - - if ((array_in == nullptr) || (result == nullptr)) { - return event_ref; - } - - if (size == 0) { - return event_ref; - } - - sycl::range<1> gws(size); - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - size_t i = global_id[0]; - result[i] = array_in[i]; - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - event = q.submit(kernel_func); - - event_ref = reinterpret_cast(&event); - - return DPCTLEvent_Copy(event_ref); -} - -template -void dpnp_astype_c(const void *array1_in, void *result1, const size_t size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_astype_c<_DataType, _ResultType>( - q_ref, array1_in, result1, size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_astype_default_c)(const void *, void *, const size_t) = - dpnp_astype_c<_DataType, _ResultType>; - template @@ -521,199 +458,9 @@ DPCTLSyclEventRef (*dpnp_dot_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>; -template -DPCTLSyclEventRef dpnp_eig_c(DPCTLSyclQueueRef q_ref, - const void *array_in, - void *result1, - void *result2, - size_t size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // TODO this kernel works with square 2-D array only - - // Kernel Type for calculation is double type - // because interface requires float type but calculations are expected in - // double type - - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - if (!size) { - return event_ref; - } - sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::event event; - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array_in, size * size, true); - DPNPC_ptr_adapter<_ResultType> result1_ptr(q_ref, result1, size, true, - true); - DPNPC_ptr_adapter<_ResultType> result2_ptr(q_ref, result2, size * size, - true, true); - const _DataType *array = input1_ptr.get_ptr(); - _ResultType *result_val = result1_ptr.get_ptr(); - _ResultType *result_vec = result2_ptr.get_ptr(); - - double *result_val_kern = reinterpret_cast( - sycl::malloc_shared(size * sizeof(double), q)); - double *result_vec_kern = reinterpret_cast( - sycl::malloc_shared(size * size * sizeof(double), q)); - - // type conversion. Also, math library requires copy memory because override - for (size_t it = 0; it < (size * size); ++it) { - result_vec_kern[it] = - array[it]; // TODO use memcpy_c or input1_ptr(array_in, size, true) - } - - const std::int64_t lda = std::max(1UL, size); - - const std::int64_t scratchpad_size = - mkl_lapack::syevd_scratchpad_size( - q, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda); - - // https://github.com/IntelPython/dpnp/issues/1005 - // Test tests/test_linalg.py::test_eig_arange raises 2 issues in dpnp_eig_c - // on CPU - // 1. Call of mkl_lapack::syevd_scratchpad_size returns wrong value - // that causes out of memory issue. - // 2. Call of the function oneapi::mkl::lapack::syevd causes segfault. - // Example of the command to reproduce the issues: - // SYCL_DEVICE_FILTER=cpu pytest - // tests/test_linalg.py::test_eig_arange[2-float64] High-level reason of the - // issues is numpy is imported before dpnp in third party tests. Low-level - // reason of the issues could be related to MKL runtime library loaded - // during numpy import. - - double *scratchpad = reinterpret_cast( - sycl::malloc_shared(scratchpad_size * sizeof(double), q)); - - event = mkl_lapack::syevd( - q, // queue - oneapi::mkl::job::vec, // jobz - oneapi::mkl::uplo::upper, // uplo - size, // The order of the matrix A (0 <= n) - result_vec_kern, // will be overwritten with eigenvectors - lda, result_val_kern, scratchpad, scratchpad_size); - event.wait(); - - sycl::free(scratchpad, q); - - for (size_t it1 = 0; it1 < size; ++it1) { - result_val[it1] = - result_val_kern[it1]; // TODO use memcpy_c or dpnpc_transpose_c - for (size_t it2 = 0; it2 < size; ++it2) { - // copy + transpose - result_vec[it2 * size + it1] = result_vec_kern[it1 * size + it2]; - } - } - - sycl::free(result_val_kern, q); - sycl::free(result_vec_kern, q); - - return event_ref; -} - -template -void dpnp_eig_c(const void *array_in, void *result1, void *result2, size_t size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_eig_c<_DataType, _ResultType>( - q_ref, array_in, result1, result2, size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_eig_default_c)(const void *, void *, void *, size_t) = - dpnp_eig_c<_DataType, _ResultType>; - -template -DPCTLSyclEventRef dpnp_eigvals_c(DPCTLSyclQueueRef q_ref, - const void *array_in, - void *result1, - size_t size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // TODO this kernel works with square 2-D array only - - // Kernel Type for calculation is double type - // because interface requires float type but calculations are expected in - // double type - - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - if (!size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - sycl::event event; - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array_in, size * size, true); - DPNPC_ptr_adapter<_ResultType> result1_ptr(q_ref, result1, size, true, - true); - const _DataType *array = input1_ptr.get_ptr(); - _ResultType *result_val = result1_ptr.get_ptr(); - - double *result_val_kern = reinterpret_cast( - sycl::malloc_shared(size * sizeof(double), q)); - double *result_vec_kern = reinterpret_cast( - sycl::malloc_shared(size * size * sizeof(double), q)); - - // type conversion. Also, math library requires copy memory because override - for (size_t it = 0; it < (size * size); ++it) { - result_vec_kern[it] = array[it]; // TODO same as previous kernel - } - - const std::int64_t lda = std::max(1UL, size); - - const std::int64_t scratchpad_size = - mkl_lapack::syevd_scratchpad_size( - q, oneapi::mkl::job::vec, oneapi::mkl::uplo::upper, size, lda); - - double *scratchpad = reinterpret_cast( - sycl::malloc_shared(scratchpad_size * sizeof(double), q)); - - event = mkl_lapack::syevd(q, // queue - oneapi::mkl::job::vec, // jobz - oneapi::mkl::uplo::upper, // uplo - size, // The order of the matrix A (0 <= n) - result_vec_kern, lda, result_val_kern, scratchpad, - scratchpad_size); - event.wait(); - - sycl::free(scratchpad, q); - - for (size_t it1 = 0; it1 < size; ++it1) { - result_val[it1] = result_val_kern[it1]; - } - - sycl::free(result_val_kern, q); - - return event_ref; -} - -template -void dpnp_eigvals_c(const void *array_in, void *result1, size_t size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_eigvals_c<_DataType, _ResultType>( - q_ref, array_in, result1, size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_eigvals_default_c)(const void *, - void *, - size_t) = dpnp_eigvals_c<_DataType, _ResultType>; template class dpnp_initval_c_kernel; @@ -769,226 +516,10 @@ DPCTLSyclEventRef (*dpnp_initval_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>; -template -class dpnp_matmul_c_kernel; -template -DPCTLSyclEventRef dpnp_matmul_c(DPCTLSyclQueueRef q_ref, - void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - (void)result_size; - (void)result_ndim; - (void)result_shape; - (void)result_strides; - (void)input1_size; - (void)input1_ndim; - (void)input1_strides; - (void)input2_size; - (void)input2_ndim; - (void)input2_strides; - - DPCTLSyclEventRef event_ref = nullptr; - - size_t size_m = input1_shape[0]; - size_t size_n = input2_shape[1]; - size_t size_k = input1_shape[1]; - - if (!size_m || !size_n || !size_k) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - std::vector dep_events = cast_event_vector(dep_event_vec_ref); - sycl::event event; - - _DataType *array_1 = - reinterpret_cast<_DataType *>(const_cast(input1_in)); - _DataType *array_2 = - reinterpret_cast<_DataType *>(const_cast(input2_in)); - _DataType *result = reinterpret_cast<_DataType *>(result_out); - - if constexpr (std::is_same<_DataType, double>::value || - std::is_same<_DataType, float>::value) - { - // using std::max for these ldx variables is required by math library - const std::int64_t ld_array_2 = - std::max(1UL, size_n); // First dimensions of array_2 - const std::int64_t ld_array_1 = - std::max(1UL, size_k); // First dimensions of array_1 - const std::int64_t ld_result = - std::max(1UL, size_n); // Fast dimensions of result - - event = mkl_blas::gemm(q, oneapi::mkl::transpose::nontrans, - oneapi::mkl::transpose::nontrans, size_n, size_m, - size_k, _DataType(1), array_2, ld_array_2, - array_1, ld_array_1, _DataType(0), result, - ld_result, dep_events); - } - else { - // input1: M x K - // input2: K x N - // result: M x N - const size_t dim_m = - size_m; // shape1.front(); // First dimensions of array1 - const size_t dim_n = - size_n; // shape2.back(); // Last dimensions of array2 - const size_t dim_k = - size_k; // shape1.back(); // First dimensions of array2 - - sycl::range<2> gws(dim_m, dim_n); // dimensions are: "i" and "j" - - auto kernel_parallel_for_func = [=](sycl::id<2> global_id) { - size_t i = global_id[0]; // for (size_t i = 0; i < size; ++i) - { - size_t j = global_id[1]; // for (size_t j = 0; j < size; ++j) - { - _DataType acc = _DataType(0); - for (size_t k = 0; k < dim_k; ++k) { - const size_t index_1 = i * dim_k + k; - const size_t index_2 = k * dim_n + j; - acc += array_1[index_1] * array_2[index_2]; - } - const size_t index_result = i * dim_n + j; - result[index_result] = acc; - } - } - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.depends_on(dep_events); - cgh.parallel_for>( - gws, kernel_parallel_for_func); - }; - - event = q.submit(kernel_func); - } - - event_ref = reinterpret_cast(&event); - - return DPCTLEvent_Copy(event_ref); -} - -template -void dpnp_matmul_c(void *result_out, - const size_t result_size, - const size_t result_ndim, - const shape_elem_type *result_shape, - const shape_elem_type *result_strides, - const void *input1_in, - const size_t input1_size, - const size_t input1_ndim, - const shape_elem_type *input1_shape, - const shape_elem_type *input1_strides, - const void *input2_in, - const size_t input2_size, - const size_t input2_ndim, - const shape_elem_type *input2_shape, - const shape_elem_type *input2_strides) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_matmul_c<_DataType>( - q_ref, result_out, result_size, result_ndim, result_shape, - result_strides, input1_in, input1_size, input1_ndim, input1_shape, - input1_strides, input2_in, input2_size, input2_ndim, input2_shape, - input2_strides, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_matmul_default_c)(void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *, - const void *, - const size_t, - const size_t, - const shape_elem_type *, - const shape_elem_type *) = - dpnp_matmul_c<_DataType>; void func_map_init_linalg(func_map_t &fmap) { - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_BLN] = { - eft_BLN, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_INT] = { - eft_INT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_LNG] = { - eft_LNG, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_FLT] = { - eft_FLT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_DBL] = { - eft_DBL, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_INT][eft_BLN] = { - eft_BLN, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_INT][eft_FLT] = { - eft_FLT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_LNG][eft_BLN] = { - eft_BLN, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_LNG][eft_INT] = { - eft_INT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_LNG][eft_FLT] = { - eft_FLT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_FLT][eft_BLN] = { - eft_BLN, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_FLT][eft_INT] = { - eft_INT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_FLT][eft_LNG] = { - eft_LNG, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_DBL][eft_BLN] = { - eft_BLN, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_DBL][eft_INT] = { - eft_INT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_DBL][eft_LNG] = { - eft_LNG, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_DBL][eft_FLT] = { - eft_FLT, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_astype_default_c}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_C64][eft_C64] = { - eft_C64, - (void *) - dpnp_astype_default_c, std::complex>}; - fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_C128][eft_C128] = { - eft_C128, - (void *) - dpnp_astype_default_c, std::complex>}; fmap[DPNPFuncName::DPNP_FN_DOT][eft_INT][eft_INT] = { eft_INT, (void *)dpnp_dot_default_c}; @@ -1057,23 +588,7 @@ void func_map_init_linalg(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_dot_ext_c}; - fmap[DPNPFuncName::DPNP_FN_EIG][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_eig_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIG][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_eig_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIG][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_eig_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIG][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_eig_default_c}; - - fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_eigvals_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_eigvals_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_eigvals_default_c}; - fmap[DPNPFuncName::DPNP_FN_EIGVALS][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_eigvals_default_c}; + fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BLN][eft_BLN] = { eft_BLN, (void *)dpnp_initval_default_c}; @@ -1103,14 +618,5 @@ void func_map_init_linalg(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_INITVAL_EXT][eft_C128][eft_C128] = { eft_C128, (void *)dpnp_initval_ext_c>}; - fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_matmul_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_matmul_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_matmul_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_matmul_default_c}; - return; } diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp deleted file mode 100644 index 1dc2783d48c..00000000000 --- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp +++ /dev/null @@ -1,914 +0,0 @@ -//***************************************************************************** -// Copyright (c) 2016-2024, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// - Redistributions of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// - Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -// THE POSSIBILITY OF SUCH DAMAGE. -//***************************************************************************** - -#include -#include - -#include "dpnp_fptr.hpp" -#include "dpnp_utils.hpp" -#include "dpnpc_memory_adapter.hpp" -#include "queue_sycl.hpp" -#include - -namespace mkl_blas = oneapi::mkl::blas::row_major; -namespace mkl_lapack = oneapi::mkl::lapack; - -template -DPCTLSyclEventRef dpnp_cholesky_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - const size_t size, - const size_t data_size, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - if (!data_size) { - return event_ref; - } - sycl::queue q = *(reinterpret_cast(q_ref)); - - sycl::event event; - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, size, true, true); - _DataType *in_array = input1_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - size_t iters = size / (data_size * data_size); - - // math lib func overrides input - _DataType *in_a = reinterpret_cast<_DataType *>( - sycl::malloc_shared(data_size * data_size * sizeof(_DataType), q)); - - for (size_t k = 0; k < iters; ++k) { - for (size_t it = 0; it < data_size * data_size; ++it) { - in_a[it] = in_array[k * (data_size * data_size) + it]; - } - - const std::int64_t n = data_size; - - const std::int64_t lda = std::max(1UL, n); - - const std::int64_t scratchpad_size = - mkl_lapack::potrf_scratchpad_size<_DataType>( - q, oneapi::mkl::uplo::upper, n, lda); - - _DataType *scratchpad = reinterpret_cast<_DataType *>( - sycl::malloc_shared(scratchpad_size * sizeof(_DataType), q)); - - event = mkl_lapack::potrf(q, oneapi::mkl::uplo::upper, n, in_a, lda, - scratchpad, scratchpad_size); - - event.wait(); - - for (size_t i = 0; i < data_size; i++) { - bool arg = false; - for (size_t j = 0; j < data_size; j++) { - if (i == j - 1) { - arg = true; - } - if (arg) { - in_a[i * data_size + j] = 0; - } - } - } - - sycl::free(scratchpad, q); - - for (size_t t = 0; t < data_size * data_size; ++t) { - result[k * (data_size * data_size) + t] = in_a[t]; - } - } - - sycl::free(in_a, q); - - return event_ref; -} - -template -void dpnp_cholesky_c(void *array1_in, - void *result1, - const size_t size, - const size_t data_size) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_cholesky_c<_DataType>( - q_ref, array1_in, result1, size, data_size, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_cholesky_default_c)(void *, void *, const size_t, const size_t) = - dpnp_cholesky_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_det_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t input_size = std::accumulate( - shape, shape + ndim, 1, std::multiplies()); - if (!input_size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - size_t n = shape[ndim - 1]; - size_t size_out = 1; - if (ndim != 2) { - for (size_t i = 0; i < ndim - 2; i++) { - size_out *= shape[i]; - } - } - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, input_size, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, size_out, true, - true); - _DataType *array_1 = input1_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - _DataType *matrix = new _DataType[n * n]; - _DataType *elems = new _DataType[n * n]; - - for (size_t i = 0; i < size_out; i++) { - if (size_out > 1) { - for (size_t j = i * n * n; j < (i + 1) * n * n; j++) { - elems[j - i * n * n] = array_1[j]; - } - - for (size_t j = 0; j < n; j++) { - for (size_t k = 0; k < n; k++) { - matrix[j * n + k] = elems[j * n + k]; - } - } - } - else { - for (size_t j = 0; j < n; j++) { - for (size_t k = 0; k < n; k++) { - matrix[j * n + k] = array_1[j * n + k]; - } - } - } - - _DataType det_val = 1; - for (size_t l = 0; l < n; l++) { - if (matrix[l * n + l] == 0) { - for (size_t j = l; j < n; j++) { - if (matrix[j * n + l] != 0) { - for (size_t k = l; k < n; k++) { - _DataType c = matrix[l * n + k]; - matrix[l * n + k] = -1 * matrix[j * n + k]; - matrix[j * n + k] = c; - } - break; - } - if (j == n - 1 and matrix[j * n + l] == 0) { - det_val = 0; - } - } - } - if (det_val != 0) { - for (size_t j = l + 1; j < n; j++) { - _DataType quotient = - -(matrix[j * n + l] / matrix[l * n + l]); - for (size_t k = l + 1; k < n; k++) { - matrix[j * n + k] += quotient * matrix[l * n + k]; - } - } - } - } - - if (det_val != 0) { - for (size_t l = 0; l < n; l++) { - det_val *= matrix[l * n + l]; - } - } - - result[i] = det_val; - } - - delete[] elems; - delete[] matrix; - return event_ref; -} - -template -void dpnp_det_c(void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_det_c<_DataType>( - q_ref, array1_in, result1, shape, ndim, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_det_default_c)(void *, void *, shape_elem_type *, size_t) = - dpnp_det_c<_DataType>; - -template -DPCTLSyclEventRef (*dpnp_det_ext_c)(DPCTLSyclQueueRef, - void *, - void *, - shape_elem_type *, - size_t, - const DPCTLEventVectorRef) = - dpnp_det_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_inv_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)ndim; - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t input_size = std::accumulate( - shape, shape + ndim, 1, std::multiplies()); - if (!input_size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, input_size, true); - DPNPC_ptr_adapter<_ResultType> result_ptr(q_ref, result1, input_size, true, - true); - - _DataType *array_1 = input1_ptr.get_ptr(); - _ResultType *result = result_ptr.get_ptr(); - - size_t n = shape[0]; - - _ResultType *a_arr = new _ResultType[n * n]; - _ResultType *e_arr = new _ResultType[n * n]; - - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < n; ++j) { - a_arr[i * n + j] = array_1[i * n + j]; - if (i == j) { - e_arr[i * n + j] = 1; - } - else { - e_arr[i * n + j] = 0; - } - } - } - - for (size_t k = 0; k < n; ++k) { - if (a_arr[k * n + k] == 0) { - for (size_t i = k; i < n; ++i) { - if (a_arr[i * n + k] != 0) { - for (size_t j = 0; j < n; ++j) { - float c = a_arr[k * n + j]; - a_arr[k * n + j] = a_arr[i * n + j]; - a_arr[i * n + j] = c; - float c_e = e_arr[k * n + j]; - e_arr[k * n + j] = e_arr[i * n + j]; - e_arr[i * n + j] = c_e; - } - break; - } - } - } - - float temp = a_arr[k * n + k]; - - for (size_t j = 0; j < n; ++j) { - a_arr[k * n + j] = a_arr[k * n + j] / temp; - e_arr[k * n + j] = e_arr[k * n + j] / temp; - } - - for (size_t i = k + 1; i < n; ++i) { - temp = a_arr[i * n + k]; - for (size_t j = 0; j < n; j++) { - a_arr[i * n + j] = a_arr[i * n + j] - a_arr[k * n + j] * temp; - e_arr[i * n + j] = e_arr[i * n + j] - e_arr[k * n + j] * temp; - } - } - } - - for (size_t k = 0; k < n - 1; ++k) { - size_t ind_k = n - 1 - k; - for (size_t i = 0; i < ind_k; ++i) { - size_t ind_i = ind_k - 1 - i; - - float temp = a_arr[ind_i * n + ind_k]; - for (size_t j = 0; j < n; ++j) { - a_arr[ind_i * n + j] = - a_arr[ind_i * n + j] - a_arr[ind_k * n + j] * temp; - e_arr[ind_i * n + j] = - e_arr[ind_i * n + j] - e_arr[ind_k * n + j] * temp; - } - } - } - - for (size_t i = 0; i < n; ++i) { - for (size_t j = 0; j < n; ++j) { - result[i * n + j] = e_arr[i * n + j]; - } - } - - delete[] a_arr; - delete[] e_arr; - return event_ref; -} - -template -void dpnp_inv_c(void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_inv_c<_DataType, _ResultType>( - q_ref, array1_in, result1, shape, ndim, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_inv_default_c)(void *, void *, shape_elem_type *, size_t) = - dpnp_inv_c<_DataType, _ResultType>; - -template -class dpnp_kron_c_kernel; - -template -DPCTLSyclEventRef dpnp_kron_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *array2_in, - void *result1, - shape_elem_type *in1_shape, - shape_elem_type *in2_shape, - shape_elem_type *res_shape, - size_t ndim, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t input1_size = std::accumulate( - in1_shape, in1_shape + ndim, 1, std::multiplies()); - const size_t input2_size = std::accumulate( - in2_shape, in2_shape + ndim, 1, std::multiplies()); - const size_t result_size = std::accumulate( - res_shape, res_shape + ndim, 1, std::multiplies()); - if (!(result_size && input1_size && input2_size)) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType1> input1_ptr(q_ref, array1_in, input1_size); - DPNPC_ptr_adapter<_DataType2> input2_ptr(q_ref, array2_in, input2_size); - DPNPC_ptr_adapter<_ResultType> result_ptr(q_ref, result1, result_size); - - _DataType1 *array1 = input1_ptr.get_ptr(); - _DataType2 *array2 = input2_ptr.get_ptr(); - _ResultType *result = result_ptr.get_ptr(); - - shape_elem_type *_in1_shape = reinterpret_cast( - sycl::malloc_shared(ndim * sizeof(shape_elem_type), q)); - shape_elem_type *_in2_shape = reinterpret_cast( - sycl::malloc_shared(ndim * sizeof(shape_elem_type), q)); - - q.memcpy(_in1_shape, in1_shape, ndim * sizeof(shape_elem_type)).wait(); - q.memcpy(_in2_shape, in2_shape, ndim * sizeof(shape_elem_type)).wait(); - - shape_elem_type *in1_offsets = reinterpret_cast( - sycl::malloc_shared(ndim * sizeof(shape_elem_type), q)); - shape_elem_type *in2_offsets = reinterpret_cast( - sycl::malloc_shared(ndim * sizeof(shape_elem_type), q)); - shape_elem_type *res_offsets = reinterpret_cast( - sycl::malloc_shared(ndim * sizeof(shape_elem_type), q)); - - get_shape_offsets_inkernel(in1_shape, ndim, in1_offsets); - get_shape_offsets_inkernel(in2_shape, ndim, in2_offsets); - get_shape_offsets_inkernel(res_shape, ndim, res_offsets); - - sycl::range<1> gws(result_size); - auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { - const size_t idx = global_id[0]; - - size_t idx1 = 0; - size_t idx2 = 0; - size_t reminder = idx; - for (size_t axis = 0; axis < ndim; ++axis) { - const size_t res_axis = reminder / res_offsets[axis]; - reminder = reminder - res_axis * res_offsets[axis]; - - const size_t in1_axis = res_axis / _in2_shape[axis]; - const size_t in2_axis = res_axis - in1_axis * _in2_shape[axis]; - - idx1 += in1_axis * in1_offsets[axis]; - idx2 += in2_axis * in2_offsets[axis]; - } - - result[idx] = array1[idx1] * array2[idx2]; - }; - - auto kernel_func = [&](sycl::handler &cgh) { - cgh.parallel_for< - class dpnp_kron_c_kernel<_DataType1, _DataType2, _ResultType>>( - gws, kernel_parallel_for_func); - }; - - sycl::event event = q.submit(kernel_func); - - event_ref = reinterpret_cast(&event); - - return DPCTLEvent_Copy(event_ref); -} - -template -void dpnp_kron_c(void *array1_in, - void *array2_in, - void *result1, - shape_elem_type *in1_shape, - shape_elem_type *in2_shape, - shape_elem_type *res_shape, - size_t ndim) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = - dpnp_kron_c<_DataType1, _DataType2, _ResultType>( - q_ref, array1_in, array2_in, result1, in1_shape, in2_shape, - res_shape, ndim, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_kron_default_c)(void *, - void *, - void *, - shape_elem_type *, - shape_elem_type *, - shape_elem_type *, - size_t) = - dpnp_kron_c<_DataType1, _DataType2, _ResultType>; - -template -DPCTLSyclEventRef - dpnp_matrix_rank_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - - const size_t input_size = std::accumulate( - shape, shape + ndim, 1, std::multiplies()); - if (!input_size) { - return event_ref; - } - - sycl::queue q = *(reinterpret_cast(q_ref)); - - DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, input_size, true); - DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result1, 1, true, true); - _DataType *array_1 = input1_ptr.get_ptr(); - _DataType *result = result_ptr.get_ptr(); - - shape_elem_type elems = 1; - if (ndim > 1) { - elems = shape[0]; - for (size_t i = 1; i < ndim; i++) { - if (shape[i] < elems) { - elems = shape[i]; - } - } - } - - _DataType acc = 0; - for (size_t i = 0; i < static_cast(elems); i++) { - size_t ind = 0; - for (size_t j = 0; j < ndim; j++) { - ind += (shape[j] - 1) * i; - } - acc += array_1[ind]; - } - result[0] = acc; - - return event_ref; -} - -template -void dpnp_matrix_rank_c(void *array1_in, - void *result1, - shape_elem_type *shape, - size_t ndim) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_matrix_rank_c<_DataType>( - q_ref, array1_in, result1, shape, ndim, dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_matrix_rank_default_c)(void *, void *, shape_elem_type *, size_t) = - dpnp_matrix_rank_c<_DataType>; - -template -DPCTLSyclEventRef dpnp_qr_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - void *result2, - void *result3, - size_t size_m, - size_t size_n, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - if (!size_m || !size_n) { - return event_ref; - } - sycl::queue q = *(reinterpret_cast(q_ref)); - - sycl::event event; - - DPNPC_ptr_adapter<_InputDT> input1_ptr(q_ref, array1_in, size_m * size_n, - true); - _InputDT *in_array = input1_ptr.get_ptr(); - - // math lib func overrides input - _ComputeDT *in_a = reinterpret_cast<_ComputeDT *>( - sycl::malloc_shared(size_m * size_n * sizeof(_ComputeDT), q)); - - for (size_t i = 0; i < size_m; ++i) { - for (size_t j = 0; j < size_n; ++j) { - // TODO transpose? use dpnp_transpose_c() - in_a[j * size_m + i] = in_array[i * size_n + j]; - } - } - - const size_t min_size_m_n = std::min(size_m, size_n); - DPNPC_ptr_adapter<_ComputeDT> result1_ptr( - q_ref, result1, size_m * min_size_m_n, true, true); - DPNPC_ptr_adapter<_ComputeDT> result2_ptr( - q_ref, result2, min_size_m_n * size_n, true, true); - DPNPC_ptr_adapter<_ComputeDT> result3_ptr(q_ref, result3, min_size_m_n, - true, true); - _ComputeDT *res_q = result1_ptr.get_ptr(); - _ComputeDT *res_r = result2_ptr.get_ptr(); - _ComputeDT *tau = result3_ptr.get_ptr(); - - const std::int64_t lda = size_m; - - const std::int64_t geqrf_scratchpad_size = - mkl_lapack::geqrf_scratchpad_size<_ComputeDT>(q, size_m, size_n, lda); - - _ComputeDT *geqrf_scratchpad = reinterpret_cast<_ComputeDT *>( - sycl::malloc_shared(geqrf_scratchpad_size * sizeof(_ComputeDT), q)); - - std::vector depends(1); - set_barrier_event(q, depends); - - event = mkl_lapack::geqrf(q, size_m, size_n, in_a, lda, tau, - geqrf_scratchpad, geqrf_scratchpad_size, depends); - event.wait(); - - if (!depends.empty()) { - verbose_print("oneapi::mkl::lapack::geqrf", depends.front(), event); - } - - sycl::free(geqrf_scratchpad, q); - - // R - size_t mrefl = min_size_m_n; - for (size_t i = 0; i < mrefl; ++i) { - for (size_t j = 0; j < size_n; ++j) { - if (j >= i) { - res_r[i * size_n + j] = in_a[j * size_m + i]; - } - else { - res_r[i * size_n + j] = _ComputeDT(0); - } - } - } - - // Q - const size_t nrefl = min_size_m_n; - const std::int64_t orgqr_scratchpad_size = - mkl_lapack::orgqr_scratchpad_size<_ComputeDT>(q, size_m, nrefl, nrefl, - lda); - - _ComputeDT *orgqr_scratchpad = reinterpret_cast<_ComputeDT *>( - sycl::malloc_shared(orgqr_scratchpad_size * sizeof(_ComputeDT), q)); - - set_barrier_event(q, depends); - - event = mkl_lapack::orgqr(q, size_m, nrefl, nrefl, in_a, lda, tau, - orgqr_scratchpad, orgqr_scratchpad_size, depends); - event.wait(); - - if (!depends.empty()) { - verbose_print("oneapi::mkl::lapack::orgqr", depends.front(), event); - } - - sycl::free(orgqr_scratchpad, q); - - for (size_t i = 0; i < size_m; ++i) { - for (size_t j = 0; j < nrefl; ++j) { - res_q[i * nrefl + j] = in_a[j * size_m + i]; - } - } - - sycl::free(in_a, q); - - return event_ref; -} - -template -void dpnp_qr_c(void *array1_in, - void *result1, - void *result2, - void *result3, - size_t size_m, - size_t size_n) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_qr_c<_InputDT, _ComputeDT>( - q_ref, array1_in, result1, result2, result3, size_m, size_n, - dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_qr_default_c)(void *, void *, void *, void *, size_t, size_t) = - dpnp_qr_c<_InputDT, _ComputeDT>; - -template -DPCTLSyclEventRef dpnp_svd_c(DPCTLSyclQueueRef q_ref, - void *array1_in, - void *result1, - void *result2, - void *result3, - size_t size_m, - size_t size_n, - const DPCTLEventVectorRef dep_event_vec_ref) -{ - // avoid warning unused variable - (void)dep_event_vec_ref; - - DPCTLSyclEventRef event_ref = nullptr; - sycl::queue q = *(reinterpret_cast(q_ref)); - - sycl::event event; - - DPNPC_ptr_adapter<_InputDT> input1_ptr( - q_ref, array1_in, size_m * size_n, - true); // TODO no need this if use dpnp_copy_to() - _InputDT *in_array = input1_ptr.get_ptr(); - - // math lib gesvd func overrides input - _ComputeDT *in_a = reinterpret_cast<_ComputeDT *>( - sycl::malloc_shared(size_m * size_n * sizeof(_ComputeDT), q)); - for (size_t it = 0; it < size_m * size_n; ++it) { - in_a[it] = in_array[it]; // TODO Type conversion. memcpy can not be used - // directly. dpnp_copy_to() ? - } - - DPNPC_ptr_adapter<_ComputeDT> result1_ptr(q_ref, result1, size_m * size_m, - true, true); - DPNPC_ptr_adapter<_SVDT> result2_ptr(q_ref, result2, - std::min(size_m, size_n), true, true); - DPNPC_ptr_adapter<_ComputeDT> result3_ptr(q_ref, result3, size_n * size_n, - true, true); - _ComputeDT *res_u = result1_ptr.get_ptr(); - _SVDT *res_s = result2_ptr.get_ptr(); - _ComputeDT *res_vt = result3_ptr.get_ptr(); - - const std::int64_t m = size_m; - const std::int64_t n = size_n; - - const std::int64_t lda = std::max(1UL, n); - const std::int64_t ldu = std::max(1UL, m); - const std::int64_t ldvt = std::max(1UL, n); - - const std::int64_t scratchpad_size = - mkl_lapack::gesvd_scratchpad_size<_ComputeDT>( - q, oneapi::mkl::jobsvd::vectors, oneapi::mkl::jobsvd::vectors, n, m, - lda, ldvt, ldu); - - _ComputeDT *scratchpad = reinterpret_cast<_ComputeDT *>( - sycl::malloc_shared(scratchpad_size * sizeof(_ComputeDT), q)); - - event = - mkl_lapack::gesvd(q, - oneapi::mkl::jobsvd::vectors, // onemkl::job jobu, - oneapi::mkl::jobsvd::vectors, // onemkl::job jobvt, - n, m, in_a, lda, res_s, res_vt, ldvt, res_u, ldu, - scratchpad, scratchpad_size); - - event.wait(); - - sycl::free(scratchpad, q); - - return event_ref; -} - -template -void dpnp_svd_c(void *array1_in, - void *result1, - void *result2, - void *result3, - size_t size_m, - size_t size_n) -{ - DPCTLSyclQueueRef q_ref = reinterpret_cast(&DPNP_QUEUE); - DPCTLEventVectorRef dep_event_vec_ref = nullptr; - DPCTLSyclEventRef event_ref = dpnp_svd_c<_InputDT, _ComputeDT, _SVDT>( - q_ref, array1_in, result1, result2, result3, size_m, size_n, - dep_event_vec_ref); - DPCTLEvent_WaitAndThrow(event_ref); - DPCTLEvent_Delete(event_ref); -} - -template -void (*dpnp_svd_default_c)(void *, void *, void *, void *, size_t, size_t) = - dpnp_svd_c<_InputDT, _ComputeDT, _SVDT>; - -void func_map_init_linalg_func(func_map_t &fmap) -{ - fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_cholesky_default_c}; - fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_cholesky_default_c}; - - fmap[DPNPFuncName::DPNP_FN_DET][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_det_default_c}; - fmap[DPNPFuncName::DPNP_FN_DET][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_det_default_c}; - fmap[DPNPFuncName::DPNP_FN_DET][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_det_default_c}; - fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_det_default_c}; - - fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_inv_default_c}; - fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_inv_default_c}; - fmap[DPNPFuncName::DPNP_FN_INV][eft_FLT][eft_FLT] = { - eft_DBL, (void *)dpnp_inv_default_c}; - fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_inv_default_c}; - - fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = { - eft_LNG, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_FLT] = { - eft_FLT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_DBL] = { - eft_DBL, (void *)dpnp_kron_default_c}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_C128] = { - // eft_C128, (void*)dpnp_kron_default_c, - // std::complex>}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_INT] = { - eft_LNG, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_FLT] = { - eft_FLT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_DBL] = { - eft_DBL, (void *)dpnp_kron_default_c}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_LNG][eft_C128] = { - // eft_C128, (void*)dpnp_kron_default_c, - // std::complex>}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_INT] = { - eft_FLT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_LNG] = { - eft_FLT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_DBL] = { - eft_DBL, (void *)dpnp_kron_default_c}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_FLT][eft_C128] = { - // eft_C128, (void*)dpnp_kron_default_c, - // std::complex>}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_INT] = { - eft_DBL, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_LNG] = { - eft_DBL, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_FLT] = { - eft_DBL, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_kron_default_c}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_DBL][eft_C128] = { - eft_C128, (void *)dpnp_kron_default_c, - std::complex>}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_INT] = { - // eft_C128, (void*)dpnp_kron_default_c, int32_t, - // std::complex>}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_LNG] = { - // eft_C128, (void*)dpnp_kron_default_c, int64_t, - // std::complex>}; - // fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_FLT] = { - // eft_C128, (void*)dpnp_kron_default_c, float, - // std::complex>}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_DBL] = { - eft_C128, (void *)dpnp_kron_default_c, double, - std::complex>}; - fmap[DPNPFuncName::DPNP_FN_KRON][eft_C128][eft_C128] = { - eft_C128, - (void *)dpnp_kron_default_c, std::complex, - std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_INT][eft_INT] = { - eft_INT, (void *)dpnp_matrix_rank_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_LNG][eft_LNG] = { - eft_LNG, (void *)dpnp_matrix_rank_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_matrix_rank_default_c}; - fmap[DPNPFuncName::DPNP_FN_MATRIX_RANK][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_matrix_rank_default_c}; - - fmap[DPNPFuncName::DPNP_FN_QR][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_qr_default_c}; - fmap[DPNPFuncName::DPNP_FN_QR][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_qr_default_c}; - fmap[DPNPFuncName::DPNP_FN_QR][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_qr_default_c}; - fmap[DPNPFuncName::DPNP_FN_QR][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_qr_default_c}; - // fmap[DPNPFuncName::DPNP_FN_QR][eft_C128][eft_C128] = { - // eft_C128, (void*)dpnp_qr_c, std::complex>}; - - fmap[DPNPFuncName::DPNP_FN_SVD][eft_INT][eft_INT] = { - eft_DBL, (void *)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_LNG][eft_LNG] = { - eft_DBL, (void *)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_FLT][eft_FLT] = { - eft_FLT, (void *)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_DBL][eft_DBL] = { - eft_DBL, (void *)dpnp_svd_default_c}; - fmap[DPNPFuncName::DPNP_FN_SVD][eft_C128][eft_C128] = { - eft_C128, (void *)dpnp_svd_default_c, - std::complex, double>}; - - return; -} diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp index 022e844319d..20fc5305e9a 100644 --- a/dpnp/backend/src/dpnp_fptr.hpp +++ b/dpnp/backend/src/dpnp_fptr.hpp @@ -331,7 +331,6 @@ void func_map_init_elemwise(func_map_t &fmap); void func_map_init_fft_func(func_map_t &fmap); void func_map_init_indexing_func(func_map_t &fmap); void func_map_init_linalg(func_map_t &fmap); -void func_map_init_linalg_func(func_map_t &fmap); void func_map_init_logic(func_map_t &fmap); void func_map_init_manipulation(func_map_t &fmap); void func_map_init_mathematical(func_map_t &fmap); diff --git a/dpnp/backend/src/dpnp_iface_fptr.cpp b/dpnp/backend/src/dpnp_iface_fptr.cpp index a0683d44a96..460896bfa2d 100644 --- a/dpnp/backend/src/dpnp_iface_fptr.cpp +++ b/dpnp/backend/src/dpnp_iface_fptr.cpp @@ -172,7 +172,6 @@ static func_map_t func_map_init() func_map_init_fft_func(fmap); func_map_init_indexing_func(fmap); func_map_init_linalg(fmap); - func_map_init_linalg_func(fmap); func_map_init_logic(fmap); func_map_init_manipulation(fmap); func_map_init_mathematical(fmap); From 94864fa8db1ec34e77347d446ea7672ee3c6ae92 Mon Sep 17 00:00:00 2001 From: Natalia Polina Date: Sun, 16 Jun 2024 15:18:19 -0700 Subject: [PATCH 2/2] fix pre-commit --- dpnp/backend/kernels/dpnp_krnl_common.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp index b2f4247d8d1..b1d864327e6 100644 --- a/dpnp/backend/kernels/dpnp_krnl_common.cpp +++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp @@ -458,10 +458,6 @@ DPCTLSyclEventRef (*dpnp_dot_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_dot_c<_DataType_output, _DataType_input1, _DataType_input2>; - - - - template class dpnp_initval_c_kernel; @@ -516,8 +512,6 @@ DPCTLSyclEventRef (*dpnp_initval_ext_c)(DPCTLSyclQueueRef, const DPCTLEventVectorRef) = dpnp_initval_c<_DataType>; - - void func_map_init_linalg(func_map_t &fmap) { @@ -588,8 +582,6 @@ void func_map_init_linalg(func_map_t &fmap) fmap[DPNPFuncName::DPNP_FN_DOT_EXT][eft_DBL][eft_DBL] = { eft_DBL, (void *)dpnp_dot_ext_c}; - - fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_BLN][eft_BLN] = { eft_BLN, (void *)dpnp_initval_default_c}; fmap[DPNPFuncName::DPNP_FN_INITVAL][eft_INT][eft_INT] = {