diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml index 5645191b7c6..fbfe66ff17b 100644 --- a/.github/workflows/conda-package.yml +++ b/.github/workflows/conda-package.yml @@ -194,10 +194,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} env: - SYCL_QUEUE_THREAD_POOL_SIZE: 16 + SYCL_QUEUE_THREAD_POOL_SIZE: 6 test_windows: name: Test ['windows-latest', python='${{ matrix.python }}'] @@ -333,10 +333,10 @@ jobs: # TODO: run the whole scope once the issues on CPU are resolved - name: Run tests run: | - python -m pytest -q -ra --disable-warnings -vv -s ${{ env.TEST_SCOPE }} + python -m pytest -q -ra --disable-warnings -vv ${{ env.TEST_SCOPE }} working-directory: ${{ env.tests-path }} env: - SYCL_QUEUE_THREAD_POOL_SIZE: 16 + SYCL_QUEUE_THREAD_POOL_SIZE: 6 upload: name: Upload ['${{ matrix.os }}', python='${{ matrix.python }}'] diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt index 07a4ffae8ab..8f3086ec3a9 100644 --- a/dpnp/backend/extensions/vm/CMakeLists.txt +++ b/dpnp/backend/extensions/vm/CMakeLists.txt @@ -55,9 +55,7 @@ else() target_compile_options(${python_module_name} PRIVATE -fno-approx-func -fno-finite-math-only - -no-ipo ) - target_link_options(${python_module_name} PRIVATE -no-ipo) endif() target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel) @@ -72,17 +70,6 @@ endif() target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP) -target_link_libraries(${python_module_name} PUBLIC oneDPL) - -if (UNIX) - # needed for STL headers with GCC < 11 - target_compile_definitions(${python_module_name} PUBLIC _GLIBCXX_USE_TBB_PAR_BACKEND=0) -endif() - -target_compile_definitions(${python_module_name} PUBLIC PSTL_USE_PARALLEL_POLICIES=0) -# work-around for Windows at exit crash with predefined policies -target_compile_definitions(${python_module_name} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0) - install(TARGETS ${python_module_name} DESTINATION "dpnp/backend/extensions/vm" ) diff --git a/dpnp/backend/extensions/vm/div.cpp b/dpnp/backend/extensions/vm/div.cpp index 8a6751a45a4..28fbe1cdf3c 100644 --- a/dpnp/backend/extensions/vm/div.cpp +++ b/dpnp/backend/extensions/vm/div.cpp @@ -64,46 +64,16 @@ static sycl::event div_impl(sycl::queue exec_q, { type_utils::validate_type_for_device(exec_q); - std::cerr << "enter div_impl" << std::endl; + const T* a = reinterpret_cast(in_a); + const T* b = reinterpret_cast(in_b); + T* y = reinterpret_cast(out_y); - const T* _a = reinterpret_cast(in_a); - const T* _b = reinterpret_cast(in_b); - T* _y = reinterpret_cast(out_y); - - std::cerr << "casting is done" << std::endl; - - T* a = sycl::malloc_device(n, exec_q); - T* b = sycl::malloc_device(n, exec_q); - T* y = sycl::malloc_device(n, exec_q); - - std::cerr << "malloc is done" << std::endl; - - exec_q.copy(_a, a, n).wait(); - exec_q.copy(_b, b, n).wait(); - exec_q.copy(_y, y, n).wait(); - - std::cerr << "copy is done" << std::endl; - - sycl::event ev = mkl_vm::div(exec_q, + return mkl_vm::div(exec_q, n, // number of elements to be calculated a, // pointer `a` containing 1st input vector of size n b, // pointer `b` containing 2nd input vector of size n y, // pointer `y` to the output vector of size n depends); - ev.wait(); - - std::cerr << "div is done" << std::endl; - - exec_q.copy(y, _y, n).wait(); - - std::cerr << "copy is done" << std::endl; - - sycl::free(a, exec_q); - sycl::free(b, exec_q); - sycl::free(y, exec_q); - - std::cerr << "leaving div_impl" << std::endl; - return sycl::event(); } std::pair div(sycl::queue exec_q, @@ -205,20 +175,9 @@ std::pair div(sycl::queue exec_q, throw py::value_error("No div implementation defined"); } sycl::event sum_ev = div_fn(exec_q, src_nelems, src1_data, src2_data, dst_data, depends); - // sum_ev.wait(); - - // int* dummy = sycl::malloc_device(1, exec_q); - // sycl::event cleanup_ev = exec_q.submit([&](sycl::handler& cgh) { - // // cgh.depends_on(sum_ev); - // auto ctx = exec_q.get_context(); - // cgh.host_task([dummy, ctx]() { - // // dummy host task to pass into keep_args_alive - // sycl::free(dummy, ctx); - // }); - // }); - - // sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); - // return std::make_pair(ht_ev, sum_ev); + + sycl::event ht_ev = dpctl::utils::keep_args_alive(exec_q, {src1, src2, dst}, {sum_ev}); + return std::make_pair(ht_ev, sum_ev); return std::make_pair(sycl::event(), sycl::event()); } @@ -227,6 +186,7 @@ bool can_call_div(sycl::queue exec_q, dpctl::tensor::usm_ndarray src2, dpctl::tensor::usm_ndarray dst) { +#if INTEL_MKL_VERSION >= 20230002 // check type_nums int src1_typenum = src1.get_typenum(); int src2_typenum = src2.get_typenum(); @@ -325,6 +285,16 @@ bool can_call_div(sycl::queue exec_q, return false; } return true; +#else + // In OneMKL 2023.1.0 the call of oneapi::mkl::vm::div() is going to dead lock + // inside ~usm_wrapper_to_host()->{...; q_->wait_and_throw(); ...} + + (void)exec_q; + (void)src1; + (void)src2; + (void)dst; + return false; +#endif // INTEL_MKL_VERSION >= 20230002 } template