From ea89d42d4af42cbb38959d70ea691741c67220eb Mon Sep 17 00:00:00 2001 From: Vladimir Lazarev Date: Thu, 2 Apr 2020 16:44:19 +0300 Subject: [PATCH] [SYCL] Enable LIT testing with CUDA BE Signed-off-by: Vladimir Lazarev --- sycl/test/backend/cuda/primary_context.cpp | 116 --------------- sycl/test/basic_tests/boolean.cpp | 2 - .../basic_tests/buffer/buffer_full_copy.cpp | 3 - sycl/test/group-algorithm/all_of.cpp | 3 +- sycl/test/group-algorithm/any_of.cpp | 3 +- sycl/test/group-algorithm/broadcast.cpp | 3 +- sycl/test/group-algorithm/exclusive_scan.cpp | 3 +- sycl/test/group-algorithm/inclusive_scan.cpp | 3 +- sycl/test/group-algorithm/leader.cpp | 2 +- sycl/test/group-algorithm/none_of.cpp | 3 +- sycl/test/group-algorithm/reduce.cpp | 3 +- sycl/test/hier_par/hier_par_basic.cpp | 3 - sycl/test/kernel_from_file/hw.cpp | 3 - sycl/test/lit.cfg.py | 1 + sycl/test/multi_ptr/multi_ptr.cpp | 2 +- sycl/test/program_manager/env_vars.cpp | 3 +- .../regression/private_array_init_test.cpp | 2 +- sycl/test/regression/static-buffer-dtor.cpp | 1 - sycl/test/scheduler/DataMovement.cpp | 6 +- sycl/test/usm/allocator_vector.cpp | 1 + sycl/test/usm/allocator_vector_fail.cpp | 1 + sycl/test/usm/allocatorll.cpp | 1 + sycl/test/usm/badmalloc.cpp | 4 +- sycl/test/usm/depends_on.cpp | 1 + sycl/test/usm/dmemll.cpp | 1 + sycl/test/usm/hmemll.cpp | 1 + sycl/test/usm/math.cpp | 2 - sycl/test/usm/memadvise.cpp | 1 + sycl/test/usm/memcpy.cpp | 3 +- sycl/test/usm/memset.cpp | 3 +- sycl/test/usm/mixed.cpp | 1 + sycl/test/usm/mixed2.cpp | 3 +- sycl/test/usm/mixed2template.cpp | 3 +- sycl/test/usm/mixed_queue.cpp | 3 +- sycl/test/usm/pfor_flatten.cpp | 4 +- sycl/test/usm/queue_wait.cpp | 3 +- sycl/test/usm/smemll.cpp | 1 + sycl/tools/CMakeLists.txt | 25 ++++ sycl/tools/get_device_count_by_type.cpp | 6 +- sycl/unittests/pi/cuda/CMakeLists.txt | 1 + .../pi/cuda/test_primary_context.cpp | 134 ++++++++++++++++++ 41 files changed, 212 insertions(+), 156 deletions(-) delete mode 100644 sycl/test/backend/cuda/primary_context.cpp create mode 100644 sycl/unittests/pi/cuda/test_primary_context.cpp diff --git a/sycl/test/backend/cuda/primary_context.cpp b/sycl/test/backend/cuda/primary_context.cpp deleted file mode 100644 index d02b4bca35f60..0000000000000 --- a/sycl/test/backend/cuda/primary_context.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// REQUIRES: cuda -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I%opencl_include_dir -I%cuda_toolkit_include -o %t.out -lcuda -lsycl -// RUN: env SYCL_DEVICE_TYPE=GPU %t.out -// NOTE: OpenCL is required for the runtime, even when using the CUDA BE. - -//==---------- primary_context.cpp - SYCL cuda primary context test --------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -using namespace cl::sycl; - -void check(bool condition, const char *conditionString, const char *filename, - const long line) noexcept { - if (!condition) { - std::cerr << "CHECK failed in " << filename << "#" << line << " " - << conditionString << "\n"; - std::abort(); - } -} - -#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__) - -bool isCudaDevice(const device &dev) { - const platform platform = dev.get_info(); - const std::string platformVersion = - platform.get_info(); - // If using PI_CUDA, don't accept a non-CUDA device - return platformVersion.find("CUDA") != std::string::npos; -} - -class cuda_device_selector : public device_selector { -public: - int operator()(const device &dev) const { - return isCudaDevice(dev) ? 1 : -1; - } -}; - -class other_cuda_device_selector : public device_selector { -public: - other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} - - int operator()(const device &dev) const { - if (!isCudaDevice(dev)) { - return -1; - } - if (dev.get() == excludeDevice.get()) { - // Return only this device if it is the only available - return 0; - } - return 1; - } - -private: - const device &excludeDevice; -}; - -int main() { - try { - context c; - } catch (device_error &e) { - std::cout << "Failed to create device for context" << std::endl; - } - - device DeviceA = cuda_device_selector().select_device(); - device DeviceB = other_cuda_device_selector(DeviceA).select_device(); - - CHECK(isCudaDevice(DeviceA)); - - { - std::cout << "create single context" << std::endl; - context Context(DeviceA, async_handler{}, /*UsePrimaryContext=*/true); - - CUdevice CudaDevice = reinterpret_cast(DeviceA.get())->get(); - CUcontext CudaContext = reinterpret_cast(Context.get())->get(); - - CUcontext PrimaryCudaContext; - cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice); - - CHECK(CudaContext == PrimaryCudaContext); - - cuDevicePrimaryCtxRelease(CudaDevice); - } - { - std::cout << "create multiple contexts for one device" << std::endl; - context ContextA(DeviceA, async_handler{}, /*UsePrimaryContext=*/true); - context ContextB(DeviceA, async_handler{}, /*UsePrimaryContext=*/true); - - CUcontext CudaContextA = - reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = - reinterpret_cast(ContextB.get())->get(); - - CHECK(CudaContextA == CudaContextB); - } - if (isCudaDevice(DeviceB) && DeviceA.get() != DeviceB.get()) { - std::cout << "create multiple contexts for multiple devices" << std::endl; - context ContextA(DeviceA, async_handler{}, /*UsePrimaryContext=*/true); - context ContextB(DeviceB, async_handler{}, /*UsePrimaryContext=*/true); - - CUcontext CudaContextA = - reinterpret_cast(ContextA.get())->get(); - CUcontext CudaContextB = - reinterpret_cast(ContextB.get())->get(); - - CHECK(CudaContextA != CudaContextB); - } -} diff --git a/sycl/test/basic_tests/boolean.cpp b/sycl/test/basic_tests/boolean.cpp index 041cf492786d4..cac65ddaa80bd 100644 --- a/sycl/test/basic_tests/boolean.cpp +++ b/sycl/test/basic_tests/boolean.cpp @@ -3,8 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// XFAIL: cuda -// TODO: investigate incorrect results on cuda backend #include #include diff --git a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp index f9c2047510e31..d4d64a7882a32 100644 --- a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp +++ b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp @@ -6,9 +6,6 @@ // RUN: %GPU_RUN_PLACEHOLDER %t2.out // RUN: %ACC_RUN_PLACEHOLDER %t2.out -// TODO: cuda_piEnqueueMemBufferCopy not implemented -// XFAIL: cuda - //==------------- buffer_full_copy.cpp - SYCL buffer basic test ------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. diff --git a/sycl/test/group-algorithm/all_of.cpp b/sycl/test/group-algorithm/all_of.cpp index a8b4fc4bfff2b..5b0eabd8a8e1b 100644 --- a/sycl/test/group-algorithm/all_of.cpp +++ b/sycl/test/group-algorithm/all_of.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/any_of.cpp b/sycl/test/group-algorithm/any_of.cpp index 4e5391b5b01be..2c117d74121ea 100644 --- a/sycl/test/group-algorithm/any_of.cpp +++ b/sycl/test/group-algorithm/any_of.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/broadcast.cpp b/sycl/test/group-algorithm/broadcast.cpp index 9fcce3b938673..8315eee1c23f0 100644 --- a/sycl/test/group-algorithm/broadcast.cpp +++ b/sycl/test/group-algorithm/broadcast.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/exclusive_scan.cpp b/sycl/test/group-algorithm/exclusive_scan.cpp index fad4777a7cec1..69236c2858000 100644 --- a/sycl/test/group-algorithm/exclusive_scan.cpp +++ b/sycl/test/group-algorithm/exclusive_scan.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/inclusive_scan.cpp b/sycl/test/group-algorithm/inclusive_scan.cpp index 54d79f72e5395..8f4e4f701adea 100644 --- a/sycl/test/group-algorithm/inclusive_scan.cpp +++ b/sycl/test/group-algorithm/inclusive_scan.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/leader.cpp b/sycl/test/group-algorithm/leader.cpp index 3e0bad4706cfc..86132ce8ed54c 100644 --- a/sycl/test/group-algorithm/leader.cpp +++ b/sycl/test/group-algorithm/leader.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/group-algorithm/none_of.cpp b/sycl/test/group-algorithm/none_of.cpp index d0ef19b8ed3ea..10d94b1019ac4 100644 --- a/sycl/test/group-algorithm/none_of.cpp +++ b/sycl/test/group-algorithm/none_of.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/group-algorithm/reduce.cpp b/sycl/test/group-algorithm/reduce.cpp index 988c40f245ff7..9f6dd05fba6d2 100644 --- a/sycl/test/group-algorithm/reduce.cpp +++ b/sycl/test/group-algorithm/reduce.cpp @@ -1,8 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include #include diff --git a/sycl/test/hier_par/hier_par_basic.cpp b/sycl/test/hier_par/hier_par_basic.cpp index 6caf3169f555f..d1a94ea1a7112 100644 --- a/sycl/test/hier_par/hier_par_basic.cpp +++ b/sycl/test/hier_par/hier_par_basic.cpp @@ -12,9 +12,6 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// TODO: ptxas fatal : Unresolved extern function '__spirv_ControlBarrier' -// XFAIL: cuda - // This test checks hierarchical parallelism invocation APIs, but without any // data or code with side-effects between the work group and work item scopes. diff --git a/sycl/test/kernel_from_file/hw.cpp b/sycl/test/kernel_from_file/hw.cpp index a9f2b6031d26d..3e53d6260e8cb 100644 --- a/sycl/test/kernel_from_file/hw.cpp +++ b/sycl/test/kernel_from_file/hw.cpp @@ -6,9 +6,6 @@ // TODO: InvalidTargetTriple: Expects spir-unknown-unknown or spir64-unknown-unknown. Actual target triple is x86_64-unknown-linux-gnu -// XFAIL: cuda -// Currently unsupported on cuda as this test specifically tests a SPV path. - #include #include diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py index ca2947308e75b..252c38a797c4d 100644 --- a/sycl/test/lit.cfg.py +++ b/sycl/test/lit.cfg.py @@ -155,6 +155,7 @@ def getDeviceCount(device_type): config.available_features.add('gpu') if cuda: config.available_features.add('cuda') + gpu_run_substitute += " SYCL_BE=PI_CUDA " if platform.system() == "Linux": gpu_run_on_linux_substitute = "env SYCL_DEVICE_TYPE=GPU " diff --git a/sycl/test/multi_ptr/multi_ptr.cpp b/sycl/test/multi_ptr/multi_ptr.cpp index a6e20a808ac69..c2e44f461e1b7 100644 --- a/sycl/test/multi_ptr/multi_ptr.cpp +++ b/sycl/test/multi_ptr/multi_ptr.cpp @@ -3,7 +3,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl %s -o %t1.out +// RUN: %clangxx -DRESTRICT_WRITE_ACCESS_TO_CONSTANT_PTR -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out diff --git a/sycl/test/program_manager/env_vars.cpp b/sycl/test/program_manager/env_vars.cpp index e747eab855e12..dcc4bd4a38e3c 100644 --- a/sycl/test/program_manager/env_vars.cpp +++ b/sycl/test/program_manager/env_vars.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -O0 -fsycl %s -o %t.out -lsycl +// RUN: %clangxx -O0 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -lsycl // // Deprecated SYCL_PROGRAM_BUILD_OPTIONS should work as an alias to // SYCL_PROGRAM_COMPILE_OPTIONS: @@ -16,7 +16,6 @@ // RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_COMPILE_OPTIONS="-enable-link-options -cl-denorms-are-zero" SHOULD_CRASH=1 %t.out // RUN: %CPU_RUN_PLACEHOLDER SYCL_PROGRAM_LINK_OPTIONS="-g" SHOULD_CRASH=1 %t.out - #include #include #include diff --git a/sycl/test/regression/private_array_init_test.cpp b/sycl/test/regression/private_array_init_test.cpp index d85a610794b03..bf10d8e217e55 100644 --- a/sycl/test/regression/private_array_init_test.cpp +++ b/sycl/test/regression/private_array_init_test.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -fsycl %s -o %t.out -lOpenCL +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -lOpenCL // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/sycl/test/regression/static-buffer-dtor.cpp b/sycl/test/regression/static-buffer-dtor.cpp index ab0809034d733..3d8bd98fa3f18 100644 --- a/sycl/test/regression/static-buffer-dtor.cpp +++ b/sycl/test/regression/static-buffer-dtor.cpp @@ -16,7 +16,6 @@ // TODO: terminate called after throwing an instance of 'cl::sycl::runtime_error' // TODO: what(): OpenCL API failed. OpenCL API returns: -999 (Unknown OpenCL error code) -999 (Unknown OpenCL error code) -// XFAIL: cuda #include diff --git a/sycl/test/scheduler/DataMovement.cpp b/sycl/test/scheduler/DataMovement.cpp index f1812bf4dbf29..48282d34f0662 100644 --- a/sycl/test/scheduler/DataMovement.cpp +++ b/sycl/test/scheduler/DataMovement.cpp @@ -1,5 +1,9 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t.out -// RUN: %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// Incorrect event callback processing for host device. +// XFAIL: cuda // //==-------------------------- DataMovement.cpp ----------------------------==// // diff --git a/sycl/test/usm/allocator_vector.cpp b/sycl/test/usm/allocator_vector.cpp index 9cc82d8fac1f5..3ae88da999aa9 100644 --- a/sycl/test/usm/allocator_vector.cpp +++ b/sycl/test/usm/allocator_vector.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==---- allocator_vector.cpp - Allocator Container test -------------------==// // diff --git a/sycl/test/usm/allocator_vector_fail.cpp b/sycl/test/usm/allocator_vector_fail.cpp index bb033ef753071..7bb8025f2003a 100644 --- a/sycl/test/usm/allocator_vector_fail.cpp +++ b/sycl/test/usm/allocator_vector_fail.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==-- allocator_vector_fail.cpp - Device Memory Allocator fail test -------==// // diff --git a/sycl/test/usm/allocatorll.cpp b/sycl/test/usm/allocatorll.cpp index 1b7796540686f..8fbfbe605f7df 100644 --- a/sycl/test/usm/allocatorll.cpp +++ b/sycl/test/usm/allocatorll.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==---- allocatorll.cpp - Device Memory Linked List Allocator test --------==// // diff --git a/sycl/test/usm/badmalloc.cpp b/sycl/test/usm/badmalloc.cpp index fc91b1260d465..b574dd67eea3b 100644 --- a/sycl/test/usm/badmalloc.cpp +++ b/sycl/test/usm/badmalloc.cpp @@ -1,9 +1,9 @@ -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out -// UNSUPPORTED: windows +// UNSUPPORTED: windows,cuda //==----------------- badmalloc.cpp - Bad Mallocs test ---------------------==// // diff --git a/sycl/test/usm/depends_on.cpp b/sycl/test/usm/depends_on.cpp index 3943621853836..ea7749809f6f9 100644 --- a/sycl/test/usm/depends_on.cpp +++ b/sycl/test/usm/depends_on.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==----------------- depends_on.cpp - depends_on test ---------------------==// // diff --git a/sycl/test/usm/dmemll.cpp b/sycl/test/usm/dmemll.cpp index e5c32b2f20262..de987904c69e1 100644 --- a/sycl/test/usm/dmemll.cpp +++ b/sycl/test/usm/dmemll.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==------------------- dmemll.cpp - Device Memory Linked List test --------==// // diff --git a/sycl/test/usm/hmemll.cpp b/sycl/test/usm/hmemll.cpp index 38b578ce948c2..ce9898338d886 100644 --- a/sycl/test/usm/hmemll.cpp +++ b/sycl/test/usm/hmemll.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==------------------- hmemll.cpp - Host Memory Linked List test ----------==// // diff --git a/sycl/test/usm/math.cpp b/sycl/test/usm/math.cpp index 4155767e309f7..c2ae25363fc2f 100644 --- a/sycl/test/usm/math.cpp +++ b/sycl/test/usm/math.cpp @@ -3,8 +3,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // REQUIRES: cpu -// TODO: ptxas fatal : Unresolved extern function '_Z20__spirv_ocl_lgamma_rfPi' -// XFAIL: cuda #include diff --git a/sycl/test/usm/memadvise.cpp b/sycl/test/usm/memadvise.cpp index 9b584c045e2e5..213b897f7d3ad 100644 --- a/sycl/test/usm/memadvise.cpp +++ b/sycl/test/usm/memadvise.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==---------------- memadvise.cpp - Shared Memory Linked List test --------==// // diff --git a/sycl/test/usm/memcpy.cpp b/sycl/test/usm/memcpy.cpp index 3545cdf5218fd..a6ac08bf21da8 100644 --- a/sycl/test/usm/memcpy.cpp +++ b/sycl/test/usm/memcpy.cpp @@ -5,9 +5,10 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda #include diff --git a/sycl/test/usm/memset.cpp b/sycl/test/usm/memset.cpp index 6fb12eb1fcc4d..9cc3aa8eb883e 100644 --- a/sycl/test/usm/memset.cpp +++ b/sycl/test/usm/memset.cpp @@ -1,6 +1,7 @@ -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==---- memset.cpp - USM memset test --------------------------------------==// // diff --git a/sycl/test/usm/mixed.cpp b/sycl/test/usm/mixed.cpp index 5d45182d2a35e..449db850b3791 100644 --- a/sycl/test/usm/mixed.cpp +++ b/sycl/test/usm/mixed.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==------------------- mixed.cpp - Mixed Memory test ---------------------==// // diff --git a/sycl/test/usm/mixed2.cpp b/sycl/test/usm/mixed2.cpp index c074e2207b578..db0421f7f63fa 100644 --- a/sycl/test/usm/mixed2.cpp +++ b/sycl/test/usm/mixed2.cpp @@ -1,7 +1,8 @@ -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==------------------- mixed2.cpp - Mixed Memory test ---------------------==// // diff --git a/sycl/test/usm/mixed2template.cpp b/sycl/test/usm/mixed2template.cpp index 4261187092d72..4bc544b0505a8 100644 --- a/sycl/test/usm/mixed2template.cpp +++ b/sycl/test/usm/mixed2template.cpp @@ -1,7 +1,8 @@ -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==---------- mixed2template.cpp - Mixed Memory with Templatestest --------==// // diff --git a/sycl/test/usm/mixed_queue.cpp b/sycl/test/usm/mixed_queue.cpp index 0585e982179e1..10ea73d702856 100644 --- a/sycl/test/usm/mixed_queue.cpp +++ b/sycl/test/usm/mixed_queue.cpp @@ -1,7 +1,8 @@ -// RUN: %clangxx -fsycl %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==-------------- mixed_queue.cpp - Mixed Memory test ---------------------==// // diff --git a/sycl/test/usm/pfor_flatten.cpp b/sycl/test/usm/pfor_flatten.cpp index 68496c7b94886..86089ad5a76c8 100644 --- a/sycl/test/usm/pfor_flatten.cpp +++ b/sycl/test/usm/pfor_flatten.cpp @@ -1,8 +1,8 @@ -// RUN: %clangxx -fsycl -fsycl-unnamed-lambda %s -o %t1.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -fsycl-unnamed-lambda %s -o %t1.out // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out -// XFAIL: cuda +// UNSUPPORTED: cuda //==--------------- pfor_flatten.cpp - Kernel Launch Flattening test -------==// // diff --git a/sycl/test/usm/queue_wait.cpp b/sycl/test/usm/queue_wait.cpp index 76bdaaf4c7b92..5d187515c63ab 100644 --- a/sycl/test/usm/queue_wait.cpp +++ b/sycl/test/usm/queue_wait.cpp @@ -1,7 +1,8 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out +// UNSUPPORTED: cuda #include diff --git a/sycl/test/usm/smemll.cpp b/sycl/test/usm/smemll.cpp index 4fb79cb8429d8..7d602fdce9306 100644 --- a/sycl/test/usm/smemll.cpp +++ b/sycl/test/usm/smemll.cpp @@ -2,6 +2,7 @@ // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out // RUN: %CPU_RUN_PLACEHOLDER %t1.out // RUN: %GPU_RUN_PLACEHOLDER %t1.out +// UNSUPPORTED: cuda //==------------------- smemll.cpp - Shared Memory Linked List test --------==// // diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt index dd921f969c66b..467801433c25c 100644 --- a/sycl/tools/CMakeLists.txt +++ b/sycl/tools/CMakeLists.txt @@ -4,6 +4,31 @@ set(CMAKE_CXX_EXTENSIONS OFF) add_executable(get_device_count_by_type get_device_count_by_type.cpp) add_dependencies(get_device_count_by_type ocl-headers ocl-icd) + +if( SYCL_BUILD_PI_CUDA ) + find_package(CUDA 10.0 REQUIRED) + + add_library(cudadrv SHARED IMPORTED) + + set_target_properties( + cudadrv PROPERTIES + IMPORTED_LOCATION ${CUDA_CUDA_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${CUDA_INCLUDE_DIRS} + ) + + target_compile_definitions(get_device_count_by_type + PUBLIC $<$:USE_PI_CUDA> + ) + + target_include_directories( get_device_count_by_type + PUBLIC + ${CUDA_INCLUDE_DIRS} + ) + target_link_libraries(get_device_count_by_type + PUBLIC OpenCL-Headers cudadrv +) +endif() + target_link_libraries(get_device_count_by_type PRIVATE OpenCL::Headers PRIVATE ${OpenCL_LIBRARIES} diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp index 5611685889fac..ca66310958dc2 100644 --- a/sycl/tools/get_device_count_by_type.cpp +++ b/sycl/tools/get_device_count_by_type.cpp @@ -10,7 +10,7 @@ #include #ifdef USE_PI_CUDA -#include +#include #endif // USE_PI_CUDA #include @@ -38,12 +38,12 @@ int main(int argc, char* argv[]) { cl_uint deviceCount = 0; #ifdef USE_PI_CUDA - if (backend == "CUDA") { + if (backend == "PI_CUDA") { std::string msg{""}; int runtime_version = 0; - cudaError_t err = cuDriverGetVersion(&runtime_version); + auto err = cuDriverGetVersion(&runtime_version); if (runtime_version < 9020 || err != CUDA_SUCCESS) { std::cout << deviceCount << " :Unsupported CUDA Runtime " << std::endl; } diff --git a/sycl/unittests/pi/cuda/CMakeLists.txt b/sycl/unittests/pi/cuda/CMakeLists.txt index 0d68616bc5d5d..e0084af403cc3 100644 --- a/sycl/unittests/pi/cuda/CMakeLists.txt +++ b/sycl/unittests/pi/cuda/CMakeLists.txt @@ -5,6 +5,7 @@ add_sycl_unittest(PiCudaTests test_device.cpp test_kernels.cpp test_mem_obj.cpp + test_primary_context.cpp test_queue.cpp test_events.cpp ) diff --git a/sycl/unittests/pi/cuda/test_primary_context.cpp b/sycl/unittests/pi/cuda/test_primary_context.cpp new file mode 100644 index 0000000000000..199765debeeed --- /dev/null +++ b/sycl/unittests/pi/cuda/test_primary_context.cpp @@ -0,0 +1,134 @@ +//==---------- pi_primary_context.cpp - PI unit tests ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gtest/gtest.h" + +#include + +#include +#include +#include +#include +#include + +#include + +using namespace cl::sycl; + +void check(bool condition, const char *conditionString, const char *filename, + const long line) noexcept { + if (!condition) { + std::cerr << "CHECK failed in " << filename << "#" << line << " " + << conditionString << "\n"; + std::abort(); + } +} + +#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__) + +bool isCudaDevice(const device &dev) { + const platform platform = dev.get_info(); + const std::string platformVersion = + platform.get_info(); + // If using PI_CUDA, don't accept a non-CUDA device + return platformVersion.find("CUDA") != std::string::npos; +} + +class cuda_device_selector : public device_selector { +public: + int operator()(const device &dev) const { return isCudaDevice(dev) ? 1 : -1; } +}; + +class other_cuda_device_selector : public device_selector { +public: + other_cuda_device_selector(const device &dev) : excludeDevice{dev} {} + + int operator()(const device &dev) const { + if (!isCudaDevice(dev)) { + return -1; + } + if (dev.get() == excludeDevice.get()) { + // Return only this device if it is the only available + return 0; + } + return 1; + } + +private: + const device &excludeDevice; +}; + +using namespace cl::sycl; + +struct DISABLED_CudaPrimaryContextTests : public ::testing::Test { + +protected: + std::vector Plugins; + + pi_platform platform_; + device deviceA_; + device deviceB_; + context context_; + + void SetUp() override { + + try { + context context_; + } catch (device_error &e) { + std::cout << "Failed to create device for context" << std::endl; + } + + deviceA_ = cuda_device_selector().select_device(); + deviceB_ = other_cuda_device_selector(deviceA_).select_device(); + + ASSERT_TRUE(isCudaDevice(deviceA_)); + } + + void TearDown() override {} +}; + +TEST_F(DISABLED_CudaPrimaryContextTests, piSingleContext) { + std::cout << "create single context" << std::endl; + context Context(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); + + CUdevice CudaDevice = reinterpret_cast(deviceA_.get())->get(); + CUcontext CudaContext = reinterpret_cast(Context.get())->get(); + + CUcontext PrimaryCudaContext; + cuDevicePrimaryCtxRetain(&PrimaryCudaContext, CudaDevice); + + ASSERT_EQ(CudaContext, PrimaryCudaContext); + + cuDevicePrimaryCtxRelease(CudaDevice); +} + +TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextSingleDevice) { + std::cout << "create multiple contexts for one device" << std::endl; + context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); + context ContextB(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); + + CUcontext CudaContextA = reinterpret_cast(ContextA.get())->get(); + CUcontext CudaContextB = reinterpret_cast(ContextB.get())->get(); + + ASSERT_EQ(CudaContextA, CudaContextB); +} + +TEST_F(DISABLED_CudaPrimaryContextTests, piMultiContextMultiDevice) { + if (isCudaDevice(deviceB_) && deviceA_.get() != deviceB_.get()) { + std::cout << "create multiple contexts for multiple devices" << std::endl; + context ContextA(deviceA_, async_handler{}, /*UsePrimaryContext=*/true); + context ContextB(deviceB_, async_handler{}, /*UsePrimaryContext=*/true); + + CUcontext CudaContextA = + reinterpret_cast(ContextA.get())->get(); + CUcontext CudaContextB = + reinterpret_cast(ContextB.get())->get(); + + ASSERT_NE(CudaContextA, CudaContextB); + } +}