Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ This extension is implemented and fully supported by {dpcpp}.
[NOTE]
====
Although {dpcpp} supports this extension on all backends, it is currently used
only on Level Zero. Other backends ignore the properties defined in this specification.
only on Level Zero and CUDA.
Other backends ignore the properties defined in this specification.
====

== Overview
Expand Down Expand Up @@ -104,4 +105,4 @@ properties are hints and may safely be ignored by an implementation.

It is illegal to specify multiple differrent priority hints for the same queue.
Doing so causes the `queue` constructor to throw a synchronous `exception` with
the `errc::invalid` error code.
the `errc::invalid` error code.
19 changes: 14 additions & 5 deletions sycl/plugins/unified_runtime/ur/adapters/cuda/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ CUstream ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) {
// The second check is done after mutex is locked so other threads can not
// change NumComputeStreams after that
if (NumComputeStreams < ComputeStreams.size()) {
UR_CHECK_ERROR(
cuStreamCreate(&ComputeStreams[NumComputeStreams++], Flags));
UR_CHECK_ERROR(cuStreamCreateWithPriority(
&ComputeStreams[NumComputeStreams++], Flags, Priority));
}
}
Token = ComputeStreamIndex++;
Expand Down Expand Up @@ -101,8 +101,8 @@ CUstream ur_queue_handle_t_::getNextTransferStream() {
// The second check is done after mutex is locked so other threads can not
// change NumTransferStreams after that
if (NumTransferStreams < TransferStreams.size()) {
UR_CHECK_ERROR(
cuStreamCreate(&TransferStreams[NumTransferStreams++], Flags));
UR_CHECK_ERROR(cuStreamCreateWithPriority(
&TransferStreams[NumTransferStreams++], Flags, Priority));
}
}
uint32_t StreamI = TransferStreamIndex++ % TransferStreams.size();
Expand All @@ -128,6 +128,8 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,

unsigned int Flags = CU_STREAM_NON_BLOCKING;
ur_queue_flags_t URFlags = 0;
// '0' is the default priority, per CUDA Toolkit 12.2 and earlier
int Priority = 0;
bool IsOutOfOrder = false;
if (pProps && pProps->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) {
URFlags = pProps->flags;
Expand All @@ -140,6 +142,13 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,
if (URFlags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
IsOutOfOrder = true;
}
if (URFlags & UR_QUEUE_FLAG_PRIORITY_HIGH) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(cuCtxGetStreamPriorityRange(nullptr, &Priority));
} else if (URFlags & UR_QUEUE_FLAG_PRIORITY_LOW) {
ScopedContext Active(hContext);
UR_CHECK_ERROR(cuCtxGetStreamPriorityRange(&Priority, nullptr));
}
}

std::vector<CUstream> ComputeCuStreams(
Expand All @@ -149,7 +158,7 @@ urQueueCreate(ur_context_handle_t hContext, ur_device_handle_t hDevice,

Queue = std::unique_ptr<ur_queue_handle_t_>(new ur_queue_handle_t_{
std::move(ComputeCuStreams), std::move(TransferCuStreams), hContext,
hDevice, Flags, URFlags});
hDevice, Flags, URFlags, Priority});

*phQueue = Queue.release();

Expand Down
5 changes: 3 additions & 2 deletions sycl/plugins/unified_runtime/ur/adapters/cuda/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct ur_queue_handle_t_ {
unsigned int LastSyncTransferStreams;
unsigned int Flags;
ur_queue_flags_t URFlags;
int Priority;
// When ComputeStreamSyncMutex and ComputeStreamMutex both need to be
// locked at the same time, ComputeStreamSyncMutex should be locked first
// to avoid deadlocks
Expand All @@ -59,7 +60,7 @@ struct ur_queue_handle_t_ {
ur_queue_handle_t_(std::vector<CUstream> &&ComputeStreams,
std::vector<CUstream> &&TransferStreams,
ur_context_handle_t_ *Context, ur_device_handle_t_ *Device,
unsigned int Flags, ur_queue_flags_t URFlags,
unsigned int Flags, ur_queue_flags_t URFlags, int Priority,
bool BackendOwns = true)
: ComputeStreams{std::move(ComputeStreams)},
TransferStreams{std::move(TransferStreams)},
Expand All @@ -69,7 +70,7 @@ struct ur_queue_handle_t_ {
Device{Device}, RefCount{1}, EventCount{0}, ComputeStreamIndex{0},
TransferStreamIndex{0}, NumComputeStreams{0}, NumTransferStreams{0},
LastSyncComputeStreams{0}, LastSyncTransferStreams{0}, Flags(Flags),
URFlags(URFlags), HasOwnership{BackendOwns} {
URFlags(URFlags), Priority(Priority), HasOwnership{BackendOwns} {
urContextRetain(Context);
urDeviceRetain(Device);
}
Expand Down
47 changes: 47 additions & 0 deletions sycl/test-e2e/Plugin/cuda_queue_priority.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// REQUIRES: gpu, cuda

// RUN: %{build} %cuda_options -o %t.out
// RUN: %{run} %t.out
//
// Check that queue priority is passed to CUDA runtime
#define SYCL_EXT_ONEAPI_BACKEND_CUDA_EXPERIMENTAL 1
#include <sycl/ext/oneapi/experimental/backend/cuda.hpp>
#include <sycl/sycl.hpp>

#include <cuda.h>

#include <assert.h>

int get_real_priority(sycl::context &C, sycl::device &D,
sycl::property_list Props) {
sycl::queue Q(C, D, Props);
CUstream QNative = sycl::get_native<sycl::backend::ext_oneapi_cuda>(Q);
int P;
CUresult Result = cuStreamGetPriority(QNative, &P);
assert(Result == CUDA_SUCCESS && "cuStreamGetPriority call failed");
return P;
}

int main(int Argc, const char *Argv[]) {
sycl::device D;
sycl::context C{D};

int PrioDefault = get_real_priority(C, D, sycl::property_list{});
int PrioNormal = get_real_priority(
C, D, {sycl::ext::oneapi::property::queue::priority_normal{}});
int PrioHigh = get_real_priority(
C, D, {sycl::ext::oneapi::property::queue::priority_high{}});
int PrioLow = get_real_priority(
C, D, {sycl::ext::oneapi::property::queue::priority_low{}});
// Lower value means higher priority
assert(PrioDefault == PrioNormal &&
"priority_normal is not the same as default");
assert(PrioHigh <= PrioNormal &&
"priority_high is lower than priority_normal");
assert(PrioLow >= PrioNormal &&
"priority_low is higher than priority_normal");
assert(PrioLow > PrioHigh && "priority_low is the same as priority_high");

std::cout << "The test passed." << std::endl;
return 0;
}