Skip to content

Commit 4828db5

Browse files
jbrodmanbader
authored andcommitted
[SYCL][USM] Enable mem_advise APIs (#544)
* Implement mem_advise, add tests, update documentation. Signed-off-by: James Brodman <james.brodman@intel.com>
1 parent b4998f2 commit 4828db5

File tree

7 files changed

+141
-44
lines changed

7 files changed

+141
-44
lines changed

sycl/doc/extensions/USM/USM.adoc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -374,22 +374,28 @@ Return value:: none
374374

375375
'''
376376
==== Concurrent USM
377-
Concurrent USM contains all the utility functions of Explicit USM and Restricted USM. It introduces a new function, `sycl::mem_advise`, that allows programmers to provide additional information to the underlying runtime about how different allocations are used.
377+
Concurrent USM contains all the utility functions of Explicit USM and Restricted USM. It introduces a new function, `sycl::queue::mem_advise`, that allows programmers to provide additional information to the underlying runtime about how different allocations are used.
378378

379379
===== Performance Hints
380380
===== prefetch
381381
In Concurrent USM, prefetch commands may be overlapped with kernel execution.
382+
382383
===== mem_advise
383384
[source,cpp]
384385
----
385-
void sycl::mem_advise(void *addr, size_t length, int advice);
386+
class queue {
387+
...
388+
public:
389+
...
390+
event mem_advise(void *addr, size_t length, int advice);
391+
};
386392
----
387393

388394
Parameters::
389395
* `void* addr` - address of allocation
390396
* `size_t length` - number of bytes in the allocation
391397
* `int advice` - device-defined advice for the specified allocation
392-
Return Value:: none
398+
Return Value:: Returns an event representing the operation.
393399

394400
'''
395401
==== General

sycl/include/CL/sycl/detail/queue_impl.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,9 @@ class queue_impl {
201201
return m_PropList.get_property<propertyT>();
202202
}
203203

204-
event memset(void* ptr, int value, size_t count);
205-
event memcpy(void* dest, const void* src, size_t count);
204+
event memset(void* Ptr, int Value, size_t Count);
205+
event memcpy(void* Dest, const void* Src, size_t Count);
206+
event mem_advise(const void *Ptr, size_t Length, int Advice);
206207

207208
private:
208209
template <typename T>

sycl/include/CL/sycl/detail/usm_dispatch.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,11 @@ class USMDispatcher {
4343
cl_mem_migration_flags Flags,
4444
pi_uint32 NumEventsInWaitList,
4545
const pi_event *EventWaitList, pi_event *Event);
46-
pi_result enqueueMemAdvise(pi_queue Queue, void *Ptr, size_t Size,
47-
cl_mem_advice_intel Advice,
48-
pi_uint32 NumEventsInWaitList,
49-
const pi_event *EventWaitList, pi_event *Event);
5046
pi_result getMemAllocInfo(pi_context Context, const void *Ptr,
5147
cl_mem_info_intel ParamName, size_t ParamValueSize,
5248
void *ParamValue, size_t *ParamValueSizeRet);
49+
void memAdvise(pi_queue Queue, const void *Ptr, size_t Length, int Advice,
50+
pi_event *Event);
5351

5452
private:
5553
bool mEmulated = false;

sycl/include/CL/sycl/queue.hpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,16 @@ class queue {
104104
return impl->get_property<propertyT>();
105105
}
106106

107-
event memset(void* ptr, int value, size_t count) {
108-
return impl->memset(ptr, value, count);
107+
event memset(void* Ptr, int Value, size_t Count) {
108+
return impl->memset(Ptr, Value, Count);
109109
}
110110

111-
event memcpy(void* dest, const void* src, size_t count) {
112-
return impl->memcpy(dest, src, count);
111+
event memcpy(void* Dest, const void* Src, size_t Count) {
112+
return impl->memcpy(Dest, Src, Count);
113+
}
114+
115+
event mem_advise(const void *Ptr, size_t Length, int Advice) {
116+
return impl->mem_advise(Ptr, Length, Advice);
113117
}
114118

115119
private:

sycl/source/detail/queue_impl.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ event queue_impl::memcpy(void *Dest, const void *Src, size_t Count) {
5959

6060
return event(Event, Context);
6161
}
62+
63+
event queue_impl::mem_advise(const void *Ptr, size_t Length, int Advice) {
64+
context Context = get_context();
65+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
66+
getSyclObjImpl(Context)->getUSMDispatch();
67+
cl_event Event;
68+
69+
USMDispatch->memAdvise(getHandleRef(), Ptr, Length, Advice,
70+
reinterpret_cast<pi_event *>(&Event));
71+
72+
return event(Event, Context);
73+
}
6274
} // namespace detail
6375
} // namespace sycl
6476
} // namespace cl

sycl/source/detail/usm/usm_dispatch.cpp

Lines changed: 24 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -278,37 +278,6 @@ pi_result USMDispatcher::enqueueMigrateMem(pi_queue Queue, const void *Ptr,
278278
return RetVal;
279279
}
280280

281-
pi_result USMDispatcher::enqueueMemAdvise(pi_queue Queue, void *Ptr,
282-
size_t Size,
283-
cl_mem_advice_intel Advice,
284-
pi_uint32 NumEventsInWaitList,
285-
const pi_event *EventWaitList,
286-
pi_event *Event) {
287-
pi_result RetVal = PI_INVALID_OPERATION;
288-
289-
if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) {
290-
cl_command_queue CLQueue = pi::cast<cl_command_queue>(Queue);
291-
292-
if (mEmulated) {
293-
// TODO: What should we do here?
294-
// This isn't really supported yet.
295-
// Advice is typically safe to ignore,
296-
// so a NOP will do.
297-
RetVal = pi::cast<pi_result>(clEnqueueMarkerWithWaitList(
298-
CLQueue, NumEventsInWaitList,
299-
reinterpret_cast<const cl_event *>(EventWaitList),
300-
reinterpret_cast<cl_event *>(Event)));
301-
} else {
302-
RetVal = pi::cast<pi_result>(pfn_clEnqueueMemAdviseINTEL(
303-
CLQueue, Ptr, Size, Advice, NumEventsInWaitList,
304-
reinterpret_cast<const cl_event *>(EventWaitList),
305-
reinterpret_cast<cl_event *>(Event)));
306-
}
307-
}
308-
309-
return RetVal;
310-
}
311-
312281
pi_result USMDispatcher::getMemAllocInfo(pi_context Context, const void *Ptr,
313282
cl_mem_info_intel ParamName,
314283
size_t ParamValueSize,
@@ -337,6 +306,30 @@ pi_result USMDispatcher::getMemAllocInfo(pi_context Context, const void *Ptr,
337306
return RetVal;
338307
}
339308

309+
void USMDispatcher::memAdvise(pi_queue Queue, const void *Ptr, size_t Length,
310+
int Advice, pi_event *Event) {
311+
if (pi::useBackend(pi::Backend::SYCL_BE_PI_OPENCL)) {
312+
cl_command_queue CLQueue = pi::cast<cl_command_queue>(Queue);
313+
314+
if (mEmulated) {
315+
// memAdvise does nothing here
316+
PI_CHECK(clEnqueueMarkerWithWaitList(
317+
CLQueue, 0, nullptr, reinterpret_cast<cl_event *>(Event)));
318+
} else {
319+
// Temporary until driver supports
320+
// memAdvise doesn't do anything on an iGPU anyway
321+
PI_CHECK(clEnqueueMarkerWithWaitList(
322+
CLQueue, 0, nullptr, reinterpret_cast<cl_event *>(Event)));
323+
/*
324+
// Enable once this is supported in the driver
325+
auto CLAdvice = *reinterpret_cast<cl_mem_advice_intel *>(&Advice);
326+
PI_CHECK(pfn_clEnqueueMemAdviseINTEL(
327+
CLQueue, Ptr, Length, CLAdvice, 0, nullptr,
328+
reinterpret_cast<cl_event *>(Event)));
329+
*/
330+
}
331+
}
332+
}
340333
} // namespace usm
341334
} // namespace detail
342335
} // namespace sycl

sycl/test/usm/memadvise.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// RUN: %clangxx -fsycl %s -o %t1.out -lOpenCL
2+
// RUN: %CPU_RUN_PLACEHOLDER %t1.out
3+
// TODO: SYCL specific fail - analyze and enable
4+
// XFAIL: windows
5+
6+
//==---------------- memadvise.cpp - Shared Memory Linked List test --------==//
7+
//
8+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9+
// See https://llvm.org/LICENSE.txt for license information.
10+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11+
//
12+
//===----------------------------------------------------------------------===//
13+
14+
#include <CL/sycl.hpp>
15+
16+
using namespace cl::sycl;
17+
18+
int numNodes = 4;
19+
20+
struct Node {
21+
Node() : pNext(nullptr), Num(0xDEADBEEF) {}
22+
23+
Node *pNext;
24+
uint32_t Num;
25+
};
26+
27+
class foo;
28+
int main() {
29+
queue q;
30+
auto dev = q.get_device();
31+
auto ctxt = q.get_context();
32+
Node *s_head = nullptr;
33+
Node *s_cur = nullptr;
34+
35+
s_head = (Node *)malloc_shared(sizeof(Node), dev, ctxt);
36+
if (s_head == nullptr) {
37+
return -1;
38+
}
39+
q.mem_advise(s_head, sizeof(Node), 42);
40+
s_cur = s_head;
41+
42+
for (int i = 0; i < numNodes; i++) {
43+
s_cur->Num = i * 2;
44+
45+
if (i != (numNodes - 1)) {
46+
s_cur->pNext = (Node *)malloc_shared(sizeof(Node), dev, ctxt);
47+
if (s_cur->pNext == nullptr) {
48+
return -1;
49+
}
50+
q.mem_advise(s_cur->pNext, sizeof(Node), 42);
51+
} else {
52+
s_cur->pNext = nullptr;
53+
}
54+
55+
s_cur = s_cur->pNext;
56+
}
57+
58+
auto e1 = q.submit([=](handler &cgh) {
59+
cgh.single_task<class foo>([=]() {
60+
Node *pHead = s_head;
61+
while (pHead) {
62+
pHead->Num = pHead->Num * 2 + 1;
63+
pHead = pHead->pNext;
64+
}
65+
});
66+
});
67+
68+
e1.wait();
69+
70+
s_cur = s_head;
71+
int mismatches = 0;
72+
for (int i = 0; i < numNodes; i++) {
73+
const int want = i * 4 + 1;
74+
if (s_cur->Num != want) {
75+
return -1;
76+
}
77+
Node *old = s_cur;
78+
s_cur = s_cur->pNext;
79+
free(old, ctxt);
80+
}
81+
82+
return 0;
83+
}

0 commit comments

Comments
 (0)