Skip to content

Commit 94268cb

Browse files
Merge pull request #152 from IntelPython/feature/memory-zero-copy
Feature/memory zero copy
2 parents 800e4d0 + 6d7375e commit 94268cb

29 files changed

+1336
-232
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
44
## [Unreleased]
55
### Added
66
- Device descriptors "max_compute_units", "max_work_item_dimensions", "max_work_item_sizes", "max_work_group_size", "max_num_sub_groups" and "aspects" for int64 atomics inside dpctl C API and inside the dpctl.SyclDevice class.
7+
- MemoryUSM* classes moved to `dpctl.memory` module, added support for aligned allocation, added support for `prefetch` and `mem_advise` (sychronous) methods, implemented `copy_to_host`, `copy_from_host` and `copy_from_device` methods, pickling support, and zero-copy interoperability with Python objects which implement `__sycl_usm_array_inerface__` protocol.
78

89
### Removed
910
- The Legacy OpenCL interface.

backends/include/dppl_sycl_device_interface.h

+11
Original file line numberDiff line numberDiff line change
@@ -203,4 +203,15 @@ DPPLDevice_GetVendorName (__dppl_keep const DPPLSyclDeviceRef DRef);
203203
DPPL_API
204204
bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef);
205205

206+
/*!
207+
* @brief Checks if two DPPLSyclDeviceRef objects point to the same
208+
* sycl::device.
209+
*
210+
* @param DevRef1 First opaque pointer to the sycl device.
211+
* @param DevRef2 Second opaque pointer to the sycl device.
212+
* @return True if the underlying sycl::device are same, false otherwise.
213+
*/
214+
DPPL_API
215+
bool DPPLDevice_AreEq (__dppl_keep const DPPLSyclDeviceRef DevRef1,
216+
__dppl_keep const DPPLSyclDeviceRef DevRef2);
206217
DPPL_C_EXTERN_C_END

backends/include/dppl_sycl_queue_interface.h

+26
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,30 @@ DPPL_API
199199
void DPPLQueue_Memcpy (__dppl_keep const DPPLSyclQueueRef QRef,
200200
void *Dest, const void *Src, size_t Count);
201201

202+
/*!
203+
* @brief C-API wrapper for sycl::queue::prefetch, the function waits on an event
204+
* till the prefetch operation completes.
205+
*
206+
* @param QRef An opaque pointer to the sycl queue.
207+
* @param Ptr An USM pointer to memory.
208+
* @param Count A number of bytes to prefetch.
209+
*/
210+
DPPL_API
211+
void DPPLQueue_Prefetch (__dppl_keep DPPLSyclQueueRef QRef,
212+
const void *Ptr, size_t Count);
213+
214+
/*!
215+
* @brief C-API wrapper for sycl::queue::mem_advise, the function waits on an event
216+
* till the operation completes.
217+
*
218+
* @param QRef An opaque pointer to the sycl queue.
219+
* @param Ptr An USM pointer to memory.
220+
* @param Count A number of bytes to prefetch.
221+
* @param Advice Device-defined advice for the specified allocation.
222+
* A value of 0 reverts the advice for Ptr to the default behavior.
223+
*/
224+
DPPL_API
225+
void DPPLQueue_MemAdvise (__dppl_keep DPPLSyclQueueRef QRef,
226+
const void *Ptr, size_t Count, int Advice);
227+
202228
DPPL_C_EXTERN_C_END

backends/include/dppl_sycl_queue_manager.h

+21
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,25 @@ DPPLQueueMgr_PushQueue (DPPLSyclBackendType BETy,
158158
DPPL_API
159159
void DPPLQueueMgr_PopQueue ();
160160

161+
162+
/*!
163+
* @brief Creates a new instance of SYCL queue from SYCL context and
164+
* SYCL device.
165+
*
166+
* The instance is not placed into queue manager. The user assumes
167+
* ownership of the queue reference and should deallocate it using
168+
* DPPLQueue_Delete.
169+
*
170+
* @param CRef Sycl context reference
171+
* @param DRef Sycl device reference
172+
*
173+
* @return A copy of the sycl::queue created from given context and device
174+
* references.
175+
*/
176+
DPPL_API
177+
__dppl_give DPPLSyclQueueRef
178+
DPPLQueueMgr_GetQueueFromContextAndDevice(__dppl_keep DPPLSyclContextRef CRef,
179+
__dppl_keep DPPLSyclDeviceRef DRef);
180+
181+
161182
DPPL_C_EXTERN_C_END

backends/include/dppl_sycl_usm_interface.h

+80-6
Original file line numberDiff line numberDiff line change
@@ -34,35 +34,94 @@
3434
DPPL_C_EXTERN_C_BEGIN
3535

3636
/*!
37-
* @brief Crete USM shared memory.
37+
* @brief Create USM shared memory.
3838
*
39-
* @return The pointer to USM shared memory.
39+
* @param size Number of bytes to allocate
40+
* @param QRef Sycl queue reference to use in allocation
41+
*
42+
* @return The pointer to USM shared memory. On failure, returns nullptr.
4043
*/
4144
DPPL_API
4245
__dppl_give DPPLSyclUSMRef
4346
DPPLmalloc_shared (size_t size, __dppl_keep const DPPLSyclQueueRef QRef);
4447

4548
/*!
46-
* @brief Crete USM host memory.
49+
* @brief Create USM shared memory.
50+
*
51+
* @param alignment Allocation's byte alignment
52+
* @param size Number of bytes to allocate
53+
* @param QRef Sycl queue reference to use in allocation
4754
*
48-
* @return The pointer to USM host memory.
55+
* @return The pointer to USM shared memory with the requested alignment.
56+
* On failure, returns nullptr.
57+
*/
58+
DPPL_API
59+
__dppl_give DPPLSyclUSMRef
60+
DPPLaligned_alloc_shared (size_t alignment, size_t size,
61+
__dppl_keep const DPPLSyclQueueRef QRef);
62+
63+
/*!
64+
* @brief Create USM host memory.
65+
*
66+
* @param size Number of bytes to allocate
67+
* @param QRef Sycl queue reference to use in allocation
68+
*
69+
* @return The pointer to USM host memory. On failure, returns nullptr.
4970
*/
5071
DPPL_API
5172
__dppl_give DPPLSyclUSMRef
5273
DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef);
5374

5475
/*!
55-
* @brief Crete USM device memory.
76+
* @brief Create USM host memory.
77+
*
78+
* @param alignment Allocation's byte alignment
79+
* @param size Number of bytes to allocate
80+
* @param QRef Sycl queue reference to use in allocation
5681
*
57-
* @return The pointer to USM device memory.
82+
* @return The pointer to USM host memory with the requested alignment.
83+
* On failure, returns nullptr.
84+
*/
85+
DPPL_API
86+
__dppl_give DPPLSyclUSMRef
87+
DPPLaligned_alloc_host (size_t alignment, size_t size,
88+
__dppl_keep const DPPLSyclQueueRef QRef);
89+
90+
/*!
91+
* @brief Create USM device memory.
92+
*
93+
* @param size Number of bytes to allocate
94+
* @param QRef Sycl queue reference to use in allocation
95+
*
96+
* @return The pointer to USM device memory. On failure, returns nullptr.
5897
*/
5998
DPPL_API
6099
__dppl_give DPPLSyclUSMRef
61100
DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef);
62101

102+
/*!
103+
* @brief Create USM device memory.
104+
*
105+
* @param alignment Allocation's byte alignment
106+
* @param size Number of bytes to allocate
107+
* @param QRef Sycl queue reference to use in allocation
108+
*
109+
* @return The pointer to USM device memory with requested alignment.
110+
* On failure, returns nullptr.
111+
*/
112+
DPPL_API
113+
__dppl_give DPPLSyclUSMRef
114+
DPPLaligned_alloc_device (size_t alignment, size_t size,
115+
__dppl_keep const DPPLSyclQueueRef QRef);
116+
63117
/*!
64118
* @brief Free USM memory.
65119
*
120+
* @param MRef USM pointer to free
121+
* @param QRef Sycl queue reference to use.
122+
*
123+
* USM pointer must have been allocated using the same context as the one
124+
* used to construct the queue.
66125
*/
67126
DPPL_API
68127
void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef,
@@ -79,11 +138,26 @@ void DPPLfree_with_context (__dppl_take DPPLSyclUSMRef MRef,
79138
/*!
80139
* @brief Get pointer type.
81140
*
141+
* @param MRef USM Memory
142+
* @param CRef Sycl context reference associated with the pointer
143+
*
82144
* @return "host", "device", "shared" or "unknown"
83145
*/
84146
DPPL_API
85147
const char *
86148
DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef,
87149
__dppl_keep const DPPLSyclContextRef CRef);
88150

151+
/*!
152+
* @brief Get the device associated with USM pointer.
153+
*
154+
* @param MRef USM pointer
155+
* @param CRef Sycl context reference associated with the pointer
156+
*
157+
* @return A DPPLSyclDeviceRef pointer to the sycl device.
158+
*/
159+
DPPL_API
160+
DPPLSyclDeviceRef
161+
DPPLUSM_GetPointerDevice (__dppl_keep const DPPLSyclUSMRef MRef,
162+
__dppl_keep const DPPLSyclContextRef CRef);
89163
DPPL_C_EXTERN_C_END

backends/source/dppl_sycl_device_interface.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,12 @@ bool DPPLDevice_IsHostUnifiedMemory (__dppl_keep const DPPLSyclDeviceRef DRef)
261261
}
262262
return false;
263263
}
264+
265+
bool DPPLDevice_AreEq(__dppl_keep const DPPLSyclDeviceRef DevRef1,
266+
__dppl_keep const DPPLSyclDeviceRef DevRef2)
267+
{
268+
if(!(DevRef1 && DevRef2))
269+
// \todo handle error
270+
return false;
271+
return (*unwrap(DevRef1) == *unwrap(DevRef2));
272+
}

backends/source/dppl_sycl_platform_interface.cpp

+7-7
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ get_set_of_non_hostbackends ()
4141
{
4242
std::set<DPPLSyclBackendType> be_set;
4343
for (auto p : platform::get_platforms()) {
44-
if(p.is_host())
44+
if(p.is_host())
4545
continue;
4646
auto be = p.get_backend();
4747
switch (be)
@@ -155,12 +155,12 @@ void DPPLPlatform_DumpInfo ()
155155
*/
156156
size_t DPPLPlatform_GetNumNonHostPlatforms ()
157157
{
158-
auto nNonHostPlatforms = 0ul;
159-
for (auto &p : platform::get_platforms()) {
160-
if (p.is_host())
161-
continue;
162-
++nNonHostPlatforms;
163-
}
158+
auto nNonHostPlatforms = 0ul;
159+
for (auto &p : platform::get_platforms()) {
160+
if (p.is_host())
161+
continue;
162+
++nNonHostPlatforms;
163+
}
164164
return nNonHostPlatforms;
165165
}
166166

backends/source/dppl_sycl_queue_interface.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,28 @@ DPPLQueue_Wait (__dppl_keep DPPLSyclQueueRef QRef)
290290
SyclQueue->wait();
291291
}
292292

293-
void DPPLQueue_Memcpy (__dppl_take const DPPLSyclQueueRef QRef,
293+
void DPPLQueue_Memcpy (__dppl_keep const DPPLSyclQueueRef QRef,
294294
void *Dest, const void *Src, size_t Count)
295295
{
296296
auto Q = unwrap(QRef);
297297
auto event = Q->memcpy(Dest, Src, Count);
298298
event.wait();
299299
}
300+
301+
void
302+
DPPLQueue_Prefetch (__dppl_keep DPPLSyclQueueRef QRef,
303+
const void *Ptr, size_t Count)
304+
{
305+
auto Q = unwrap(QRef);
306+
auto event = Q->prefetch(Ptr, Count);
307+
event.wait();
308+
}
309+
310+
void
311+
DPPLQueue_MemAdvise (__dppl_keep DPPLSyclQueueRef QRef,
312+
const void *Ptr, size_t Count, int Advice)
313+
{
314+
auto Q = unwrap(QRef);
315+
auto event = Q->mem_advise(Ptr, Count, static_cast<pi_mem_advice>(Advice));
316+
event.wait();
317+
}

backends/source/dppl_sycl_queue_manager.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ namespace
4040

4141
// Create wrappers for C Binding types (see CBindingWrapping.h).
4242
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPPLSyclQueueRef)
43+
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPPLSyclDeviceRef)
44+
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPPLSyclContextRef)
4345

4446
/*!
4547
* @brief A helper class to support the DPPLSyclQueuemanager.
@@ -99,7 +101,7 @@ class QMgrHelper
99101
{
100102
QVec *active_queues;
101103
try {
102-
auto def_device = std::move(default_selector().select_device());
104+
auto def_device { default_selector().select_device() };
103105
auto BE = def_device.get_platform().get_backend();
104106
auto DevTy = def_device.get_info<info::device::device_type>();
105107

@@ -534,3 +536,17 @@ void DPPLQueueMgr_PopQueue ()
534536
{
535537
QMgrHelper::popSyclQueue();
536538
}
539+
540+
/*!
541+
* The function constructs a new SYCL queue instance from SYCL conext and
542+
* SYCL device.
543+
*/
544+
DPPLSyclQueueRef
545+
DPPLQueueMgr_GetQueueFromContextAndDevice (__dppl_keep DPPLSyclContextRef CRef,
546+
__dppl_keep DPPLSyclDeviceRef DRef)
547+
{
548+
auto dev = unwrap(DRef);
549+
auto ctx = unwrap(CRef);
550+
551+
return wrap(new queue(*ctx, *dev));
552+
}

backends/source/dppl_sycl_usm_interface.cpp

+41
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
//===----------------------------------------------------------------------===//
2626

2727
#include "dppl_sycl_usm_interface.h"
28+
#include "dppl_sycl_device_interface.h"
2829
#include "Support/CBindingWrapping.h"
2930

3031
#include <CL/sycl.hpp> /* SYCL headers */
@@ -35,6 +36,7 @@ namespace
3536
{
3637
// Create wrappers for C Binding types (see CBindingWrapping.h).
3738
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(queue, DPPLSyclQueueRef)
39+
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(device, DPPLSyclDeviceRef)
3840
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(context, DPPLSyclContextRef)
3941
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(void, DPPLSyclUSMRef)
4042

@@ -48,6 +50,15 @@ DPPLmalloc_shared (size_t size, __dppl_keep const DPPLSyclQueueRef QRef)
4850
return wrap(Ptr);
4951
}
5052

53+
__dppl_give DPPLSyclUSMRef
54+
DPPLaligned_alloc_shared (size_t alignment, size_t size,
55+
__dppl_keep const DPPLSyclQueueRef QRef)
56+
{
57+
auto Q = unwrap(QRef);
58+
auto Ptr = aligned_alloc_shared(alignment, size, *Q);
59+
return wrap(Ptr);
60+
}
61+
5162
__dppl_give DPPLSyclUSMRef
5263
DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef)
5364
{
@@ -56,6 +67,15 @@ DPPLmalloc_host (size_t size, __dppl_keep const DPPLSyclQueueRef QRef)
5667
return wrap(Ptr);
5768
}
5869

70+
__dppl_give DPPLSyclUSMRef
71+
DPPLaligned_alloc_host (size_t alignment, size_t size,
72+
__dppl_keep const DPPLSyclQueueRef QRef)
73+
{
74+
auto Q = unwrap(QRef);
75+
auto Ptr = aligned_alloc_host(alignment, size, *Q);
76+
return wrap(Ptr);
77+
}
78+
5979
__dppl_give DPPLSyclUSMRef
6080
DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef)
6181
{
@@ -64,6 +84,15 @@ DPPLmalloc_device (size_t size, __dppl_keep const DPPLSyclQueueRef QRef)
6484
return wrap(Ptr);
6585
}
6686

87+
__dppl_give DPPLSyclUSMRef
88+
DPPLaligned_alloc_device (size_t alignment, size_t size,
89+
__dppl_keep const DPPLSyclQueueRef QRef)
90+
{
91+
auto Q = unwrap(QRef);
92+
auto Ptr = aligned_alloc_device(alignment, size, *Q);
93+
return wrap(Ptr);
94+
}
95+
6796
void DPPLfree_with_queue (__dppl_take DPPLSyclUSMRef MRef,
6897
__dppl_keep const DPPLSyclQueueRef QRef)
6998
{
@@ -99,3 +128,15 @@ DPPLUSM_GetPointerType (__dppl_keep const DPPLSyclUSMRef MRef,
99128
return "unknown";
100129
}
101130
}
131+
132+
DPPLSyclDeviceRef
133+
DPPLUSM_GetPointerDevice (__dppl_keep const DPPLSyclUSMRef MRef,
134+
__dppl_keep const DPPLSyclContextRef CRef)
135+
{
136+
auto Ptr = unwrap(MRef);
137+
auto C = unwrap(CRef);
138+
139+
auto Dev = get_pointer_device(Ptr, *C);
140+
141+
return wrap(new device(Dev));
142+
}

0 commit comments

Comments
 (0)