Skip to content

Commit c40bd83

Browse files
author
khaled
committed
WIP for fully enable sycl queue in DPEXRT_MemInfo_alloc
Sycl queue support through DPEXRT_MemInfo_alloc working
1 parent 116bb12 commit c40bd83

File tree

7 files changed

+132
-92
lines changed

7 files changed

+132
-92
lines changed

numba_dpex/_patches.py

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
from numba_dpex.core.runtime import context as dpexrt
2727
from numba_dpex.core.types import DpnpNdArray
28+
from numba_dpex.core.types.dpctl_types import DpctlSyclQueue
2829

2930
# Numpy array constructors
3031

@@ -172,13 +173,73 @@ def _mk_alloc(
172173
return out
173174

174175

176+
def make_queue(context, builder, arrtype):
177+
"""Utility function used for allocating a new queue.
178+
179+
This function will allocates a new queue (e.g. SYCL queue)
180+
during LLVM code generation (lowering). Given a target context,
181+
builder, array type, returns a LLVM value pointing at a numba-dpex
182+
runtime allocated queue.
183+
184+
Args:
185+
context (numba.core.base.BaseContext): Any of the context
186+
derived from Numba's BaseContext
187+
(e.g. `numba.core.cpu.CPUContext`).
188+
builder (llvmlite.ir.builder.IRBuilder): The IR builder
189+
from `llvmlite` for code generation.
190+
arrtype (numba_dpex.core.types.dpnp_ndarray_type.DpnpNdArray):
191+
Any of the array types derived from
192+
`numba.core.types.nptypes.Array`,
193+
e.g. `numba_dpex.core.types.dpnp_ndarray_type.DpnpNdArray`.
194+
Please refer to `numba_dpex.dpnp_iface._intrinsic.alloc_empty_arrayobj()`
195+
function for details on how to construct this argument.
196+
197+
Returns:
198+
tuple: A tuple containing `llvmlite.ir.instructions.ExtractValue`,
199+
`llvmlite.ir.instructions.CastInstr` and `numba.core.pythonapi.PythonAPI`.
200+
"""
201+
202+
pyapi = context.get_python_api(builder)
203+
queue_struct_proxy = cgutils.create_struct_proxy(
204+
DpctlSyclQueue(arrtype.queue)
205+
)(context, builder)
206+
queue_struct_ptr = queue_struct_proxy._getpointer()
207+
queue_struct_voidptr = builder.bitcast(queue_struct_ptr, cgutils.voidptr_t)
208+
209+
address = context.get_constant(types.intp, id(arrtype.queue))
210+
queue_address_ptr = builder.inttoptr(address, cgutils.voidptr_t)
211+
212+
dpexrtCtx = dpexrt.DpexRTContext(context)
213+
dpexrtCtx.queuestruct_from_python(
214+
pyapi, queue_address_ptr, queue_struct_voidptr
215+
)
216+
# errcode = dpexrtCtx.queuestruct_from_python(
217+
# pyapi, queue_address_ptr, queue_struct_voidptr
218+
# )
219+
# is_error = cgutils.is_not_null(builder, errcode)
220+
# # Handle error
221+
# with builder.if_then(is_error, likely=False):
222+
# pyapi.err_set_string(
223+
# "_patches.make_queue(): PyExc_TypeError",
224+
# "can't unbox dpctl.SyclQueue from PyObject into a Numba "
225+
# "native value. The object maybe of a different type",
226+
# )
227+
228+
queue_struct = builder.load(queue_struct_ptr)
229+
queue_ref = builder.extract_value(queue_struct, 1)
230+
231+
return (queue_ref, queue_address_ptr, pyapi)
232+
233+
175234
def _empty_nd_impl(context, builder, arrtype, shapes):
176235
"""Utility function used for allocating a new array during LLVM code
177236
generation (lowering). Given a target context, builder, array
178237
type, and a tuple or list of lowered dimension sizes, returns a
179238
LLVM value pointing at a Numba runtime allocated array.
180239
"""
181240

241+
(queue, queue_ptr, pyapi) = make_queue(context, builder, arrtype)
242+
182243
arycls = make_array(arrtype)
183244
ary = arycls(context, builder)
184245

@@ -231,21 +292,16 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
231292

232293
if isinstance(arrtype, DpnpNdArray):
233294
usm_ty = arrtype.usm_type
234-
usm_ty_val = 0
235-
if usm_ty == "device":
236-
usm_ty_val = 1
237-
elif usm_ty == "shared":
238-
usm_ty_val = 2
239-
elif usm_ty == "host":
240-
usm_ty_val = 3
241-
usm_type = context.get_constant(types.uint64, usm_ty_val)
242-
device = context.insert_const_string(builder.module, arrtype.device)
295+
usm_ty_map = {"device": 1, "shared": 2, "host": 3}
296+
usm_type = context.get_constant(
297+
types.uint64, usm_ty_map[usm_ty] if usm_ty in usm_ty_map else 0
298+
)
243299

244300
args = (
245301
context.get_dummy_value(),
246302
allocsize,
247303
usm_type,
248-
device,
304+
queue,
249305
)
250306
mip = types.MemInfoPointer(types.voidptr)
251307
arytypeclass = types.TypeRef(type(arrtype))
@@ -265,6 +321,7 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
265321
fnop.get_call_type(context.typing_context, sig.args, {})
266322
eqfn = context.get_function(fnop, sig)
267323
meminfo = eqfn(builder, args)
324+
pyapi.decref(queue_ptr)
268325
else:
269326
dtype = arrtype.dtype
270327
align_val = context.get_preferred_array_alignment(dtype)
@@ -298,36 +355,36 @@ def _empty_nd_impl(context, builder, arrtype, shapes):
298355

299356

300357
@overload_classmethod(DpnpNdArray, "_usm_allocate")
301-
def _ol_array_allocate(cls, allocsize, usm_type, device):
358+
def _ol_array_allocate(cls, allocsize, usm_type, queue):
302359
"""Implements an allocator for dpnp.ndarrays."""
303360

304-
def impl(cls, allocsize, usm_type, device):
305-
return intrin_usm_alloc(allocsize, usm_type, device)
361+
def impl(cls, allocsize, usm_type, queue):
362+
return intrin_usm_alloc(allocsize, usm_type, queue)
306363

307364
return impl
308365

309366

310367
numba_config.DISABLE_PERFORMANCE_WARNINGS = 0
311368

312369

313-
def _call_usm_allocator(arrtype, size, usm_type, device):
370+
def _call_usm_allocator(arrtype, size, usm_type, queue):
314371
"""Trampoline to call the intrinsic used for allocation"""
315-
return arrtype._usm_allocate(size, usm_type, device)
372+
return arrtype._usm_allocate(size, usm_type, queue)
316373

317374

318375
numba_config.DISABLE_PERFORMANCE_WARNINGS = 1
319376

320377

321378
@intrinsic
322-
def intrin_usm_alloc(typingctx, allocsize, usm_type, device):
379+
def intrin_usm_alloc(typingctx, allocsize, usm_type, queue):
323380
"""Intrinsic to call into the allocator for Array"""
324381

325382
def codegen(context, builder, signature, args):
326-
[allocsize, usm_type, device] = args
383+
[allocsize, usm_type, queue] = args
327384
dpexrtCtx = dpexrt.DpexRTContext(context)
328-
meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, device)
385+
meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, queue)
329386
return meminfo
330387

331388
mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
332-
sig = signature(mip, allocsize, usm_type, device)
389+
sig = signature(mip, allocsize, usm_type, queue)
333390
return sig, codegen

numba_dpex/core/runtime/_dpexrt_python.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
4545
npy_intp nitems,
4646
npy_intp itemsize,
4747
DPCTLSyclQueueRef qref);
48+
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
49+
size_t usm_type,
50+
const DPCTLSyclQueueRef qref);
4851
static void usmndarray_meminfo_dtor(void *ptr, size_t size, void *info);
4952
static PyObject *box_from_arystruct_parent(arystruct_t *arystruct,
5053
int ndim,
@@ -440,33 +443,25 @@ static NRT_MemInfo *NRT_MemInfo_new_from_usmndarray(PyObject *ndarrobj,
440443
* @return {return} A new NRT_MemInfo object, NULL if no NRT_MemInfo
441444
* object could be created.
442445
*/
443-
static NRT_MemInfo *
444-
DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
446+
static NRT_MemInfo *DPEXRT_MemInfo_alloc(npy_intp size,
447+
size_t usm_type,
448+
const DPCTLSyclQueueRef qref)
445449
{
446450
NRT_MemInfo *mi = NULL;
447451
NRT_ExternalAllocator *ext_alloca = NULL;
448452
MemInfoDtorInfo *midtor_info = NULL;
449-
DPCTLSyclQueueRef qref = NULL;
450453

451454
DPEXRT_DEBUG(drt_debug_print(
452455
"DPEXRT-DEBUG: Inside DPEXRT_MemInfo_alloc %s, line %d\n", __FILE__,
453456
__LINE__));
457+
454458
// Allocate a new NRT_MemInfo object
455459
if (!(mi = (NRT_MemInfo *)malloc(sizeof(NRT_MemInfo)))) {
456460
DPEXRT_DEBUG(drt_debug_print(
457461
"DPEXRT-ERROR: Could not allocate a new NRT_MemInfo object.\n"));
458462
goto error;
459463
}
460464

461-
if (!(qref = (DPCTLSyclQueueRef)DPEXRTQueue_CreateFromFilterString(device)))
462-
{
463-
DPEXRT_DEBUG(
464-
drt_debug_print("DPEXRT-ERROR: Could not create a sycl::queue from "
465-
"filter string: %s at %s %d.\n",
466-
device, __FILE__, __LINE__));
467-
goto error;
468-
}
469-
470465
// Allocate a new NRT_ExternalAllocator
471466
if (!(ext_alloca = NRT_ExternalAllocator_new_for_usm(qref, usm_type)))
472467
goto error;
@@ -484,10 +479,10 @@ DPEXRT_MemInfo_alloc(npy_intp size, size_t usm_type, const char *device)
484479

485480
mi->size = size;
486481
mi->external_allocator = ext_alloca;
487-
DPEXRT_DEBUG(drt_debug_print(
488-
"DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p "
489-
"external_allocator=%p for usm_type %zu on device %s, %s at %d\n",
490-
mi, ext_alloca, usm_type, device, __FILE__, __LINE__));
482+
// DPEXRT_DEBUG(drt_debug_print(
483+
// "DPEXRT-DEBUG: DPEXRT_MemInfo_alloc mi=%p "
484+
// "external_allocator=%p for usm_type %zu on device %s, %s at %d\n",
485+
// mi, ext_alloca, usm_type, device, __FILE__, __LINE__));
491486

492487
return mi;
493488

@@ -1157,6 +1152,11 @@ static int DPEXRT_sycl_queue_from_python(PyObject *obj,
11571152
goto error;
11581153
}
11591154

1155+
DPEXRT_DEBUG(DPCTLSyclDeviceRef device_ref;
1156+
device_ref = DPCTLQueue_GetDevice(queue_ref);
1157+
DPCTLDeviceMgr_PrintDeviceInfo(device_ref);
1158+
DPCTLDevice_Delete(device_ref););
1159+
11601160
queue_struct->parent = obj;
11611161
queue_struct->queue_ref = queue_ref;
11621162

numba_dpex/core/runtime/context.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,20 +20,20 @@ def _check_null_result(func):
2020
@functools.wraps(func)
2121
def wrap(self, builder, *args, **kwargs):
2222
memptr = func(self, builder, *args, **kwargs)
23-
msg = "USM allocation failed. Check the usm_type and filter "
24-
"string values."
23+
msg = "USM allocation failed. Check the usm_type and filter string values."
2524
cgutils.guard_memory_error(self._context, builder, memptr, msg=msg)
2625
return memptr
2726

2827
return wrap
2928

3029
@_check_null_result
31-
def meminfo_alloc(self, builder, size, usm_type, device):
30+
def meminfo_alloc(self, builder, size, usm_type, queue):
3231
"""
3332
Wrapper to call :func:`~context.DpexRTContext.meminfo_alloc_unchecked`
3433
with null checking of the returned value.
3534
"""
36-
return self.meminfo_alloc_unchecked(builder, size, usm_type, device)
35+
36+
return self.meminfo_alloc_unchecked(builder, size, usm_type, queue)
3737

3838
@_check_null_result
3939
def meminfo_fill(
@@ -60,7 +60,7 @@ def meminfo_fill(
6060
device,
6161
)
6262

63-
def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
63+
def meminfo_alloc_unchecked(self, builder, size, usm_type, queue):
6464
"""Allocate a new MemInfo with a data payload of `size` bytes.
6565
6666
The result of the call is checked and if it is NULL, i.e. allocation
@@ -79,6 +79,7 @@ def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
7979
8080
Returns: A pointer to the MemInfo is returned.
8181
"""
82+
8283
mod = builder.module
8384
u64 = llvmir.IntType(64)
8485
fnty = llvmir.FunctionType(
@@ -87,7 +88,7 @@ def meminfo_alloc_unchecked(self, builder, size, usm_type, device):
8788
fn = cgutils.get_or_insert_function(mod, fnty, "DPEXRT_MemInfo_alloc")
8889
fn.return_value.add_attribute("noalias")
8990

90-
ret = builder.call(fn, [size, usm_type, device])
91+
ret = builder.call(fn, [size, usm_type, queue])
9192

9293
return ret
9394

@@ -164,7 +165,6 @@ def queuestruct_from_python(self, pyapi, obj, ptr):
164165
fn.args[1].add_attribute("nocapture")
165166

166167
self.error = pyapi.builder.call(fn, (obj, ptr))
167-
168168
return self.error
169169

170170
def queuestruct_to_python(self, pyapi, val):

numba_dpex/core/types/dpctl_types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,18 @@ def unbox_sycl_queue(typ, obj, c):
3737
"""
3838
Convert a SyclQueue object to a native structure.
3939
"""
40+
4041
qstruct = cgutils.create_struct_proxy(typ)(c.context, c.builder)
4142
qptr = qstruct._getpointer()
4243
ptr = c.builder.bitcast(qptr, c.pyapi.voidptr)
44+
4345
if c.context.enable_nrt:
4446
dpexrtCtx = dpexrt.DpexRTContext(c.context)
4547
errcode = dpexrtCtx.queuestruct_from_python(c.pyapi, obj, ptr)
4648
else:
4749
raise UnreachableError
4850
is_error = cgutils.is_not_null(c.builder, errcode)
51+
4952
# Handle error
5053
with c.builder.if_then(is_error, likely=False):
5154
c.pyapi.err_set_string(

numba_dpex/core/types/usm_ndarray_type.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def __init__(
9393
)
9494
name = (
9595
"%s(dtype=%s, ndim=%s, layout=%s, address_space=%s, "
96-
"usm_type=%s, device=%s, sycl_device=%s)" % name_parts
96+
"usm_type=%s, device=%s, sycl_queue=%s)" % name_parts
9797
)
9898

9999
super().__init__(

numba_dpex/dpnp_iface/_intrinsic.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,17 @@ def fill_arrayobj(context, builder, ary, arrtype, fill_value):
107107

108108

109109
@intrinsic
110-
def intrin_usm_alloc(typingctx, allocsize, usm_type, device):
110+
def intrin_usm_alloc(typingctx, allocsize, usm_type, queue):
111111
"""Intrinsic to call into the allocator for Array"""
112112

113113
def codegen(context, builder, signature, args):
114-
[allocsize, usm_type, device] = args
114+
[allocsize, usm_type, queue] = args
115115
dpexrtCtx = dpexrt.DpexRTContext(context)
116-
meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, device)
116+
meminfo = dpexrtCtx.meminfo_alloc(builder, allocsize, usm_type, queue)
117117
return meminfo
118118

119119
mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
120-
sig = signature(mip, allocsize, usm_type, device)
120+
sig = signature(mip, allocsize, usm_type, queue)
121121
return sig, codegen
122122

123123

0 commit comments

Comments
 (0)