Skip to content

Commit 036e301

Browse files
committed
Initial async kernel support
1 parent 05aa34d commit 036e301

File tree

8 files changed

+341
-12
lines changed

8 files changed

+341
-12
lines changed

numba_dpex/core/runtime/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ python_add_library(${PROJECT_NAME} MODULE ${SOURCES})
109109

110110
# Add SYCL to target, this must come after python_add_library()
111111
# FIXME: sources incompatible with sycl include?
112-
# add_sycl_to_target(TARGET ${PROJECT_NAME} SOURCES ${KERNEL_SOURCES})
112+
add_sycl_to_target(TARGET ${PROJECT_NAME} SOURCES ${KERNEL_SOURCES})
113113

114114
# Link the DPCTLSyclInterface library to target
115115
target_link_libraries(${PROJECT_NAME} PRIVATE DPCTLSyclInterface)

numba_dpex/core/runtime/_dbg_printer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
/* Debugging facilities - enabled at compile-time */
1515
/* #undef NDEBUG */
16-
#if 0
16+
#if 1
1717
#include <stdio.h>
1818
#define DPEXRT_DEBUG(X) \
1919
{ \

numba_dpex/core/runtime/_dpexrt_python.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "_queuestruct.h"
2525
#include "_usmarraystruct.h"
2626

27+
#include "experimental/nrt_reserve_meminfo.h"
2728
#include "numba/core/runtime/nrt_external.h"
2829

2930
// forward declarations
@@ -1490,6 +1491,8 @@ static PyObject *build_c_helpers_dict(void)
14901491
&DPEXRT_sycl_event_from_python);
14911492
_declpointer("DPEXRT_sycl_event_to_python", &DPEXRT_sycl_event_to_python);
14921493
_declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
1494+
_declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
1495+
&DPEXRT_nrt_acquire_meminfo_and_schedule_release);
14931496

14941497
#undef _declpointer
14951498
return dct;
@@ -1557,6 +1560,9 @@ MOD_INIT(_dpexrt_python)
15571560
PyLong_FromVoidPtr(&DPEXRT_MemInfo_alloc));
15581561
PyModule_AddObject(m, "DPEXRT_MemInfo_fill",
15591562
PyLong_FromVoidPtr(&DPEXRT_MemInfo_fill));
1563+
PyModule_AddObject(
1564+
m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
1565+
PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
15601566
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
15611567
return MOD_SUCCESS_VAL(m);
15621568
}

numba_dpex/core/runtime/context.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,3 +433,41 @@ def submit_ndrange(
433433
)
434434

435435
return ret
436+
437+
def acquire_meminfo_and_schedule_release(
438+
self, builder: llvmir.IRBuilder, args
439+
):
440+
"""Inserts LLVM IR to call nrt_acquire_meminfo_and_schedule_release.
441+
442+
DPCTLSyclEventRef
443+
DPEXRT_nrt_acquire_meminfo_and_schedule_release(
444+
NRT_api_functions *nrt,
445+
DPCTLSyclQueueRef QRef,
446+
NRT_MemInfo **meminfo_array,
447+
size_t meminfo_array_size,
448+
DPCTLSyclEventRef *depERefs,
449+
size_t nDepERefs,
450+
int *status,
451+
);
452+
453+
"""
454+
mod = builder.module
455+
456+
func_ty = llvmir.FunctionType(
457+
cgutils.voidptr_t,
458+
[
459+
cgutils.voidptr_t,
460+
cgutils.voidptr_t,
461+
cgutils.voidptr_t.as_pointer(),
462+
llvmir.IntType(64),
463+
cgutils.voidptr_t,
464+
llvmir.IntType(64),
465+
llvmir.IntType(64).as_pointer(),
466+
],
467+
)
468+
fn = cgutils.get_or_insert_function(
469+
mod, func_ty, "DPEXRT_nrt_acquire_meminfo_and_schedule_release"
470+
)
471+
ret = builder.call(fn, args)
472+
473+
return ret
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// SPDX-FileCopyrightText: 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#include "nrt_reserve_meminfo.h"
6+
7+
#include "_dbg_printer.h"
8+
#include "syclinterface/dpctl_sycl_type_casters.hpp"
9+
#include <CL/sycl.hpp>
10+
11+
extern "C"
12+
{
13+
DPCTLSyclEventRef DPEXRT_nrt_acquire_meminfo_and_schedule_release(
14+
NRT_api_functions *nrt,
15+
DPCTLSyclQueueRef QRef,
16+
NRT_MemInfo **meminfo_array,
17+
size_t meminfo_array_size,
18+
// DPCTLSyclEventRef *depERefs,
19+
DPCTLSyclEventRef depERef,
20+
size_t nDepERefs,
21+
int *status)
22+
{
23+
DPEXRT_DEBUG(drt_debug_print(
24+
"DPEXRT-DEBUG: scheduling nrt meminfo release.\n"););
25+
26+
using dpctl::syclinterface::unwrap;
27+
using dpctl::syclinterface::wrap;
28+
29+
sycl::queue *q = unwrap<sycl::queue>(QRef);
30+
31+
std::vector<NRT_MemInfo *> meminfo_vec(
32+
meminfo_array, meminfo_array + meminfo_array_size);
33+
34+
for (size_t i = 0; i < meminfo_array_size; ++i) {
35+
nrt->acquire(meminfo_vec[i]);
36+
}
37+
38+
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: acquired meminfo.\n"););
39+
40+
try {
41+
sycl::event ht_ev = q->submit([&](sycl::handler &cgh) {
42+
for (size_t ev_id = 0; ev_id < nDepERefs; ++ev_id) {
43+
// cgh.depends_on(*(unwrap<sycl::event>(depERefs[ev_id])));
44+
cgh.depends_on(*(unwrap<sycl::event>(depERef)));
45+
}
46+
cgh.host_task([meminfo_array_size, meminfo_vec, nrt]() {
47+
for (size_t i = 0; i < meminfo_array_size; ++i) {
48+
nrt->release(meminfo_vec[i]);
49+
DPEXRT_DEBUG(
50+
drt_debug_print("DPEXRT-DEBUG: released meminfo "
51+
"from host_task.\n"););
52+
}
53+
});
54+
});
55+
56+
constexpr int result_ok = 0;
57+
58+
*status = result_ok;
59+
auto e_ptr = new sycl::event(ht_ev);
60+
return wrap<sycl::event>(e_ptr);
61+
} catch (const std::exception &e) {
62+
constexpr int result_std_exception = 1;
63+
64+
*status = result_std_exception;
65+
return nullptr;
66+
}
67+
68+
constexpr int result_other_abnormal = 2;
69+
70+
*status = result_other_abnormal;
71+
return nullptr;
72+
}
73+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// SPDX-FileCopyrightText: 2023 Intel Corporation
2+
//
3+
// SPDX-License-Identifier: Apache-2.0
4+
5+
#ifndef _EXPERIMENTAL_H_
6+
#define _EXPERIMENTAL_H_
7+
8+
#include "dpctl_capi.h"
9+
#include "numba/core/runtime/nrt_external.h"
10+
11+
#ifdef __cplusplus
12+
extern "C"
13+
{
14+
#endif
15+
DPCTLSyclEventRef DPEXRT_nrt_acquire_meminfo_and_schedule_release(
16+
NRT_api_functions *nrt,
17+
DPCTLSyclQueueRef QRef,
18+
NRT_MemInfo **meminfo_array,
19+
size_t meminfo_array_size,
20+
// DPCTLSyclEventRef *depERefs,
21+
DPCTLSyclEventRef depERef,
22+
size_t nDepERefs,
23+
int *status);
24+
#ifdef __cplusplus
25+
}
26+
#endif
27+
28+
#endif /* _EXPERIMENTAL_H_ */

0 commit comments

Comments
 (0)