Skip to content

Commit

Permalink
Update/dpctl memcpy async (#529)
Browse files Browse the repository at this point in the history
* Initial update to accommodate async dpctl memcpy
* Update dpnp implementations
* Fix dpctl version
* Use dpctl 0.10*

Co-authored-by: Sergey Pokhodenko <sergey.pokhodenko@intel.com>
  • Loading branch information
reazulhoque and PokhodenkoSA authored Sep 7, 2021
1 parent 01a4a19 commit 16d72c9
Show file tree
Hide file tree
Showing 15 changed files with 295 additions and 73 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/conda-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
integration_channels: -c dppy/label/dev
artifact_name: -c dppy_label_dev
experimental: false # current stable
dependencies: dpctl=0.9.0=*_21 dpnp=0.7.1=*_41
dependencies: dpnp=0.7.1=*_41
continue-on-error: ${{ matrix.experimental }}
env:
# conda-forge: llvm-spirv 11 not on intel channel yet
Expand Down
4 changes: 2 additions & 2 deletions conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ requirements:
- setuptools
- cython
- numba 0.54*
- dpctl >=0.9*
- dpctl >=0.10*
- dpnp >=0.7* # [linux]
- wheel
run:
- python
- numba 0.54*
- dpctl >=0.9*
- dpctl >=0.10*
- spirv-tools
- llvm-spirv 11.*
- dpnp >=0.7* # [linux]
Expand Down
23 changes: 22 additions & 1 deletion numba_dppy/dpctl_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,39 @@ def dpctl_malloc_shared():


def dpctl_queue_memcpy():
ret_type = types.void
ret_type = types.voidptr
sig = signature(ret_type, types.voidptr, types.voidptr, types.voidptr, types.int64)
return types.ExternalFunction("DPCTLQueue_Memcpy", sig)


def dpctl_event_wait():
ret_type = types.voidptr
sig = signature(ret_type, types.voidptr)
return types.ExternalFunction("DPCTLEvent_Wait", sig)


def dpctl_event_delete():
ret_type = types.void
sig = signature(ret_type, types.voidptr)
return types.ExternalFunction("DPCTLEvent_Delete", sig)


def dpctl_free_with_queue():
ret_type = types.void
sig = signature(ret_type, types.voidptr, types.voidptr)
return types.ExternalFunction("DPCTLfree_with_queue", sig)


def dpctl_queue_wait():
ret_type = types.void
sig = signature(ret_type, types.voidptr)
return types.ExternalFunction("DPCTLQueue_Wait", sig)


get_current_queue = dpctl_get_current_queue()
malloc_shared = dpctl_malloc_shared()
queue_memcpy = dpctl_queue_memcpy()
free_with_queue = dpctl_free_with_queue()
event_wait = dpctl_event_wait()
event_delete = dpctl_event_delete()
queue_wait = dpctl_queue_wait()
30 changes: 24 additions & 6 deletions numba_dppy/dpnp_glue/dpnp_array_creations_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,21 @@ def common_impl(a, b, out, dpnp_func, PRINT_DEBUG):
sycl_queue = dpctl_functions.get_current_queue()

b_usm = dpctl_functions.malloc_shared(b.size * b.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, b_usm, b.ctypes, b.size * b.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, b_usm, b.ctypes, b.size * b.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(out_usm, b_usm, a.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(b_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand All @@ -58,15 +64,21 @@ def common_shape_impl(a, out, dpnp_func, PRINT_DEBUG):
sycl_queue = dpctl_functions.get_current_queue()

a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(a_usm, out_usm, a.shapeptr, a.ndim)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down Expand Up @@ -203,16 +215,22 @@ def dpnp_impl(a, b):
sycl_queue = dpctl_functions.get_current_queue()

b_usm = dpctl_functions.malloc_shared(b.size * b.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, b_usm, b.ctypes, b.size * b.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, b_usm, b.ctypes, b.size * b.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out = np.arange(0, a.size, 1, res_dtype)
out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(b_usm, out_usm, a.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(b_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down
44 changes: 35 additions & 9 deletions numba_dppy/dpnp_glue/dpnp_array_ops_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,21 @@ def common_impl(a, out, dpnp_func, print_debug):

sycl_queue = dpctl_functions.get_current_queue()
a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out_usm = dpctl_functions.malloc_shared(a.itemsize, sycl_queue)

dpnp_func(a_usm, out_usm, a.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down Expand Up @@ -130,16 +136,22 @@ def dpnp_impl(a):
sycl_queue = dpctl_functions.get_current_queue()

a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out = np.arange(0, a.size, 1, res_dtype)
out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(a_usm, out_usm, a.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down Expand Up @@ -180,16 +192,22 @@ def dpnp_impl(a):
sycl_queue = dpctl_functions.get_current_queue()

a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out = np.arange(0, a.size, 1, res_dtype)
out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(a_usm, out_usm, a.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down Expand Up @@ -228,21 +246,29 @@ def dpnp_impl(a, ind):
sycl_queue = dpctl_functions.get_current_queue()

a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

ind_usm = dpctl_functions.malloc_shared(ind.size * ind.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, ind_usm, ind.ctypes, ind.size * ind.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out = np.arange(0, ind.size, 1, res_dtype).reshape(ind.shape)
out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(a_usm, ind_usm, out_usm, ind.size)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(ind_usm, sycl_queue)
Expand Down
10 changes: 8 additions & 2 deletions numba_dppy/dpnp_glue/dpnp_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,21 @@ def dpnp_impl(a, offset=0):
sycl_queue = dpctl_functions.get_current_queue()

a_usm = dpctl_functions.malloc_shared(a.size * a.itemsize, sycl_queue)
dpctl_functions.queue_memcpy(sycl_queue, a_usm, a.ctypes, a.size * a.itemsize)
event = dpctl_functions.queue_memcpy(
sycl_queue, a_usm, a.ctypes, a.size * a.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

out_usm = dpctl_functions.malloc_shared(out.size * out.itemsize, sycl_queue)

dpnp_func(a_usm, out_usm, offset, a.shapeptr, out.shapeptr, out.ndim)

dpctl_functions.queue_memcpy(
event = dpctl_functions.queue_memcpy(
sycl_queue, out.ctypes, out_usm, out.size * out.itemsize
)
dpctl_functions.event_wait(event)
dpctl_functions.event_delete(event)

dpctl_functions.free_with_queue(a_usm, sycl_queue)
dpctl_functions.free_with_queue(out_usm, sycl_queue)
Expand Down
Loading

0 comments on commit 16d72c9

Please sign in to comment.