@@ -363,7 +363,7 @@ pi_uint64 _pi_event::get_end_time() const {
363363
364364pi_result _pi_event::record () {
365365
366- if (is_recorded ()) {
366+ if (is_recorded () || ! is_started () ) {
367367 return PI_INVALID_EVENT;
368368 }
369369
@@ -2074,7 +2074,7 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20742074 size_t size, void *ptr,
20752075 pi_uint32 num_events_in_wait_list,
20762076 const pi_event *event_wait_list,
2077- pi_event *retEvent ) {
2077+ pi_event *event ) {
20782078
20792079 assert (buffer != nullptr );
20802080 assert (command_queue != nullptr );
@@ -2089,7 +2089,7 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20892089 retErr = cuda_piEnqueueEventsWait (command_queue, num_events_in_wait_list,
20902090 event_wait_list, nullptr );
20912091
2092- if (retEvent ) {
2092+ if (event ) {
20932093 retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
20942094 PI_COMMAND_TYPE_MEM_BUFFER_READ, command_queue));
20952095 retImplEv->start ();
@@ -2098,16 +2098,16 @@ pi_result cuda_piEnqueueMemBufferRead(pi_queue command_queue, pi_mem buffer,
20982098 retErr =
20992099 PI_CHECK_ERROR (cuMemcpyDtoHAsync (ptr, devPtr + offset, size, cuStream));
21002100
2101- if (retEvent ) {
2101+ if (event ) {
21022102 retErr = retImplEv->record ();
21032103 }
21042104
21052105 if (blocking_read) {
21062106 retErr = PI_CHECK_ERROR (cuStreamSynchronize (cuStream));
21072107 }
21082108
2109- if (retEvent ) {
2110- *retEvent = retImplEv.release ();
2109+ if (event ) {
2110+ *event = retImplEv.release ();
21112111 }
21122112
21132113 } catch (pi_result err) {
@@ -3381,7 +3381,7 @@ pi_result cuda_piEnqueueMemBufferReadRect(
33813381 const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
33823382 size_t host_row_pitch, size_t host_slice_pitch, void *ptr,
33833383 pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3384- pi_event *retEvent ) {
3384+ pi_event *event ) {
33853385
33863386 assert (buffer != nullptr );
33873387 assert (command_queue != nullptr );
@@ -3397,9 +3397,9 @@ pi_result cuda_piEnqueueMemBufferReadRect(
33973397 retErr = cuda_piEnqueueEventsWait (command_queue, num_events_in_wait_list,
33983398 event_wait_list, nullptr );
33993399
3400- if (retEvent ) {
3400+ if (event ) {
34013401 retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
3402- PI_COMMAND_TYPE_MEM_BUFFER_READ , command_queue));
3402+ PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT , command_queue));
34033403 retImplEv->start ();
34043404 }
34053405
@@ -3408,16 +3408,16 @@ pi_result cuda_piEnqueueMemBufferReadRect(
34083408 buffer_row_pitch, buffer_slice_pitch, ptr, CU_MEMORYTYPE_HOST,
34093409 host_offset, host_row_pitch, host_slice_pitch);
34103410
3411- if (retEvent ) {
3411+ if (event ) {
34123412 retErr = retImplEv->record ();
34133413 }
34143414
34153415 if (blocking_read) {
34163416 retErr = PI_CHECK_ERROR (cuStreamSynchronize (cuStream));
34173417 }
34183418
3419- if (retEvent ) {
3420- *retEvent = retImplEv.release ();
3419+ if (event ) {
3420+ *event = retImplEv.release ();
34213421 }
34223422
34233423 } catch (pi_result err) {
@@ -3432,7 +3432,7 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34323432 const size_t *region, size_t buffer_row_pitch, size_t buffer_slice_pitch,
34333433 size_t host_row_pitch, size_t host_slice_pitch, const void *ptr,
34343434 pi_uint32 num_events_in_wait_list, const pi_event *event_wait_list,
3435- pi_event *retEvent ) {
3435+ pi_event *event ) {
34363436
34373437 assert (buffer != nullptr );
34383438 assert (command_queue != nullptr );
@@ -3448,9 +3448,9 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34483448 retErr = cuda_piEnqueueEventsWait (command_queue, num_events_in_wait_list,
34493449 event_wait_list, nullptr );
34503450
3451- if (retEvent ) {
3451+ if (event ) {
34523452 retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
3453- PI_COMMAND_TYPE_MEM_BUFFER_WRITE , command_queue));
3453+ PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT , command_queue));
34543454 retImplEv->start ();
34553455 }
34563456
@@ -3459,16 +3459,16 @@ pi_result cuda_piEnqueueMemBufferWriteRect(
34593459 host_slice_pitch, &devPtr, CU_MEMORYTYPE_DEVICE, buffer_offset,
34603460 buffer_row_pitch, buffer_slice_pitch);
34613461
3462- if (retEvent ) {
3462+ if (event ) {
34633463 retErr = retImplEv->record ();
34643464 }
34653465
34663466 if (blocking_write) {
34673467 retErr = PI_CHECK_ERROR (cuStreamSynchronize (cuStream));
34683468 }
34693469
3470- if (retEvent ) {
3471- *retEvent = retImplEv.release ();
3470+ if (event ) {
3471+ *event = retImplEv.release ();
34723472 }
34733473
34743474 } catch (pi_result err) {
@@ -3487,6 +3487,8 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
34873487 return PI_INVALID_QUEUE;
34883488 }
34893489
3490+ std::unique_ptr<_pi_event> retImplEv{nullptr };
3491+
34903492 try {
34913493 ScopedContext active (command_queue->get_context ());
34923494
@@ -3497,17 +3499,21 @@ pi_result cuda_piEnqueueMemBufferCopy(pi_queue command_queue, pi_mem src_buffer,
34973499
34983500 pi_result result;
34993501
3502+ if (event) {
3503+ retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
3504+ PI_COMMAND_TYPE_MEM_BUFFER_COPY, command_queue));
3505+ result = retImplEv->start ();
3506+ }
3507+
35003508 auto stream = command_queue->get ();
35013509 auto src = src_buffer->mem_ .buffer_mem_ .get () + src_offset;
35023510 auto dst = dst_buffer->mem_ .buffer_mem_ .get () + dst_offset;
35033511
35043512 result = PI_CHECK_ERROR (cuMemcpyDtoDAsync (dst, src, size, stream));
35053513
35063514 if (event) {
3507- auto new_event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_COPY,
3508- command_queue);
3509- new_event->record ();
3510- *event = new_event;
3515+ result = retImplEv->record ();
3516+ *event = retImplEv.release ();
35113517 }
35123518
35133519 return result;
@@ -3543,7 +3549,7 @@ pi_result cuda_piEnqueueMemBufferCopyRect(
35433549
35443550 if (event) {
35453551 retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
3546- PI_COMMAND_TYPE_MEM_BUFFER_COPY , command_queue));
3552+ PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT , command_queue));
35473553 retImplEv->start ();
35483554 }
35493555
@@ -3586,6 +3592,8 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
35863592 (void )pattern_is_valid;
35873593 (void )pattern_size_is_valid;
35883594
3595+ std::unique_ptr<_pi_event> retImplEv{nullptr };
3596+
35893597 try {
35903598 ScopedContext active (command_queue->get_context ());
35913599
@@ -3596,6 +3604,12 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
35963604
35973605 pi_result result;
35983606
3607+ if (event) {
3608+ retImplEv = std::unique_ptr<_pi_event>(_pi_event::make_native (
3609+ PI_COMMAND_TYPE_MEM_BUFFER_FILL, command_queue));
3610+ result = retImplEv->start ();
3611+ }
3612+
35993613 auto dstDevice = buffer->mem_ .buffer_mem_ .get () + offset;
36003614 auto stream = command_queue->get ();
36013615 auto N = size / pattern_size;
@@ -3646,10 +3660,8 @@ pi_result cuda_piEnqueueMemBufferFill(pi_queue command_queue, pi_mem buffer,
36463660 }
36473661
36483662 if (event) {
3649- auto new_event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_FILL,
3650- command_queue);
3651- new_event->record ();
3652- *event = new_event;
3663+ result = retImplEv->record ();
3664+ *event = retImplEv.release ();
36533665 }
36543666
36553667 return result;
@@ -3971,7 +3983,7 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
39713983 size_t size,
39723984 pi_uint32 num_events_in_wait_list,
39733985 const pi_event *event_wait_list,
3974- pi_event *retEvent , void **ret_map) {
3986+ pi_event *event , void **ret_map) {
39753987
39763988 assert (ret_map != nullptr );
39773989 assert (command_queue != nullptr );
@@ -3993,15 +4005,16 @@ pi_result cuda_piEnqueueMemBufferMap(pi_queue command_queue, pi_mem buffer,
39934005 if ((map_flags & CL_MAP_READ) || (map_flags & CL_MAP_WRITE)) {
39944006 ret_err = cuda_piEnqueueMemBufferRead (
39954007 command_queue, buffer, blocking_map, offset, size, hostPtr,
3996- num_events_in_wait_list, event_wait_list, retEvent );
4008+ num_events_in_wait_list, event_wait_list, event );
39974009 } else {
3998- if (retEvent ) {
4010+ if (event ) {
39994011 try {
40004012 ScopedContext active (command_queue->get_context ());
40014013
4002- *retEvent = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_MAP,
4003- command_queue);
4004- (*retEvent)->record ();
4014+ *event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_MAP,
4015+ command_queue);
4016+ (*event)->start ();
4017+ (*event)->record ();
40054018 } catch (pi_result error) {
40064019 ret_err = error;
40074020 }
@@ -4018,7 +4031,7 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40184031 void *mapped_ptr,
40194032 pi_uint32 num_events_in_wait_list,
40204033 const pi_event *event_wait_list,
4021- pi_event *retEvent ) {
4034+ pi_event *event ) {
40224035 pi_result ret_err = PI_SUCCESS;
40234036
40244037 assert (command_queue != nullptr );
@@ -4034,15 +4047,16 @@ pi_result cuda_piEnqueueMemUnmap(pi_queue command_queue, pi_mem memobj,
40344047 command_queue, memobj, true ,
40354048 memobj->mem_ .buffer_mem_ .get_map_offset (mapped_ptr),
40364049 memobj->mem_ .buffer_mem_ .get_size (), mapped_ptr,
4037- num_events_in_wait_list, event_wait_list, retEvent );
4050+ num_events_in_wait_list, event_wait_list, event );
40384051 } else {
4039- if (retEvent ) {
4052+ if (event ) {
40404053 try {
40414054 ScopedContext active (command_queue->get_context ());
40424055
4043- *retEvent = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
4044- command_queue);
4045- (*retEvent)->record ();
4056+ *event = _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_UNMAP,
4057+ command_queue);
4058+ (*event)->start ();
4059+ (*event)->record ();
40464060 } catch (pi_result error) {
40474061 ret_err = error;
40484062 }
@@ -4155,7 +4169,7 @@ pi_result cuda_piextUSMEnqueueMemset(pi_queue queue, void *ptr, pi_int32 value,
41554169 events_waitlist, nullptr );
41564170 if (event) {
41574171 event_ptr = std::unique_ptr<_pi_event>(
4158- _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_COPY , queue));
4172+ _pi_event::make_native (PI_COMMAND_TYPE_MEM_BUFFER_FILL , queue));
41594173 event_ptr->start ();
41604174 }
41614175 result = PI_CHECK_ERROR (cuMemsetD8Async (
0 commit comments