From 695a6f07053b3f7da3d320ea766259f32e8542a3 Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Thu, 11 Mar 2021 04:44:20 -0600 Subject: [PATCH 1/2] DAOS-6958 event: proper event handing in case of progress errors (#4911) crt_progress can sometimes return errors that are not timeout. Those are not properly handled in synchronous IO calls and cause the private event to be in an unusable state for following IOs. This PR checks the error from cart and reinits the private event in case an error was returned so it can be resused rather then returning to the user. Signed-off-by: Mohamad Chaarawi --- src/client/api/event.c | 24 +++++++++++++++++++++--- src/client/api/task.c | 6 +++++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/client/api/event.c b/src/client/api/event.c index a88bccd826e..228f4f1bc83 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -1218,6 +1218,7 @@ daos_event_priv_get(daos_event_t **ev) if (evx->evx_status != DAOS_EVS_READY) { D_CRIT("private event is inuse, status=%d\n", evx->evx_status); + return -DER_BUSY; } *ev = &ev_thpriv; return 0; @@ -1243,12 +1244,29 @@ daos_event_priv_wait() /* Wait on the event to complete */ while (evx->evx_status != DAOS_EVS_READY) { + int rc2; + rc = crt_progress_cond(evx->evx_ctx, 0, ev_progress_cb, &epa); - if (rc == 0) + + /** progress succeeded, loop can exit if event completed */ + if (rc == 0) { rc = ev_thpriv.ev_error; + continue; + } - if (rc && rc != -DER_TIMEDOUT) - break; + /** progress timeout, try calling progress again */ + if (rc == -DER_TIMEDOUT) + continue; + + /* + * other progress failure; op should fail with that err. reset + * the private event first so it can be resused. + */ + rc2 = daos_event_priv_reset(); + if (rc2) + return rc2; + ev_thpriv_is_init = true; + break; } return rc; } diff --git a/src/client/api/task.c b/src/client/api/task.c index e64073c9f57..d6685080015 100644 --- a/src/client/api/task.c +++ b/src/client/api/task.c @@ -119,7 +119,11 @@ dc_task_schedule(tse_task_t *task, bool instant) out: if (daos_event_is_priv(ev)) { - daos_event_priv_wait(); + int rc2; + + rc2 = daos_event_priv_wait(); + if (rc2) + return rc2; rc = ev->ev_error; } return rc; From 3a64ff36d82f3d32627a3e12054d210ea37b1fcc Mon Sep 17 00:00:00 2001 From: Mohamad Chaarawi Date: Fri, 12 Mar 2021 13:57:05 +0000 Subject: [PATCH 2/2] DAOS-7015 event: proper event handing in case of progress errors - exit loop if event error is reported Signed-off-by: Mohamad Chaarawi --- src/client/api/event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/client/api/event.c b/src/client/api/event.c index 228f4f1bc83..c001444a3a6 100644 --- a/src/client/api/event.c +++ b/src/client/api/event.c @@ -1251,6 +1251,8 @@ daos_event_priv_wait() /** progress succeeded, loop can exit if event completed */ if (rc == 0) { rc = ev_thpriv.ev_error; + if (rc) + break; continue; } @@ -1263,8 +1265,7 @@ daos_event_priv_wait() * the private event first so it can be resused. */ rc2 = daos_event_priv_reset(); - if (rc2) - return rc2; + D_ASSERT(rc2 == 0); ev_thpriv_is_init = true; break; }