From d2b21da1f19094e8789478bc19679647e5d47522 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 12 Oct 2023 00:31:07 +0200 Subject: [PATCH] Fix a bug in resource monitors yielding a deadlock-caused abort When monitors down handlers were called, the processes table was locked. Yet, monitor down handler might uncarefully need to lock this table, for example to decrement a refc-binary that would be a resource with a down handler. Signed-off-by: Paul Guyot --- src/libAtomVM/context.c | 48 ++++++++--- tests/test-enif.c | 171 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+), 11 deletions(-) diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c index 0b09192f3..01ea786a6 100644 --- a/src/libAtomVM/context.c +++ b/src/libAtomVM/context.c @@ -42,7 +42,7 @@ #define DEFAULT_STACK_SIZE 8 #define BYTES_PER_TERM (TERM_BITS / 8) -static void context_monitors_handle_terminate(Context *ctx); +static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx); Context *context_new(GlobalContext *glb) { @@ -123,10 +123,31 @@ void context_destroy(Context *ctx) // When monitor message is sent, process is no longer in the table // and is no longer registered either. - context_monitors_handle_terminate(ctx); + struct ResourceMonitor *resource_monitor = context_monitors_handle_terminate(ctx); synclist_unlock(&ctx->global->processes_table); + // Eventually call resource monitors handlers after the processes table was unlocked + // The monitors were removed from the list of monitors. + if (resource_monitor) { + ErlNifEnv env; + erl_nif_env_partial_init_from_globalcontext(&env, ctx->global); + + struct ListHead monitors; + list_prepend(&resource_monitor->base.monitor_list_head, &monitors); + + struct ListHead *item; + struct ListHead *tmp; + MUTABLE_LIST_FOR_EACH (item, tmp, &monitors) { + struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head); + resource_monitor = CONTAINER_OF(monitor, struct ResourceMonitor, base); + void *resource = term_to_term_ptr(monitor->monitor_obj); + struct RefcBinary *refc = refc_binary_from_data(resource); + refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks); + free(monitor); + } + } + // Any other process released our mailbox, so we can clear it. mailbox_destroy(&ctx->mailbox, &ctx->heap); @@ -330,23 +351,27 @@ bool context_get_process_info(Context *ctx, term *out, term atom_key) return true; } -static void context_monitors_handle_terminate(Context *ctx) +static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx) { GlobalContext *glb = ctx->global; struct ListHead *item; struct ListHead *tmp; + struct ResourceMonitor *result = NULL; MUTABLE_LIST_FOR_EACH (item, tmp, &ctx->monitors_head) { struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head); if (monitor->ref_ticks && term_is_boxed(monitor->monitor_obj)) { // Resource monitor - struct ResourceMonitor *resource_monitor = (struct ResourceMonitor *) monitor; - void *resource = term_to_term_ptr(monitor->monitor_obj); - struct RefcBinary *refc = refc_binary_from_data(resource); - ErlNifEnv env; - erl_nif_env_partial_init_from_globalcontext(&env, glb); - refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks); - + struct ResourceMonitor *resource_monitor = CONTAINER_OF(monitor, struct ResourceMonitor, base); + // remove the monitor from the list of the resource list_remove(&resource_monitor->resource_list_head); + list_init(&resource_monitor->resource_list_head); + // remove it from the list we are iterating on + if (result == NULL) { + list_init(&resource_monitor->base.monitor_list_head); + result = resource_monitor; + } else { + list_append(&result->base.monitor_list_head, &resource_monitor->base.monitor_list_head); + } } else { int local_process_id = term_to_local_process_id(monitor->monitor_obj); Context *target = globalcontext_get_process_nolock(glb, local_process_id); @@ -400,9 +425,10 @@ static void context_monitors_handle_terminate(Context *ctx) mailbox_send(target, info_tuple); } + free(monitor); } - free(monitor); } + return result; } int context_link(Context *ctx, term link_pid) diff --git a/tests/test-enif.c b/tests/test-enif.c index 32376e6a7..f9c2cba4b 100644 --- a/tests/test-enif.c +++ b/tests/test-enif.c @@ -32,6 +32,12 @@ static uint32_t cb_read_resource = 0; static int32_t down_pid = 0; static ErlNifMonitor down_mon = 0; +static uint32_t cb_read_resource_two = 0; +static int32_t down_pid_two = 0; +static ErlNifMonitor down_mon_two = 0; + +static int32_t lockable_pid = 0; + static void resource_dtor(ErlNifEnv *env, void *resource) { UNUSED(env); @@ -48,6 +54,34 @@ static void resource_down(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNif down_mon = *mon; } +static void resource_down_two(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon) +{ + UNUSED(env); + + cb_read_resource_two = *((uint32_t *) resource); + down_pid_two = *pid; + down_mon_two = *mon; +} + +// down handlers should be able to acquire the process tables lock, e.g. to send +// a message. +static void resource_down_acquiring_lock(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon) +{ + UNUSED(env); + UNUSED(resource); + UNUSED(pid); + UNUSED(mon); + + Context *target = globalcontext_get_process_lock(env->global, lockable_pid); + assert(target != NULL); + + cb_read_resource = *((uint32_t *) resource); + down_pid = *pid; + down_mon = *mon; + + globalcontext_get_process_unlock(env->global, target); +} + void test_resource() { GlobalContext *glb = globalcontext_new(); @@ -253,6 +287,141 @@ void test_resource_monitor() assert(cb_read_resource == 0); } +void test_resource_monitor_handler_can_lock() +{ + GlobalContext *glb = globalcontext_new(); + ErlNifEnv env; + erl_nif_env_partial_init_from_globalcontext(&env, glb); + + ErlNifResourceTypeInit init; + init.members = 3; + init.dtor = resource_dtor; + init.stop = NULL; + init.down = resource_down_acquiring_lock; + ErlNifResourceFlags flags; + + ErlNifResourceType *resource_type = enif_init_resource_type(&env, "test_resource", &init, ERL_NIF_RT_CREATE, &flags); + assert(resource_type != NULL); + assert(flags == ERL_NIF_RT_CREATE); + + void *ptr = enif_alloc_resource(resource_type, sizeof(uint32_t)); + uint32_t *resource = (uint32_t *) ptr; + *resource = 42; + + ErlNifMonitor mon; + Context *ctx; + Context *another_ctx; + int32_t pid; + int monitor_result; + + // Monitor called on destroy + cb_read_resource = 0; + down_pid = 0; + down_mon = 0; + ctx = context_new(glb); + another_ctx = context_new(glb); + lockable_pid = another_ctx->process_id; + pid = ctx->process_id; + monitor_result = enif_monitor_process(&env, ptr, &pid, &mon); + assert(monitor_result == 0); + assert(cb_read_resource == 0); + + scheduler_terminate(ctx); + assert(cb_read_resource == 42); + assert(down_pid == pid); + assert(enif_compare_monitors(&mon, &down_mon) == 0); + + scheduler_terminate(another_ctx); + + globalcontext_destroy(glb); +} + +void test_resource_monitor_two_resources_two_processes() +{ + GlobalContext *glb = globalcontext_new(); + ErlNifEnv env; + erl_nif_env_partial_init_from_globalcontext(&env, glb); + + ErlNifResourceTypeInit init_1; + init_1.members = 3; + init_1.dtor = resource_dtor; + init_1.stop = NULL; + init_1.down = resource_down; + ErlNifResourceFlags flags; + + ErlNifResourceType *resource_type_1 = enif_init_resource_type(&env, "test_resource_1", &init_1, ERL_NIF_RT_CREATE, &flags); + assert(resource_type_1 != NULL); + assert(flags == ERL_NIF_RT_CREATE); + + void *ptr_1 = enif_alloc_resource(resource_type_1, sizeof(uint32_t)); + uint32_t *resource_1 = (uint32_t *) ptr_1; + *resource_1 = 42; + + ErlNifResourceTypeInit init_2; + init_2.members = 3; + init_2.dtor = resource_dtor; + init_2.stop = NULL; + init_2.down = resource_down_two; + + ErlNifResourceType *resource_type_2 = enif_init_resource_type(&env, "test_resource_2", &init_2, ERL_NIF_RT_CREATE, &flags); + assert(resource_type_1 != NULL); + assert(flags == ERL_NIF_RT_CREATE); + + void *ptr_2 = enif_alloc_resource(resource_type_2, sizeof(uint32_t)); + uint32_t *resource_2 = (uint32_t *) ptr_2; + *resource_2 = 43; + + ErlNifMonitor mon_1, mon_2, mon_3; + Context *ctx_1; + Context *ctx_2; + int32_t pid_1; + int32_t pid_2; + int monitor_result; + + cb_read_resource = 0; + down_pid = 0; + down_pid_two = 0; + down_mon = 0; + down_mon_two = 0; + ctx_1 = context_new(glb); + ctx_2 = context_new(glb); + pid_1 = ctx_1->process_id; + pid_2 = ctx_2->process_id; + + // Both resources monitor process 1. + // Resource 1 also monitors process 2. + monitor_result = enif_monitor_process(&env, ptr_1, &pid_1, &mon_1); + assert(monitor_result == 0); + monitor_result = enif_monitor_process(&env, ptr_2, &pid_1, &mon_2); + assert(monitor_result == 0); + monitor_result = enif_monitor_process(&env, ptr_1, &pid_2, &mon_3); + assert(monitor_result == 0); + + // Process #1 terminates, mon_1 & mon_2 are fired. + assert(cb_read_resource == 0); + assert(cb_read_resource_two == 0); + scheduler_terminate(ctx_1); + assert(cb_read_resource == 42); + assert(cb_read_resource_two == 43); + assert(down_pid == pid_1); + assert(down_pid_two == pid_1); + assert(enif_compare_monitors(&mon_1, &down_mon) == 0); + assert(enif_compare_monitors(&mon_2, &down_mon_two) == 0); + + cb_read_resource = 0; + cb_read_resource_two = 0; + down_pid = 0; + down_mon = 0; + + // Process #2 terminates, mon_3 is fired. + scheduler_terminate(ctx_2); + assert(cb_read_resource == 42); + assert(cb_read_resource_two == 0); + assert(down_pid == pid_2); + assert(enif_compare_monitors(&mon_3, &down_mon) == 0); + + globalcontext_destroy(glb); +} int main(int argc, char **argv) { UNUSED(argc); @@ -262,6 +431,8 @@ int main(int argc, char **argv) test_resource_destroyed_with_global(); test_resource_keep_release(); test_resource_monitor(); + test_resource_monitor_handler_can_lock(); + test_resource_monitor_two_resources_two_processes(); return EXIT_SUCCESS; }