Skip to content

Commit

Permalink
Fix a bug in resource monitors yielding a deadlock-caused abort
Browse files Browse the repository at this point in the history
When monitors down handlers were called, the processes table was locked. Yet,
monitor down handler might uncarefully need to lock this table, for example
to decrement a refc-binary that would be a resource with a down handler.

Signed-off-by: Paul Guyot <pguyot@kallisys.net>
  • Loading branch information
pguyot committed Oct 11, 2023
1 parent 23d24ce commit 88e0068
Show file tree
Hide file tree
Showing 2 changed files with 207 additions and 10 deletions.
46 changes: 36 additions & 10 deletions src/libAtomVM/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
#define DEFAULT_STACK_SIZE 8
#define BYTES_PER_TERM (TERM_BITS / 8)

static void context_monitors_handle_terminate(Context *ctx);
static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx);

Context *context_new(GlobalContext *glb)
{
Expand Down Expand Up @@ -123,10 +123,31 @@ void context_destroy(Context *ctx)

// When monitor message is sent, process is no longer in the table
// and is no longer registered either.
context_monitors_handle_terminate(ctx);
struct ResourceMonitor *resource_monitor = context_monitors_handle_terminate(ctx);

synclist_unlock(&ctx->global->processes_table);

// Eventually call resource monitors handlers after the processes table was unlocked
// The monitors were removed from the list of monitors.
if (resource_monitor) {
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, ctx->global);

struct ListHead monitors;
list_prepend(&resource_monitor->base.monitor_list_head, &monitors);

struct ListHead *item;
struct ListHead *tmp;
MUTABLE_LIST_FOR_EACH (item, tmp, &monitors) {
struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head);
resource_monitor = (struct ResourceMonitor *) monitor;
void *resource = term_to_term_ptr(monitor->monitor_obj);
struct RefcBinary *refc = refc_binary_from_data(resource);
refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks);
free(monitor);
}
}

// Any other process released our mailbox, so we can clear it.
mailbox_destroy(&ctx->mailbox, &ctx->heap);

Expand Down Expand Up @@ -330,23 +351,27 @@ bool context_get_process_info(Context *ctx, term *out, term atom_key)
return true;
}

static void context_monitors_handle_terminate(Context *ctx)
static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx)
{
GlobalContext *glb = ctx->global;
struct ListHead *item;
struct ListHead *tmp;
struct ResourceMonitor *result = NULL;
MUTABLE_LIST_FOR_EACH (item, tmp, &ctx->monitors_head) {
struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head);
if (monitor->ref_ticks && term_is_boxed(monitor->monitor_obj)) {
// Resource monitor
struct ResourceMonitor *resource_monitor = (struct ResourceMonitor *) monitor;
void *resource = term_to_term_ptr(monitor->monitor_obj);
struct RefcBinary *refc = refc_binary_from_data(resource);
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);
refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks);

// remove the monitor from the list of the resource
list_remove(&resource_monitor->resource_list_head);
list_init(&resource_monitor->resource_list_head);
// remove it from the list we are iterating on
if (result == NULL) {
list_init(&resource_monitor->base.monitor_list_head);
result = resource_monitor;
} else {
list_append(&result->base.monitor_list_head, &resource_monitor->base.monitor_list_head);
}
} else {
int local_process_id = term_to_local_process_id(monitor->monitor_obj);
Context *target = globalcontext_get_process_nolock(glb, local_process_id);
Expand Down Expand Up @@ -400,9 +425,10 @@ static void context_monitors_handle_terminate(Context *ctx)

mailbox_send(target, info_tuple);
}
free(monitor);
}
free(monitor);
}
return result;
}

int context_link(Context *ctx, term link_pid)
Expand Down
171 changes: 171 additions & 0 deletions tests/test-enif.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ static uint32_t cb_read_resource = 0;
static int32_t down_pid = 0;
static ErlNifMonitor down_mon = 0;

static uint32_t cb_read_resource_two = 0;
static int32_t down_pid_two = 0;
static ErlNifMonitor down_mon_two = 0;

static int32_t lockable_pid = 0;

static void resource_dtor(ErlNifEnv *env, void *resource)
{
UNUSED(env);
Expand All @@ -48,6 +54,34 @@ static void resource_down(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNif
down_mon = *mon;
}

static void resource_down_two(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon)
{
UNUSED(env);

cb_read_resource_two = *((uint32_t *) resource);
down_pid_two = *pid;
down_mon_two = *mon;
}

// down handlers should be able to acquire the process tables lock, e.g. to send
// a message.
static void resource_down_acquiring_lock(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon)
{
UNUSED(env);
UNUSED(resource);
UNUSED(pid);
UNUSED(mon);

Context *target = globalcontext_get_process_lock(env->global, lockable_pid);
assert(target != NULL);

cb_read_resource = *((uint32_t *) resource);
down_pid = *pid;
down_mon = *mon;

globalcontext_get_process_unlock(env->global, target);
}

void test_resource()
{
GlobalContext *glb = globalcontext_new();
Expand Down Expand Up @@ -253,6 +287,141 @@ void test_resource_monitor()
assert(cb_read_resource == 0);
}

void test_resource_monitor_handler_can_lock()
{
GlobalContext *glb = globalcontext_new();
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);

ErlNifResourceTypeInit init;
init.members = 3;
init.dtor = resource_dtor;
init.stop = NULL;
init.down = resource_down_acquiring_lock;
ErlNifResourceFlags flags;

ErlNifResourceType *resource_type = enif_init_resource_type(&env, "test_resource", &init, ERL_NIF_RT_CREATE, &flags);
assert(resource_type != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr = enif_alloc_resource(resource_type, sizeof(uint32_t));
uint32_t *resource = (uint32_t *) ptr;
*resource = 42;

ErlNifMonitor mon;
Context *ctx;
Context *another_ctx;
int32_t pid;
int monitor_result;

// Monitor called on destroy
cb_read_resource = 0;
down_pid = 0;
down_mon = 0;
ctx = context_new(glb);
another_ctx = context_new(glb);
lockable_pid = another_ctx->process_id;
pid = ctx->process_id;
monitor_result = enif_monitor_process(&env, ptr, &pid, &mon);
assert(monitor_result == 0);
assert(cb_read_resource == 0);

scheduler_terminate(ctx);
assert(cb_read_resource == 42);
assert(down_pid == pid);
assert(enif_compare_monitors(&mon, &down_mon) == 0);

scheduler_terminate(another_ctx);

globalcontext_destroy(glb);
}

void test_resource_monitor_two_resources_two_processes()
{
GlobalContext *glb = globalcontext_new();
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);

ErlNifResourceTypeInit init_1;
init_1.members = 3;
init_1.dtor = resource_dtor;
init_1.stop = NULL;
init_1.down = resource_down;
ErlNifResourceFlags flags;

ErlNifResourceType *resource_type_1 = enif_init_resource_type(&env, "test_resource_1", &init_1, ERL_NIF_RT_CREATE, &flags);
assert(resource_type_1 != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr_1 = enif_alloc_resource(resource_type_1, sizeof(uint32_t));
uint32_t *resource_1 = (uint32_t *) ptr_1;
*resource_1 = 42;

ErlNifResourceTypeInit init_2;
init_2.members = 3;
init_2.dtor = resource_dtor;
init_2.stop = NULL;
init_2.down = resource_down_two;

ErlNifResourceType *resource_type_2 = enif_init_resource_type(&env, "test_resource_2", &init_2, ERL_NIF_RT_CREATE, &flags);
assert(resource_type_1 != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr_2 = enif_alloc_resource(resource_type_2, sizeof(uint32_t));
uint32_t *resource_2 = (uint32_t *) ptr_2;
*resource_2 = 43;

ErlNifMonitor mon_1, mon_2, mon_3;
Context *ctx_1;
Context *ctx_2;
int32_t pid_1;
int32_t pid_2;
int monitor_result;

cb_read_resource = 0;
down_pid = 0;
down_pid_two = 0;
down_mon = 0;
down_mon_two = 0;
ctx_1 = context_new(glb);
ctx_2 = context_new(glb);
pid_1 = ctx_1->process_id;
pid_2 = ctx_2->process_id;

// Both resources monitor process 1.
// Resource 1 also monitors process 2.
monitor_result = enif_monitor_process(&env, ptr_1, &pid_1, &mon_1);
assert(monitor_result == 0);
monitor_result = enif_monitor_process(&env, ptr_2, &pid_1, &mon_2);
assert(monitor_result == 0);
monitor_result = enif_monitor_process(&env, ptr_1, &pid_2, &mon_3);
assert(monitor_result == 0);

// Process #1 terminates, mon_1 & mon_2 are fired.
assert(cb_read_resource == 0);
assert(cb_read_resource_two == 0);
scheduler_terminate(ctx_1);
assert(cb_read_resource == 42);
assert(cb_read_resource_two == 43);
assert(down_pid == pid_1);
assert(down_pid_two == pid_1);
assert(enif_compare_monitors(&mon_1, &down_mon) == 0);
assert(enif_compare_monitors(&mon_2, &down_mon_two) == 0);

cb_read_resource = 0;
cb_read_resource_two = 0;
down_pid = 0;
down_mon = 0;

// Process #2 terminates, mon_3 is fired.
scheduler_terminate(ctx_2);
assert(cb_read_resource == 42);
assert(cb_read_resource_two == 0);
assert(down_pid == pid_2);
assert(enif_compare_monitors(&mon_3, &down_mon) == 0);

globalcontext_destroy(glb);
}
int main(int argc, char **argv)
{
UNUSED(argc);
Expand All @@ -262,6 +431,8 @@ int main(int argc, char **argv)
test_resource_destroyed_with_global();
test_resource_keep_release();
test_resource_monitor();
test_resource_monitor_handler_can_lock();
test_resource_monitor_two_resources_two_processes();

return EXIT_SUCCESS;
}

0 comments on commit 88e0068

Please sign in to comment.