Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix a bug in resource monitors yielding a deadlock-caused abort #875

Merged
merged 1 commit into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 37 additions & 11 deletions src/libAtomVM/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
#define DEFAULT_STACK_SIZE 8
#define BYTES_PER_TERM (TERM_BITS / 8)

static void context_monitors_handle_terminate(Context *ctx);
static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx);

Context *context_new(GlobalContext *glb)
{
Expand Down Expand Up @@ -123,10 +123,31 @@ void context_destroy(Context *ctx)

// When monitor message is sent, process is no longer in the table
// and is no longer registered either.
context_monitors_handle_terminate(ctx);
struct ResourceMonitor *resource_monitor = context_monitors_handle_terminate(ctx);

synclist_unlock(&ctx->global->processes_table);

// Eventually call resource monitors handlers after the processes table was unlocked
// The monitors were removed from the list of monitors.
if (resource_monitor) {
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, ctx->global);

struct ListHead monitors;
list_prepend(&resource_monitor->base.monitor_list_head, &monitors);

struct ListHead *item;
struct ListHead *tmp;
MUTABLE_LIST_FOR_EACH (item, tmp, &monitors) {
struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head);
resource_monitor = CONTAINER_OF(monitor, struct ResourceMonitor, base);
void *resource = term_to_term_ptr(monitor->monitor_obj);
struct RefcBinary *refc = refc_binary_from_data(resource);
refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks);
free(monitor);
}
}

// Any other process released our mailbox, so we can clear it.
mailbox_destroy(&ctx->mailbox, &ctx->heap);

Expand Down Expand Up @@ -330,23 +351,27 @@ bool context_get_process_info(Context *ctx, term *out, term atom_key)
return true;
}

static void context_monitors_handle_terminate(Context *ctx)
static struct ResourceMonitor *context_monitors_handle_terminate(Context *ctx)
{
GlobalContext *glb = ctx->global;
struct ListHead *item;
struct ListHead *tmp;
struct ResourceMonitor *result = NULL;
MUTABLE_LIST_FOR_EACH (item, tmp, &ctx->monitors_head) {
struct Monitor *monitor = GET_LIST_ENTRY(item, struct Monitor, monitor_list_head);
if (monitor->ref_ticks && term_is_boxed(monitor->monitor_obj)) {
// Resource monitor
struct ResourceMonitor *resource_monitor = (struct ResourceMonitor *) monitor;
void *resource = term_to_term_ptr(monitor->monitor_obj);
struct RefcBinary *refc = refc_binary_from_data(resource);
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);
refc->resource_type->down(&env, resource, &ctx->process_id, &monitor->ref_ticks);

struct ResourceMonitor *resource_monitor = CONTAINER_OF(monitor, struct ResourceMonitor, base);
// remove the monitor from the list of the resource
list_remove(&resource_monitor->resource_list_head);
list_init(&resource_monitor->resource_list_head);
// remove it from the list we are iterating on
if (result == NULL) {
list_init(&resource_monitor->base.monitor_list_head);
result = resource_monitor;
} else {
list_append(&result->base.monitor_list_head, &resource_monitor->base.monitor_list_head);
}
} else {
int local_process_id = term_to_local_process_id(monitor->monitor_obj);
Context *target = globalcontext_get_process_nolock(glb, local_process_id);
Expand Down Expand Up @@ -400,9 +425,10 @@ static void context_monitors_handle_terminate(Context *ctx)

mailbox_send(target, info_tuple);
}
free(monitor);
}
free(monitor);
}
return result;
}

int context_link(Context *ctx, term link_pid)
Expand Down
171 changes: 171 additions & 0 deletions tests/test-enif.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ static uint32_t cb_read_resource = 0;
static int32_t down_pid = 0;
static ErlNifMonitor down_mon = 0;

static uint32_t cb_read_resource_two = 0;
static int32_t down_pid_two = 0;
static ErlNifMonitor down_mon_two = 0;

static int32_t lockable_pid = 0;

static void resource_dtor(ErlNifEnv *env, void *resource)
{
UNUSED(env);
Expand All @@ -48,6 +54,34 @@ static void resource_down(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNif
down_mon = *mon;
}

static void resource_down_two(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon)
{
UNUSED(env);

cb_read_resource_two = *((uint32_t *) resource);
down_pid_two = *pid;
down_mon_two = *mon;
}

// down handlers should be able to acquire the process tables lock, e.g. to send
// a message.
static void resource_down_acquiring_lock(ErlNifEnv *env, void *resource, ErlNifPid *pid, ErlNifMonitor *mon)
{
UNUSED(env);
UNUSED(resource);
UNUSED(pid);
UNUSED(mon);

Context *target = globalcontext_get_process_lock(env->global, lockable_pid);
assert(target != NULL);

cb_read_resource = *((uint32_t *) resource);
down_pid = *pid;
down_mon = *mon;

globalcontext_get_process_unlock(env->global, target);
}

void test_resource()
{
GlobalContext *glb = globalcontext_new();
Expand Down Expand Up @@ -253,6 +287,141 @@ void test_resource_monitor()
assert(cb_read_resource == 0);
}

void test_resource_monitor_handler_can_lock()
{
GlobalContext *glb = globalcontext_new();
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);

ErlNifResourceTypeInit init;
init.members = 3;
init.dtor = resource_dtor;
init.stop = NULL;
init.down = resource_down_acquiring_lock;
ErlNifResourceFlags flags;

ErlNifResourceType *resource_type = enif_init_resource_type(&env, "test_resource", &init, ERL_NIF_RT_CREATE, &flags);
assert(resource_type != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr = enif_alloc_resource(resource_type, sizeof(uint32_t));
uint32_t *resource = (uint32_t *) ptr;
*resource = 42;

ErlNifMonitor mon;
Context *ctx;
Context *another_ctx;
int32_t pid;
int monitor_result;

// Monitor called on destroy
cb_read_resource = 0;
down_pid = 0;
down_mon = 0;
ctx = context_new(glb);
another_ctx = context_new(glb);
lockable_pid = another_ctx->process_id;
pid = ctx->process_id;
monitor_result = enif_monitor_process(&env, ptr, &pid, &mon);
assert(monitor_result == 0);
assert(cb_read_resource == 0);

scheduler_terminate(ctx);
assert(cb_read_resource == 42);
assert(down_pid == pid);
assert(enif_compare_monitors(&mon, &down_mon) == 0);

scheduler_terminate(another_ctx);

globalcontext_destroy(glb);
}

void test_resource_monitor_two_resources_two_processes()
{
GlobalContext *glb = globalcontext_new();
ErlNifEnv env;
erl_nif_env_partial_init_from_globalcontext(&env, glb);

ErlNifResourceTypeInit init_1;
init_1.members = 3;
init_1.dtor = resource_dtor;
init_1.stop = NULL;
init_1.down = resource_down;
ErlNifResourceFlags flags;

ErlNifResourceType *resource_type_1 = enif_init_resource_type(&env, "test_resource_1", &init_1, ERL_NIF_RT_CREATE, &flags);
assert(resource_type_1 != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr_1 = enif_alloc_resource(resource_type_1, sizeof(uint32_t));
uint32_t *resource_1 = (uint32_t *) ptr_1;
*resource_1 = 42;

ErlNifResourceTypeInit init_2;
init_2.members = 3;
init_2.dtor = resource_dtor;
init_2.stop = NULL;
init_2.down = resource_down_two;

ErlNifResourceType *resource_type_2 = enif_init_resource_type(&env, "test_resource_2", &init_2, ERL_NIF_RT_CREATE, &flags);
assert(resource_type_1 != NULL);
assert(flags == ERL_NIF_RT_CREATE);

void *ptr_2 = enif_alloc_resource(resource_type_2, sizeof(uint32_t));
uint32_t *resource_2 = (uint32_t *) ptr_2;
*resource_2 = 43;

ErlNifMonitor mon_1, mon_2, mon_3;
Context *ctx_1;
Context *ctx_2;
int32_t pid_1;
int32_t pid_2;
int monitor_result;

cb_read_resource = 0;
down_pid = 0;
down_pid_two = 0;
down_mon = 0;
down_mon_two = 0;
ctx_1 = context_new(glb);
ctx_2 = context_new(glb);
pid_1 = ctx_1->process_id;
pid_2 = ctx_2->process_id;

// Both resources monitor process 1.
// Resource 1 also monitors process 2.
monitor_result = enif_monitor_process(&env, ptr_1, &pid_1, &mon_1);
assert(monitor_result == 0);
monitor_result = enif_monitor_process(&env, ptr_2, &pid_1, &mon_2);
assert(monitor_result == 0);
monitor_result = enif_monitor_process(&env, ptr_1, &pid_2, &mon_3);
assert(monitor_result == 0);

// Process #1 terminates, mon_1 & mon_2 are fired.
assert(cb_read_resource == 0);
assert(cb_read_resource_two == 0);
scheduler_terminate(ctx_1);
assert(cb_read_resource == 42);
assert(cb_read_resource_two == 43);
assert(down_pid == pid_1);
assert(down_pid_two == pid_1);
assert(enif_compare_monitors(&mon_1, &down_mon) == 0);
assert(enif_compare_monitors(&mon_2, &down_mon_two) == 0);

cb_read_resource = 0;
cb_read_resource_two = 0;
down_pid = 0;
down_mon = 0;

// Process #2 terminates, mon_3 is fired.
scheduler_terminate(ctx_2);
assert(cb_read_resource == 42);
assert(cb_read_resource_two == 0);
assert(down_pid == pid_2);
assert(enif_compare_monitors(&mon_3, &down_mon) == 0);

globalcontext_destroy(glb);
}
int main(int argc, char **argv)
{
UNUSED(argc);
Expand All @@ -262,6 +431,8 @@ int main(int argc, char **argv)
test_resource_destroyed_with_global();
test_resource_keep_release();
test_resource_monitor();
test_resource_monitor_handler_can_lock();
test_resource_monitor_two_resources_two_processes();

return EXIT_SUCCESS;
}