From 46a4e3814b21a65e63c0500b36d140f1be1cf011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=BD=D0=B0=D0=B1?= Date: Sat, 1 May 2021 16:10:45 +0200 Subject: [PATCH] zed: protect against wait4()/fork() races to the global PID table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be very easily triggered by adding a sleep(1) before the wait4() on a PID-starved system: the reaper thread would wait for a child before its entry appeared, letting old entries accumulate: Invoking "all-debug.sh" eid=3021 pid=391 Finished "(null)" eid=0 pid=391 time=0.002432s exit=0 Invoking "all-syslog.sh" eid=3021 pid=336 Finished "(null)" eid=0 pid=336 time=0.002432s exit=0 Invoking "history_event-zfs-list-cacher.sh" eid=3021 pid=347 Invoking "all-debug.sh" eid=3022 pid=349 Finished "history_event-zfs-list-cacher.sh" eid=3021 pid=347 time=0.001669s exit=0 Finished "(null)" eid=0 pid=349 time=0.002404s exit=0 Invoking "all-syslog.sh" eid=3022 pid=370 Finished "(null)" eid=0 pid=370 time=0.002427s exit=0 Invoking "history_event-zfs-list-cacher.sh" eid=3022 pid=391 avl_find(tree, new_node, &where) == NULL ASSERT at ../../module/avl/avl.c:641:avl_add() Thread 1 "zed" received signal SIGABRT, Aborted. By employing this wider lock, we atomise [wait, remove] and [fork, add]: slowing down the reaper thread now just causes some zombies to accumulate until it can get to them Signed-off-by: Ahelenia ZiemiaƄska Closes #11963 --- cmd/zed/zed_exec.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cmd/zed/zed_exec.c b/cmd/zed/zed_exec.c index 9b0775a7469e..0a6782f0205e 100644 --- a/cmd/zed/zed_exec.c +++ b/cmd/zed/zed_exec.c @@ -142,8 +142,10 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, prog, eid, strerror(ENAMETOOLONG)); return; } + (void) pthread_mutex_lock(&_launched_processes_lock); pid = fork(); if (pid < 0) { + (void) pthread_mutex_unlock(&_launched_processes_lock); zed_log_msg(LOG_WARNING, "Failed to fork \"%s\" for eid=%llu: %s", prog, eid, strerror(errno)); @@ -166,20 +168,19 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, /* parent process */ - __atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST); - zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d", - prog, eid, pid); - node = calloc(1, sizeof (*node)); if (node) { node->pid = pid; node->eid = eid; node->name = strdup(prog); - (void) pthread_mutex_lock(&_launched_processes_lock); avl_add(&_launched_processes, node); - (void) pthread_mutex_unlock(&_launched_processes_lock); } + (void) pthread_mutex_unlock(&_launched_processes_lock); + + __atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST); + zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d", + prog, eid, pid); } static void