diff --git a/cmd/zed/agents/zfs_agents.c b/cmd/zed/agents/zfs_agents.c index 67b7951b0e65..35dd818ff80d 100644 --- a/cmd/zed/agents/zfs_agents.c +++ b/cmd/zed/agents/zfs_agents.c @@ -392,6 +392,7 @@ zfs_agent_init(libzfs_handle_t *zfs_hdl) list_destroy(&agent_events); zed_log_die("Failed to initialize agents"); } + pthread_setname_np(g_agents_tid, "agents"); } void diff --git a/cmd/zed/agents/zfs_mod.c b/cmd/zed/agents/zfs_mod.c index 4a58e1f1dbd3..b564f2b12a26 100644 --- a/cmd/zed/agents/zfs_mod.c +++ b/cmd/zed/agents/zfs_mod.c @@ -918,6 +918,7 @@ zfs_slm_init() return (-1); } + pthread_setname_np(g_zfs_tid, "enum-pools"); list_create(&g_device_list, sizeof (struct pendingdev), offsetof(struct pendingdev, pd_node)); diff --git a/cmd/zed/zed.c b/cmd/zed/zed.c index 907b8af0d01f..349e4d01b135 100644 --- a/cmd/zed/zed.c +++ b/cmd/zed/zed.c @@ -60,8 +60,8 @@ _setup_sig_handlers(void) zed_log_die("Failed to initialize sigset"); sa.sa_flags = SA_RESTART; - sa.sa_handler = SIG_IGN; + sa.sa_handler = SIG_IGN; if (sigaction(SIGPIPE, &sa, NULL) < 0) zed_log_die("Failed to ignore SIGPIPE"); @@ -75,6 +75,10 @@ _setup_sig_handlers(void) sa.sa_handler = _hup_handler; if (sigaction(SIGHUP, &sa, NULL) < 0) zed_log_die("Failed to register SIGHUP handler"); + + (void) sigaddset(&sa.sa_mask, SIGCHLD); + if (pthread_sigmask(SIG_BLOCK, &sa.sa_mask, NULL) < 0) + zed_log_die("Failed to block SIGCHLD"); } /* diff --git a/cmd/zed/zed_conf.c b/cmd/zed/zed_conf.c index c15f01fecb41..44ccc2557b2d 100644 --- a/cmd/zed/zed_conf.c +++ b/cmd/zed/zed_conf.c @@ -51,6 +51,7 @@ zed_conf_create(void) zcp->state_fd = -1; /* opened via zed_conf_open_state() */ zcp->zfs_hdl = NULL; /* opened via zed_event_init() */ zcp->zevent_fd = -1; /* opened via zed_event_init() */ + zcp->max_jobs = 16; if (!(zcp->conf_file = strdup(ZED_CONF_FILE))) goto nomem; @@ -172,6 +173,8 @@ _zed_conf_display_help(const char *prog, int got_err) "Write daemon's PID to FILE.", ZED_PID_FILE); fprintf(fp, "%*c%*s %s [%s]\n", w1, 0x20, -w2, "-s FILE", "Write daemon's state to FILE.", ZED_STATE_FILE); + fprintf(fp, "%*c%*s %s [%d]\n", w1, 0x20, -w2, "-j JOBS", + "Start at most JOBS at once.", 16); fprintf(fp, "\n"); exit(got_err ? EXIT_FAILURE : EXIT_SUCCESS); @@ -251,8 +254,9 @@ _zed_conf_parse_path(char **resultp, const char *path) void zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv) { - const char * const opts = ":hLVc:d:p:P:s:vfFMZI"; + const char * const opts = ":hLVc:d:p:P:s:vfFMZIj:"; int opt; + unsigned long raw; if (!zcp || !argv || !argv[0]) zed_log_die("Failed to parse options: Internal error"); @@ -303,6 +307,17 @@ zed_conf_parse_opts(struct zed_conf *zcp, int argc, char **argv) case 'Z': zcp->do_zero = 1; break; + case 'j': + errno = 0; + raw = strtoul(optarg, NULL, 0); + if (errno == ERANGE || raw > INT16_MAX) { + zed_log_die("%lu is too many jobs", raw); + } if (raw == 0) { + zed_log_die("0 jobs makes no sense"); + } else { + zcp->max_jobs = raw; + } + break; case '?': default: if (optopt == '?') diff --git a/cmd/zed/zed_conf.h b/cmd/zed/zed_conf.h index f44d20382968..7599af46b11e 100644 --- a/cmd/zed/zed_conf.h +++ b/cmd/zed/zed_conf.h @@ -39,6 +39,7 @@ struct zed_conf { libzfs_handle_t *zfs_hdl; /* handle to libzfs */ int zevent_fd; /* fd for access to zevents */ char *path; /* custom $PATH for zedlets to use */ + int16_t max_jobs; /* max zedlets to run at one time */ }; struct zed_conf *zed_conf_create(void); diff --git a/cmd/zed/zed_disk_event.c b/cmd/zed/zed_disk_event.c index 174d24523253..6ec566cff3ca 100644 --- a/cmd/zed/zed_disk_event.c +++ b/cmd/zed/zed_disk_event.c @@ -379,6 +379,7 @@ zed_disk_event_init() return (-1); } + pthread_setname_np(g_mon_tid, "udev monitor"); zed_log_msg(LOG_INFO, "zed_disk_event_init"); return (0); diff --git a/cmd/zed/zed_event.c b/cmd/zed/zed_event.c index 8892087d6e62..6d746b7b139a 100644 --- a/cmd/zed/zed_event.c +++ b/cmd/zed/zed_event.c @@ -15,7 +15,7 @@ #include #include #include -#include /* FIXME: Replace with libzfs_core. */ +#include #include #include #include @@ -96,6 +96,8 @@ zed_event_fini(struct zed_conf *zcp) libzfs_fini(zcp->zfs_hdl); zcp->zfs_hdl = NULL; } + + zed_exec_fini(); } /* @@ -953,8 +955,7 @@ zed_event_service(struct zed_conf *zcp) _zed_event_add_time_strings(eid, zsp, etime); - zed_exec_process(eid, class, subclass, - zcp->zedlet_dir, zcp->zedlets, zsp, zcp->zevent_fd); + zed_exec_process(eid, class, subclass, zcp, zsp); zed_conf_write_state(zcp, eid, etime); diff --git a/cmd/zed/zed_exec.c b/cmd/zed/zed_exec.c index e8f510213868..1fbac429683b 100644 --- a/cmd/zed/zed_exec.c +++ b/cmd/zed/zed_exec.c @@ -18,10 +18,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include "zed_exec.h" #include "zed_file.h" #include "zed_log.h" @@ -29,6 +32,39 @@ #define ZEVENT_FILENO 3 +struct launched_process_node { + avl_node_t node; + pid_t pid; + uint64_t eid; + char *name; +}; + +static int +_launched_process_node_compare(const void *x1, const void *x2) +{ + pid_t p1; + pid_t p2; + + assert(x1 != NULL); + assert(x2 != NULL); + + p1 = ((const struct launched_process_node *) x1)->pid; + p2 = ((const struct launched_process_node *) x2)->pid; + + if (p1 < p2) + return (-1); + else if (p1 == p2) + return (0); + else + return (1); +} + +static pthread_t _reap_children_tid = (pthread_t)-1; +static volatile boolean_t _reap_children_stop; +static avl_tree_t _launched_processes; +static pthread_mutex_t _launched_processes_lock = PTHREAD_MUTEX_INITIALIZER; +static int16_t _launched_processes_limit; + /* * Create an environment string array for passing to execve() using the * NAME=VALUE strings in container [zsp]. @@ -85,14 +121,20 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, int n; pid_t pid; int fd; - pid_t wpid; - int status; + struct launched_process_node *node; + sigset_t mask; + struct timespec launch_timeout = + { .tv_sec = 0, .tv_nsec = 200 * 1000 * 1000, }; assert(dir != NULL); assert(prog != NULL); assert(env != NULL); assert(zfd >= 0); + while (__atomic_load_n(&_launched_processes_limit, + __ATOMIC_SEQ_CST) <= 0) + (void) nanosleep(&launch_timeout, NULL); + n = snprintf(path, sizeof (path), "%s/%s", dir, prog); if ((n < 0) || (n >= sizeof (path))) { zed_log_msg(LOG_WARNING, @@ -107,6 +149,9 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, prog, eid, strerror(errno)); return; } else if (pid == 0) { + (void) sigemptyset(&mask); + (void) sigprocmask(SIG_SETMASK, &mask, NULL); + (void) umask(022); if ((fd = open("/dev/null", O_RDWR)) != -1) { (void) dup2(fd, STDIN_FILENO); @@ -121,80 +166,135 @@ _zed_exec_fork_child(uint64_t eid, const char *dir, const char *prog, /* parent process */ + __atomic_sub_fetch(&_launched_processes_limit, 1, __ATOMIC_SEQ_CST); zed_log_msg(LOG_INFO, "Invoking \"%s\" eid=%llu pid=%d", prog, eid, pid); - /* FIXME: Timeout rogue child processes with sigalarm? */ - - /* - * Wait for child process using WNOHANG to limit - * the time spent waiting to 10 seconds (10,000ms). - */ - for (n = 0; n < 1000; n++) { - wpid = waitpid(pid, &status, WNOHANG); - if (wpid == (pid_t)-1) { - if (errno == EINTR) - continue; - zed_log_msg(LOG_WARNING, - "Failed to wait for \"%s\" eid=%llu pid=%d", - prog, eid, pid); - break; - } else if (wpid == 0) { - struct timespec t; - - /* child still running */ - t.tv_sec = 0; - t.tv_nsec = 10000000; /* 10ms */ - (void) nanosleep(&t, NULL); - continue; - } + node = calloc(1, sizeof (*node)); + if (node) { + node->pid = pid; + node->eid = eid; + node->name = strdup(prog); + (void) pthread_mutex_lock(&_launched_processes_lock); + avl_add(&_launched_processes, node); + (void) pthread_mutex_unlock(&_launched_processes_lock); + } +} + +static void +_nop(int sig) +{} + +static void * +_reap_children(void *arg) +{ + struct launched_process_node node, *pnode; + pid_t pid; + int status; + struct sigaction sa = {}; + + (void) sigfillset(&sa.sa_mask); + (void) sigdelset(&sa.sa_mask, SIGCHLD); + (void) pthread_sigmask(SIG_SETMASK, &sa.sa_mask, NULL); + + (void) sigemptyset(&sa.sa_mask); + sa.sa_handler = _nop; + sa.sa_flags = SA_NOCLDSTOP; + (void) sigaction(SIGCHLD, &sa, NULL); - if (WIFEXITED(status)) { - zed_log_msg(LOG_INFO, - "Finished \"%s\" eid=%llu pid=%d exit=%d", - prog, eid, pid, WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { - zed_log_msg(LOG_INFO, - "Finished \"%s\" eid=%llu pid=%d sig=%d/%s", - prog, eid, pid, WTERMSIG(status), - strsignal(WTERMSIG(status))); + for (_reap_children_stop = B_FALSE; !_reap_children_stop; ) { + pid = waitpid(0, &status, 0); + + if (pid == (pid_t)-1) { + if (errno == ECHILD) + pause(); + else if (errno != EINTR) + zed_log_msg(LOG_WARNING, + "Failed to wait for children: %s", + strerror(errno)); } else { - zed_log_msg(LOG_INFO, - "Finished \"%s\" eid=%llu pid=%d status=0x%X", - prog, eid, (unsigned int) status); + memset(&node, 0, sizeof (node)); + node.pid = pid; + (void) pthread_mutex_lock(&_launched_processes_lock); + pnode = avl_find(&_launched_processes, &node, NULL); + if (pnode) { + memcpy(&node, pnode, sizeof (node)); + + avl_remove(&_launched_processes, pnode); + free(pnode); + } + (void) pthread_mutex_unlock(&_launched_processes_lock); + __atomic_add_fetch(&_launched_processes_limit, 1, + __ATOMIC_SEQ_CST); + + if (WIFEXITED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d exit=%d", + node.name, node.eid, pid, + WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d sig=%d/%s", + node.name, node.eid, pid, WTERMSIG(status), + strsignal(WTERMSIG(status))); + } else { + zed_log_msg(LOG_INFO, + "Finished \"%s\" eid=%llu pid=%d " + "status=0x%X", + node.name, node.eid, (unsigned int) status); + } + + free(node.name); } - break; } - /* - * kill child process after 10 seconds - */ - if (wpid == 0) { - zed_log_msg(LOG_WARNING, "Killing hung \"%s\" pid=%d", - prog, pid); - (void) kill(pid, SIGKILL); - (void) waitpid(pid, &status, 0); + return (NULL); +} + +void +zed_exec_fini(void) +{ + struct launched_process_node *node; + void *ck = NULL; + + if (_reap_children_tid == (pthread_t)-1) + return; + + _reap_children_stop = B_TRUE; + (void) pthread_kill(_reap_children_tid, SIGCHLD); + (void) pthread_join(_reap_children_tid, NULL); + + while ((node = avl_destroy_nodes(&_launched_processes, &ck)) != NULL) { + free(node->name); + free(node); } + avl_destroy(&_launched_processes); + + (void) pthread_mutex_destroy(&_launched_processes_lock); + (void) pthread_mutex_init(&_launched_processes_lock, NULL); + + _reap_children_tid = (pthread_t)-1; } /* * Process the event [eid] by synchronously invoking all zedlets with a * matching class prefix. * - * Each executable in [zedlets] from the directory [dir] is matched against - * the event's [class], [subclass], and the "all" class (which matches - * all events). Every zedlet with a matching class prefix is invoked. + * Each executable in [zcp->zedlets] from the directory [zcp->zedlet_dir] + * is matched against the event's [class], [subclass], and the "all" class + * (which matches all events). + * Every zedlet with a matching class prefix is invoked. * The NAME=VALUE strings in [envs] will be passed to the zedlet as * environment variables. * - * The file descriptor [zfd] is the zevent_fd used to track the + * The file descriptor [zcp->zevent_fd] is the zevent_fd used to track the * current cursor location within the zevent nvlist. * * Return 0 on success, -1 on error. */ int zed_exec_process(uint64_t eid, const char *class, const char *subclass, - const char *dir, zed_strings_t *zedlets, zed_strings_t *envs, int zfd) + struct zed_conf *zcp, zed_strings_t *envs) { const char *class_strings[4]; const char *allclass = "all"; @@ -203,9 +303,22 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass, char **e; int n; - if (!dir || !zedlets || !envs || zfd < 0) + if (!zcp->zedlet_dir || !zcp->zedlets || !envs || zcp->zevent_fd < 0) return (-1); + if (_reap_children_tid == (pthread_t)-1) { + _launched_processes_limit = zcp->max_jobs; + + if (pthread_create(&_reap_children_tid, NULL, + _reap_children, NULL) != 0) + return (-1); + pthread_setname_np(_reap_children_tid, "reap ZEDLETs"); + + avl_create(&_launched_processes, _launched_process_node_compare, + sizeof (struct launched_process_node), + offsetof(struct launched_process_node, node)); + } + csp = class_strings; if (class) @@ -221,11 +334,13 @@ zed_exec_process(uint64_t eid, const char *class, const char *subclass, e = _zed_exec_create_env(envs); - for (z = zed_strings_first(zedlets); z; z = zed_strings_next(zedlets)) { + for (z = zed_strings_first(zcp->zedlets); z; + z = zed_strings_next(zcp->zedlets)) { for (csp = class_strings; *csp; csp++) { n = strlen(*csp); if ((strncmp(z, *csp, n) == 0) && !isalpha(z[n])) - _zed_exec_fork_child(eid, dir, z, e, zfd); + _zed_exec_fork_child(eid, zcp->zedlet_dir, + z, e, zcp->zevent_fd); } } free(e); diff --git a/cmd/zed/zed_exec.h b/cmd/zed/zed_exec.h index 5eb9170abfe3..2a7f721f0030 100644 --- a/cmd/zed/zed_exec.h +++ b/cmd/zed/zed_exec.h @@ -17,9 +17,11 @@ #include #include "zed_strings.h" +#include "zed_conf.h" + +void zed_exec_fini(void); int zed_exec_process(uint64_t eid, const char *class, const char *subclass, - const char *dir, zed_strings_t *zedlets, zed_strings_t *envs, - int zevent_fd); + struct zed_conf *zcp, zed_strings_t *envs); #endif /* !ZED_EXEC_H */ diff --git a/cmd/zed/zed_file.c b/cmd/zed/zed_file.c index b51b1ca9dcf6..4437892e9058 100644 --- a/cmd/zed/zed_file.c +++ b/cmd/zed/zed_file.c @@ -187,31 +187,3 @@ zed_file_close_from(int lowfd) errno = errno_bak; } - -/* - * Set the CLOEXEC flag on file descriptor [fd] so it will be automatically - * closed upon successful execution of one of the exec functions. - * Return 0 on success, or -1 on error. - * - * FIXME: No longer needed? - */ -int -zed_file_close_on_exec(int fd) -{ - int flags; - - if (fd < 0) { - errno = EBADF; - return (-1); - } - flags = fcntl(fd, F_GETFD); - if (flags == -1) - return (-1); - - flags |= FD_CLOEXEC; - - if (fcntl(fd, F_SETFD, flags) == -1) - return (-1); - - return (0); -} diff --git a/cmd/zed/zed_file.h b/cmd/zed/zed_file.h index 7dcae83810ef..28a5dd0ccac3 100644 --- a/cmd/zed/zed_file.h +++ b/cmd/zed/zed_file.h @@ -30,6 +30,4 @@ pid_t zed_file_is_locked(int fd); void zed_file_close_from(int fd); -int zed_file_close_on_exec(int fd); - #endif /* !ZED_FILE_H */ diff --git a/man/man8/zed.8.in b/man/man8/zed.8.in index e32a89de8a0f..db9f4a816311 100644 --- a/man/man8/zed.8.in +++ b/man/man8/zed.8.in @@ -29,6 +29,7 @@ ZED \- ZFS Event Daemon [\fB\-p\fR \fIpidfile\fR] [\fB\-P\fR \fIpath\fR] [\fB\-s\fR \fIstatefile\fR] +[\fB\-j\fR \fIjobs\fR] [\fB\-v\fR] [\fB\-V\fR] [\fB\-Z\fR] @@ -95,6 +96,11 @@ it in production! .TP .BI \-s\ statefile Write the daemon's state to the specified file. +.TP +.BI \-j\ jobs +Allow at most \fIjobs\fR ZEDLETs to run concurrently, +delaying execution of new ones until they finish. +Defaults to 16. .SH ZEVENTS .PP A zevent is comprised of a list of nvpairs (name/value pairs). Each zevent @@ -231,12 +237,6 @@ Terminate the daemon. .SH BUGS .PP -Events are processed synchronously by a single thread. This can delay the -processing of simultaneous zevents. -.PP -ZEDLETs are killed after a maximum of ten seconds. -This can lead to a violation of a ZEDLET's atomicity assumptions. -.PP The ownership and permissions of the \fIenabled-zedlets\fR directory (along with all parent directories) are not checked. If any of these directories are improperly owned or permissioned, an unprivileged user could insert a diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib index bd77bae7d967..c3b1adc93a4e 100644 --- a/tests/zfs-tests/include/libtest.shlib +++ b/tests/zfs-tests/include/libtest.shlib @@ -3705,8 +3705,8 @@ function zed_start # run ZED in the background and redirect foreground logging # output to $ZED_LOG. log_must truncate -s 0 $ZED_DEBUG_LOG - log_must eval "zed -vF -d $ZEDLET_DIR -p $ZEDLET_DIR/zed.pid -P $PATH" \ - "-s $ZEDLET_DIR/state 2>$ZED_LOG &" + log_must eval "zed -vF -d $ZEDLET_DIR -P $PATH" \ + "-s $ZEDLET_DIR/state -j 1 2>$ZED_LOG &" fi return 0 @@ -3722,14 +3722,13 @@ function zed_stop fi log_note "Stopping ZED" - if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then - zedpid=$(<${ZEDLET_DIR}/zed.pid) - kill $zedpid - while ps -p $zedpid > /dev/null; do - sleep 1 - done - rm -f ${ZEDLET_DIR}/zed.pid - fi + while true; do + zedpids="$(pgrep -x zed)" + [ "$?" -ne 0 ] && break + + log_must kill $zedpids + sleep 1 + done return 0 } diff --git a/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh b/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh index 44652ee4cf12..0bef0ef1f96b 100755 --- a/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh +++ b/tests/zfs-tests/tests/functional/events/zed_rc_filter.ksh @@ -49,6 +49,7 @@ log_assert "Verify zpool sub-commands generate expected events" log_onexit cleanup log_must zpool events -c +log_must zed_stop log_must zed_start # Backup our zed.rc