Skip to content

Commit

Permalink
Mitigation for spawn FD closing taking forever with a big FD limit (m…
Browse files Browse the repository at this point in the history
…ono/mono#10761)

* Mitigation for spawn FD closing taking forever with a big FD limit

On systems with a large file descriptor limit, Mono takes a very
long time to spawn a new process; with informal testing on the AIX
CI builder, (with a POWER7) it took ~30 minutes. This is obviously
very undesirable, but hand our is forced - libraries the user might
be calling could be creating non-CLOEXEC files we're unaware of. As
such, we started from the FD limit and worked our way down until we
hit stdio. Using APIs such as posix_spawn aren't desirable as we do
some fiddling with the child before we exec; and even then, closing
FDs with posix_spawn relies on non-standard file actions like
Solaris' addclosefrom not present on many systems. (All of this is
unnecessary on Windows, of course.)

This presents an alternative way (currently only implemented on AIX
but with notes how for other platforms) to try to close the child's
loose FDs before exec; by trying to get the highest number FD in
use, then work our way down. In the event we can't, we simply fall
back to the old logic.

See mono/mono#6555 for a discussion and the initial problem being mitigated.

* Use an another strategy of closing only known to be open handles

...on FreeBSD and AIX.

Use the kinfo_getfiles library call on FreeBSD and only close what's
safe to close. On AIX, use the third and fourth arguments to getprocs
to check what's open. However, the array to get all the handles takes
1 MB, so allocate it on the heap; like what kinfo_getfiles does. We
don't need to free these as the child will exec or exit, which blows
it all away.

The previous strategy from previous commit is still used and on AIX,
enhanced.

* Add Linux strategy to fork FD close

Tested on WSL, shows benefits with big FD ulimit.


Commit migrated from mono/mono@0d29dfb
  • Loading branch information
NattyNarwhal authored and luhenry committed Oct 8, 2018
1 parent d072c5f commit 5cd3083
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 4 deletions.
4 changes: 3 additions & 1 deletion src/mono/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ case "$host" in
with_sgen_default_concurrent=yes
;;
*-*-*freebsd*)
dnl For close_my_fds
LDFLAGS="$LDFLAGS -lutil"
if test "x$PTHREAD_CFLAGS" = "x"; then
CPPFLAGS="$CPPFLAGS -DGC_FREEBSD_THREADS"
libmono_cflags=
Expand All @@ -197,7 +199,7 @@ case "$host" in
LDFLAGS="$LDFLAGS $PTHREAD_LIBS -L/usr/local/lib"
libmono_ldflags="$PTHREAD_LIBS"
fi
CPPFLAGS="$CPPFLAGS -DHOST_BSD"
CPPFLAGS="$CPPFLAGS -DHOST_BSD -D_WITH_GETLINE"
need_link_unlink=yes
AC_DEFINE(PTHREAD_POINTER_ID, 1, [pthread is a pointer])
libdl=
Expand Down
135 changes: 132 additions & 3 deletions src/mono/mono/metadata/w32process-unix.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@
#include <utime.h>
#endif

// For close_my_fds
#if defined (_AIX)
#include <procinfo.h>
#elif defined (__FreeBSD__)
#include <sys/sysctl.h>
#include <sys/user.h>
#include <libutil.h>
#elif defined(__linux__)
#include <dirent.h>
#endif

#include <mono/metadata/object-internals.h>
#include <mono/metadata/w32process.h>
#include <mono/metadata/w32process-internals.h>
Expand Down Expand Up @@ -1594,6 +1605,125 @@ is_managed_binary (const char *filename)
return managed;
}

/**
* Gets the biggest numbered file descriptor for the current process; failing
* that, the system's file descriptor limit. This is called by the fork child
* in close_my_fds.
*/
static inline guint32
max_fd_count (void)
{
#if defined (_AIX)
struct procentry64 pe;
pid_t p;
p = getpid ();
if (getprocs64 (&pe, sizeof (pe), NULL, 0, &p, 1) != -1) {
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS,
"%s: maximum returned fd in child is %u",
__func__, pe.pi_maxofile);
return pe.pi_maxofile; // biggest + 1
}
#endif
// fallback to user/system limit if unsupported/error
return eg_getdtablesize ();
}

/**
* Closes all of the process' opened file descriptors, applying a strategy
* appropriate for the target system. This is called by the fork child in
* process_create.
*/
static void
close_my_fds (void)
{
// TODO: Other platforms.
// * On macOS, use proc_pidinfo + PROC_PIDLISTFDS? See:
// http://blog.palominolabs.com/2012/06/19/getting-the-files-being-used-by-a-process-on-mac-os-x/
// (I have no idea how this plays out on i/watch/tvOS.)
// * On the other BSDs, there's likely a sysctl for this.
// * On Solaris, there exists posix_spawn_file_actions_addclosefrom_np,
// but that assumes we're using posix_spawn; we aren't, as we do some
// complex stuff between fork and exec. There's likely a way to get
// the FD list/count though (maybe look at addclosefrom source in
// illumos?) or just walk /proc/pid/fd like Linux?
#if defined (__linux__)
/* Walk the file descriptors in /proc/self/fd/. Linux has no other API,
* as far as I'm aware. Opening a directory won't create an FD. */
struct dirent *dp;
DIR *d;
int fd;
d = opendir ("/proc/self/fd/");
if (d) {
while ((dp = readdir (d)) != NULL) {
if (dp->d_name [0] == '.')
continue;
fd = atoi (dp->d_name);
if (fd > 2)
close (fd);
}
closedir (d);
return;
} else {
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS,
"%s: opening fd dir failed, using fallback",
__func__);
}
#elif defined (__FreeBSD__)
/* FreeBSD lets us get a list of FDs. There's a MIB to access them
* directly, but it uses a lot of nasty variable length structures. The
* system library libutil provides a nicer way to get a fixed length
* version instead. */
struct kinfo_file *kif;
int count, i;
/* this is malloced but we won't need to free once we exec/exit */
kif = kinfo_getfile (getpid (), &count);
if (kif) {
for (i = 0; i < count; i++) {
/* negative FDs look to be used by the OS */
if (kif [i].kf_fd > 2) /* no neg + no stdio */
close (kif [i].kf_fd);
}
return;
} else {
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS,
"%s: kinfo_getfile failed, using fallback",
__func__);
}
#elif defined (_AIX)
struct procentry64 pe;
/* this array struct is 1 MB, we're NOT putting it on the stack.
* likewise no need to free; getprocs will fail if we use the smalller
* versions if we have a lot of FDs (is it worth it?)
*/
struct fdsinfo_100K *fds;
pid_t p;
p = getpid ();
fds = (struct fdsinfo_100K *) g_malloc0 (sizeof (struct fdsinfo_100K));
if (!fds) {
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS,
"%s: fdsinfo alloc failed, using fallback",
__func__);
goto fallback;
}

if (getprocs64 (&pe, sizeof (pe), fds, sizeof (struct fdsinfo_100K), &p, 1) != -1) {
for (int i = 3; i < pe.pi_maxofile; i++) {
if (fds->pi_ufd [i].fp != 0)
close (fds->pi_ufd [i].fp);
}
return;
} else {
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS,
"%s: getprocs64 failed, using fallback",
__func__);
}
#endif
fallback:
/* Fallback: Close FDs blindly, according to an FD limit */
for (guint32 i = max_fd_count () - 1; i > 2; i--)
close (i);
}

static gboolean
process_create (const gunichar2 *appname, const gunichar2 *cmdline,
const gunichar2 *cwd, StartupHandles *startup_handles, MonoW32ProcessInfo *process_info)
Expand Down Expand Up @@ -1975,9 +2105,8 @@ process_create (const gunichar2 *appname, const gunichar2 *cmdline,
dup2 (out_fd, 1);
dup2 (err_fd, 2);

/* Close all file descriptors */
for (i = eg_getdtablesize() - 1; i > 2; i--)
close (i);
/* Close this child's file handles. */
close_my_fds ();

#ifdef DEBUG_ENABLED
mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_IO_LAYER_PROCESS, "%s: exec()ing [%s] in dir [%s]", __func__, cmd,
Expand Down

0 comments on commit 5cd3083

Please sign in to comment.