diff --git a/usr/src/cmd/ptools/pfiles/pfiles.c b/usr/src/cmd/ptools/pfiles/pfiles.c index dd5ce4af11fb..3b0ab5defe28 100644 --- a/usr/src/cmd/ptools/pfiles/pfiles.c +++ b/usr/src/cmd/ptools/pfiles/pfiles.c @@ -26,6 +26,7 @@ /* * Copyright (c) 2017 Joyent, Inc. All Rights reserved. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. + * Copyright 2024 Oxide Computer Company */ #include @@ -318,6 +319,26 @@ show_files(struct ps_prochandle *Pr) (void) Pfdinfo_iter(Pr, show_file, Pr); } +static void +show_fdflags(int fdflags) +{ + if (fdflags <= 0) + return; + + /* + * show_fileflags() already has printed content here. We translate these + * back to the O_ versions for consistency with the flags that were + * already printed. + */ + if ((fdflags & FD_CLOEXEC) != 0) { + (void) printf("|O_CLOEXEC"); + } + + if ((fdflags & FD_CLOFORK) != 0) { + (void) printf("|O_CLOFORK"); + } +} + /* examine open file with fcntl() */ static void dofcntl(struct ps_prochandle *Pr, const prfdinfo_t *info, int mandatory, @@ -333,8 +354,8 @@ dofcntl(struct ps_prochandle *Pr, const prfdinfo_t *info, int mandatory, (void) printf(" "); if (fileflags != -1) show_fileflags(fileflags); - if (fdflags != -1 && (fdflags & FD_CLOEXEC)) - (void) printf(" FD_CLOEXEC"); + if (fdflags != -1) + show_fdflags(fdflags); if (isdoor && (Pstate(Pr) != PS_DEAD)) show_door(Pr, info); (void) fputc('\n', stdout); diff --git a/usr/src/cmd/sgs/elfdump/common/corenote.c b/usr/src/cmd/sgs/elfdump/common/corenote.c index 1164fe3098da..e7c3e549b3b1 100644 --- a/usr/src/cmd/sgs/elfdump/common/corenote.c +++ b/usr/src/cmd/sgs/elfdump/common/corenote.c @@ -1690,7 +1690,7 @@ dump_prfdinfo(note_state_t *state, const char *title) { const sl_prfdinfo_layout_t *layout = state->ns_arch->prfdinfo; char buf[1024]; - uint32_t fileflags, mode; + uint32_t fileflags, mode, fdflags; indent_enter(state, title, &layout->pr_fd); @@ -1718,7 +1718,9 @@ dump_prfdinfo(note_state_t *state, const char *title) print_str(state, MSG_ORIG(MSG_CNOTE_T_PR_FILEFLAGS), conv_cnote_fileflags(fileflags, 0, buf, sizeof (buf))); - PRINT_DEC(MSG_ORIG(MSG_CNOTE_T_PR_FDFLAGS), pr_fdflags); + fdflags = extract_as_word(state, &layout->pr_fdflags); + print_str(state, MSG_ORIG(MSG_CNOTE_T_PR_FDFLAGS), + conv_cnote_fdflags(fdflags, 0, buf, sizeof (buf))); PRINT_STRBUF(MSG_ORIG(MSG_CNOTE_T_PR_PATH), pr_path); diff --git a/usr/src/cmd/sgs/include/conv.h b/usr/src/cmd/sgs/include/conv.h index cfd690098ccb..bb362c943be0 100644 --- a/usr/src/cmd/sgs/include/conv.h +++ b/usr/src/cmd/sgs/include/conv.h @@ -27,7 +27,7 @@ * Copyright 2012 DEY Storage Systems, Inc. All rights reserved. * Copyright (c) 2018, Joyent, Inc. * Copyright 2016 RackTop Systems. - * Copyright 2022 Oxide Computer Company + * Copyright 2024 Oxide Computer Company */ #ifndef _CONV_H @@ -869,6 +869,8 @@ extern const char *conv_cnote_syscall(Word, Conv_fmt_flags_t, Conv_inv_buf_t *); extern const char *conv_cnote_sysset(uint32_t *, int, Conv_fmt_flags_t, Conv_cnote_sysset_buf_t *); +extern const char *conv_cnote_fdflags(uint32_t, Conv_fmt_flags_t, + char *, size_t); extern const char *conv_cnote_fileflags(uint32_t, Conv_fmt_flags_t, char *, size_t); extern const char *conv_cnote_filemode(uint32_t, Conv_fmt_flags_t, diff --git a/usr/src/cmd/sgs/libconv/common/corenote.c b/usr/src/cmd/sgs/libconv/common/corenote.c index 296754b7146c..95ea487a46d1 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.c +++ b/usr/src/cmd/sgs/libconv/common/corenote.c @@ -2605,6 +2605,29 @@ conv_cnote_filemode(uint32_t mode, Conv_fmt_flags_t fmt_flags, return (buf); } +const char * +conv_cnote_fdflags(uint32_t flags, Conv_fmt_flags_t fmt_flags, + char *buf, size_t bufsize) +{ + CONV_EXPN_FIELD_ARG arg = { 0 }; + + static const Val_desc fdflags[] = { + { 0x01, MSG_FD_CLOEXEC }, + { 0x02, MSG_FD_CLOFORK }, + { 0, 0 } + }; + + if (flags == 0) + return (MSG_ORIG(MSG_GBL_ZERO)); + + arg.buf = buf; + arg.bufsize = bufsize; + arg.oflags = flags; + arg.rflags = flags; + + (void) conv_expn_field(&arg, fdflags, fmt_flags); + return (buf); +} #define PROCSECFLGSZ CONV_EXPN_FIELD_DEF_PREFIX_SIZE + \ MSG_ASLR_SIZE + CONV_EXPN_FIELD_DEF_SEP_SIZE + \ diff --git a/usr/src/cmd/sgs/libconv/common/corenote.msg b/usr/src/cmd/sgs/libconv/common/corenote.msg index c7ff53f82ddb..e72d90710860 100644 --- a/usr/src/cmd/sgs/libconv/common/corenote.msg +++ b/usr/src/cmd/sgs/libconv/common/corenote.msg @@ -1115,6 +1115,9 @@ @ MSG_PR_O_NOFOLLOW "O_NOFOLLOW" @ MSG_PR_O_NOLINKS "O_NOLINKS" +@ MSG_FD_CLOEXEC "FD_CLOEXEC" +@ MSG_FD_CLOFORK "FD_CLOFORK" + @ MSG_S_IFIFO "S_IFIFO" @ MSG_S_IFCHR "S_IFCHR" @ MSG_S_IFDIR "S_IFDIR" diff --git a/usr/src/cmd/sgs/rtld/common/external.c b/usr/src/cmd/sgs/rtld/common/external.c index 82f6f46ea8cf..47dea75420c1 100644 --- a/usr/src/cmd/sgs/rtld/common/external.c +++ b/usr/src/cmd/sgs/rtld/common/external.c @@ -184,6 +184,7 @@ #include #include #include +#include #include "_elf.h" #include "_rtld.h" @@ -622,13 +623,21 @@ int fcntl(int fildes, int cmd, ...) { extern int __fcntl(int, int, ...); - intptr_t arg; + intptr_t arg, arg1 = 0; va_list ap; va_start(ap, cmd); - arg = va_arg(ap, intptr_t); + switch (cmd) { + case F_DUP3FD: + arg = va_arg(ap, int); + arg1 = va_arg(ap, int); + break; + default: + arg = va_arg(ap, intptr_t); + break; + } va_end(ap); - return (__fcntl(fildes, cmd, arg)); + return (__fcntl(fildes, cmd, arg, arg1)); } int diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index a1ad4dbcc16f..2b6d761455d3 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -26,6 +26,7 @@ * Copyright 2020 Joyent, Inc. * Copyright (c) 2014, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2022 Garrett D'Amore + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -111,63 +112,58 @@ #include "proto.h" #define FCNTLMIN F_DUPFD -#define FCNTLMAX F_FLOCKW +#define FCNTLMAX F_DUP3FD const char *const FCNTLname[] = { - "F_DUPFD", - "F_GETFD", - "F_SETFD", - "F_GETFL", - "F_SETFL", - "F_O_GETLK", - "F_SETLK", - "F_SETLKW", - "F_CHKFL", - "F_DUP2FD", - "F_ALLOCSP", - "F_FREESP", - NULL, /* 12 */ - NULL, /* 13 */ - "F_GETLK", - NULL, /* 15 */ - NULL, /* 16 */ - NULL, /* 17 */ - NULL, /* 18 */ - NULL, /* 19 */ - NULL, /* 20 */ - NULL, /* 21 */ - NULL, /* 22 */ - "F_GETOWN", - "F_SETOWN", - "F_REVOKE", - "F_HASREMOTELOCKS", - "F_FREESP64", - NULL, /* 28 */ - NULL, /* 29 */ - NULL, /* 30 */ - NULL, /* 31 */ - NULL, /* 32 */ - "F_GETLK64", - "F_SETLK64", - "F_SETLKW64", - "F_DUP2FD_CLOEXEC", - "F_DUPFD_CLOEXEC", - NULL, /* 38 */ - NULL, /* 39 */ - "F_SHARE", - "F_UNSHARE", - "F_SETLK_NBMAND", - "F_SHARE_NBMAND", - "F_SETLK64_NBMAND", - NULL, /* 45 */ - "F_BADFD", - "F_OFD_GETLK", - "F_OFD_SETLK", - "F_OFD_SETLKW", - NULL, /* 50 */ - NULL, /* 51 */ - NULL, /* 52 */ - "F_FLOCK", - "F_FLOCKW" + [0] = "F_DUPFD", + [1] = "F_GETFD", + [2] = "F_SETFD", + [3] = "F_GETFL", + [4] = "F_SETFL", + [5] = "F_O_GETLK", + [6] = "F_SETLK", + [7] = "F_SETLKW", + [8] = "F_CHKFL", + [9] = "F_DUP2FD", + [10] = "F_ALLOCSP", + [11] = "F_FREESP", + [13] = "F_ISSTREAM", + [14] = "F_GETLK", + [15] = "F_PRIV", + [16] = "F_NPRIV", + [17] = "F_QUOTACTL", + [18] = "F_BLOCKS", + [19] = "F_BLKSIZE", + [23] = "F_GETOWN", + [24] = "F_SETOWN", + [25] = "F_REVOKE", + [26] = "F_HASREMOTELOCKS", + [27] = "F_FREESP64", + [28] = "F_ALLOCSP64", + [33] = "F_GETLK64", + [34] = "F_SETLK64", + [35] = "F_SETLKW64", + [36] = "F_DUP2FD_CLOEXEC", + [37] = "F_DUPFD_CLOEXEC", + [40] = "F_SHARE", + [41] = "F_UNSHARE", + [42] = "F_SETLK_NBMAND", + [43] = "F_SHARE_NBMAND", + [44] = "F_SETLK64_NBMAND", + [45] = "F_GETXFL", + [46] = "F_BADFD", + [47] = "F_OFD_GETLK", + [48] = "F_OFD_SETLK", + [49] = "F_OFD_SETLKW", + [50] = "F_OFD_GETLK64", + [51] = "F_OFD_SETLK64", + [52] = "F_OFD_SETLKW64", + [53] = "F_FLOCK", + [54] = "F_FLOCKW", + [55] = "F_FLOCK64", + [56] = "F_FLOCKW64", + [57] = "F_DUP2FD_CLOFORK", + [58] = "F_DUPFD_CLOFORK", + [59] = "F_DUP3FD" }; #define SYSFSMIN GETFSIND @@ -2203,7 +2199,7 @@ pathconfname(int code) #define ALL_O_FLAGS \ (O_NDELAY|O_APPEND|O_SYNC|O_DSYNC|O_NONBLOCK|O_CREAT|O_TRUNC\ |O_EXCL|O_NOCTTY|O_LARGEFILE|O_RSYNC|O_XATTR|O_NOFOLLOW|O_NOLINKS\ - |O_CLOEXEC|O_DIRECTORY|O_DIRECT|FXATTRDIROPEN) + |O_CLOEXEC|O_DIRECTORY|O_DIRECT|O_CLOFORK|FXATTRDIROPEN) const char * openarg(private_t *pri, int arg) @@ -2267,6 +2263,8 @@ openarg(private_t *pri, int arg) (void) strlcat(str, "|O_DIRECTORY", sizeof (pri->code_buf)); if (arg & O_DIRECT) (void) strlcat(str, "|O_DIRECT", sizeof (pri->code_buf)); + if (arg & O_CLOFORK) + (void) strlcat(str, "|O_CLOFORK", sizeof (pri->code_buf)); if (arg & FXATTRDIROPEN) (void) strlcat(str, "|FXATTRDIROPEN", sizeof (pri->code_buf)); diff --git a/usr/src/cmd/truss/expound.c b/usr/src/cmd/truss/expound.c index 359c3fcddf9e..ac83217e5f2a 100644 --- a/usr/src/cmd/truss/expound.c +++ b/usr/src/cmd/truss/expound.c @@ -1813,6 +1813,15 @@ show_ffg(private_t *pri) (void) puts(pri->sys_string); } +void +show_ffd(private_t *pri) +{ + (void) putchar('\t'); + (void) putchar('\t'); + prt_ffd(pri, 0, pri->Rval1); + (void) puts(pri->sys_string); +} + /* print values in fcntl() pointed-to structure */ void show_fcntl(private_t *pri) @@ -1824,6 +1833,11 @@ show_fcntl(private_t *pri) return; } + if (pri->sys_nargs >= 2 && pri->sys_args[1] == F_GETFD) { + show_ffd(pri); + return; + } + if (pri->sys_nargs < 3 || (offset = pri->sys_args[2]) == 0) return; diff --git a/usr/src/cmd/truss/print.c b/usr/src/cmd/truss/print.c index 47adc4e40a00..59f54b97cacc 100644 --- a/usr/src/cmd/truss/print.c +++ b/usr/src/cmd/truss/print.c @@ -90,6 +90,7 @@ #include #include #include +#include #include "ramdata.h" #include "print.h" #include "proto.h" @@ -379,26 +380,25 @@ prt_ioa(private_t *pri, int raw, long val) /* print ioctl argument */ void prt_pip(private_t *pri, int raw, long val) /* print pipe code */ { - const char *s = NULL; + int first = 1; + long flags = ~(O_CLOEXEC | O_CLOFORK | O_NONBLOCK); - if (!raw) { - switch (val) { - case O_CLOEXEC: - s = "O_CLOEXEC"; - break; - case O_NONBLOCK: - s = "O_NONBLOCK"; - break; - case O_CLOEXEC|O_NONBLOCK: - s = "O_CLOEXEC|O_NONBLOCK"; - break; - } + if (raw != 0 || val == 0 || (val & flags) != 0) { + prt_dex(pri, 0, val); + return; } - if (s == NULL) - prt_dex(pri, 0, val); - else - outstring(pri, s); + if (val & O_CLOEXEC) { + outstring(pri, "|O_CLOEXEC" + first); + first = 0; + } + if (val & O_CLOFORK) { + outstring(pri, "|O_CLOFORK" + first); + first = 0; + } + if (val & O_NONBLOCK) { + outstring(pri, "|O_NONBLOCK" + first); + } } void @@ -1840,6 +1840,15 @@ prt_skt(private_t *pri, int raw, long val) if ((val & SOCK_CLOEXEC) != 0) { outstring(pri, "|SOCK_CLOEXEC"); } + if ((val & SOCK_CLOFORK) != 0) { + outstring(pri, "|SOCK_CLOFORK"); + } + if ((val & SOCK_NDELAY) != 0) { + outstring(pri, "|SOCK_NDELAY"); + } + if ((val & SOCK_NONBLOCK) != 0) { + outstring(pri, "|SOCK_NONBLOCK"); + } } else { prt_dec(pri, 0, val); } @@ -1896,7 +1905,7 @@ prt_acf(private_t *pri, int raw, long val) { int first = 1; if (raw || !val || - (val & ~(SOCK_CLOEXEC|SOCK_NDELAY|SOCK_NONBLOCK))) { + (val & ~(SOCK_CLOEXEC|SOCK_NDELAY|SOCK_NONBLOCK|SOCK_CLOFORK))) { prt_dex(pri, 0, val); return; } @@ -1905,6 +1914,10 @@ prt_acf(private_t *pri, int raw, long val) outstring(pri, "|SOCK_CLOEXEC" + first); first = 0; } + if (val & SOCK_CLOFORK) { + outstring(pri, "|SOCK_CLOFORK" + first); + first = 0; + } if (val & SOCK_NDELAY) { outstring(pri, "|SOCK_NDELAY" + first); first = 0; @@ -2468,7 +2481,6 @@ prt_ffg(private_t *pri, int raw, long val) #define CBSIZE sizeof (pri->code_buf) char *s = pri->code_buf; size_t used = 1; - struct fcntl_flags *fp; if (raw) { (void) snprintf(s, CBSIZE, "0x%lx", val); @@ -2481,8 +2493,54 @@ prt_ffg(private_t *pri, int raw, long val) } *s = '\0'; - for (fp = fcntl_flags; - fp < &fcntl_flags[sizeof (fcntl_flags) / sizeof (*fp)]; fp++) { + for (size_t i = 0; i < ARRAY_SIZE(fcntl_flags); i++) { + struct fcntl_flags *fp = &fcntl_flags[i]; + if (val & fp->val) { + used = strlcat(s, fp->name, CBSIZE); + val &= ~fp->val; + } + } + + if (val != 0 && used <= CBSIZE) + used += snprintf(s + used, CBSIZE - used, "|0x%lx", val); + + if (used >= CBSIZE) + (void) snprintf(s + 1, CBSIZE-1, "0x%lx", val); + outstring(pri, s + 1); +#undef CBSIZE +} + +/* + * Print fcntl() F_GETFD/F_SETFD values + */ +static struct fcntl_fdflags { + long val; + const char *name; +} fcntl_fdflags[] = { + { FD_CLOEXEC, "|FD_CLOEXEC" }, + { FD_CLOFORK, "|FD_CLOFORK" } +}; + +void +prt_ffd(private_t *pri, int raw, long val) +{ +#define CBSIZE sizeof (pri->code_buf) + char *s = pri->code_buf; + size_t used = 1; + + if (raw) { + (void) snprintf(s, CBSIZE, "0x%lx", val); + outstring(pri, s); + return; + } + if (val == 0) { + outstring(pri, "(no flags)"); + return; + } + + *s = '\0'; + for (size_t i = 0; i < ARRAY_SIZE(fcntl_fdflags); i++) { + struct fcntl_fdflags *fp = &fcntl_fdflags[i]; if (val & fp->val) { used = strlcat(s, fp->name, CBSIZE); val &= ~fp->val; @@ -3033,6 +3091,64 @@ prt_exc(private_t *pri, int raw, long val) #undef CBSIZE } +/* + * Print recv*(), send*() flags. This includes all the msg_flags data as well as + * they're the same namespace. + */ +static struct sendrecv_flags { + long val; + const char *name; +} sendrecv_flags[] = { + { MSG_OOB, "|MSG_OOB" }, + { MSG_PEEK, "|MSG_PEEK" }, + { MSG_DONTROUTE, "|MSG_DONTROUTE" }, + { MSG_CTRUNC, "|MSG_CTRUNC" }, + { MSG_TRUNC, "|MSG_TRUNC" }, + { MSG_WAITALL, "|MSG_WAITALL" }, + { MSG_DONTWAIT, "|MSG_DONTWAIT" }, + { MSG_NOTIFICATION, "|MSG_NOTIFICATION" }, + { MSG_NOSIGNAL, "|MSG_NOSIGNAL" }, + { MSG_DUPCTRL, "|MSG_DUPCTRL" }, + { MSG_CMSG_CLOEXEC, "|MSG_CMSG_CLOEXEC" }, + { MSG_CMSG_CLOFORK, "|MSG_CMSG_CLOFORK" }, + { MSG_XPG4_2, "|MSG_XPG4_2" } +}; + +void +prt_srf(private_t *pri, int raw, long val) +{ +#define CBSIZE sizeof (pri->code_buf) + char *s = pri->code_buf; + size_t used = 1; + + if (raw) { + (void) snprintf(s, CBSIZE, "0x%lx", val); + outstring(pri, s); + return; + } + if (val == 0) { + outstring(pri, "(no flags)"); + return; + } + + *s = '\0'; + for (size_t i = 0; i < ARRAY_SIZE(sendrecv_flags); i++) { + struct sendrecv_flags *fp = &sendrecv_flags[i]; + if (val & fp->val) { + used = strlcat(s, fp->name, CBSIZE); + val &= ~fp->val; + } + } + + if (val != 0 && used <= CBSIZE) + used += snprintf(s + used, CBSIZE - used, "|0x%lx", val); + + if (used >= CBSIZE) + (void) snprintf(s + 1, CBSIZE-1, "0x%lx", val); + outstring(pri, s + 1); +#undef CBSIZE +} + /* * Array of pointers to print functions, one for each format. */ @@ -3142,5 +3258,7 @@ void (* const Print[])() = { prt_psdelta, /* PSDLT -- print psecflags(2) delta */ prt_psfw, /* PSFW -- print psecflags(2) set */ prt_exc, /* EXC -- print execvex() flags */ + prt_ffd, /* FFD -- print fcntl() F_SETFD flags */ + prt_srf, /* SRF -- print send*()/recv*() flags */ prt_dec, /* HID -- hidden argument, make this the last one */ }; diff --git a/usr/src/cmd/truss/print.h b/usr/src/cmd/truss/print.h index 2b4ce56b85f4..94ebf0e307f9 100644 --- a/usr/src/cmd/truss/print.h +++ b/usr/src/cmd/truss/print.h @@ -22,6 +22,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Joyent, Inc. + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -144,7 +145,9 @@ extern "C" { #define PSDLT 102 /* secflagsdelta_t */ #define PSFW 103 /* psecflagswhich_t */ #define EXC 104 /* execx flags */ -#define HID 105 /* hidden argument, don't print */ +#define FFD 105 /* fcntl F_SETFD flags */ +#define SRF 106 /* send*(), recv() flags */ +#define HID 107 /* hidden argument, don't print */ /* make sure HID is always the last member */ /* diff --git a/usr/src/cmd/truss/proto.h b/usr/src/cmd/truss/proto.h index 670fe95f1bd3..9dbb7605a32e 100644 --- a/usr/src/cmd/truss/proto.h +++ b/usr/src/cmd/truss/proto.h @@ -24,7 +24,7 @@ */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ #ifndef _PROTO_H #define _PROTO_H @@ -108,7 +108,7 @@ extern int fltlist(char *, fltset_t *, int *); extern int fdlist(char *, fileset_t *); extern int liblist(char *, int); -extern char *fetchstring(private_t *, long, int); +extern char *fetchstring(private_t *, long, int); extern void show_cred(private_t *, int, int); extern void errmsg(const char *, const char *); extern void abend(const char *, const char *); @@ -145,6 +145,7 @@ extern void report_htable_stats(void); extern const char *door_flags(private_t *, long); extern void prt_ffg(private_t *, int, long); +extern void prt_ffd(private_t *, int, long); extern void escape_string(private_t *, const char *); #ifdef __cplusplus diff --git a/usr/src/cmd/truss/systable.c b/usr/src/cmd/truss/systable.c index 08ff794eaa01..67f636285e5c 100644 --- a/usr/src/cmd/truss/systable.c +++ b/usr/src/cmd/truss/systable.c @@ -22,7 +22,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2015, Joyent, Inc. All rights reserved. - * Copyright 2020 Oxide Computer Company + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -282,7 +282,7 @@ const struct systable systable[] = { {"execvex", 4, DEC, NOV, STG, HEX, HEX, EXC}, /* 59 */ {"umask", 1, OCT, NOV, OCT}, /* 60 */ {"chroot", 1, DEC, NOV, STG}, /* 61 */ -{"fcntl", 3, DEC, NOV, DEC, FCN, HEX}, /* 62 */ +{"fcntl", 4, DEC, NOV, DEC, FCN, HEX, HEX}, /* 62 */ {"ulimit", 2, DEX, NOV, ULM, DEC}, /* 63 */ {"renameat", 4, DEC, NOV, ATC, STG, ATC, STG}, /* 64 */ {"unlinkat", 3, DEC, NOV, ATC, STG, UAT}, /* 65 */ @@ -457,12 +457,12 @@ const struct systable systable[] = { {"accept", 5, DEC, NOV, DEC, HEX, HEX, SKV, ACF}, /* 234 */ {"connect", 4, DEC, NOV, DEC, HEX, DEC, SKV}, /* 235 */ {"shutdown", 3, DEC, NOV, DEC, SHT, SKV}, /* 236 */ -{"recv", 4, DEC, NOV, DEC, IOB, DEC, DEC}, /* 237 */ -{"recvfrom", 6, DEC, NOV, DEC, IOB, DEC, DEC, HEX, HEX}, /* 238 */ -{"recvmsg", 3, DEC, NOV, DEC, HEX, DEC}, /* 239 */ -{"send", 4, DEC, NOV, DEC, IOB, DEC, DEC}, /* 240 */ -{"sendmsg", 3, DEC, NOV, DEC, HEX, DEC}, /* 241 */ -{"sendto", 6, DEC, NOV, DEC, IOB, DEC, DEC, HEX, DEC}, /* 242 */ +{"recv", 4, DEC, NOV, DEC, IOB, DEC, SRF}, /* 237 */ +{"recvfrom", 6, DEC, NOV, DEC, IOB, DEC, SRF, HEX, HEX}, /* 238 */ +{"recvmsg", 3, DEC, NOV, DEC, HEX, SRF}, /* 239 */ +{"send", 4, DEC, NOV, DEC, IOB, DEC, SRF}, /* 240 */ +{"sendmsg", 3, DEC, NOV, DEC, HEX, SRF}, /* 241 */ +{"sendto", 6, DEC, NOV, DEC, IOB, DEC, SRF, HEX, DEC}, /* 242 */ {"getpeername", 4, DEC, NOV, DEC, HEX, HEX, SKV}, /* 243 */ {"getsockname", 4, DEC, NOV, DEC, HEX, HEX, SKV}, /* 244 */ {"getsockopt", 6, DEC, NOV, DEC, SOL, SON, HEX, HEX, SKV}, /* 245 */ @@ -597,9 +597,11 @@ const struct systable open64table[] = { #define NOPEN64CODE (sizeof (open64table) / sizeof (struct systable)) const struct systable fcntltable[] = { -{"fcntl", 3, DEC, NOV, DEC, FCN, HEX}, /* 0: default */ -{"fcntl", 2, DEC, NOV, DEC, FCN}, /* 1: no arg */ -{"fcntl", 3, DEC, NOV, DEC, FCN, FFG}, /* 2: F_SETFL */ +{"fcntl", 3, DEC, NOV, DEC, FCN, HEX}, /* 0: default */ +{"fcntl", 2, DEC, NOV, DEC, FCN}, /* 1: no arg */ +{"fcntl", 3, DEC, NOV, DEC, FCN, FFG}, /* 2: F_SETFL */ +{"fcntl", 3, DEC, NOV, DEC, FCN, FFD}, /* 3: F_SETFD */ +{"fcntl", 4, DEC, NOV, DEC, FCN, DEC, FFD}, /* 4: F_DUP3FD */ }; #define NFCNTLCODE (sizeof (fcntltable) / sizeof (struct systable)) @@ -1506,6 +1508,8 @@ getsubcode(private_t *pri) case F_GETOWN: case F_GETXFL: subcode = 1; break; case F_SETFL: subcode = 2; break; + case F_SETFD: subcode = 3; break; + case F_DUP3FD: subcode = 4; break; } } break; diff --git a/usr/src/lib/libc/port/gen/dup.c b/usr/src/lib/libc/port/gen/dup.c index 4fd562934f56..f5b9d19694dd 100644 --- a/usr/src/lib/libc/port/gen/dup.c +++ b/usr/src/lib/libc/port/gen/dup.c @@ -24,13 +24,14 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1988 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ -#include "lint.h" -#include +#include "lint.h" +#include #include #include @@ -51,10 +52,13 @@ dup2(int fildes, int fildes2) int dup3(int fildes, int fildes2, int flags) { + int dflags = 0; + /* - * The only valid flag is O_CLOEXEC. + * dup3() only supports O_ open flags that translate into file + * descriptor flags in the F_GETFD sense. */ - if (flags & ~O_CLOEXEC) { + if (flags & ~(O_CLOEXEC | O_CLOFORK)) { errno = EINVAL; return (-1); } @@ -68,6 +72,10 @@ dup3(int fildes, int fildes2, int flags) return (-1); } - return (fcntl(fildes, (flags == 0) ? F_DUP2FD : F_DUP2FD_CLOEXEC, - fildes2)); + if ((flags & O_CLOEXEC) != 0) + dflags |= FD_CLOEXEC; + if ((flags & O_CLOFORK) != 0) + dflags |= FD_CLOFORK; + + return (fcntl(fildes, F_DUP3FD, fildes2, dflags)); } diff --git a/usr/src/lib/libc/port/sys/fcntl.c b/usr/src/lib/libc/port/sys/fcntl.c index cea7e79dd251..11ba675b2453 100644 --- a/usr/src/lib/libc/port/sys/fcntl.c +++ b/usr/src/lib/libc/port/sys/fcntl.c @@ -22,6 +22,7 @@ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -70,11 +71,20 @@ __fcntl(int fd, int cmd, ...) { int res; int pid; - intptr_t arg; + intptr_t arg, arg1 = 0; va_list ap; + /* + * The fcntl(2) entry points are responsible for marshalling arguments + * into intptr_t sized objects prior to calling this. The kernel only + * works in terms of intptr_t sized arguments; however, some calls (like + * F_DUP3FD) are in terms of two int sized arguments. + */ va_start(ap, cmd); arg = va_arg(ap, intptr_t); + if (cmd == F_DUP3FD) { + arg1 = va_arg(ap, intptr_t); + } va_end(ap); switch (cmd) { @@ -88,6 +98,6 @@ __fcntl(int fd, int cmd, ...) return (res); default: - return (syscall(SYS_fcntl, fd, cmd, arg)); + return (syscall(SYS_fcntl, fd, cmd, arg, arg1)); } } diff --git a/usr/src/lib/libc/port/threads/scalls.c b/usr/src/lib/libc/port/threads/scalls.c index b45468f77a61..618a1614754c 100644 --- a/usr/src/lib/libc/port/threads/scalls.c +++ b/usr/src/lib/libc/port/threads/scalls.c @@ -741,15 +741,23 @@ int fcntl(int fildes, int cmd, ...) { extern int __fcntl(int, int, ...); - intptr_t arg; + intptr_t arg, arg1 = 0; int rv; va_list ap; va_start(ap, cmd); - arg = va_arg(ap, intptr_t); + switch (cmd) { + case F_DUP3FD: + arg = va_arg(ap, int); + arg1 = va_arg(ap, int); + break; + default: + arg = va_arg(ap, intptr_t); + break; + } va_end(ap); if (cmd != F_SETLKW) - return (__fcntl(fildes, cmd, arg)); + return (__fcntl(fildes, cmd, arg, arg1)); PERFORM(__fcntl(fildes, cmd, arg)) } diff --git a/usr/src/man/man2/exec.2 b/usr/src/man/man2/exec.2 index 3276bc370827..bee13807a929 100644 --- a/usr/src/man/man2/exec.2 +++ b/usr/src/man/man2/exec.2 @@ -47,7 +47,7 @@ .\" Copyright 2015, Joyent, Inc. .\" Copyright 2024 Oxide Computer Company .\" -.Dd February 3, 2024 +.Dd June 21, 2024 .Dt EXEC 2 .Os .Sh NAME @@ -305,7 +305,9 @@ process image, except for those whose close-on-exec flag is set; see .Xr fcntl 2 . For those file descriptors that remain open, all attributes of the open file -description, including file locks, remain unchanged. +description, including file locks and the disposition of the close-on-fork flag +.Dv FD_CLOFORK , +remain unchanged. .Pp The preferred hardware address translation size .Po diff --git a/usr/src/man/man2/fcntl.2 b/usr/src/man/man2/fcntl.2 index 5426f5603e61..fc0ed802d08a 100644 --- a/usr/src/man/man2/fcntl.2 +++ b/usr/src/man/man2/fcntl.2 @@ -45,9 +45,9 @@ .\" Copyright (c) 2007, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. .\" Copyright 2015 Joyent, Inc. -.\" Copyright 2022 Oxide Computer Company +.\" Copyright 2024 Oxide Computer Company .\" -.TH FCNTL 2 "Feb 16, 2015" +.TH FCNTL 2 "June 21, 2024" .SH NAME fcntl \- file control .SH SYNOPSIS @@ -78,11 +78,11 @@ The values for \fIcmd\fR are defined in <\fBfcntl.h\fR> and include: .RS 15n Return a new file descriptor which is the lowest numbered available (that is, not already open) file descriptor greater than or equal to the third argument, -\fIarg\fR, taken as an integer of type \fBint\fR. The new file descriptor -refers to the same open file description as the original file descriptor, and -shares any locks. The \fBFD_CLOEXEC\fR flag associated with the new file -descriptor is cleared to keep the file open across calls to one of the -\fBexec\fR(2) functions. +\fIarg\fR, taken as an integer of type \fBint\fR. The new file descriptor refers +to the same open file description as the original file descriptor, and shares +any locks. The \fBFD_CLOEXEC\fR or \fBFD_CLOFORK\fR flags associated with the +new file descriptor is cleared to keep the file open across calls to one of the +\fBexec\fR(2) or \fBfork\fR(2) functions respectively. .RE .sp @@ -93,7 +93,23 @@ descriptor is cleared to keep the file open across calls to one of the .RS 15n Similar to \fBF_DUPFD\fR, but always returns \fIarg\fR. \fBF_DUP2FD\fR closes \fIarg\fR if it is open and not equal to \fIfildes.\fR \fBF_DUP2FD\fR is -equivalent to \fBdup2\fR(\fIfildes\fR, \fIarg\fR). +equivalent to \fBdup2\fR(\fIfildes\fR, \fIarg\fR). The \fBFD_CLOEXEC\fR and +\fBFD_CLOFORK\fR flags are cleared on the new descriptor, regardless of how they +were set on the original descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP3FD\fR\fR +.ad +.RS 15n +Similar to \fBF_DUP2FD\fR; however, an additional argument is used to set the +resulting file descriptor flags, rather than simply clearing them. The file +descriptor flags may be the bitwise-include-OR of \fBFD_CLOEXEC\fR and +\fBFD_CLOFORK\fR. This fourth argument is taken as type \fBint\fR. Specifying an +invalid flag in the fourth argument will cause the function to fail and it will +return \fBEINVAL\fR. .RE .sp @@ -102,8 +118,20 @@ equivalent to \fBdup2\fR(\fIfildes\fR, \fIarg\fR). \fB\fBF_DUPFD_CLOEXEC\fR\fR .ad .RS 15n -Similar to \fBF_DUPFD\fR except that instead of clearing \fBFD_CLOEXEC\fR -it is explicitly set on the returned file descriptor. +Similar to \fBF_DUPFD\fR except that the returned file descriptor's flags are +set explicitly to \fBFD_CLOEXEC\fR, regardless of what the original file +descriptor's flags were. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUPFD_CLOFORK\fR\fR +.ad +.RS 15n +Similar to \fBF_DUPFD\fR except that the returned file descriptor's flags are +set explicitly to \fBFD_CLOFORK\fR, regardless of what the original file +descriptor's flags were. .RE .sp @@ -113,7 +141,18 @@ it is explicitly set on the returned file descriptor. .ad .RS 15n Similar to \fBF_DUP2FD\fR with two exceptions. The \fBFD_CLOEXEC\fR flag is -explicitly set on the returned file descriptor. If \fIfiledes\fR equals +explicitly set on the returned file descriptor. If \fIfildes\fR equals +\fIarg\fR, the call will fail setting \fBerrno\fR to \fBEINVAL\fR. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP2FD_CLOFORK\fR\fR +.ad +.RS 15n +Similar to \fBF_DUP2FD\fR with two exceptions. The \fBFD_CLOFORK\fR flag is +explicitly set on the returned file descriptor. If \fIfildes\fR equals \fIarg\fR, the call will fail setting \fBerrno\fR to \fBEINVAL\fR. .RE @@ -226,9 +265,12 @@ file descriptions. .RS 15n Set the file descriptor flags defined in <\fBfcntl.h\fR>, that are associated with \fIfildes\fR, to the third argument, \fIarg\fR, taken as type \fBint\fR. -If the \fBFD_CLOEXEC\fR flag in the third argument is 0, the file will remain -open across the \fBexec()\fR functions; otherwise the file will be closed upon -successful execution of one of the \fBexec()\fR functions. +There are two flags that control the behavior of the file descriptor on +subsequent process activity. The \fBFD_CLOEXEC\fR flag, if set, will cause the +file descriptor to be closed upon successful execution of one of the +\fBexec\fR(2) functions. The \fBFD_CLOFORK\fR flag, if set, will cause the file +descriptor to be closed upon successful execution of one of the \fBfork\fR(2) +functions. .RE .sp @@ -486,7 +528,8 @@ that file descriptor terminates. POSIX-style locks are not inherited by a child process created using \fBfork\fR(2). An OFD-style lock is scoped to the file description for a file, not the process or open file descriptor. Thus all file descriptors referring to the same description (i.e. those created via the -\fBF_DUPFD\fR, \fBF_DUP2FD\fR, \fBF_DUPFD_CLOEXEC\fR, or \fBF_DUP2FD_CLOEXEC\fR +\fBF_DUPFD\fR, \fBF_DUP2FD\fR, \fBF_DUP3FD\fR, \fBF_DUPFD_CLOEXEC\fR, +\fBF_DUP2FD_CLOEXEC\fR, \fBF_DUPFD_CLOFORK\fR, or \fBF_DUP2FD_CLOFORK\fR commands to the \fBfcntl\fR(2) system call, or those created via the \fBdup\fR(2) system call, or those inherited by a child process created via \fBfork\fR(2)) reference the same lock, but a file descriptor obtained via a @@ -728,7 +771,25 @@ Upon successful completion, the value returned depends on \fIcmd\fR as follows: .sp .ne 2 .na -\fB\fBF_DUPFD\fR\fR +\fB\fBF_DUPFD\fR\fR, \fB\fBF_DUP2FD\fR\fR, \fB\fBF_DUP3FD\fR\fR +.ad +.RS 14n +A new file descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUPFD_CLOEXEC\fR\fR, \fB\fBF_DUPFD_CLOFORK\fR\fR +.ad +.RS 14n +A new file descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP2FD_CLOEXEC\fR\fR, \fB\fBF_DUP2FD_CLOFORK\fR\fR .ad .RS 14n A new file descriptor. @@ -983,7 +1044,8 @@ and \fIfildes\fR is not a valid file descriptor open for writing. The \fIcmd\fR argument is \fBF_FREESP\fR and \fIfildes\fR is not a valid file descriptor open for writing. .sp -The \fIcmd\fR argument is \fBF_DUP2FD\fR, and \fIarg\fR is negative or is not +The \fIcmd\fR argument is \fBF_DUP2FD\fR, \fBF_DUP2FD_CLOEXEC\fR, +\fBF_DUP2FD_CLOFORK\fR, or \fBF_DUP3FD\fR and \fIarg\fR is negative or is not less than the current resource limit for \fBRLIMIT_NOFILE.\fR .sp The \fIcmd\fR argument is \fBF_SHARE,\fR the \fBf_access\fR share reservation @@ -1036,8 +1098,11 @@ does not support locking. The \fIcmd\fR argument is \fBF_UNSHARE\fR and a reservation with this \fBf_id\fR for this process does not exist. .sp -The \fIcmd\fR argument is \fBF_DUP2FD_CLOEXEC\fR and \fIfildes\fR is equal -to \fBarg\fR. +The \fIcmd\fR argument is \fBF_DUP2FD_CLOEXEC\fR or \fIF_DUP2FD_CLOFORK\fR and +\fIfildes\fR is equal to \fBarg\fR. +.sp +The \fIcmd\fR argument is \fBF_DUP3FD\fR and the fourth flags argument contains +unknown values. .RE .sp diff --git a/usr/src/man/man2/fork.2 b/usr/src/man/man2/fork.2 index 5c0079421ff2..4cdff1411bf1 100644 --- a/usr/src/man/man2/fork.2 +++ b/usr/src/man/man2/fork.2 @@ -43,12 +43,12 @@ .\" Copyright 1989 AT&T .\" Portions Copyright (c) 1994, X/Open Company Limited. All Rights Reserved. .\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2024 Oxide Computer Company .\" -.TH FORK 2 "Aug 18, 2019" +.TH FORK 2 "June 21, 2024" .SH NAME fork, fork1, forkall, forkx, forkallx \- create a new process .SH SYNOPSIS -.LP .nf #include #include @@ -79,7 +79,6 @@ fork, fork1, forkall, forkx, forkallx \- create a new process .fi .SH DESCRIPTION -.LP The \fBfork()\fR, \fBfork1()\fR, \fBforkall()\fR, \fBforkx()\fR, and \fBforkallx()\fR functions create a new process. The address space of the new process (child process) is an exact copy of the address space of the calling @@ -101,7 +100,7 @@ environment .TP .ie t \(bu .el o -open file descriptors +open file descriptors (except those marked close-on-fork, see discussion below) .RE .RS +4 .TP @@ -270,7 +269,10 @@ The child process has a different parent process \fBID\fR (that is, the process .el o The child process has its own copy of the parent's file descriptors and directory streams. Each of the child's file descriptors shares a common file -pointer with the corresponding file descriptor of the parent. +pointer with the corresponding file descriptor of the parent. In addition, any +file descriptors that were marked with the close-on-fork flag, \fBFD_CLOFORK\fR +(see \fBfcntl\fR(2) and \fBO_CLOFORK\fR in \fBopen\fR(2)), will not be present +in the child process, but remain open in the parent. .RE .RS +4 .TP @@ -353,7 +355,6 @@ descriptor is open in the child. If a descriptor is closed in the parent, attempts to operate on the door descriptor will fail even if it is still open in the child. .SS "Threads" -.LP A call to \fBforkall()\fR or \fBforkallx()\fR replicates in the child process all of the threads (see \fBthr_create\fR(3C) and \fBpthread_create\fR(3C)) in the parent process. A call to \fBfork1()\fR or \fBforkx()\fR replicates only @@ -379,7 +380,6 @@ provides all threading support for both sets of application programming interfaces. Applications that require replicate-all fork semantics must call \fBforkall()\fR or \fBforkallx()\fR. .SS "Fork Extensions" -.LP The \fBforkx()\fR and \fBforkallx()\fR functions accept a \fIflags\fR argument consisting of a bitwise inclusive-OR of zero or more of the following flags, which are defined in the header \fB\fR: @@ -417,7 +417,6 @@ exits. If the \fIflags\fR argument is 0 \fBforkx()\fR is identical to \fBfork()\fR and \fBforkallx()\fR is identical to \fBforkall()\fR. .SS "\fBfork()\fR Safety" -.LP If a multithreaded application calls \fBfork()\fR, \fBfork1()\fR, or \fBforkx()\fR, and the child does more than simply call one of the \fBexec\fR(2) functions, there is a possibility of deadlock occurring in the @@ -447,14 +446,12 @@ specification: "To avoid errors, the child process may only execute Async-Signal-Safe operations until such time as one of the \fBexec\fR(2) functions is called." .SH RETURN VALUES -.LP Upon successful completion, \fBfork()\fR, \fBfork1()\fR, \fBforkall()\fR, \fBforkx()\fR, and \fBforkallx()\fR return \fB0\fR to the child process and return the process \fBID\fR of the child process to the parent process. Otherwise, \fB(pid_t)\(mi1\fR is returned to the parent process, no child process is created, and \fBerrno\fR is set to indicate the error. .SH ERRORS -.LP The \fBfork()\fR, \fBfork1()\fR, \fBforkall()\fR, \fBforkx()\fR, and \fBforkallx()\fR functions will fail if: .sp @@ -501,7 +498,6 @@ The \fIflags\fR argument is invalid. .RE .SH ATTRIBUTES -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -523,7 +519,6 @@ Standard See below. .LP For \fBfork()\fR, see \fBstandards\fR(7). .SH SEE ALSO -.LP \fBalarm\fR(2), \fBexec\fR(2), \fBexit\fR(2), \fBfcntl\fR(2), \fBgetitimer\fR(2), \fBgetrlimit\fR(2), \fBmemcntl\fR(2), \fBmmap\fR(2), \fBnice\fR(2), \fBpriocntl\fR(2), \fBsemop\fR(2), \fBshmop\fR(2), @@ -538,7 +533,6 @@ For \fBfork()\fR, see \fBstandards\fR(7). .BR privileges (7), .BR standards (7) .SH NOTES -.LP An application should call \fB_exit()\fR rather than \fBexit\fR(3C) if it cannot \fBexecve()\fR, since \fBexit()\fR will flush and close standard I/O channels and thereby corrupt the parent process's standard I/O data structures. diff --git a/usr/src/man/man2/open.2 b/usr/src/man/man2/open.2 index 3096f4cc9e2d..2213facf0982 100644 --- a/usr/src/man/man2/open.2 +++ b/usr/src/man/man2/open.2 @@ -47,9 +47,9 @@ .\" All Rights Reserved. .\" Copyright 2015 Nexenta Systems, Inc. All rights reserved. .\" Copyright 2020 Joyent, Inc. -.\" Copyright 2022 Oxide Computer Company +.\" Copyright 2024 Oxide Computer Company .\" -.Dd February 5, 2022 +.Dd February 5, 2024 .Dt OPEN 2 .Os .Sh NAME @@ -113,9 +113,6 @@ function returns a file descriptor for the named file that is the lowest file descriptor not currently open for that process. The open file description is new, and therefore the file descriptor does not share it with any other process in the system. -The -.Dv FD_CLOEXEC -file descriptor flag associated with the new file descriptor is cleared. .Pp The file offset used to mark the current position within the file is set to the beginning of the file. @@ -311,6 +308,26 @@ to If set, the file descriptor returned will be closed prior to any future .Xr exec 2 calls. +This sets the +.Dv FD_CLOEXEC +flag on the file descriptor. +If not performed at open time, this can later be set with the +.Dv F_SETFD +.Xr fcntl 2 +command. +.It Dv O_CLOFORK +If set, the file descriptor returned will be closed in any child processses +created with the +.Xr fork 2 +family of functions. +The file descriptor will remain open in the parent. +This sets the +.Dv FD_CLOFORK +flag on the file descriptor. +If not performed at open time, this can later be set with the +.Dv F_SETFD +.Xr fcntl 2 +command. .It Dv O_NONBLOCK O_NDELAY These flags can affect subsequent reads and writes .Po diff --git a/usr/src/man/man2/pipe.2 b/usr/src/man/man2/pipe.2 index f78550966b66..3675455aa219 100644 --- a/usr/src/man/man2/pipe.2 +++ b/usr/src/man/man2/pipe.2 @@ -45,12 +45,12 @@ .\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved. .\" Portions Copyright (c) 2013, OmniTI Computer Consulting, Inc. .\" All Rights Reserved. +.\" Copyright 2024 Oxide Computer Company .\" -.TH PIPE 2 "Apr 19, 2013" +.TH PIPE 2 "June 21, 2024" .SH NAME pipe \- create an interprocess channel .SH SYNOPSIS -.LP .nf #include @@ -60,15 +60,14 @@ pipe \- create an interprocess channel .fi .SH DESCRIPTION -.sp -.LP The \fBpipe()\fR and pipe2() functions create an I/O mechanism called a pipe and returns two file descriptors, \fIfildes\fR[\fB0\fR] and \fIfildes\fR[\fB1\fR]. The files associated with \fIfildes\fR[\fB0\fR] and \fIfildes\fR[\fB1\fR] are streams and are both opened for reading and writing. The \fBpipe()\fR call will clear the \fBO_NDELAY\fR, -\fBO_NONBLOCK\fR, and \fBFD_CLOEXEC\fR flags on both file descriptors. The -\fBfcntl\fR(2) function can be used to set these flags. +\fBO_NONBLOCK\fR, and the \fBFD_CLOEXEC\fR and \fBFD_CLOFORK\fR flags on +both file descriptors. The \fBfcntl\fR(2) function can be used to set these +flags. .sp .LP The \fBpipe2()\fR call will clear the \fBO_NDELAY\fR on both filedescriptors. @@ -98,6 +97,17 @@ Both file descriptors will be opened with the FD_CLOEXEC flag set. Both file descriptors will be closed prior to any future exec() calls. .RE +.sp +.ne 2 +.na +\fB\fBO_CLOFORK\fR\fR +.ad +.RS 12n +Both file descriptors will be opened with the FD_CLOFORK flag set. Both file +descriptors will be closed in any child processes created with the fork() family +of calls. +.RE + .sp .LP A read from \fIfildes\fR[\fB0\fR] accesses the data written to @@ -109,13 +119,9 @@ on a \fBFIFO\fR basis. Upon successful completion \fBpipe()\fR marks for update the \fBst_atime\fR, \fBst_ctime\fR, and \fBst_mtime\fR fields of the pipe. .SH RETURN VALUES -.sp -.LP Upon successful completion, \fB0\fR is returned. Otherwise, \fB\(mi1\fR is returned and \fBerrno\fR is set to indicate the error. .SH ERRORS -.sp -.LP The \fBpipe()\fR and \fBpipe2()\fR functions will fail if: .sp .ne 2 @@ -155,13 +161,11 @@ The \fBpipe2()\fR function will also fail if: .ad .RS 10n The \fIflags\fR argument is illegal. Valid \fIflags\fR are zero or a -bitwise inclusive-OR of \fBO_CLOEXEC\fR and \fBO_NONBLOCK\fR. +bitwise inclusive-OR of \fBO_CLOEXEC\fR, \fBO_CLOFORK\fR, and \fBO_NONBLOCK\fR. .RE .SH ATTRIBUTES -.sp -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -178,8 +182,6 @@ MT-Level Async-Signal-Safe .TE .SH SEE ALSO -.sp -.LP .BR sh (1), .BR fcntl (2), .BR fstat (2), @@ -193,8 +195,6 @@ MT-Level Async-Signal-Safe .BR attributes (7), .BR standards (7) .SH NOTES -.sp -.LP Since a pipe is bi-directional, there are two separate flows of data. Therefore, the size (\fBst_size\fR) returned by a call to \fBfstat\fR(2) with argument \fIfildes\fR[\fB0\fR] or \fIfildes\fR[\fB1\fR] is the number of bytes diff --git a/usr/src/man/man3c/dup2.3c b/usr/src/man/man3c/dup2.3c index 46363c75c782..b199d14dfbcb 100644 --- a/usr/src/man/man3c/dup2.3c +++ b/usr/src/man/man3c/dup2.3c @@ -2,10 +2,11 @@ .\" Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. .\" Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 1989 AT&T +.\" Copyright 2024 Oxide Computer Company .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH DUP2 3C "February 17, 2023" +.TH DUP2 3C "June 21, 2024" .SH NAME dup2, dup3 \- duplicate an open file descriptor .SH SYNOPSIS @@ -32,15 +33,40 @@ to \fIfildes\fR, or if \fIfildes\fR is not a valid open file descriptor, .sp .LP The \fBdup2()\fR function is equivalent to \fBfcntl\fR(\fIfildes\fR, -\fBF_DUP2FD\fR, \fIfildes2\fR). +\fBF_DUP2FD\fR, \fIfildes2\fR). The returned file descriptor will have neither +the close-on-exec (\fBFD_CLOEXEC\fR) or close-on-fork (\fBFD_CLOFORK\fR) flags +set on it, regardless of what is set on \fIfildes\fR. .sp .LP The \fBdup3()\fR function works similarly to the \fBdup2()\fR function with two exceptions. If \fIfildes\fR and \fIfildes2\fR point to the same file -descriptor, -1 is returned and errno set to \fBEINVAL\fR. If \fIflags\fR -is \fBO_CLOEXEC\fR, then \fIfiledes2\fR will have the \fBFD_CLOEXEC\fR flag -set causing the file descriptor to be closed during any future call of -\fBexec\fR(2). +descriptor, -1 is returned and errno set to \fBEINVAL\fR. The \fIflags\fR +argument allows for control of the returned file descriptor's flags. Valid +values are the bitwise-inclusive-OR of: + +.sp +.ne 2 +.na +\fB\fBO_CLOEXEC\fR\fR +.ad +.RS 12n +The returned file descriptor will have the close-on-exec flag, \fBFD_CLOEXEC\fR +set on it. The file descriptor will be automatically closed when the process +calls the \fBexec\fR(2) family of functions. +.RE + +.sp +.ne 2 +.na +\fB\fBO_CLOFORK\fR\fR +.ad +.RS 12n +The returned file descriptor will have the close-on-fork flag, \fBFD_CLOFORK\fR +set on it. The file descriptor will be automatically closed in any child +processes created with the \fBfork\fR(2) family of functions. It will remain +open in the parent. +.RE + .SH RETURN VALUES Upon successful completion a non-negative integer representing the file descriptor is returned. Otherwise, \fB\(mi1\fR is returned and \fBerrno\fR is @@ -93,8 +119,9 @@ Additionally, the \fBdup3()\fR function will fail if: \fB\fBEINVAL\fR\fR .ad .RS 10n -\fIflags\fR has a value other than 0 or \fBO_CLOEXEC\fR or \fIfildes\fR and -\fIfildes2\fR point to the same file descriptor. +\fIflags\fR has a value other than 0 or the bitwise-inclusive-OR of +\fBO_CLOEXEC\fR and \fBO_CLOFORK\fR. \fIfildes\fR and \fIfildes2\fR point to +the same file descriptor. .RE .SH ATTRIBUTES @@ -118,6 +145,7 @@ MT-Level Async-Signal-Safe .BR creat (2), .BR exec (2), .BR fcntl (2), +.BR fork (2), .BR getrlimit (2), .BR open (2), .BR pipe (2), diff --git a/usr/src/man/man3head/fcntl.h.3head b/usr/src/man/man3head/fcntl.h.3head index b525d7994791..449121102307 100644 --- a/usr/src/man/man3head/fcntl.h.3head +++ b/usr/src/man/man3head/fcntl.h.3head @@ -42,18 +42,17 @@ .\" .\" Copyright 1989 AT&T .\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved. +.\" Copyright 2024 Oxide Computer Company .\" -.TH FCNTL.H 3HEAD "January 20, 2020" +.TH FCNTL.H 3HEAD "June 21, 2024" .SH NAME fcntl.h, fcntl \- file control options .SH SYNOPSIS -.LP .nf #include .fi .SH DESCRIPTION -.LP The \fB\fR header defines the following requests and arguments for use by the functions \fBfcntl\fR(2), \fBopen\fR(2), and \fBopenat\fR(2). .sp @@ -65,7 +64,7 @@ Values for \fIcmd\fR used by \fBfcntl()\fR (the following values are unique): \fB\fBF_DUPFD\fR\fR .ad .RS 15n -Duplicate file descriptor. +Duplicate file descriptor. File descriptor flags cleared on duplicate. .RE .sp @@ -74,7 +73,58 @@ Duplicate file descriptor. \fB\fBF_DUP2FD\fR\fR .ad .RS 15n -Similar to \fBF_DUPFD\fR, but always returns \fIarg\fR. +Similar to \fBF_DUPFD\fR, but always returns \fIarg\fR. File descriptor flags +cleared on duplicate. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP3FD\fR\fR +.ad +.RS 15n +Similar to \fBF_DUP2FD\fR, but takes a fourth \fBint\fR argument which specifies +the file descriptor flags (\fBFD_CLOEXEC\fR and \fBFD_CLOFORK\fR). +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUPFD_CLOEXEC\fR\fR +.ad +.RS 15n +Similar to \fBF_DUPFD\fR, but the \fBFD_CLOEXEC\fR flag is set on the returned +descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUPFD_CLOFORK\fR\fR +.ad +.RS 15n +Similar to \fBF_DUPFD\fR, but the \fBFD_CLOFORK\fR flag is set on the returned +descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP2FD_CLOEXEC\fR\fR +.ad +.RS 15n +Similar to \fBF_DUP2FD\fR, but the \fBFD_CLOEXEC\fR flag is set on the returned +descriptor. +.RE + +.sp +.ne 2 +.na +\fB\fBF_DUP2FD_CLOFORK\fR\fR +.ad +.RS 15n +Similar to \fBF_DUP2FD\fR, but the \fBFD_CLOFORK\fR flag is set on the returned +descriptor. .RE .sp @@ -247,6 +297,16 @@ Close the file descriptor upon execution of an \fBexec\fR function (see \fBexec\fR(2)). .RE +.sp +.ne 2 +.na +\fB\fBFD_CLOFORK\fR\fR +.ad +.RS 14n +Close the file descriptor in any child process created with the \fBfork\fR(2) +family functions. +.RE + .sp .LP Values for \fBl_type\fR used for record locking with \fBfcntl()\fR (the @@ -441,6 +501,16 @@ Set append mode. The file should be closed on any calls to \fBexec\fR(2). .RE +.sp +.ne 2 +.na +.B O_CLOFORK +.ad +.RS 12n +The file will be closed in any child processes created with calls to +\fBfork\fR(2). +.RE + .sp .ne 2 .na @@ -605,7 +675,6 @@ long f_id; /* Process unique identifier */ .in -2 .SH ATTRIBUTES -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -622,10 +691,10 @@ Standard See \fBstandards\fR(7). .TE .SH SEE ALSO -.LP .BR creat (2), .BR exec (2), .BR fcntl (2), +.BR fork (2), .BR open (2), .BR fdatasync (3C), .BR fsync (3C), @@ -633,7 +702,6 @@ Standard See \fBstandards\fR(7). .BR fsattr (7), .BR standards (7) .SH NOTES -.LP Data is successfully transferred for a write operation to a regular file when the system ensures that all data written is readable on any subsequent open of the file (even one that follows a system or power failure) in the absence of a diff --git a/usr/src/man/man3head/socket.h.3head b/usr/src/man/man3head/socket.h.3head index 5a9dee8c2ca5..85797f76001c 100644 --- a/usr/src/man/man3head/socket.h.3head +++ b/usr/src/man/man3head/socket.h.3head @@ -43,8 +43,9 @@ .\" Copyright (c) 1992, X/Open Company Limited All Rights Reserved. .\" Portions Copyright (c) 2009, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 2020 OmniOS Community Edition (OmniOSce) Association. +.\" Copyright 2024 Oxide Computer Company .\" -.TH SOCKET.H 3HEAD "Sep 18, 2020" +.TH SOCKET.H 3HEAD "June 21, 2024" .SH NAME socket.h, socket, CMSG_DATA, CMSG_FIRSTHDR, CMSG_LEN, CMSG_NXTHDR, CMSG_SPACE \- Internet Protocol family .SH SYNOPSIS @@ -120,7 +121,8 @@ header defines the following macros for use as the \fBcmsg_type\fR values when .ad .RS 14n Indicates that the data array contains the access rights (set of open file -descriptors) to be sent or received. +descriptors) to be sent or received. Each file descriptor requires one +\fBint\fR to send or receive. .RE .sp @@ -386,7 +388,8 @@ int l_linger /* linger time, in seconds */ .sp .LP -The \fB\fR header defines the following macros: +The \fB\fR header defines the following macros which indicate +types of sockets: .sp .ne 2 .na @@ -405,6 +408,24 @@ Datagram socket Byte-stream socket .RE +.sp +.ne 2 +.na +\fBSOCK_RAW\fR +.ad +.RS 18n +Raw protocol interface +.RE + +.sp +.ne 2 +.na +\fBSOCK_RDM\fR +.ad +.RS 18n +Reliably delivered message +.RE + .sp .ne 2 .na @@ -414,6 +435,53 @@ Byte-stream socket Sequenced-packet socket .RE +.sp +.LP +In some cases, the above types are bitwise-inclusive-ORed with zero or more of +the following macros which modify the socket's default behavior: + +.sp +.ne 2 +.na +\fBSOCK_CLOEXEC\fR +.ad +.RS 18n +The socket should have the close-on-exec, \fBFD_CLOEXEC\fR file descriptor flag +set on it. The socket will be closed when the process calls any of the +\fBexec\fR(2) family of functions. +.RE + +.sp +.ne 2 +.na +\fBSOCK_CLOFORK\fR +.ad +.RS 18n +The socket should have the close-on-fork, \fBFD_CLOFORK\fR file descriptor flag +set on it. The socket will be closed in any child process created with the +\fBfork\fR(2) family of functions. +.RE + +.sp +.ne 2 +.na +\fBSOCK_NDELAY\fR +.ad +.RS 18n +The socket should have the \fBO_NDELAY\fR flag set. See \fBopen\fR(2) for a +discussion of the specific non-blocking behavior this implies. +.RE + +.sp +.ne 2 +.na +\fBSOCK_NONBLOCK\fR +.ad +.RS 18n +The socket should have the \fBO_NONBLOCK\fR flag set. See \fBopen\fR(2) for a +discussion of the specific non-blocking behavior this implies. +.RE + .sp .LP The \fB\fR header defines the following macros for use as the @@ -637,6 +705,32 @@ Wait for complete message. Do not generate \fBSIGPIPE\fR signal. .RE +.sp +.ne 2 +.na +\fBMSG_CMSG_CLOEXEC\fR +.ad +.RS 15n +When receiving a message with the \fBSCM_RIGHTS\fR ancillary data present, all +file descriptors should have the close-on-exec, \fBFD_CLOEXEC\fR flag set on +them. They will be closed when the process successfully calls any of the +\fBexec\fR(2) family of functions. This has no effect when sending +\fBSCM_RIGHTS\fR ancillary data. +.RE + +.sp +.ne 2 +.na +\fBMSG_CMSG_CLOFORK\fR +.ad +.RS 15n +When receiving a message with the \fBSCM_RIGHTS\fR ancillary data present, all +file descriptors should have the close-on-fork, \fBFD_CLOFORK\fR flag set on +them. They will be closed in any child processes created with the \fBfork\fR(2) +family of functions. This has no effect when sending \fBSCM_RIGHTS\fR ancillary +data. +.RE + .sp .LP The \fB\fR header defines the following macros: diff --git a/usr/src/man/man3socket/accept.3socket b/usr/src/man/man3socket/accept.3socket index 2a66a51ab167..3feabb802197 100644 --- a/usr/src/man/man3socket/accept.3socket +++ b/usr/src/man/man3socket/accept.3socket @@ -2,14 +2,14 @@ .\" Copyright 1989 AT&T .\" Copyright (C) 2002, Sun Microsystems, Inc. All Rights Reserved .\" Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. +.\" Copyright 2024 Oxide Computer Company .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH ACCEPT 3SOCKET "Apr 19, 2013" +.TH ACCEPT 3SOCKET "June 21, 2024" .SH NAME accept \- accept a connection on a socket .SH SYNOPSIS -.LP .nf \fBcc\fR [ \fIflag\fR ... ] \fIfile\fR ... \fB-lsocket\fR \fB -lnsl \fR [ \fIlibrary\fR ... ] #include @@ -22,7 +22,6 @@ accept \- accept a connection on a socket .fi .SH DESCRIPTION -.LP The argument \fIs\fR is a socket that has been created with \fBsocket\fR(3SOCKET) and bound to an address with \fBbind\fR(3SOCKET), and that is listening for connections after a call to \fBlisten\fR(3SOCKET). The @@ -60,7 +59,7 @@ The \fBaccept4()\fR function allows flags that control the behavior of a successfully accepted socket. If \fIflags\fR is 0, \fBaccept4()\fR acts identically to \fBaccept()\fR. Values for \fIflags\fR are constructed by a bitwise-inclusive-OR of flags from the following list, defined in -. +. .sp .ne 2 .na @@ -73,6 +72,18 @@ caller thus avoiding the race condition between \fBaccept()\fR and \fBfcntl()\fR. See, \fBO_CLOEXEC\fR in \fBopen(2)\fR for more details. .RE +.sp +.ne 2 +.na +\fB\fBSOCK_CLOFORK\fR\fR +.ad +.RS 12n +The accepted socket will have the FD_CLOFORK flag set as if \fBfcntl()\fR +was called on it. This flag is set before the socket is passed to the +caller thus avoiding the race condition between \fBaccept()\fR and +\fBfcntl()\fR. See, \fBO_CLOFORK\fR in \fBopen(2)\fR for more details. +.RE + .sp .ne 2 .na @@ -102,11 +113,9 @@ an \fBaccept()\fR by selecting or polling it for a read. However, this will only indicate when a connect indication is pending; it is still necessary to call \fBaccept()\fR. .SH RETURN VALUES -.LP The \fBaccept()\fR function returns \fB\(mi1\fR on error. If it succeeds, it returns a non-negative integer that is a descriptor for the accepted socket. .SH ERRORS -.LP \fBaccept()\fR and \fBaccept4()\fR will fail if: .sp .ne 2 @@ -231,12 +240,11 @@ Additionally, \fBaccept4()\fR will fail if: .ad .RS 16n The \fIflags\fR value is invalid. The \fIflags\fR argument can only be the -bitwise inclusive-OR of \fBSOCK_CLOEXEC\fR, \fBSOCK_NONBLOCK\fR, and -\fBSOCK_NDELAY\fR. +bitwise inclusive-OR of \fBSOCK_CLOEXEC\fR, \fBSOCK_CLOFORK\fR, +\fBSOCK_NONBLOCK\fR, and \fBSOCK_NDELAY\fR. .RE .SH ATTRIBUTES -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -251,7 +259,6 @@ MT-Level Safe .TE .SH SEE ALSO -.LP \fBpoll\fR(2), \fBbind\fR(3SOCKET), \fBconnect\fR(3SOCKET), \fBlisten\fR(3SOCKET), \fBsockaddr\fR(3SOCKET), \fBselect\fR(3C), \fBsocket.h\fR(3HEAD), \fBsocket\fR(3SOCKET), \fBnetconfig\fR(5), diff --git a/usr/src/man/man3socket/recv.3socket b/usr/src/man/man3socket/recv.3socket index 9d95155967f2..028100c535af 100644 --- a/usr/src/man/man3socket/recv.3socket +++ b/usr/src/man/man3socket/recv.3socket @@ -2,14 +2,14 @@ .\" Copyright 1989 AT&T .\" Copyright (C) 2006, Sun Microsystems, Inc. All Rights Reserved .\" Copyright (c) 2018, Joyent, Inc. +.\" Copyright 2024 Oxide Computer Company .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH RECV 3SOCKET "September 10, 2018" +.TH RECV 3SOCKET "June 21, 2024" .SH NAME recv, recvfrom, recvmsg \- receive a message from a socket .SH SYNOPSIS -.LP .nf \fBcc\fR [ \fIflag\fR... ] \fIfile\fR... \fB-lsocket\fR \fB -lnsl \fR [ \fIlibrary\fR... ] #include @@ -31,7 +31,6 @@ recv, recvfrom, recvmsg \- receive a message from a socket .fi .SH DESCRIPTION -.LP The \fBrecv()\fR, \fBrecvfrom()\fR, and \fBrecvmsg()\fR functions are used to receive messages from another socket. The \fIs\fR socket is created with \fBsocket\fR(3SOCKET). @@ -112,17 +111,39 @@ specifying \fBO_NONBLOCK\fR on the file descriptor of a socket, except that write requests are unaffected. .RE +.sp +.ne 2 +.na +\fB\fBMSG_CMSG_CLOEXEC\fR\fR +.ad +.RS 16n +When receiving the \fBSCM_RIGHTS\fR ancillary data, all such file descriptors +should be marked with the close-on-exec, \fBFD_CLOEXEC\fR flag. These file +descriptors will be closed on successful execution of the \fBexec\fR(2) family +of functions. +.RE + +.sp +.ne 2 +.na +\fB\fBMSG_CMSG_CLOFORK\fR\fR +.ad +.RS 16n +When receiving the \fBSCM_RIGHTS\fR ancillary data, all such file descriptors +should be marked with the close-on-fork, \fBFD_CLOFORK\fR flag. These file +descriptors will be closed in any children created with the \fBfork\fR(2) family +of functions. +.RE + .sp .LP The \fBrecvmsg()\fR function call uses a \fBmsghdr\fR structure defined in <\fBsys/socket.h\fR> to minimize the number of directly supplied parameters. .SH RETURN VALUES -.LP Upon successful completion, these functions return the number of bytes received. Otherwise, they return \fB-1\fR and set \fBerrno\fR to indicate the error. .SH ERRORS -.LP In addition to the errors documented below, an asynchronous error generated by the underlying socket protocol may be returned. For the full list of errors, please see the corresponding socket protocol manual page. For example, for a @@ -275,7 +296,6 @@ One of the \fIiov_len\fR values in the \fBmsg_iov\fR array member of the .RE .SH ATTRIBUTES -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -292,7 +312,6 @@ MT-Level Safe .TE .SH SEE ALSO -.LP \fBfcntl\fR(2), \fBioctl\fR(2), \fBpoll\fR(2), \fBread\fR(2), \fBconnect\fR(3SOCKET), \fBgetsockopt\fR(3SOCKET), \fBlibxnet\fR(3LIB), \fBport_get(3C)\fR, \fBselect\fR(3C), diff --git a/usr/src/man/man3socket/socket.3socket b/usr/src/man/man3socket/socket.3socket index cfb72c264652..3148a03c8818 100644 --- a/usr/src/man/man3socket/socket.3socket +++ b/usr/src/man/man3socket/socket.3socket @@ -2,11 +2,12 @@ .\" Copyright (C) 2009, Sun Microsystems, Inc. All Rights Reserved. .\" Copyright 1989 AT&T .\" Copyright (c) 2013, OmniTI Computer Consulting, Inc. All Rights Reserved. +.\" Copyright 2024 Oxide Computer Company .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] .\" Copyright 2022 Garrett D'Amore -.TH SOCKET 3SOCKET "July 6, 2022" +.TH SOCKET 3SOCKET "June 21, 2024" .SH NAME socket \- create an endpoint for communication .SH SYNOPSIS @@ -82,9 +83,22 @@ following list, defined in . \fB\fBSOCK_CLOEXEC\fR\fR .ad .RS 12n -Creates the socket with the \fBFD_CLOEXEC\fR flag set, causing the underlying -file descriptor to be closed prior to any future calls to \fBexec\fR(2). This -is similar in purpose to the \fBO_CLOEXEC\fR flag to \fBopen\fR(2). +Creates the socket with the close-on-exec flag, \fBFD_CLOEXEC\fR set. This +is similar in purpose to the \fBO_CLOEXEC\fR flag to \fBopen\fR(2). The file +descriptor will be closed on any successful calls to the \fBexec\fR(2) family of +functions. +.RE + +.sp +.ne 2 +.na +\fB\fBSOCK_CLOFORK\fR\fR +.ad +.RS 12n +Creates the socket with the close-on-fork flag, \fBFD_CLOFORK\fR, set. This +is similar in purpose to the \fBO_CLOFORK\fR flag to \fBopen\fR(2). The file +descriptor will be closed in any children created with the \fBfork\fR(2) family +of functions. .RE .sp diff --git a/usr/src/man/man3socket/socketpair.3socket b/usr/src/man/man3socket/socketpair.3socket index 00fbe418b81a..132811ec866c 100644 --- a/usr/src/man/man3socket/socketpair.3socket +++ b/usr/src/man/man3socket/socketpair.3socket @@ -4,11 +4,10 @@ .\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License"). You may not use this file except in compliance with the License. .\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing. See the License for the specific language governing permissions and limitations under the License. .\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE. If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner] -.TH SOCKETPAIR 3SOCKET "Jan 10, 2001" +.TH SOCKETPAIR 3SOCKET "June 21, 2024" .SH NAME socketpair \- create a pair of connected sockets .SH SYNOPSIS -.LP .nf \fBcc\fR [ \fIflag\fR ... ] \fIfile\fR ... \fB-lsocket\fR \fB -lnsl \fR [ \fIlibrary\fR ... ] #include @@ -18,20 +17,20 @@ socketpair \- create a pair of connected sockets .fi .SH DESCRIPTION -.sp -.LP The \fBsocketpair()\fR library call creates an unnamed pair of connected sockets in the specified address family \fIdomain\fR, of the specified \fItype\fR, that uses the optionally specified \fIprotocol\fR. The descriptors that are used in referencing the new sockets are returned in \fIsv\fR[0] and \fIsv\fR[1]. The two sockets are indistinguishable. -.SH RETURN VALUES .sp .LP +The \fItype\fR arguments supports the various flags that modify the returned +socket's behavior that are discussed in \fBsocket\fR(3SOCKET): +\fBSOCK_CLOEXEC\fR, \fBSOCK_CLOFORK\fR, \fBSOCK_NDELAY\fR, and +\fBSOCK_NONBLOCK\fR. +.SH RETURN VALUES \fBsocketpair()\fR returns \fB\(mi1\fR on failure and \fB0\fR on success. .SH ERRORS -.sp -.LP The call succeeds unless: .sp .ne 2 @@ -97,8 +96,6 @@ The process does not have appropriate privileges. .RE .SH ATTRIBUTES -.sp -.LP See \fBattributes\fR(7) for descriptions of the following attributes: .sp @@ -113,14 +110,10 @@ MT-Level Safe .TE .SH SEE ALSO -.sp -.LP .BR pipe (2), .BR read (2), .BR write (2), .BR socket.h (3HEAD), .BR attributes (7) .SH NOTES -.sp -.LP This call is currently implemented only for the \fBAF_UNIX\fR address family. diff --git a/usr/src/pkg/manifests/system-test-ostest.p5m b/usr/src/pkg/manifests/system-test-ostest.p5m index 1855defb862c..ea0422b39e0a 100644 --- a/usr/src/pkg/manifests/system-test-ostest.p5m +++ b/usr/src/pkg/manifests/system-test-ostest.p5m @@ -106,6 +106,10 @@ file path=opt/os-tests/tests/libtopo/digraph-test-in.xml mode=0444 dir path=opt/os-tests/tests/minttl file path=opt/os-tests/tests/minttl/minttl mode=0555 file path=opt/os-tests/tests/minttl/minttl_err mode=0555 +dir path=opt/os-tests/tests/oclo +file path=opt/os-tests/tests/oclo/oclo mode=0555 +file path=opt/os-tests/tests/oclo/oclo_errors mode=0555 +file path=opt/os-tests/tests/oclo/ocloexec_verify mode=0555 file path=opt/os-tests/tests/odirectory.32 mode=0555 file path=opt/os-tests/tests/odirectory.64 mode=0555 dir path=opt/os-tests/tests/pf_key diff --git a/usr/src/test/os-tests/runfiles/default.run b/usr/src/test/os-tests/runfiles/default.run index ea3036218c13..c587f8f1c168 100644 --- a/usr/src/test/os-tests/runfiles/default.run +++ b/usr/src/test/os-tests/runfiles/default.run @@ -203,3 +203,6 @@ arch = i86pc [/opt/os-tests/tests/saveargs/testmatch/testmatch] arch = i86pc + +[/opt/os-tests/tests/oclo] +tests = ['oclo', 'oclo_errors'] diff --git a/usr/src/test/os-tests/tests/Makefile b/usr/src/test/os-tests/tests/Makefile index af0cd751227d..ef77d6117447 100644 --- a/usr/src/test/os-tests/tests/Makefile +++ b/usr/src/test/os-tests/tests/Makefile @@ -35,6 +35,7 @@ SUBDIRS = \ ksid \ libtopo \ minttl \ + oclo \ pf_key \ poll \ portfs \ diff --git a/usr/src/test/os-tests/tests/oclo/Makefile b/usr/src/test/os-tests/tests/oclo/Makefile new file mode 100644 index 000000000000..014727d4ab4d --- /dev/null +++ b/usr/src/test/os-tests/tests/oclo/Makefile @@ -0,0 +1,59 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2024 Oxide Computer Company +# + +PROGS = \ + oclo \ + oclo_errors \ + ocloexec_verify + +ROOTOPTDIR = $(ROOT)/opt/os-tests/tests +ROOTOPTOCLO = $(ROOTOPTDIR)/oclo +ROOTOPTPROGS = $(PROGS:%=$(ROOTOPTOCLO)/%) + +include $(SRC)/cmd/Makefile.cmd +include $(SRC)/cmd/Makefile.cmd.64 +include $(SRC)/cmd/Makefile.ctf + +CSTD = $(GNU_C99) +CTF_MODE = link +CPPFLAGS += -D_XOPEN_SOURCE=800 -D__EXTENSIONS__ + +oclo := LDLIBS += -lsocket +oclo_errors := LDLIBS += -lsocket + +.KEEP_STATE: + +all: $(PROGS) + +install: $(ROOTOPTPROGS) + +clean: + $(RM) *.o + +$(ROOTOPTPROGS): $(PROGS) $(ROOTOPTOCLO) + +$(ROOTOPTDIR): + $(INS.dir) + +$(ROOTOPTOCLO): $(ROOTOPTDIR) + $(INS.dir) + +$(ROOTOPTOCLO)/%: % + $(INS.file) + +clobber: clean + $(RM) $(PROGS) + +FRC: diff --git a/usr/src/test/os-tests/tests/oclo/oclo.c b/usr/src/test/os-tests/tests/oclo/oclo.c new file mode 100644 index 000000000000..eb8e825e255f --- /dev/null +++ b/usr/src/test/os-tests/tests/oclo/oclo.c @@ -0,0 +1,1302 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2024 Oxide Computer Company + */ + +/* + * Verify the behavior of the various O_CLOFORK and O_CLOEXEC variants. In + * particular getting this via: + * + * - open(2): O_CLOFORK/O_CLOEXEC + * - fcntl(2): F_SETFD FD_CLOFORK/FD_CLOEXEC + * - fcntl(2): F_DUPFD_CLOFORK/F_DUPFD_CLOEXEC + * - fcntl(2): F_DUP2FD_CLOFORK/F_DUP2FD_CLOEXEC + * - dup2(3C) + * - dup3(3C): argument translation + * - pipe2(2) + * - socket(2): SOCK_CLOEXEC/SOCK_CLOFORK + * - accept(2): flags on the listen socket aren't inherited on accept + * - socketpair(3SOCKET) + * - accept4(2): SOCK_CLOEXEC/SOCK_CLOFORK + * - recvmsg(2): SCM_RIGHTS MSG_CMSG_CLOFORK/MSG_CMSG_CLOEXEC + * + * The test is designed such that we have an array of functions that are used to + * create file descriptors with different rules. This is found in the + * oclo_create array. Each file descriptor that is created is then registered + * with information about what is expected about it. A given creation function + * can create more than one file descriptor; however, our expectation is that + * every file descriptor is accounted for (ignoring stdin, stdout, and stderr). + * + * We pass a record of each file descriptor that was recorded to a verification + * program that will verify everything is correctly honored after an exec. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Verification program name. + */ +#define OCLO_VERIFY "ocloexec_verify" + +/* + * This structure represents a table of ways we expect to create file + * descriptors that should have the resulting flags set when done. The table is + * ordered and subsequent iterations are allowed to assume that the ones that + * have gone ahead of them have run and are therefore allowed to access them. + * The create function is expected to return the created fd. + */ +typedef struct clo_create clo_create_t; +struct clo_create { + const char *clo_desc; + int clo_flags; + void (*clo_func)(const clo_create_t *); +}; + +/* + * This is our run-time data. We expect all file descriptors to be registered by + * our calling functions through oclo_record(). + */ +typedef struct clo_rtdata { + const clo_create_t *crt_data; + size_t crt_idx; + int crt_fd; + int crt_flags; + const char *crt_desc; +} clo_rtdata_t; + +static clo_rtdata_t *oclo_rtdata; +size_t oclo_rtdata_nents = 0; +size_t oclo_rtdata_next = 0; +static int oclo_nextfd = STDERR_FILENO + 1; + +static bool +oclo_flags_match(const clo_rtdata_t *rt, bool child) +{ + const char *pass = child ? "post-fork" : "pre-fork"; + bool fail = child && (rt->crt_flags & FD_CLOFORK) != 0; + int flags = fcntl(rt->crt_fd, F_GETFD, NULL); + + if (flags < 0) { + int e = errno; + + if (fail) { + if (e == EBADF) { + (void) printf("TEST PASSED: %s (%s): fd %d: " + "correctly closed\n", + rt->crt_data->clo_desc, pass, rt->crt_fd); + return (true); + } + + warn("TEST FAILED: %s (%s): fd %d: expected fcntl to " + "fail with EBADF, but found %s", + rt->crt_data->clo_desc, pass, rt->crt_fd, + strerrorname_np(e)); + return (false); + } + + warnx("TEST FAILED: %s (%s): fd %d: fcntl(F_GETFD) " + "unexpectedly failed", rt->crt_data->clo_desc, pass, + rt->crt_fd); + return (false); + } + + if (fail) { + warnx("TEST FAILED: %s (%s): fd %d: received flags %d, but " + "expected to fail based on flags %d", + rt->crt_data->clo_desc, pass, rt->crt_fd, flags, + rt->crt_fd); + return (false); + } + + if (flags != rt->crt_flags) { + warnx("TEST FAILED: %s (%s): fd %d: discovered flags 0x%x do " + "not match expected flags 0x%x", rt->crt_data->clo_desc, + pass, rt->crt_fd, flags, rt->crt_fd); + return (false); + } + + (void) printf("TEST PASSED: %s (%s): fd %d discovered flags match " + "(0x%x)\n", rt->crt_data->clo_desc, pass, rt->crt_fd, flags); + return (true); +} + + +static void +oclo_record(const clo_create_t *c, int fd, int exp_flags, const char *desc) +{ + if (oclo_rtdata_next == oclo_rtdata_nents) { + size_t newrt = oclo_rtdata_nents + 8; + clo_rtdata_t *rt; + rt = recallocarray(oclo_rtdata, oclo_rtdata_nents, newrt, + sizeof (clo_rtdata_t)); + if (rt == NULL) { + err(EXIT_FAILURE, "TEST_FAILED: internal error " + "expanding fd records to %zu entries", newrt); + } + + oclo_rtdata_nents = newrt; + oclo_rtdata = rt; + } + + if (fd != oclo_nextfd) { + errx(EXIT_FAILURE, "TEST FAILED: internal test error: expected " + "to record next fd %d, given %d", oclo_nextfd, fd); + } + + oclo_rtdata[oclo_rtdata_next].crt_data = c; + oclo_rtdata[oclo_rtdata_next].crt_fd = fd; + oclo_rtdata[oclo_rtdata_next].crt_flags = exp_flags; + oclo_rtdata[oclo_rtdata_next].crt_desc = desc; + + /* + * Matching errors at this phase are fatal as it means we screwed up the + * program pretty badly. + */ + if (!oclo_flags_match(&oclo_rtdata[oclo_rtdata_next], false)) { + exit(EXIT_FAILURE); + } + + oclo_rtdata_next++; + oclo_nextfd++; +} + +static int +oclo_file(const clo_create_t *c) +{ + int flags = O_RDWR, fd; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + flags |= O_CLOFORK; + fd = open("/dev/null", flags); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to open /dev/null", + c->clo_desc); + } + + return (fd); +} + +static void +oclo_open(const clo_create_t *c) +{ + oclo_record(c, oclo_file(c), c->clo_flags, NULL); +} + +static void +oclo_setfd_common(const clo_create_t *c, int targ_flags) +{ + int fd = oclo_file(c); + if (fcntl(fd, F_SETFD, targ_flags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: F_SETFD failed to set " + "flags to %d", c->clo_desc, targ_flags); + } + + oclo_record(c, fd, targ_flags, NULL); +} + +static void +oclo_setfd_none(const clo_create_t *c) +{ + oclo_setfd_common(c, 0); +} + +static void +oclo_setfd_exec(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOEXEC); +} + +static void +oclo_setfd_fork(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOFORK); +} + +static void +oclo_setfd_both(const clo_create_t *c) +{ + oclo_setfd_common(c, FD_CLOFORK | FD_CLOEXEC); +} + +/* + * Open an fd with flags in a certain form and then use one of the F_DUPFD or + * F_DUP2FD variants and ensure that flags are properly propagated as expected. + */ +static void +oclo_fdup_common(const clo_create_t *c, int targ_flags, int cmd) +{ + int dup, fd; + + fd = oclo_file(c); + oclo_record(c, fd, c->clo_flags, "base"); + switch (cmd) { + case F_DUPFD: + case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: + dup = fcntl(fd, cmd, fd); + break; + case F_DUP2FD: + case F_DUP2FD_CLOEXEC: + case F_DUP2FD_CLOFORK: + dup = fcntl(fd, cmd, fd + 1); + break; + case F_DUP3FD: + dup = fcntl(fd, cmd, fd + 1, targ_flags); + break; + default: + errx(EXIT_FAILURE, "TEST FAILURE: %s: internal error: " + "unexpected fcntl cmd: 0x%x", c->clo_desc, cmd); + } + + if (dup < 0) { + err(EXIT_FAILURE, "TEST FAILURE: %s: failed to dup fd with " + "fcntl command 0x%x", c->clo_desc, cmd); + } + + oclo_record(c, dup, targ_flags, "dup"); +} + +static void +oclo_fdupfd(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUPFD); +} + +static void +oclo_fdupfd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUPFD_CLOFORK); +} + +static void +oclo_fdupfd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUPFD_CLOEXEC); +} + +static void +oclo_fdup2fd(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUP2FD); +} + +static void +oclo_fdup2fd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUP2FD_CLOFORK); +} + +static void +oclo_fdup2fd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUP2FD_CLOEXEC); +} + +static void +oclo_fdup3fd_none(const clo_create_t *c) +{ + oclo_fdup_common(c, 0, F_DUP3FD); +} + +static void +oclo_fdup3fd_exec(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC, F_DUP3FD); +} + +static void +oclo_fdup3fd_fork(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOFORK, F_DUP3FD); +} + +static void +oclo_fdup3fd_both(const clo_create_t *c) +{ + oclo_fdup_common(c, FD_CLOEXEC | FD_CLOFORK, F_DUP3FD); +} + +static void +oclo_dup_common(const clo_create_t *c, int targ_flags, bool v3) +{ + int dup, fd; + fd = oclo_file(c); + oclo_record(c, fd, c->clo_flags, "base"); + if (v3) { + int dflags = 0; + if ((targ_flags & FD_CLOEXEC) != 0) + dflags |= O_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + dflags |= O_CLOFORK; + dup = dup3(fd, fd + 1, dflags); + } else { + dup = dup2(fd, fd + 1); + } + + oclo_record(c, dup, targ_flags, "dup"); +} + +static void +oclo_dup2(const clo_create_t *c) +{ + oclo_dup_common(c, 0, false); +} + +static void +oclo_dup3_none(const clo_create_t *c) +{ + oclo_dup_common(c, 0, true); +} + +static void +oclo_dup3_exec(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOEXEC, true); +} + +static void +oclo_dup3_fork(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOFORK, true); +} + +static void +oclo_dup3_both(const clo_create_t *c) +{ + oclo_dup_common(c, FD_CLOEXEC | FD_CLOFORK, true); +} + +static void +oclo_pipe(const clo_create_t *c) +{ + int flags = 0, fds[2]; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + flags |= O_CLOFORK; + + if (pipe2(fds, flags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: pipe2() with flags %d " + "failed", c->clo_desc, flags); + } + + oclo_record(c, fds[0], c->clo_flags, "pipe[0]"); + oclo_record(c, fds[1], c->clo_flags, "pipe[1]"); +} + +static void +oclo_socket(const clo_create_t *c) +{ + int type = SOCK_DGRAM, fd; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + type |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + type |= SOCK_CLOFORK; + fd = socket(PF_INET, type, 0); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create socket " + "with flags: 0x%x\n", c->clo_desc, c->clo_flags); + } + + oclo_record(c, fd, c->clo_flags, NULL); +} + +static void +oclo_accept_common(const clo_create_t *c, int targ_flags, bool a4) +{ + int lsock, csock, asock; + int ltype = SOCK_STREAM, atype = 0; + struct sockaddr_in in; + socklen_t slen; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + ltype |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + ltype |= SOCK_CLOFORK; + + if ((targ_flags & FD_CLOEXEC) != 0) + atype |= SOCK_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + atype |= SOCK_CLOFORK; + + lsock = socket(PF_INET, ltype, 0); + if (lsock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create listen " + "socket with flags: 0x%x\n", c->clo_desc, c->clo_flags); + } + + oclo_record(c, lsock, c->clo_flags, "listen"); + (void) memset(&in, 0, sizeof (in)); + in.sin_family = AF_INET; + in.sin_port = 0; + in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + if (bind(lsock, (struct sockaddr *)&in, sizeof (in)) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to bind socket", + c->clo_desc); + } + + slen = sizeof (struct sockaddr_in); + if (getsockname(lsock, (struct sockaddr *)&in, &slen) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to discover bound " + "socket address", c->clo_desc); + } + + if (listen(lsock, 5) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to listen on socket", + c->clo_desc); + } + + csock = socket(PF_INET, SOCK_STREAM, 0); + if (csock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create client " + "socket", c->clo_desc); + } + oclo_record(c, csock, 0, "connect"); + + if (connect(csock, (struct sockaddr *)&in, sizeof (in)) != 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to connect to " + "server socket", c->clo_desc); + } + + if (a4) { + asock = accept4(lsock, NULL, NULL, atype); + } else { + asock = accept(lsock, NULL, NULL); + } + if (asock < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to accept client " + "connection", c->clo_desc); + } + oclo_record(c, asock, targ_flags, "accept"); +} + +static void +oclo_accept(const clo_create_t *c) +{ + oclo_accept_common(c, 0, false); +} + +static void +oclo_accept4_none(const clo_create_t *c) +{ + oclo_accept_common(c, 0, true); +} + +static void +oclo_accept4_fork(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOFORK, true); +} + +static void +oclo_accept4_exec(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOEXEC, true); +} + +static void +oclo_accept4_both(const clo_create_t *c) +{ + oclo_accept_common(c, FD_CLOEXEC | FD_CLOFORK, true); +} + +/* + * Go through the process of sending ourselves a file descriptor. + */ +static void +oclo_rights_common(const clo_create_t *c, int targ_flags) +{ + int pair[2], type = SOCK_DGRAM, sflags = 0; + int tosend = oclo_file(c), recvfd; + uint32_t data = 0x7777; + struct iovec iov; + struct msghdr msg; + struct cmsghdr *cm; + + if ((c->clo_flags & FD_CLOEXEC) != 0) + type |= SOCK_CLOEXEC; + if ((c->clo_flags & FD_CLOFORK) != 0) + type |= SOCK_CLOFORK; + + if (socketpair(PF_UNIX, type, 0, pair) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to create socket " + "pair", c->clo_desc); + } + + oclo_record(c, tosend, c->clo_flags, "send fd"); + oclo_record(c, pair[0], c->clo_flags, "pair[0]"); + oclo_record(c, pair[1], c->clo_flags, "pair[1]"); + + iov.iov_base = (void *)&data; + iov.iov_len = sizeof (data); + + (void) memset(&msg, 0, sizeof (msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_controllen = CMSG_SPACE(sizeof (int)); + + msg.msg_control = calloc(1, msg.msg_controllen); + if (msg.msg_control == NULL) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to allocate %u " + "bytes for SCM_RIGHTS control message", c->clo_desc, + msg.msg_controllen); + } + + cm = CMSG_FIRSTHDR(&msg); + cm->cmsg_len = CMSG_LEN(sizeof (int)); + cm->cmsg_level = SOL_SOCKET; + cm->cmsg_type = SCM_RIGHTS; + (void) memcpy(CMSG_DATA(cm), &tosend, sizeof (tosend)); + + if ((targ_flags & FD_CLOEXEC) != 0) + sflags |= MSG_CMSG_CLOEXEC; + if ((targ_flags & FD_CLOFORK) != 0) + sflags |= MSG_CMSG_CLOFORK; + + if (sendmsg(pair[0], &msg, 0) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to send fd", + c->clo_desc); + } + + data = 0; + if (recvmsg(pair[1], &msg, sflags) < 0) { + err(EXIT_FAILURE, "TEST FAILED: %s: failed to get fd", + c->clo_desc); + } + + if (data != 0x7777) { + errx(EXIT_FAILURE, "TEST FAILED: %s: did not receive correct " + "data: expected 0x7777, found 0x%x", c->clo_desc, data); + } + + if (msg.msg_controllen < CMSG_SPACE(sizeof (int))) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found insufficient " + "message control length: expected at least 0x%x, found " + "0x%x", c->clo_desc, CMSG_SPACE(sizeof (int)), + msg.msg_controllen); + } + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != SOL_SOCKET || cm->cmsg_type != SCM_RIGHTS) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found surprising cmsg " + "0x%x/0x%x, expected 0x%x/0x%x", c->clo_desc, + cm->cmsg_level, cm->cmsg_type, SOL_SOCKET, SCM_RIGHTS); + } + + if (cm->cmsg_len != CMSG_LEN(sizeof (int))) { + errx(EXIT_FAILURE, "TEST FAILED: %s: found unexpected " + "SCM_RIGHTS length 0x%x: expected 0x%zx", c->clo_desc, + cm->cmsg_len, CMSG_LEN(sizeof (int))); + } + + (void) memcpy(&recvfd, CMSG_DATA(cm), sizeof (recvfd)); + oclo_record(c, recvfd, targ_flags, "SCM_RIGHTS"); +} + +static void +oclo_rights_none(const clo_create_t *c) +{ + oclo_rights_common(c, 0); +} + +static void +oclo_rights_exec(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOEXEC); +} + +static void +oclo_rights_fork(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOFORK); +} + +static void +oclo_rights_both(const clo_create_t *c) +{ + oclo_rights_common(c, FD_CLOEXEC | FD_CLOFORK); +} + +static const clo_create_t oclo_create[] = { { + .clo_desc = "open(2), no flags", + .clo_flags = 0, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_open +}, { + .clo_desc = "open(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_open +}, { + .clo_desc = "fcntl(F_SETFD) no flags->no flags", + .clo_flags = 0, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->no flags", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->no flags", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->no flags", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_none +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOEXEC", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOEXEC", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOEXEC", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_exec +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOFORK", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOFORK", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOFORK", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_fork +}, { + .clo_desc = "fcntl(F_SETFD) no flags->O_CLOFORK|O_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK|O_CLOEXEC->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOFORK | O_CLOEXEC, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOEXEC->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOEXEC, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_SETFD) O_CLOFORK->O_CLOFORK|O_CLOEXEC", + .clo_flags = O_CLOFORK, + .clo_func = oclo_setfd_both +}, { + .clo_desc = "fcntl(F_DUPFD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) none", + .clo_flags = 0, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOFORK) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd_fork +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) none", + .clo_flags = 0, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUPFD_CLOEXEC) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdupfd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOFORK) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd_fork +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) none", + .clo_flags = 0, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP2FD_CLOEXEC) FD_CLOEXEC|FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup2fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) none->none", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_none +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_exec +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->" + "FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_both +}, { + .clo_desc = "fcntl(F_DUP3FD) none->FD_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC->FD_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "fcntl(F_DUP3FD) FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_fdup3fd_fork +}, { + .clo_desc = "dup2() none->none", + .clo_flags = 0, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup2() FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup2 +}, { + .clo_desc = "dup3() none->none", + .clo_flags = 0, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_none +}, { + .clo_desc = "dup3() none->FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_exec +}, { + .clo_desc = "dup3() none->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK|FD_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_both +}, { + .clo_desc = "dup3() none->FD_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOEXEC->FD_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "dup3() FD_CLOEXEC|FD_CLOFORK->FD_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_dup3_fork +}, { + .clo_desc = "pipe(2), no flags", + .clo_flags = 0, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_pipe +}, { + .clo_desc = "pipe(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_pipe +}, { + .clo_desc = "socket(2), no flags", + .clo_flags = 0, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_socket +}, { + .clo_desc = "socket(2), no flags->accept() none", + .clo_flags = 0, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept() none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOFORK->accept() none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept() none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept +}, { + .clo_desc = "socket(2), no flags->accept4() none", + .clo_flags = 0, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_none +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() " + "SOCK_CLOFORK|SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_both +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() SOCK_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_fork +}, { + .clo_desc = "socket(2), no flags->accept4() SOCK_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOEXEC->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOFORK->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "socket(2), O_CLOEXEC|O_CLOFORK->accept4() SOCK_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_accept4_exec +}, { + .clo_desc = "SCM_RIGHTS none->none", + .clo_flags = 0, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->none", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->none", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->none", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_none +}, { + .clo_desc = "SCM_RIGHTS none->MSG_CMSG_CLOEXEC", + .clo_flags = 0, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->MSG_CMSG_CLOEXEC", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_exec +}, { + .clo_desc = "SCM_RIGHTS MSG_CMSG_CLOFORK->nMSG_CMSG_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_fork +}, { + .clo_desc = "SCM_RIGHTS none->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = 0, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOFORK->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOFORK, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC->MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC, + .clo_func = oclo_rights_both +}, { + .clo_desc = "SCM_RIGHTS FD_CLOEXEC|FD_CLOFORK->" + "MSG_CMSG_CLOEXEC|MSG_CMSG_CLOFORK", + .clo_flags = FD_CLOEXEC | FD_CLOFORK, + .clo_func = oclo_rights_both +} }; + +static bool +oclo_verify_fork(void) +{ + bool ret = true; + + for (size_t i = 0; i < oclo_rtdata_next; i++) { + if (!oclo_flags_match(&oclo_rtdata[i], true)) { + ret = false; + } + } + + return (ret); +} + +/* + * Here we proceed to re-open any fd that was closed due to O_CLOFORK again to + * make sure it makes it to our child verifier. This also serves as a test to + * make sure that our opening of the lowest fd is correct. While this doesn't + * actually use the same method as was done previously, While it might be ideal + * to use the method as originally, this should get us most of the way there. + */ +static void +oclo_child_reopen(void) +{ + for (size_t i = 0; i < oclo_rtdata_next; i++) { + int fd; + int flags = O_RDWR | O_CLOFORK; + + if ((oclo_rtdata[i].crt_flags & FD_CLOFORK) == 0) + continue; + + if ((oclo_rtdata[i].crt_flags & FD_CLOEXEC) != 0) + flags |= O_CLOEXEC; + + fd = open("/dev/zero", flags); + if (fd < 0) { + err(EXIT_FAILURE, "TEST FAILED: failed to re-open fd " + "%d with flags %d", oclo_rtdata[i].crt_fd, flags); + } + + if (fd != oclo_rtdata[i].crt_fd) { + errx(EXIT_FAILURE, "TEST FAILED: re-opening fd %d " + "returned fd %d: test design issue or lowest fd " + "algorithm is broken", oclo_rtdata[i].crt_fd, fd); + } + } + + (void) printf("TEST PASSED: successfully reopened fds post-fork"); +} + +/* + * Look for the verification program in the same directory that this program is + * found in. Note, that isn't the same thing as the current working directory. + */ +static void +oclo_exec(void) +{ + ssize_t ret; + char dir[PATH_MAX], file[PATH_MAX]; + char **argv; + + ret = readlink("/proc/self/path/a.out", dir, sizeof (dir)); + if (ret < 0) { + err(EXIT_FAILURE, "TEST FAILED: failed to read our a.out path " + "from /proc"); + } else if (ret == 0) { + errx(EXIT_FAILURE, "TEST FAILED: reading /proc/self/path/a.out " + "returned 0 bytes"); + } else if (ret == sizeof (dir)) { + errx(EXIT_FAILURE, "TEST FAILED: Using /proc/self/path/a.out " + "requires truncation"); + } + + if (snprintf(file, sizeof (file), "%s/%s", dirname(dir), OCLO_VERIFY) >= + sizeof (file)) { + errx(EXIT_FAILURE, "TEST FAILED: cannot assemble exec path " + "name: internal buffer overflow"); + } + + /* We need an extra for both the NULL terminator and the program name */ + argv = calloc(oclo_rtdata_next + 2, sizeof (char *)); + if (argv == NULL) { + err(EXIT_FAILURE, "TEST FAILED: failed to allocate exec " + "argument array"); + } + + argv[0] = file; + for (size_t i = 0; i < oclo_rtdata_next; i++) { + if (asprintf(&argv[i + 1], "0x%x", oclo_rtdata[i].crt_flags) == + -1) { + err(EXIT_FAILURE, "TEST FAILED: failed to assemble " + "exec argument %zu", i + 1); + } + } + + (void) execv(file, argv); + warn("TEST FAILED: failed to exec verifier %s", file); +} + +int +main(void) +{ + int ret = EXIT_SUCCESS; + siginfo_t cret; + + /* + * Before we do anything else close all FDs that aren't standard. We + * don't want anything the test suite environment may have left behind. + */ + (void) closefrom(STDERR_FILENO + 1); + + /* + * Treat failure during this set up phase as a hard failure. There's no + * reason to continue if we can't successfully create the FDs we expect. + */ + for (size_t i = 0; i < ARRAY_SIZE(oclo_create); i++) { + oclo_create[i].clo_func(&oclo_create[i]); + } + + pid_t child = forkx(FORK_NOSIGCHLD | FORK_WAITPID); + if (child == 0) { + if (!oclo_verify_fork()) { + ret = EXIT_FAILURE; + } + + oclo_child_reopen(); + + oclo_exec(); + ret = EXIT_FAILURE; + _exit(ret); + } + + if (waitid(P_PID, child, &cret, WEXITED) < 0) { + err(EXIT_FAILURE, "TEST FAILED: internal test failure waiting " + "for forked child to report"); + } + + if (cret.si_code != CLD_EXITED) { + warnx("TEST FAILED: child process did not successfully exit: " + "found si_code: %d", cret.si_code); + ret = EXIT_FAILURE; + } else if (cret.si_status != 0) { + warnx("TEST FAILED: child process did not exit with code 0: " + "found %d", cret.si_status); + ret = EXIT_FAILURE; + } + + if (ret == EXIT_SUCCESS) { + (void) printf("All tests passed successfully\n"); + } + + return (ret); +} diff --git a/usr/src/test/os-tests/tests/oclo/oclo_errors.c b/usr/src/test/os-tests/tests/oclo/oclo_errors.c new file mode 100644 index 000000000000..9d9841271856 --- /dev/null +++ b/usr/src/test/os-tests/tests/oclo/oclo_errors.c @@ -0,0 +1,193 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2024 Oxide Computer Company + */ + +/* + * Verify that unsupported flags will properly generate errors across the + * functions that we know perform strict error checking. This includes: + * + * o fcntl(..., F_DUP3FD, ...) + * o dup3() + * o pipe2() + * o socket() + * o accept4() + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static bool +oclo_check(const char *desc, const char *act, int ret, int e) +{ + if (ret >= 0) { + warnx("TEST FAILED: %s: fd was %s!", desc, act); + return (false); + } else if (errno != EINVAL) { + int e = errno; + warnx("TEST FAILED: %s: failed with %s, expected " + "EINVAL", desc, strerrorname_np(e)); + return (false); + } + + (void) printf("TEST PASSED: %s: correctly failed with EINVAL\n", + desc); + return (true); +} + +static bool +oclo_dup3(const char *desc, int flags) +{ + int fd = dup3(STDERR_FILENO, 23, flags); + return (oclo_check(desc, "duplicated", fd, errno)); +} + +static bool +oclo_dup3fd(const char *desc, int flags) +{ + int fd = fcntl(STDERR_FILENO, F_DUP3FD, 23, flags); + return (oclo_check(desc, "duplicated", fd, errno)); +} + + +static bool +oclo_pipe2(const char *desc, int flags) +{ + int fds[2], ret; + + ret = pipe2(fds, flags); + return (oclo_check(desc, "piped", ret, errno)); +} + +static bool +oclo_socket(const char *desc, int type) +{ + int fd = socket(PF_UNIX, SOCK_STREAM | type, 0); + return (oclo_check(desc, "created", fd, errno)); +} + +static bool +oclo_accept(const char *desc, int flags) +{ + int sock, fd, e; + struct sockaddr_in in; + + sock = socket(PF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); + if (sock < 0) { + warn("TEST FAILED: %s: failed to create listen socket", desc); + return (false); + } + + (void) memset(&in, 0, sizeof (in)); + in.sin_family = AF_INET; + in.sin_port = 0; + in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + if (bind(sock, (struct sockaddr *)&in, sizeof (in)) != 0) { + warn("TEST FAILED: %s: failed to bind socket", desc); + (void) close(sock); + return (false); + } + + if (listen(sock, 5) < 0) { + warn("TEST FAILED: %s: failed to listen on socket", desc); + (void) close(sock); + return (false); + } + + + fd = accept4(sock, NULL, NULL, flags); + e = errno; + (void) close(sock); + return (oclo_check(desc, "accepted", fd, e)); +} + +int +main(void) +{ + int ret = EXIT_SUCCESS; + + closefrom(STDERR_FILENO + 1); + + if (!oclo_dup3("dup3(): O_RDWR", O_RDWR)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3("dup3(): O_NONBLOCK|O_CLOXEC", O_NONBLOCK | O_CLOEXEC)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3("dup3(): O_CLOFORK|O_WRONLY", O_CLOFORK | O_WRONLY)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): 0x7777", 0x7777)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): FD_CLOEXEC|FD_CLOFORK + 1", + (FD_CLOEXEC | FD_CLOFORK) + 1)) { + ret = EXIT_FAILURE; + } + + if (!oclo_dup3fd("fcntl(FDUP3FD): INT_MAX", INT_MAX)) { + ret = EXIT_FAILURE; + } + + + if (!oclo_pipe2("pipe2(): O_RDWR", O_RDWR)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): O_SYNC|O_CLOXEC", O_SYNC | O_CLOEXEC)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): O_CLOFORK|O_WRONLY", O_CLOFORK | O_WRONLY)) { + ret = EXIT_FAILURE; + } + + if (!oclo_pipe2("pipe2(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + + if (!oclo_socket("socket(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + + if (!oclo_socket("socket(): 3 << 25", 3 << 25)) { + ret = EXIT_FAILURE; + } + + if (!oclo_accept("accept4(): INT32_MAX", INT32_MAX)) { + ret = EXIT_FAILURE; + } + + if (!oclo_accept("accept4(): 3 << 25", 3 << 25)) { + ret = EXIT_FAILURE; + } + + if (ret == EXIT_SUCCESS) { + (void) printf("All tests completed successfully\n"); + } + + return (ret); +} diff --git a/usr/src/test/os-tests/tests/oclo/ocloexec_verify.c b/usr/src/test/os-tests/tests/oclo/ocloexec_verify.c new file mode 100644 index 000000000000..57bd843a801d --- /dev/null +++ b/usr/src/test/os-tests/tests/oclo/ocloexec_verify.c @@ -0,0 +1,122 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2024 Oxide Computer Company + */ + +/* + * Verify that our file descriptors starting after stderr are correct based upon + * the series of passed in arguments from the 'oclo' program. Arguments are + * passed as a string that represents the flags that were originally verified + * pre-fork/exec via fcntl(F_GETFD). In addition, anything that was originally + * closed because it had FD_CLOFORK set was reopened with the same flags so we + * can verify that things with only FD_CLOFORK survive exec. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int +verify_fdwalk_cb(void *arg, int fd) +{ + int *max = arg; + *max = fd; + return (0); +} + +static bool +verify_flags(int fd, int exp_flags) +{ + bool fail = (exp_flags & FD_CLOEXEC) != 0; + int flags = fcntl(fd, F_GETFD, NULL); + + if (flags < 0) { + int e = errno; + + if (fail) { + if (e == EBADF) { + (void) printf("TEST PASSED: post-exec fd %d: " + "flags 0x%x: correctly closed\n", fd, + exp_flags); + return (true); + } + + + warn("TEST FAILED: post-fork fd %d: expected fcntl to " + "fail with EBADF, but found %s", fd, + strerrorname_np(e)); + return (false); + } + + warnx("TEST FAILED: post-fork fd %d: fcntl(F_GETFD) " + "unexpectedly failed with %s, expected flags %d", fd, + strerrorname_np(e), exp_flags); + return (false); + } + + if (fail) { + warnx("TEST FAILED: post-fork fd %d: received flags %d, but " + "expected to fail based on flags %d", fd, flags, exp_flags); + return (false); + } + + if (flags != exp_flags) { + warnx("TEST FAILED: post-exec fd %d: discovered flags 0x%x do " + "not match expected flags 0x%x", fd, flags, exp_flags); + return (false); + } + + (void) printf("TEST PASSED: post-exec fd %d: flags 0x%x: successfully " + "matched\n", fd, exp_flags); + return (true); +} + +int +main(int argc, char *argv[]) +{ + int maxfd = STDIN_FILENO; + int ret = EXIT_SUCCESS; + + /* + * We should have one argument for each fd we found, ignoring stdin, + * stdout, and stderr. argc will also have an additional entry for our + * program name, which we want to skip. Note, the last fd may not exist + * because it was marked for close, hence the use of '>' below. + */ + (void) fdwalk(verify_fdwalk_cb, &maxfd); + if (maxfd - 3 > argc - 1) { + errx(EXIT_FAILURE, "TEST FAILED: found more fds %d than " + "arguments %d", maxfd - 3, argc - 1); + } + + for (int i = 1; i < argc; i++) { + const char *errstr; + int targ_fd = i + STDERR_FILENO; + long long targ_flags = strtonumx(argv[i], 0, + FD_CLOEXEC | FD_CLOFORK, &errstr, 0); + + if (errstr != NULL) { + errx(EXIT_FAILURE, "TEST FAILED: failed to parse " + "argument %d: %s is %s", i, argv[i], errstr); + } + + if (!verify_flags(targ_fd, (int)targ_flags)) + ret = EXIT_FAILURE; + } + + return (ret); +} diff --git a/usr/src/uts/common/fs/doorfs/door_sys.c b/usr/src/uts/common/fs/doorfs/door_sys.c index a2d3812938a9..dd9591d58271 100644 --- a/usr/src/uts/common/fs/doorfs/door_sys.c +++ b/usr/src/uts/common/fs/doorfs/door_sys.c @@ -402,7 +402,7 @@ door_create(void (*pc_cookie)(), void *data_cookie, uint_t attributes) &fd, NULL)) != 0) return (set_errno(err)); - f_setfd(fd, FD_CLOEXEC); + f_setfd_or(fd, FD_CLOEXEC); return (fd); } diff --git a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c index fefc832caa44..270cd2713695 100644 --- a/usr/src/uts/common/fs/sockfs/sockcommon_sops.c +++ b/usr/src/uts/common/fs/sockfs/sockcommon_sops.c @@ -1811,8 +1811,8 @@ so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, */ control = kmem_zalloc(controllen, KM_SLEEP); - error = so_opt2cmsg(mctlp, opt, optlen, - !(flags & MSG_XPG4_2), control, controllen); + error = so_opt2cmsg(mctlp, opt, optlen, flags, control, + controllen); if (error) { freemsg(mctlp); if (msg->msg_namelen != 0) @@ -1870,8 +1870,8 @@ so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, */ control = kmem_zalloc(controllen, KM_SLEEP); - error = so_opt2cmsg(mctlp, opt, optlen, - !(flags & MSG_XPG4_2), control, controllen); + error = so_opt2cmsg(mctlp, opt, optlen, flags, control, + controllen); if (error) { freemsg(mctlp); kmem_free(control, controllen); diff --git a/usr/src/uts/common/fs/sockfs/socksubr.c b/usr/src/uts/common/fs/sockfs/socksubr.c index 89ed3e80987c..213f7d9d4696 100644 --- a/usr/src/uts/common/fs/sockfs/socksubr.c +++ b/usr/src/uts/common/fs/sockfs/socksubr.c @@ -25,6 +25,7 @@ * Copyright 2015, Joyent, Inc. All rights reserved. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. * Copyright 2022 Garrett D'Amore + * Copyright 2024 Oxide Computer Company */ #include @@ -719,7 +720,7 @@ fdbuf_allocmsg(int size, struct fdbuf *fdbuf) */ /*ARGSUSED*/ static int -fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) +fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen, int msg_flags) { int i, fd; int *rp; @@ -753,6 +754,12 @@ fdbuf_extract(struct fdbuf *fdbuf, void *rights, int rightslen) fp->f_count++; mutex_exit(&fp->f_tlock); setf(fd, fp); + if ((msg_flags & MSG_CMSG_CLOEXEC) != 0) { + f_setfd_or(fd, FD_CLOEXEC); + } + if ((msg_flags & MSG_CMSG_CLOFORK) != 0) { + f_setfd_or(fd, FD_CLOFORK); + } *rp++ = fd; if (AU_AUDITING()) audit_fdrecv(fd, fp); @@ -1209,7 +1216,7 @@ so_cmsglen(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg) * also be checked for any possible impacts. */ int -so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, +so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int msg_flags, void *control, t_uscalar_t controllen) { struct T_opthdr *tohp; @@ -1217,6 +1224,7 @@ so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, struct fdbuf *fdbuf; int fdbuflen; int error; + int oldflg = (msg_flags & MSG_XPG4_2) == 0; #if defined(DEBUG) || defined(__lint) struct cmsghdr *cend = (struct cmsghdr *) (((uint8_t *)control) + ROUNDUP_cmsglen(controllen)); @@ -1245,7 +1253,7 @@ so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, return (EPROTO); if (oldflg) { error = fdbuf_extract(fdbuf, control, - (int)controllen); + (int)controllen, msg_flags); if (error != 0) return (error); continue; @@ -1261,7 +1269,7 @@ so_opt2cmsg(mblk_t *mp, void *opt, t_uscalar_t optlen, int oldflg, sizeof (struct cmsghdr)); error = fdbuf_extract(fdbuf, - CMSG_CONTENT(cmsg), fdlen); + CMSG_CONTENT(cmsg), fdlen, msg_flags); if (error != 0) return (error); } diff --git a/usr/src/uts/common/fs/sockfs/socksyscalls.c b/usr/src/uts/common/fs/sockfs/socksyscalls.c index 12e4ccfb7ccb..f15690c7a682 100644 --- a/usr/src/uts/common/fs/sockfs/socksyscalls.c +++ b/usr/src/uts/common/fs/sockfs/socksyscalls.c @@ -26,6 +26,7 @@ * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. * Copyright 2022 Garrett D'Amore + * Copyright 2024 Oxide Computer Company */ #include @@ -82,6 +83,13 @@ int do_useracc = 1; /* Controlled by setting SO_DEBUG to 4 */ extern int xnet_truncate_print; +/* + * This constitutes the known flags that are allowed to be passed in the upper + * bits of a socket type either for socket() or accept4(). + */ +#define SOCK_KNOWN_FLAGS (SOCK_CLOEXEC | SOCK_NDELAY | SOCK_NONBLOCK | \ + SOCK_CLOFORK) + /* * Kernel component of socket creation. * @@ -104,7 +112,7 @@ so_socket(int family, int type_w_flags, int protocol, char *devpath, type = type_w_flags & SOCK_TYPE_MASK; type_w_flags &= ~SOCK_TYPE_MASK; - if (type_w_flags & ~(SOCK_CLOEXEC|SOCK_NDELAY|SOCK_NONBLOCK)) + if (type_w_flags & ~SOCK_KNOWN_FLAGS) return (set_errno(EINVAL)); if (devpath != NULL) { @@ -150,7 +158,10 @@ so_socket(int family, int type_w_flags, int protocol, char *devpath, mutex_exit(&fp->f_tlock); setf(fd, fp); if ((type_w_flags & SOCK_CLOEXEC) != 0) { - f_setfd(fd, FD_CLOEXEC); + f_setfd_or(fd, FD_CLOEXEC); + } + if ((type_w_flags & SOCK_CLOFORK) != 0) { + f_setfd_or(fd, FD_CLOFORK); } return (fd); @@ -518,11 +529,11 @@ so_socketpair(int sv[2]) releasef(svs[1]); /* - * If FD_CLOEXEC was set on the filedescriptor we're - * swapping out, we should set it on the new one too. + * If FD_CLOEXEC or FD_CLOFORK was set on the file descriptor + * we're swapping out, we should set it on the new one too. */ - if (orig_flags & FD_CLOEXEC) { - f_setfd(nfd, FD_CLOEXEC); + if (orig_flags & (FD_CLOEXEC | FD_CLOFORK)) { + f_setfd_or(nfd, orig_flags & (FD_CLOEXEC | FD_CLOFORK)); } /* @@ -637,7 +648,7 @@ accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version, dprint(1, ("accept(%d, %p, %p)\n", sock, (void *)name, (void *)namelenp)); - if (flags & ~(SOCK_CLOEXEC|SOCK_NONBLOCK|SOCK_NDELAY)) { + if (flags & ~SOCK_KNOWN_FLAGS) { return (set_errno(EINVAL)); } @@ -727,10 +738,14 @@ accept(int sock, struct sockaddr *name, socklen_t *namelenp, int version, setf(nfd, nfp); /* - * Act on SOCK_CLOEXEC from flags + * Act on SOCK_CLOEXEC and SOCK_CLOFORK from flags */ if (flags & SOCK_CLOEXEC) { - f_setfd(nfd, FD_CLOEXEC); + f_setfd_or(nfd, FD_CLOEXEC); + } + + if (flags & SOCK_CLOFORK) { + f_setfd_or(nfd, FD_CLOFORK); } /* @@ -849,7 +864,7 @@ recvit(int sock, struct nmsghdr *msg, struct uio *uiop, int flags, controllen = msg->msg_controllen; msg->msg_flags = flags & (MSG_OOB | MSG_PEEK | MSG_WAITALL | - MSG_DONTWAIT | MSG_XPG4_2); + MSG_DONTWAIT | MSG_XPG4_2 | MSG_CMSG_CLOEXEC | MSG_CMSG_CLOFORK); error = socket_recvmsg(so, msg, uiop, CRED()); if (error) { @@ -868,9 +883,12 @@ recvit(int sock, struct nmsghdr *msg, struct uio *uiop, int flags, if (flagsp != NULL) { /* - * Clear internal flag. + * Clear internal flag. We also clear the CMSG flags out of + * paranoia, though they should have been cleared by our + * sop_recvmsg. */ - msg->msg_flags &= ~MSG_XPG4_2; + msg->msg_flags &= ~(MSG_XPG4_2 | MSG_CMSG_CLOEXEC | + MSG_CMSG_CLOFORK); /* * Determine MSG_CTRUNC. sorecvmsg sets MSG_CTRUNC only diff --git a/usr/src/uts/common/fs/sockfs/socktpi.c b/usr/src/uts/common/fs/sockfs/socktpi.c index 38407013eb6f..f0dc0e3704c9 100644 --- a/usr/src/uts/common/fs/sockfs/socktpi.c +++ b/usr/src/uts/common/fs/sockfs/socktpi.c @@ -3219,9 +3219,8 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, */ control = kmem_zalloc(controllen, KM_SLEEP); - error = so_opt2cmsg(mp, opt, optlen, - !(flags & MSG_XPG4_2), - control, controllen); + error = so_opt2cmsg(mp, opt, optlen, flags, control, + controllen); if (error) { freemsg(mp); if (msg->msg_namelen != 0) @@ -3286,9 +3285,8 @@ sotpi_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, */ control = kmem_zalloc(controllen, KM_SLEEP); - error = so_opt2cmsg(mp, opt, optlen, - !(flags & MSG_XPG4_2), - control, controllen); + error = so_opt2cmsg(mp, opt, optlen, flags, control, + controllen); if (error) { freemsg(mp); kmem_free(control, controllen); diff --git a/usr/src/uts/common/os/fio.c b/usr/src/uts/common/os/fio.c index c25564d85ffd..80f5be22dcb6 100644 --- a/usr/src/uts/common/os/fio.c +++ b/usr/src/uts/common/os/fio.c @@ -22,6 +22,7 @@ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2015, Joyent Inc. + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -57,6 +58,7 @@ #include #include #include +#include #include #include @@ -867,23 +869,50 @@ flist_fork(uf_info_t *pfip, uf_info_t *cfip) for (fd = 0, pufp = pfip->fi_list, cufp = cfip->fi_list; fd < nfiles; fd++, pufp++, cufp++) { - cufp->uf_file = pufp->uf_file; - cufp->uf_alloc = pufp->uf_alloc; - cufp->uf_flag = pufp->uf_flag; + boolean_t unreserve = B_FALSE; + + /* + * Check to see if FD_CLOFORK is set. In this case we 'close' + * the file descriptor by simply not duplicating it and leaving + * this entry as an empty descriptor. While we don't need to + * close the underlying file_t, we do need to make sure we take + * care of cleaning up our reservation. We do not reset the + * generation either, simulating a setf here. + */ + if ((pufp->uf_flag & FD_CLOFORK) == 0) { + cufp->uf_file = pufp->uf_file; + cufp->uf_flag = pufp->uf_flag; + } cufp->uf_busy = pufp->uf_busy; + cufp->uf_alloc = pufp->uf_alloc; cufp->uf_gen = pufp->uf_gen; + + /* + * We may have to clean up our allocation tracking. This happens + * either because we have no file due to the fact that we're + * busy or because we had a file and FD_CLOFORK is set. If there + * is no file and we're not busy, then the unreserve was already + * taken care of. + */ if (pufp->uf_file == NULL) { - ASSERT(pufp->uf_flag == 0); + ASSERT3U(pufp->uf_flag, ==, 0); if (pufp->uf_busy) { - /* - * Grab locks to appease ASSERTs in fd_reserve - */ - mutex_enter(&cfip->fi_lock); - mutex_enter(&cufp->uf_lock); - fd_reserve(cfip, fd, -1); - mutex_exit(&cufp->uf_lock); - mutex_exit(&cfip->fi_lock); + unreserve = B_TRUE; } + } else if ((pufp->uf_flag & FD_CLOFORK) != 0) { + ASSERT3P(pufp->uf_file, !=, NULL); + unreserve = B_TRUE; + } + + if (unreserve) { + /* + * Grab locks to appease ASSERTs in fd_reserve + */ + mutex_enter(&cfip->fi_lock); + mutex_enter(&cufp->uf_lock); + fd_reserve(cfip, fd, -1); + mutex_exit(&cufp->uf_lock); + mutex_exit(&cfip->fi_lock); } } } @@ -1262,9 +1291,9 @@ f_getfd_error(int fd, int *flagp) error = EBADF; else { UF_ENTER(ufp, fip, fd); - if ((fp = ufp->uf_file) == NULL) + if ((fp = ufp->uf_file) == NULL) { error = EBADF; - else { + } else { flag = ufp->uf_flag; if ((fp->f_flag & FWRITE) && pr_isself(fp->f_vnode)) flag |= FD_CLOEXEC; @@ -1290,24 +1319,29 @@ f_getfd(int fd) /* * Given a file descriptor and file flags, set the user's file flags. - * At present, the only valid flag is FD_CLOEXEC. + * At present, the only valid flags are FD_CLOEXEC and FD_CLOFORK. * getf() may or may not have been called before calling f_setfd_error(). */ -int -f_setfd_error(int fd, int flags) +static int +f_setfd_int(int fd, int flags, bool or) { uf_info_t *fip = P_FINFO(curproc); uf_entry_t *ufp; int error; - if ((uint_t)fd >= fip->fi_nfiles) + if ((uint_t)fd >= fip->fi_nfiles) { error = EBADF; - else { + } else { UF_ENTER(ufp, fip, fd); - if (ufp->uf_file == NULL) + if (ufp->uf_file == NULL) { error = EBADF; - else { - ufp->uf_flag = flags & FD_CLOEXEC; + } else { + flags &= (FD_CLOEXEC | FD_CLOFORK); + if (or) { + ufp->uf_flag |= flags; + } else { + ufp->uf_flag = flags; + } error = 0; } UF_EXIT(ufp); @@ -1315,10 +1349,16 @@ f_setfd_error(int fd, int flags) return (error); } +int +f_setfd_error(int fd, int flags) +{ + return (f_setfd_int(fd, flags, false)); +} + void -f_setfd(int fd, char flags) +f_setfd_or(int fd, short flags) { - (void) f_setfd_error(fd, flags); + (void) f_setfd_int(fd, flags, true); } #define BADFD_MIN 3 diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c index ffea94748301..d4ebbfee3a73 100644 --- a/usr/src/uts/common/os/sysent.c +++ b/usr/src/uts/common/os/sysent.c @@ -24,7 +24,7 @@ * Copyright 2012 Milan Jurik. All rights reserved. * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright (c) 2018, Joyent, Inc. - * Copyright 2020 Oxide Computer Company + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ @@ -75,7 +75,7 @@ int exece(const char *, const char **, const char **, int); int faccessat(int, char *, int, int); int fchmodat(int, char *, int, int); int fchownat(int, char *, uid_t, gid_t, int); -int fcntl(int, int, intptr_t); +int fcntl(int, int, intptr_t, intptr_t); int64_t vfork(); int64_t forksys(int, int); int fstat(int, struct stat *); @@ -508,7 +508,7 @@ struct sysent sysent[NSYSCALL] = /* 59 */ SYSENT_CI("exece", exece, 4), /* 60 */ SYSENT_CI("umask", umask, 1), /* 61 */ SYSENT_CI("chroot", chroot, 1), - /* 62 */ SYSENT_CI("fcntl", fcntl, 3), + /* 62 */ SYSENT_CI("fcntl", fcntl, 4), /* 63 */ SYSENT_CI("ulimit", ulimit, 2), /* 64 */ SYSENT_CI("renameat", renameat, 4), /* 65 */ SYSENT_CI("unlinkat", unlinkat, 3), @@ -881,7 +881,7 @@ struct sysent sysent32[NSYSCALL] = /* 59 */ SYSENT_CI("exece", exece, 4), /* 60 */ SYSENT_CI("umask", umask, 1), /* 61 */ SYSENT_CI("chroot", chroot, 1), - /* 62 */ SYSENT_CI("fcntl", fcntl, 3), + /* 62 */ SYSENT_CI("fcntl", fcntl, 4), /* 63 */ SYSENT_CI("ulimit", ulimit32, 2), /* 64 */ SYSENT_CI("renameat", renameat, 4), /* 65 */ SYSENT_CI("unlinkat", unlinkat, 3), diff --git a/usr/src/uts/common/sys/fcntl.h b/usr/src/uts/common/sys/fcntl.h index 4af23583fd1d..6f227a89aff2 100644 --- a/usr/src/uts/common/sys/fcntl.h +++ b/usr/src/uts/common/sys/fcntl.h @@ -92,6 +92,9 @@ extern "C" { #if !defined(_STRICT_SYMBOLS) #define O_DIRECT 0x2000000 /* direct disk access hint */ #endif +#if !defined(_STRICT_SYMBOLS) || defined(_XPG8) +#define O_CLOFORK 0x4000000 /* set the close-on-fork flag */ +#endif /* * fcntl(2) requests @@ -130,6 +133,9 @@ extern "C" { #define F_DUP2FD_CLOEXEC 36 /* Like F_DUP2FD with O_CLOEXEC set */ /* EINVAL is fildes matches arg1 */ #define F_DUPFD_CLOEXEC 37 /* Like F_DUPFD with O_CLOEXEC set */ +#define F_DUP2FD_CLOFORK 57 /* This time with FD_CLOFORK */ +#define F_DUPFD_CLOFORK 58 /* Again, but with FD_CLOFORK */ +#define F_DUP3FD 59 /* Duplicate fildes, with flags */ #define F_ISSTREAM 13 /* Is the file desc. a stream ? */ #define F_PRIV 15 /* Turn on private access to file */ @@ -342,7 +348,8 @@ typedef struct o_flock { /* Mask for file access modes */ #define O_ACCMODE (O_SEARCH | O_EXEC | 0x3) -#define FD_CLOEXEC 1 /* close on exec flag */ +#define FD_CLOEXEC 1 /* close-on-exec flag */ +#define FD_CLOFORK 2 /* close-on-fork flag */ /* * DIRECTIO diff --git a/usr/src/uts/common/sys/file.h b/usr/src/uts/common/sys/file.h index d300b940e262..d642cf6c56ac 100644 --- a/usr/src/uts/common/sys/file.h +++ b/usr/src/uts/common/sys/file.h @@ -120,6 +120,7 @@ typedef struct fpollinfo { #define FCLOEXEC 0x800000 /* O_CLOEXEC = 0x800000 */ #define FDIRECTORY 0x1000000 /* O_DIRECTORY = 0x1000000 */ #define FDIRECT 0x2000000 /* O_DIRECT = 0x2000000 */ +#define FCLOFORK 0x4000000 /* O_CLOFORK = 0x4000000 */ #if defined(_KERNEL) || defined(_FAKE_KERNEL) @@ -216,7 +217,7 @@ extern void setf(int, file_t *); extern int f_getfd_error(int, int *); extern char f_getfd(int); extern int f_setfd_error(int, int); -extern void f_setfd(int, char); +extern void f_setfd_or(int, short); extern int f_getfl(int, int *); extern int f_badfd(int, int *, int); extern int fassign(struct vnode **, int, int *); diff --git a/usr/src/uts/common/sys/socket.h b/usr/src/uts/common/sys/socket.h index 9e61bc7bb0e6..819e8715bff6 100644 --- a/usr/src/uts/common/sys/socket.h +++ b/usr/src/uts/common/sys/socket.h @@ -124,6 +124,7 @@ typedef void *_RESTRICT_KYWD Psocklen_t; #define SOCK_CLOEXEC 0x080000 /* like open(2) O_CLOEXEC for socket */ #define SOCK_NONBLOCK 0x100000 /* like O_NONBLOCK */ #define SOCK_NDELAY 0x200000 /* like O_NDELAY */ +#define SOCK_CLOFORK 0x400000 /* set FD_CLOFORK on the socket */ /* * Option flags per-socket. @@ -428,6 +429,8 @@ struct msghdr32 { #define MSG_NOSIGNAL 0x200 /* Don't generate SIGPIPE */ #define MSG_DUPCTRL 0x800 /* Save control message for use with */ /* with left over data */ +#define MSG_CMSG_CLOEXEC 0x1000 /* FD_CLOEXEC w/ SCM_RIGHTS */ +#define MSG_CMSG_CLOFORK 0x2000 /* FD_CLOFORK w/ SCM_RIGHTS */ #define MSG_XPG4_2 0x8000 /* Private: XPG4.2 flag */ /* Obsolete but kept for compilation compatibility. Use IOV_MAX. */ @@ -545,7 +548,7 @@ extern int connect(int, const struct sockaddr *, socklen_t); extern int getpeername(int, struct sockaddr *_RESTRICT_KYWD, Psocklen_t); extern int getsockname(int, struct sockaddr *_RESTRICT_KYWD, Psocklen_t); extern int getsockopt(int, int, int, void *_RESTRICT_KYWD, Psocklen_t); -extern int listen(int, int); /* XXX - fixme??? where do I go */ +extern int listen(int, int); extern int socketpair(int, int, int, int *); extern ssize_t recv(int, void *, size_t, int); extern ssize_t recvfrom(int, void *_RESTRICT_KYWD, size_t, int, diff --git a/usr/src/uts/common/syscall/fcntl.c b/usr/src/uts/common/syscall/fcntl.c index 7b787a4acbb2..2965c1000e05 100644 --- a/usr/src/uts/common/syscall/fcntl.c +++ b/usr/src/uts/common/syscall/fcntl.c @@ -23,10 +23,11 @@ * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. * Copyright 2018, Joyent, Inc. + * Copyright 2024 Oxide Computer Company */ /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ /* * Portions of this source code were derived from Berkeley 4.3 BSD @@ -62,7 +63,7 @@ static void fd_too_big(proc_t *); * File control. */ int -fcntl(int fdes, int cmd, intptr_t arg) +fcntl(int fdes, int cmd, intptr_t arg, intptr_t arg1) { int iarg; int error = 0; @@ -95,6 +96,27 @@ fcntl(int fdes, int cmd, intptr_t arg) ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64)); #endif + /* + * Most fcntl() calls take either 2 or 3 arguments. The introduction of + * F_DUP3FD added a version that takes a 4th argument (referred to as + * arg1). While fcntl() traditionally has had loose validation, we + * strictly validate this new arg. + */ + switch (cmd) { + case F_DUP3FD: + if ((arg1 & ~(FD_CLOEXEC | FD_CLOFORK)) != 0) { + error = EINVAL; + goto out; + } + break; + default: + if (arg1 != 0) { + error = EINVAL; + goto out; + } + break; + } + /* * First, for speed, deal with the subset of cases * that do not require getf() / releasef(). @@ -147,6 +169,7 @@ fcntl(int fdes, int cmd, intptr_t arg) switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: + case F_DUPFD_CLOFORK: p = curproc; if ((uint_t)iarg >= p->p_fno_ctl) { if (iarg >= 0) @@ -176,12 +199,17 @@ fcntl(int fdes, int cmd, intptr_t arg) error = EMFILE; } else { if (cmd == F_DUPFD_CLOEXEC) { - f_setfd(retval, FD_CLOEXEC); + f_setfd_or(retval, FD_CLOEXEC); + } + + if (cmd == F_DUPFD_CLOFORK) { + f_setfd_or(retval, FD_CLOFORK); } } goto done; case F_DUP2FD_CLOEXEC: + case F_DUP2FD_CLOFORK: if (fdes == iarg) { error = EINVAL; goto done; @@ -190,6 +218,7 @@ fcntl(int fdes, int cmd, intptr_t arg) /*FALLTHROUGH*/ case F_DUP2FD: + case F_DUP3FD: p = curproc; if (fdes == iarg) { retval = iarg; @@ -217,7 +246,11 @@ fcntl(int fdes, int cmd, intptr_t arg) releasef(fdes); if ((error = closeandsetf(iarg, fp)) == 0) { if (cmd == F_DUP2FD_CLOEXEC) { - f_setfd(iarg, FD_CLOEXEC); + f_setfd_or(iarg, FD_CLOEXEC); + } else if (cmd == F_DUP2FD_CLOFORK) { + f_setfd_or(iarg, FD_CLOFORK); + } else if (cmd == F_DUP3FD) { + f_setfd_or(iarg, (int)arg1); } retval = iarg; } else { diff --git a/usr/src/uts/common/syscall/open.c b/usr/src/uts/common/syscall/open.c index 40d9717a5b1e..d872b16745df 100644 --- a/usr/src/uts/common/syscall/open.c +++ b/usr/src/uts/common/syscall/open.c @@ -231,7 +231,11 @@ copen(int startfd, char *fname, int filemode, int createmode) */ setf(fd, fp); if ((filemode & FCLOEXEC) != 0) { - f_setfd(fd, FD_CLOEXEC); + f_setfd_or(fd, FD_CLOEXEC); + } + + if ((filemode & FCLOFORK) != 0) { + f_setfd_or(fd, FD_CLOFORK); } return (fd); } else { @@ -260,7 +264,11 @@ copen(int startfd, char *fname, int filemode, int createmode) mutex_exit(&fp->f_tlock); setf(fd, fp); if ((filemode & FCLOEXEC) != 0) { - f_setfd(fd, FD_CLOEXEC); + f_setfd_or(fd, FD_CLOEXEC); + } + + if ((filemode & FCLOFORK) != 0) { + f_setfd_or(fd, FD_CLOFORK); } releasef(dupfd); } diff --git a/usr/src/uts/common/syscall/pipe.c b/usr/src/uts/common/syscall/pipe.c index 15b49536de9d..4719e4e32785 100644 --- a/usr/src/uts/common/syscall/pipe.c +++ b/usr/src/uts/common/syscall/pipe.c @@ -26,7 +26,7 @@ */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ +/* All Rights Reserved */ #include @@ -116,7 +116,7 @@ pipe(intptr_t arg, int flags) /* * Validate allowed flags. */ - if ((flags & ~(FCLOEXEC|FNONBLOCK)) != 0) { + if ((flags & ~(FCLOEXEC|FNONBLOCK|FCLOFORK)) != 0) { return (set_errno(EINVAL)); } /* @@ -193,11 +193,16 @@ pipe(intptr_t arg, int flags) setf(fd2, fp2); /* - * Optionally set the FCLOEXEC flag + * Optionally set the FCLOEXEC and FCLOFORK flags */ if ((flags & FCLOEXEC) != 0) { - f_setfd(fd1, FD_CLOEXEC); - f_setfd(fd2, FD_CLOEXEC); + f_setfd_or(fd1, FD_CLOEXEC); + f_setfd_or(fd2, FD_CLOEXEC); + } + + if ((flags & FCLOFORK) != 0) { + f_setfd_or(fd1, FD_CLOFORK); + f_setfd_or(fd2, FD_CLOFORK); } return (0);