diff --git a/CMakeLists.txt b/CMakeLists.txt index 690e2e35b1b6..bc2d4961f429 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -475,7 +475,6 @@ set(ZIG_STAGE2_SOURCES lib/std/os/linux.zig lib/std/os/linux.zig lib/std/os/linux/IoUring.zig - lib/std/os/linux/io_uring_sqe.zig lib/std/os/linux/x86_64.zig lib/std/os/linux/x86_64.zig lib/std/os/windows.zig diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 6bfc41cd792b..f7f6e056265b 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -224,7 +224,7 @@ pub fn cwd() Dir { } else if (native_os == .wasi) { return .{ .fd = std.options.wasiCwd() }; } else { - return .{ .fd = posix.AT.FDCWD }; + return .{ .fd = posix.AT.fdcwd }; } } diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index 67f0c0d724b8..326b36b45553 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -2812,8 +2812,14 @@ pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat { const rc = linux.statx( self.fd, &sub_path_c, - linux.AT.NO_AUTOMOUNT, - linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME, + .{ .no_automount = true }, + .{ + .type = true, + .mode = true, + .atime = true, + .mtime = true, + .ctime = true, + }, &stx, ); diff --git a/lib/std/fs/File.zig b/lib/std/fs/File.zig index 169f3f5222bf..1491ee0ea327 100644 --- a/lib/std/fs/File.zig +++ b/lib/std/fs/File.zig @@ -564,8 +564,14 @@ pub fn stat(self: File) StatError!Stat { const rc = linux.statx( self.handle, "", - linux.AT.EMPTY_PATH, - linux.STATX_TYPE | linux.STATX_MODE | linux.STATX_ATIME | linux.STATX_MTIME | linux.STATX_CTIME, + .{ .empty_path = true }, + .{ + .type = true, + .mode = true, + .atime = true, + .mtime = true, + .ctime = true, + }, &stx, ); diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index 5eed2a4dd3df..b3c5dda65308 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -24,6 +24,7 @@ const iovec = std.posix.iovec; const iovec_const = std.posix.iovec_const; const winsize = std.posix.winsize; const ACCMODE = std.posix.ACCMODE; +pub const IoUring = @import("linux/IoUring.zig"); test { if (builtin.os.tag == .linux) { @@ -923,7 +924,7 @@ pub fn readlink(noalias path: [*:0]const u8, noalias buf_ptr: [*]u8, buf_len: us if (@hasField(SYS, "readlink")) { return syscall3(.readlink, @intFromPtr(path), @intFromPtr(buf_ptr), buf_len); } else { - return syscall4(.readlinkat, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(path), @intFromPtr(buf_ptr), buf_len); + return syscall4(.readlinkat, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(path), @intFromPtr(buf_ptr), buf_len); } } @@ -935,7 +936,7 @@ pub fn mkdir(path: [*:0]const u8, mode: mode_t) usize { if (@hasField(SYS, "mkdir")) { return syscall2(.mkdir, @intFromPtr(path), mode); } else { - return syscall3(.mkdirat, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(path), mode); + return syscall3(.mkdirat, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(path), mode); } } @@ -947,7 +948,7 @@ pub fn mknod(path: [*:0]const u8, mode: u32, dev: u32) usize { if (@hasField(SYS, "mknod")) { return syscall3(.mknod, @intFromPtr(path), mode, dev); } else { - return mknodat(AT.FDCWD, path, mode, dev); + return mknodat(At.fdcwd, path, mode, dev); } } @@ -1176,7 +1177,7 @@ pub fn rmdir(path: [*:0]const u8) usize { if (@hasField(SYS, "rmdir")) { return syscall1(.rmdir, @intFromPtr(path)); } else { - return syscall3(.unlinkat, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(path), AT.REMOVEDIR); + return syscall3(.unlinkat, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(path), @as(u32, @bitCast(At{ .removedir = true }))); } } @@ -1184,7 +1185,7 @@ pub fn symlink(existing: [*:0]const u8, new: [*:0]const u8) usize { if (@hasField(SYS, "symlink")) { return syscall2(.symlink, @intFromPtr(existing), @intFromPtr(new)); } else { - return syscall3(.symlinkat, @intFromPtr(existing), @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(new)); + return syscall3(.symlinkat, @intFromPtr(existing), @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(new)); } } @@ -1235,7 +1236,7 @@ pub fn access(path: [*:0]const u8, mode: u32) usize { if (@hasField(SYS, "access")) { return syscall2(.access, @intFromPtr(path), mode); } else { - return faccessat(AT.FDCWD, path, mode, 0); + return faccessat(At.fdcwd, path, mode, 0); } } @@ -1336,9 +1337,9 @@ pub fn rename(old: [*:0]const u8, new: [*:0]const u8) usize { if (@hasField(SYS, "rename")) { return syscall2(.rename, @intFromPtr(old), @intFromPtr(new)); } else if (@hasField(SYS, "renameat")) { - return syscall4(.renameat, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(old), @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(new)); + return syscall4(.renameat, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(old), @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(new)); } else { - return syscall5(.renameat2, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(old), @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(new), 0); + return syscall5(.renameat2, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(old), @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(new), 0); } } @@ -1380,7 +1381,7 @@ pub fn open(path: [*:0]const u8, flags: O, perm: mode_t) usize { } else { return syscall4( .openat, - @bitCast(@as(isize, AT.FDCWD)), + @bitCast(@as(isize, At.fdcwd)), @intFromPtr(path), @as(u32, @bitCast(flags)), perm, @@ -1393,7 +1394,7 @@ pub fn create(path: [*:0]const u8, perm: mode_t) usize { } pub fn openat(dirfd: i32, path: [*:0]const u8, flags: O, mode: mode_t) usize { - // dirfd could be negative, for example AT.FDCWD is -100 + // dirfd could be negative, for example At.fdcwd is -100 return syscall4(.openat, @bitCast(@as(isize, dirfd)), @intFromPtr(path), @as(u32, @bitCast(flags)), mode); } @@ -1419,7 +1420,7 @@ pub fn chmod(path: [*:0]const u8, mode: mode_t) usize { if (@hasField(SYS, "chmod")) { return syscall2(.chmod, @intFromPtr(path), mode); } else { - return fchmodat(AT.FDCWD, path, mode, 0); + return fchmodat(At.fdcwd, path, mode, 0); } } @@ -1551,9 +1552,9 @@ pub fn link(oldpath: [*:0]const u8, newpath: [*:0]const u8) usize { } else { return syscall5( .linkat, - @as(usize, @bitCast(@as(isize, AT.FDCWD))), + @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(oldpath), - @as(usize, @bitCast(@as(isize, AT.FDCWD))), + @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(newpath), 0, ); @@ -1575,7 +1576,7 @@ pub fn unlink(path: [*:0]const u8) usize { if (@hasField(SYS, "unlink")) { return syscall1(.unlink, @intFromPtr(path)); } else { - return syscall3(.unlinkat, @as(usize, @bitCast(@as(isize, AT.FDCWD))), @intFromPtr(path), 0); + return syscall3(.unlinkat, @as(usize, @bitCast(@as(isize, At.fdcwd))), @intFromPtr(path), 0); } } @@ -2272,6 +2273,7 @@ pub fn lstat(pathname: [*:0]const u8, statbuf: *Stat) usize { } } +// TODO: flags is At Flags pub fn fstatat(dirfd: i32, path: [*:0]const u8, stat_buf: *Stat, flags: u32) usize { if (native_arch == .riscv32 or native_arch.isLoongArch()) { // riscv32 and loongarch have made the interesting decision to not implement some of @@ -2284,13 +2286,13 @@ pub fn fstatat(dirfd: i32, path: [*:0]const u8, stat_buf: *Stat, flags: u32) usi } } -pub fn statx(dirfd: i32, path: [*:0]const u8, flags: u32, mask: u32, statx_buf: *Statx) usize { +pub fn statx(dirfd: i32, path: [*:0]const u8, flags: At, mask: Statx.Mask, statx_buf: *Statx) usize { return syscall5( .statx, @as(usize, @bitCast(@as(isize, dirfd))), @intFromPtr(path), - flags, - mask, + @intCast(@as(u32, @bitCast(flags))), + @intCast(@as(u32, @bitCast(mask))), @intFromPtr(statx_buf), ); } @@ -2540,15 +2542,15 @@ pub fn uname(uts: *utsname) usize { return syscall1(.uname, @intFromPtr(uts)); } -pub fn io_uring_setup(entries: u32, p: *io_uring_params) usize { +pub fn io_uring_setup(entries: u32, p: *IoUring.Params) usize { return syscall2(.io_uring_setup, entries, @intFromPtr(p)); } -pub fn io_uring_enter(fd: i32, to_submit: u32, min_complete: u32, flags: u32, sig: ?*sigset_t) usize { - return syscall6(.io_uring_enter, @as(usize, @bitCast(@as(isize, fd))), to_submit, min_complete, flags, @intFromPtr(sig), NSIG / 8); +pub fn io_uring_enter(fd: i32, to_submit: u32, min_complete: u32, flags: IoUring.uflags.Enter, sig: ?*sigset_t) usize { + return syscall6(.io_uring_enter, @as(usize, @bitCast(@as(isize, fd))), to_submit, min_complete, @intCast(@as(u32, @bitCast(flags))), @intFromPtr(sig), NSIG / 8); } -pub fn io_uring_register(fd: i32, opcode: IORING_REGISTER, arg: ?*const anyopaque, nr_args: u32) usize { +pub fn io_uring_register(fd: i32, opcode: IoUring.RegisterOp, arg: ?*const anyopaque, nr_args: u32) usize { return syscall4(.io_uring_register, @as(usize, @bitCast(@as(isize, fd))), @intFromEnum(opcode), @intFromPtr(arg), nr_args); } @@ -3513,41 +3515,72 @@ pub const STDIN_FILENO = 0; pub const STDOUT_FILENO = 1; pub const STDERR_FILENO = 2; -pub const AT = struct { - /// Special value used to indicate openat should use the current working directory - pub const FDCWD = -100; - +/// Deprecated alias to At +pub const AT = At; +/// matches AT_* and AT_STATX_* +pub const At = packed struct(u32) { + _u1: u8 = 0, /// Do not follow symbolic links - pub const SYMLINK_NOFOLLOW = 0x100; - + symlink_nofollow: bool = false, /// Remove directory instead of unlinking file - pub const REMOVEDIR = 0x200; - + removedir: bool = false, /// Follow symbolic links. - pub const SYMLINK_FOLLOW = 0x400; - + symlink_follow: bool = false, /// Suppress terminal automount traversal - pub const NO_AUTOMOUNT = 0x800; - + no_automount: bool = false, /// Allow empty relative pathname - pub const EMPTY_PATH = 0x1000; + empty_path: bool = false, + /// Force the attributes to be sync'd with the server + statx_force_sync: bool = false, + /// Don't sync attributes with the server + statx_dont_sync: bool = false, + /// Apply to the entire subtree + recursive: bool = false, + _17: u16 = 0, + /// File handle is needed to compare object identity and may not be usable + /// with open_by_handle_at(2) + pub const handle_fid: At = .{ .removedir = true }; + + /// Special value used to indicate openat should use the current working directory + pub const fdcwd = -100; + + // https://github.com/torvalds/linux/blob/d3479214c05dbd07bc56f8823e7bd8719fcd39a9/tools/perf/trace/beauty/fs_at_flags.sh#L15 + /// AT_STATX_SYNC_TYPE is not a bit, its a mask of + /// AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC /// Type of synchronisation required from statx() - pub const STATX_SYNC_TYPE = 0x6000; + pub const statx_sync_type = 0x6000; - /// - Do whatever stat() does - pub const STATX_SYNC_AS_STAT = 0x0000; + /// Do whatever stat() does + /// This is the default and is very much filesystem-specific + pub const statx_sync_as_stat: At = .{}; + // DEPRECATED ALIASES + // + // + /// Special value used to indicate openat should use the current working directory + pub const FDCWD = fdcwd; + /// Do not follow symbolic links + pub const SYMLINK_NOFOLLOW: u32 = @bitCast(At{ .symlink_nofollow = true }); + /// Remove directory instead of unlinking file + pub const REMOVEDIR: u32 = @bitCast(At{ .removedir = true }); + pub const HANDLE_FID: u32 = @bitCast(handle_fid); + /// Follow symbolic links. + pub const SYMLINK_FOLLOW: u32 = @bitCast(At{ .symlink_follow = true }); + /// Suppress terminal automount traversal + pub const NO_AUTOMOUNT: u32 = @bitCast(At{ .no_automount = true }); + /// Allow empty relative pathname + pub const EMPTY_PATH: u32 = @bitCast(At{ .empty_path = true }); + /// Type of synchronisation required from statx() + pub const STATX_SYNC_TYPE: u32 = @bitCast(statx_sync_type); + /// - Do whatever stat() does + pub const STATX_SYNC_AS_STAT: u32 = @bitCast(statx_sync_as_stat); /// - Force the attributes to be sync'd with the server - pub const STATX_FORCE_SYNC = 0x2000; - + pub const STATX_FORCE_SYNC: u32 = @bitCast(At{ .statx_force_sync = true }); /// - Don't sync attributes with the server - pub const STATX_DONT_SYNC = 0x4000; - + pub const STATX_DONT_SYNC: u32 = @bitCast(At{ .statx_dont_sync = true }); /// Apply to the entire subtree - pub const RECURSIVE = 0x8000; - - pub const HANDLE_FID = REMOVEDIR; + pub const RECURSIVE: u32 = @bitCast(At{ .recursive = true }); }; pub const FALLOC = struct { @@ -3693,31 +3726,87 @@ pub const X_OK = 1; pub const W_OK = 2; pub const R_OK = 4; -pub const W = struct { - pub const NOHANG = 1; - pub const UNTRACED = 2; - pub const STOPPED = 2; - pub const EXITED = 4; - pub const CONTINUED = 8; - pub const NOWAIT = 0x1000000; +pub const W = packed struct(u32) { + nohang: bool = false, + stopped: bool = false, + exited: bool = false, + continued: bool = false, + _5: u20 = 0, + nowait: bool = false, + _26: u7 = 0, + /// alias to stopped + pub const untraced: W = .{ .stopped = true }; + + fn toInt(s: W) u32 { + return @bitCast(s); + } + + /// matches EXITSTATUS in C + pub fn exitStatus(s: W) u8 { + return @intCast((s.toInt() & 0xff00) >> 8); + } + + /// matches TERMSIG in C + pub fn termSig(s: W) u32 { + return s.toInt() & 0x7f; + } + + /// matches STOPSIG in C + pub fn stopSig(s: W) u32 { + return exitStatus(s); + } + + /// matches IFEXITED in C + pub fn ifExited(s: W) bool { + return termSig(s) == 0; + } + + /// matches IFSTOPPED in C + pub fn ifStopped(s: W) bool { + return @as(u16, @truncate(((s.toInt() & 0xffff) *% 0x10001) >> 8)) > 0x7f00; + } + /// matches IFSIGNALED in C + pub fn ifSignaled(s: W) bool { + return (s.toInt() & 0xffff) -% 1 < 0xff; + } + + // Deprecated constants + pub const NOHANG: u32 = @bitCast(W{ .nohang = true }); + pub const STOPPED: u32 = @bitCast(W{ .stopped = true }); + pub const UNTRACED: u32 = @bitCast(untraced); + pub const EXITED: u32 = @bitCast(W{ .exited = true }); + pub const CONTINUED: u32 = @bitCast(W{ .continued = true }); + pub const NOWAIT: u32 = @bitCast(W{ .nowait = true }); + + /// DEPRECATED alias to exitStatus pub fn EXITSTATUS(s: u32) u8 { - return @as(u8, @intCast((s & 0xff00) >> 8)); + return exitStatus(@bitCast(s)); } + + /// DEPRECATED alias to termSig pub fn TERMSIG(s: u32) u32 { - return s & 0x7f; + return termSig(@bitCast(s)); } + + /// DEPRECATED alias to stopSig pub fn STOPSIG(s: u32) u32 { - return EXITSTATUS(s); + return stopSig(@bitCast(s)); } + + /// DEPRECATED alias to ifExited pub fn IFEXITED(s: u32) bool { - return TERMSIG(s) == 0; + return ifExited(@bitCast(s)); } + + /// DEPRECATED alias to ifStopped pub fn IFSTOPPED(s: u32) bool { - return @as(u16, @truncate(((s & 0xffff) *% 0x10001) >> 8)) > 0x7f00; + return ifStopped(@bitCast(s)); } + + /// DEPRECATED alias to ifSignaled pub fn IFSIGNALED(s: u32) bool { - return (s & 0xffff) -% 1 < 0xff; + return ifSignaled(@bitCast(s)); } }; @@ -3914,22 +4003,83 @@ pub const SEEK = struct { pub const END = 2; }; -pub const SHUT = struct { - pub const RD = 0; - pub const WR = 1; - pub const RDWR = 2; +/// Deprecated alias to Shut +pub const SHUT = Shut; +/// enum sock_shutdown_cmd - Shutdown types +/// matches SHUT_* in kenel +pub const Shut = enum(u32) { + /// SHUT_RD: shutdown receptions + rd = 0, + /// SHUT_WR: shutdown transmissions + wd = 1, + /// SHUT_RDWR: shutdown receptions/transmissions + rdwr = 2, + + _, + + // deprecated constants of the fields + pub const RD: u32 = @intFromEnum(Shut.rd); + pub const WR: u32 = @intFromEnum(Shut.wd); + pub const RDWR: u32 = @intFromEnum(Shut.rdwr); }; -pub const SOCK = struct { - pub const STREAM = if (is_mips) 2 else 1; - pub const DGRAM = if (is_mips) 1 else 2; - pub const RAW = 3; - pub const RDM = 4; - pub const SEQPACKET = 5; - pub const DCCP = 6; - pub const PACKET = 10; - pub const CLOEXEC = if (is_sparc) 0o20000000 else 0o2000000; - pub const NONBLOCK = if (is_mips) 0o200 else if (is_sparc) 0o40000 else 0o4000; +/// SYNC_FILE_RANGE_* flags +pub const SyncFileRange = packed struct(u32) { + _: u32 = 0, // TODO: fill out +}; + +/// Deprecated alias to Sock +pub const SOCK = Sock; +/// SOCK_* Socket type and flags +pub const Sock = packed struct(u32) { + type: Type = .default, + flags: Flags = .{}, + + /// matches sock_type in kernel + pub const Type = enum(u7) { + default = 0, + stream = if (is_mips) 2 else 1, + dgram = if (is_mips) 1 else 2, + raw = 3, + rdm = 4, + seqpacket = 5, + dccp = 6, + packet = 10, + + _, + }; + + // bit range is (8 - 32] of the u32 + /// Flags for socket, socketpair, accept4 + pub const Flags = if (is_sparc) packed struct(u25) { + _8: u7 = 0, // start from u7 since Type comes before Flags + nonblock: bool = false, + _16: u7 = 0, + cloexec: bool = false, + _24: u9 = 0, + } else if (is_mips) packed struct(u25) { + nonblock: bool = false, + _9: u11 = 0, + cloexec: bool = false, + _21: u12 = 0, + } else packed struct(u25) { + _8: u4 = 0, + nonblock: bool = false, + _13: u7 = 0, + cloexec: bool = false, + _21: u12 = 0, + }; + + // Deprecated aliases for SOCK type and flags + pub const STREAM: u32 = @intFromEnum(Type.stream); + pub const DGRAM: u32 = @intFromEnum(Type.dgram); + pub const RAW: u32 = @intFromEnum(Type.raw); + pub const RDM: u32 = @intFromEnum(Type.rdm); + pub const SEQPACKET: u32 = @intFromEnum(Type.seqpacket); + pub const DCCP: u32 = @intFromEnum(Type.dccp); + pub const PACKET: u32 = @intFromEnum(Type.packet); + pub const CLOEXEC: u32 = (@as(u25, @bitCast(Flags{ .cloexec = true })) << 7); + pub const NONBLOCK: u32 = (@as(u25, @bitCast(Flags{ .nonblock = true })) << 7); }; pub const TCP = struct { @@ -4032,386 +4182,481 @@ pub const UDP_ENCAP = struct { pub const RXRPC = 6; }; -pub const PF = struct { - pub const UNSPEC = 0; - pub const LOCAL = 1; - pub const UNIX = LOCAL; - pub const FILE = LOCAL; - pub const INET = 2; - pub const AX25 = 3; - pub const IPX = 4; - pub const APPLETALK = 5; - pub const NETROM = 6; - pub const BRIDGE = 7; - pub const ATMPVC = 8; - pub const X25 = 9; - pub const INET6 = 10; - pub const ROSE = 11; - pub const DECnet = 12; - pub const NETBEUI = 13; - pub const SECURITY = 14; - pub const KEY = 15; - pub const NETLINK = 16; - pub const ROUTE = PF.NETLINK; - pub const PACKET = 17; - pub const ASH = 18; - pub const ECONET = 19; - pub const ATMSVC = 20; - pub const RDS = 21; - pub const SNA = 22; - pub const IRDA = 23; - pub const PPPOX = 24; - pub const WANPIPE = 25; - pub const LLC = 26; - pub const IB = 27; - pub const MPLS = 28; - pub const CAN = 29; - pub const TIPC = 30; - pub const BLUETOOTH = 31; - pub const IUCV = 32; - pub const RXRPC = 33; - pub const ISDN = 34; - pub const PHONET = 35; - pub const IEEE802154 = 36; - pub const CAIF = 37; - pub const ALG = 38; - pub const NFC = 39; - pub const VSOCK = 40; - pub const KCM = 41; - pub const QIPCRTR = 42; - pub const SMC = 43; - pub const XDP = 44; - pub const MAX = 45; -}; - -pub const AF = struct { - pub const UNSPEC = PF.UNSPEC; - pub const LOCAL = PF.LOCAL; - pub const UNIX = AF.LOCAL; - pub const FILE = AF.LOCAL; - pub const INET = PF.INET; - pub const AX25 = PF.AX25; - pub const IPX = PF.IPX; - pub const APPLETALK = PF.APPLETALK; - pub const NETROM = PF.NETROM; - pub const BRIDGE = PF.BRIDGE; - pub const ATMPVC = PF.ATMPVC; - pub const X25 = PF.X25; - pub const INET6 = PF.INET6; - pub const ROSE = PF.ROSE; - pub const DECnet = PF.DECnet; - pub const NETBEUI = PF.NETBEUI; - pub const SECURITY = PF.SECURITY; - pub const KEY = PF.KEY; - pub const NETLINK = PF.NETLINK; - pub const ROUTE = PF.ROUTE; - pub const PACKET = PF.PACKET; - pub const ASH = PF.ASH; - pub const ECONET = PF.ECONET; - pub const ATMSVC = PF.ATMSVC; - pub const RDS = PF.RDS; - pub const SNA = PF.SNA; - pub const IRDA = PF.IRDA; - pub const PPPOX = PF.PPPOX; - pub const WANPIPE = PF.WANPIPE; - pub const LLC = PF.LLC; - pub const IB = PF.IB; - pub const MPLS = PF.MPLS; - pub const CAN = PF.CAN; - pub const TIPC = PF.TIPC; - pub const BLUETOOTH = PF.BLUETOOTH; - pub const IUCV = PF.IUCV; - pub const RXRPC = PF.RXRPC; - pub const ISDN = PF.ISDN; - pub const PHONET = PF.PHONET; - pub const IEEE802154 = PF.IEEE802154; - pub const CAIF = PF.CAIF; - pub const ALG = PF.ALG; - pub const NFC = PF.NFC; - pub const VSOCK = PF.VSOCK; - pub const KCM = PF.KCM; - pub const QIPCRTR = PF.QIPCRTR; - pub const SMC = PF.SMC; - pub const XDP = PF.XDP; - pub const MAX = PF.MAX; -}; - -pub const SO = if (is_mips) struct { - pub const DEBUG = 1; - pub const REUSEADDR = 0x0004; - pub const KEEPALIVE = 0x0008; - pub const DONTROUTE = 0x0010; - pub const BROADCAST = 0x0020; - pub const LINGER = 0x0080; - pub const OOBINLINE = 0x0100; - pub const REUSEPORT = 0x0200; - pub const SNDBUF = 0x1001; - pub const RCVBUF = 0x1002; - pub const SNDLOWAT = 0x1003; - pub const RCVLOWAT = 0x1004; - pub const RCVTIMEO = 0x1006; - pub const SNDTIMEO = 0x1005; - pub const ERROR = 0x1007; - pub const TYPE = 0x1008; - pub const ACCEPTCONN = 0x1009; - pub const PROTOCOL = 0x1028; - pub const DOMAIN = 0x1029; - pub const NO_CHECK = 11; - pub const PRIORITY = 12; - pub const BSDCOMPAT = 14; - pub const PASSCRED = 17; - pub const PEERCRED = 18; - pub const PEERSEC = 30; - pub const SNDBUFFORCE = 31; - pub const RCVBUFFORCE = 33; - pub const SECURITY_AUTHENTICATION = 22; - pub const SECURITY_ENCRYPTION_TRANSPORT = 23; - pub const SECURITY_ENCRYPTION_NETWORK = 24; - pub const BINDTODEVICE = 25; - pub const ATTACH_FILTER = 26; - pub const DETACH_FILTER = 27; - pub const GET_FILTER = ATTACH_FILTER; - pub const PEERNAME = 28; - pub const TIMESTAMP_OLD = 29; - pub const PASSSEC = 34; - pub const TIMESTAMPNS_OLD = 35; - pub const MARK = 36; - pub const TIMESTAMPING_OLD = 37; - pub const RXQ_OVFL = 40; - pub const WIFI_STATUS = 41; - pub const PEEK_OFF = 42; - pub const NOFCS = 43; - pub const LOCK_FILTER = 44; - pub const SELECT_ERR_QUEUE = 45; - pub const BUSY_POLL = 46; - pub const MAX_PACING_RATE = 47; - pub const BPF_EXTENSIONS = 48; - pub const INCOMING_CPU = 49; - pub const ATTACH_BPF = 50; - pub const DETACH_BPF = DETACH_FILTER; - pub const ATTACH_REUSEPORT_CBPF = 51; - pub const ATTACH_REUSEPORT_EBPF = 52; - pub const CNX_ADVICE = 53; - pub const MEMINFO = 55; - pub const INCOMING_NAPI_ID = 56; - pub const COOKIE = 57; - pub const PEERGROUPS = 59; - pub const ZEROCOPY = 60; - pub const TXTIME = 61; - pub const BINDTOIFINDEX = 62; - pub const TIMESTAMP_NEW = 63; - pub const TIMESTAMPNS_NEW = 64; - pub const TIMESTAMPING_NEW = 65; - pub const RCVTIMEO_NEW = 66; - pub const SNDTIMEO_NEW = 67; - pub const DETACH_REUSEPORT_BPF = 68; -} else if (is_ppc) struct { - pub const DEBUG = 1; - pub const REUSEADDR = 2; - pub const TYPE = 3; - pub const ERROR = 4; - pub const DONTROUTE = 5; - pub const BROADCAST = 6; - pub const SNDBUF = 7; - pub const RCVBUF = 8; - pub const KEEPALIVE = 9; - pub const OOBINLINE = 10; - pub const NO_CHECK = 11; - pub const PRIORITY = 12; - pub const LINGER = 13; - pub const BSDCOMPAT = 14; - pub const REUSEPORT = 15; - pub const RCVLOWAT = 16; - pub const SNDLOWAT = 17; - pub const RCVTIMEO = 18; - pub const SNDTIMEO = 19; - pub const PASSCRED = 20; - pub const PEERCRED = 21; - pub const ACCEPTCONN = 30; - pub const PEERSEC = 31; - pub const SNDBUFFORCE = 32; - pub const RCVBUFFORCE = 33; - pub const PROTOCOL = 38; - pub const DOMAIN = 39; - pub const SECURITY_AUTHENTICATION = 22; - pub const SECURITY_ENCRYPTION_TRANSPORT = 23; - pub const SECURITY_ENCRYPTION_NETWORK = 24; - pub const BINDTODEVICE = 25; - pub const ATTACH_FILTER = 26; - pub const DETACH_FILTER = 27; - pub const GET_FILTER = ATTACH_FILTER; - pub const PEERNAME = 28; - pub const TIMESTAMP_OLD = 29; - pub const PASSSEC = 34; - pub const TIMESTAMPNS_OLD = 35; - pub const MARK = 36; - pub const TIMESTAMPING_OLD = 37; - pub const RXQ_OVFL = 40; - pub const WIFI_STATUS = 41; - pub const PEEK_OFF = 42; - pub const NOFCS = 43; - pub const LOCK_FILTER = 44; - pub const SELECT_ERR_QUEUE = 45; - pub const BUSY_POLL = 46; - pub const MAX_PACING_RATE = 47; - pub const BPF_EXTENSIONS = 48; - pub const INCOMING_CPU = 49; - pub const ATTACH_BPF = 50; - pub const DETACH_BPF = DETACH_FILTER; - pub const ATTACH_REUSEPORT_CBPF = 51; - pub const ATTACH_REUSEPORT_EBPF = 52; - pub const CNX_ADVICE = 53; - pub const MEMINFO = 55; - pub const INCOMING_NAPI_ID = 56; - pub const COOKIE = 57; - pub const PEERGROUPS = 59; - pub const ZEROCOPY = 60; - pub const TXTIME = 61; - pub const BINDTOIFINDEX = 62; - pub const TIMESTAMP_NEW = 63; - pub const TIMESTAMPNS_NEW = 64; - pub const TIMESTAMPING_NEW = 65; - pub const RCVTIMEO_NEW = 66; - pub const SNDTIMEO_NEW = 67; - pub const DETACH_REUSEPORT_BPF = 68; -} else if (is_sparc) struct { - pub const DEBUG = 1; - pub const REUSEADDR = 4; - pub const TYPE = 4104; - pub const ERROR = 4103; - pub const DONTROUTE = 16; - pub const BROADCAST = 32; - pub const SNDBUF = 4097; - pub const RCVBUF = 4098; - pub const KEEPALIVE = 8; - pub const OOBINLINE = 256; - pub const NO_CHECK = 11; - pub const PRIORITY = 12; - pub const LINGER = 128; - pub const BSDCOMPAT = 1024; - pub const REUSEPORT = 512; - pub const PASSCRED = 2; - pub const PEERCRED = 64; - pub const RCVLOWAT = 2048; - pub const SNDLOWAT = 4096; - pub const RCVTIMEO = 8192; - pub const SNDTIMEO = 16384; - pub const ACCEPTCONN = 32768; - pub const PEERSEC = 30; - pub const SNDBUFFORCE = 4106; - pub const RCVBUFFORCE = 4107; - pub const PROTOCOL = 4136; - pub const DOMAIN = 4137; - pub const SECURITY_AUTHENTICATION = 20481; - pub const SECURITY_ENCRYPTION_TRANSPORT = 20482; - pub const SECURITY_ENCRYPTION_NETWORK = 20484; - pub const BINDTODEVICE = 13; - pub const ATTACH_FILTER = 26; - pub const DETACH_FILTER = 27; - pub const GET_FILTER = 26; - pub const PEERNAME = 28; - pub const TIMESTAMP_OLD = 29; - pub const PASSSEC = 31; - pub const TIMESTAMPNS_OLD = 33; - pub const MARK = 34; - pub const TIMESTAMPING_OLD = 35; - pub const RXQ_OVFL = 36; - pub const WIFI_STATUS = 37; - pub const PEEK_OFF = 38; - pub const NOFCS = 39; - pub const LOCK_FILTER = 40; - pub const SELECT_ERR_QUEUE = 41; - pub const BUSY_POLL = 48; - pub const MAX_PACING_RATE = 49; - pub const BPF_EXTENSIONS = 50; - pub const INCOMING_CPU = 51; - pub const ATTACH_BPF = 52; - pub const DETACH_BPF = 27; - pub const ATTACH_REUSEPORT_CBPF = 53; - pub const ATTACH_REUSEPORT_EBPF = 54; - pub const CNX_ADVICE = 55; - pub const MEMINFO = 57; - pub const INCOMING_NAPI_ID = 58; - pub const COOKIE = 59; - pub const PEERGROUPS = 61; - pub const ZEROCOPY = 62; - pub const TXTIME = 63; - pub const BINDTOIFINDEX = 65; - pub const TIMESTAMP_NEW = 70; - pub const TIMESTAMPNS_NEW = 66; - pub const TIMESTAMPING_NEW = 67; - pub const RCVTIMEO_NEW = 68; - pub const SNDTIMEO_NEW = 69; - pub const DETACH_REUSEPORT_BPF = 71; -} else struct { - pub const DEBUG = 1; - pub const REUSEADDR = 2; - pub const TYPE = 3; - pub const ERROR = 4; - pub const DONTROUTE = 5; - pub const BROADCAST = 6; - pub const SNDBUF = 7; - pub const RCVBUF = 8; - pub const KEEPALIVE = 9; - pub const OOBINLINE = 10; - pub const NO_CHECK = 11; - pub const PRIORITY = 12; - pub const LINGER = 13; - pub const BSDCOMPAT = 14; - pub const REUSEPORT = 15; - pub const PASSCRED = 16; - pub const PEERCRED = 17; - pub const RCVLOWAT = 18; - pub const SNDLOWAT = 19; - pub const RCVTIMEO = 20; - pub const SNDTIMEO = 21; - pub const ACCEPTCONN = 30; - pub const PEERSEC = 31; - pub const SNDBUFFORCE = 32; - pub const RCVBUFFORCE = 33; - pub const PROTOCOL = 38; - pub const DOMAIN = 39; - pub const SECURITY_AUTHENTICATION = 22; - pub const SECURITY_ENCRYPTION_TRANSPORT = 23; - pub const SECURITY_ENCRYPTION_NETWORK = 24; - pub const BINDTODEVICE = 25; - pub const ATTACH_FILTER = 26; - pub const DETACH_FILTER = 27; - pub const GET_FILTER = ATTACH_FILTER; - pub const PEERNAME = 28; - pub const TIMESTAMP_OLD = 29; - pub const PASSSEC = 34; - pub const TIMESTAMPNS_OLD = 35; - pub const MARK = 36; - pub const TIMESTAMPING_OLD = 37; - pub const RXQ_OVFL = 40; - pub const WIFI_STATUS = 41; - pub const PEEK_OFF = 42; - pub const NOFCS = 43; - pub const LOCK_FILTER = 44; - pub const SELECT_ERR_QUEUE = 45; - pub const BUSY_POLL = 46; - pub const MAX_PACING_RATE = 47; - pub const BPF_EXTENSIONS = 48; - pub const INCOMING_CPU = 49; - pub const ATTACH_BPF = 50; - pub const DETACH_BPF = DETACH_FILTER; - pub const ATTACH_REUSEPORT_CBPF = 51; - pub const ATTACH_REUSEPORT_EBPF = 52; - pub const CNX_ADVICE = 53; - pub const MEMINFO = 55; - pub const INCOMING_NAPI_ID = 56; - pub const COOKIE = 57; - pub const PEERGROUPS = 59; - pub const ZEROCOPY = 60; - pub const TXTIME = 61; - pub const BINDTOIFINDEX = 62; - pub const TIMESTAMP_NEW = 63; - pub const TIMESTAMPNS_NEW = 64; - pub const TIMESTAMPING_NEW = 65; - pub const RCVTIMEO_NEW = 66; - pub const SNDTIMEO_NEW = 67; - pub const DETACH_REUSEPORT_BPF = 68; +// Deprecated Alias +pub const AF = Af; +pub const PF = Af; +/// Protocol Family (same values as Protocol Family) +pub const Pf = Af; +/// Address Family +pub const Af = enum(u16) { + unspec = 0, + unix = 1, + inet = 2, + ax25 = 3, + ipx = 4, + appletalk = 5, + netrom = 6, + bridge = 7, + atmpvc = 8, + x25 = 9, + inet6 = 10, + rose = 11, + decnet = 12, + netbeui = 13, + security = 14, + key = 15, + route = 16, + packet = 17, + ash = 18, + econet = 19, + atmsvc = 20, + rds = 21, + sna = 22, + irda = 23, + pppox = 24, + wanpipe = 25, + llc = 26, + ib = 27, + mpls = 28, + can = 29, + tipc = 30, + bluetooth = 31, + iucv = 32, + rxrpc = 33, + isdn = 34, + phonet = 35, + ieee802154 = 36, + caif = 37, + alg = 38, + nfc = 39, + vsock = 40, + kcm = 41, + qipcrtr = 42, + smc = 43, + xdp = 44, + max = 45, + _, + + // Aliases + pub const local = Af.unix; + pub const file = Af.unix; + pub const netlink = Af.route; + + // Deprecated constants for backward compatibility + pub const UNSPEC: u16 = @intFromEnum(Af.unspec); + pub const UNIX: u16 = @intFromEnum(Af.unix); + pub const LOCAL: u16 = @intFromEnum(local); + pub const FILE: u16 = @intFromEnum(file); + pub const INET: u16 = @intFromEnum(Af.inet); + pub const AX25: u16 = @intFromEnum(Af.ax25); + pub const IPX: u16 = @intFromEnum(Af.ipx); + pub const APPLETALK: u16 = @intFromEnum(Af.appletalk); + pub const NETROM: u16 = @intFromEnum(Af.netrom); + pub const BRIDGE: u16 = @intFromEnum(Af.bridge); + pub const ATMPVC: u16 = @intFromEnum(Af.atmpvc); + pub const X25: u16 = @intFromEnum(Af.x25); + pub const INET6: u16 = @intFromEnum(Af.inet6); + pub const ROSE: u16 = @intFromEnum(Af.rose); + pub const DECnet: u16 = @intFromEnum(Af.decnet); + pub const NETBEUI: u16 = @intFromEnum(Af.netbeui); + pub const SECURITY: u16 = @intFromEnum(Af.security); + pub const KEY: u16 = @intFromEnum(Af.key); + pub const ROUTE: u16 = @intFromEnum(Af.route); + pub const NETLINK: u16 = @intFromEnum(netlink); + pub const PACKET: u16 = @intFromEnum(Af.packet); + pub const ASH: u16 = @intFromEnum(Af.ash); + pub const ECONET: u16 = @intFromEnum(Af.econet); + pub const ATMSVC: u16 = @intFromEnum(Af.atmsvc); + pub const RDS: u16 = @intFromEnum(Af.rds); + pub const SNA: u16 = @intFromEnum(Af.sna); + pub const IRDA: u16 = @intFromEnum(Af.irda); + pub const PPPOX: u16 = @intFromEnum(Af.pppox); + pub const WANPIPE: u16 = @intFromEnum(Af.wanpipe); + pub const LLC: u16 = @intFromEnum(Af.llc); + pub const IB: u16 = @intFromEnum(Af.ib); + pub const MPLS: u16 = @intFromEnum(Af.mpls); + pub const CAN: u16 = @intFromEnum(Af.can); + pub const TIPC: u16 = @intFromEnum(Af.tipc); + pub const BLUETOOTH: u16 = @intFromEnum(Af.bluetooth); + pub const IUCV: u16 = @intFromEnum(Af.iucv); + pub const RXRPC: u16 = @intFromEnum(Af.rxrpc); + pub const ISDN: u16 = @intFromEnum(Af.isdn); + pub const PHONET: u16 = @intFromEnum(Af.phonet); + pub const IEEE802154: u16 = @intFromEnum(Af.ieee802154); + pub const CAIF: u16 = @intFromEnum(Af.caif); + pub const ALG: u16 = @intFromEnum(Af.alg); + pub const NFC: u16 = @intFromEnum(Af.nfc); + pub const VSOCK: u16 = @intFromEnum(Af.vsock); + pub const KCM: u16 = @intFromEnum(Af.kcm); + pub const QIPCRTR: u16 = @intFromEnum(Af.qipcrtr); + pub const SMC: u16 = @intFromEnum(Af.smc); + pub const XDP: u16 = @intFromEnum(Af.xdp); + pub const MAX: u16 = @intFromEnum(Af.max); +}; + +// COMMIT: add new Typed So enum +/// SO_* type +pub const So = if (is_mips) enum(u16) { + debug = 1, + reuseaddr = 0x0004, + keepalive = 0x0008, + dontroute = 0x0010, + broadcast = 0x0020, + linger = 0x0080, + oobinline = 0x0100, + reuseport = 0x0200, + sndbuf = 0x1001, + rcvbuf = 0x1002, + sndlowat = 0x1003, + rcvlowat = 0x1004, + sndtimeo = 0x1005, + rcvtimeo = 0x1006, + @"error" = 0x1007, + type = 0x1008, + acceptconn = 0x1009, + protocol = 0x1028, + domain = 0x1029, + no_check = 11, + priority = 12, + bsdcompat = 14, + passcred = 17, + peercred = 18, + peersec = 30, + sndbufforce = 31, + rcvbufforce = 33, + security_authentication = 22, + security_encryption_transport = 23, + security_encryption_network = 24, + bindtodevice = 25, + attach_filter = 26, + detach_filter = 27, + peername = 28, + timestamp_old = 29, + passsec = 34, + timestampns_old = 35, + mark = 36, + timestamping_old = 37, + rxq_ovfl = 40, + wifi_status = 41, + peek_off = 42, + nofcs = 43, + lock_filter = 44, + select_err_queue = 45, + busy_poll = 46, + max_pacing_rate = 47, + bpf_extensions = 48, + incoming_cpu = 49, + attach_bpf = 50, + attach_reuseport_cbpf = 51, + attach_reuseport_ebpf = 52, + cnx_advice = 53, + meminfo = 55, + incoming_napi_id = 56, + cookie = 57, + peergroups = 59, + zerocopy = 60, + txtime = 61, + bindtoifindex = 62, + timestamp_new = 63, + timestampns_new = 64, + timestamping_new = 65, + rcvtimeo_new = 66, + sndtimeo_new = 67, + detach_reuseport_bpf = 68, + _, + + // aliases + pub const get_filter: So = .attach_filter; + pub const detach_bpf: So = .detach_filter; +} else if (is_ppc) enum(u16) { + debug = 1, + reuseaddr = 2, + type = 3, + @"error" = 4, + dontroute = 5, + broadcast = 6, + sndbuf = 7, + rcvbuf = 8, + keepalive = 9, + oobinline = 10, + no_check = 11, + priority = 12, + linger = 13, + bsdcompat = 14, + reuseport = 15, + rcvlowat = 16, + sndlowat = 17, + rcvtimeo = 18, + sndtimeo = 19, + passcred = 20, + peercred = 21, + acceptconn = 30, + peersec = 31, + sndbufforce = 32, + rcvbufforce = 33, + protocol = 38, + domain = 39, + security_authentication = 22, + security_encryption_transport = 23, + security_encryption_network = 24, + bindtodevice = 25, + attach_filter = 26, + detach_filter = 27, + peername = 28, + timestamp_old = 29, + passsec = 34, + timestampns_old = 35, + mark = 36, + timestamping_old = 37, + rxq_ovfl = 40, + wifi_status = 41, + peek_off = 42, + nofcs = 43, + lock_filter = 44, + select_err_queue = 45, + busy_poll = 46, + max_pacing_rate = 47, + bpf_extensions = 48, + incoming_cpu = 49, + attach_bpf = 50, + attach_reuseport_cbpf = 51, + attach_reuseport_ebpf = 52, + cnx_advice = 53, + meminfo = 55, + incoming_napi_id = 56, + cookie = 57, + peergroups = 59, + zerocopy = 60, + txtime = 61, + bindtoifindex = 62, + timestamp_new = 63, + timestampns_new = 64, + timestamping_new = 65, + rcvtimeo_new = 66, + sndtimeo_new = 67, + detach_reuseport_bpf = 68, + _, + + // aliases + pub const get_filter: So = .attach_filter; + pub const detach_bpf: So = .detach_filter; +} else if (is_sparc) enum(u16) { + debug = 1, + reuseaddr = 4, + type = 4104, + @"error" = 4103, + dontroute = 16, + broadcast = 32, + sndbuf = 4097, + rcvbuf = 4098, + keepalive = 8, + oobinline = 256, + no_check = 11, + priority = 12, + linger = 128, + bsdcompat = 1024, + reuseport = 512, + passcred = 2, + peercred = 64, + rcvlowat = 2048, + sndlowat = 4096, + rcvtimeo = 8192, + sndtimeo = 16384, + acceptconn = 32768, + peersec = 30, + sndbufforce = 4106, + rcvbufforce = 4107, + protocol = 4136, + domain = 4137, + security_authentication = 20481, + security_encryption_transport = 20482, + security_encryption_network = 20484, + bindtodevice = 13, + attach_filter = 26, + detach_filter = 27, + peername = 28, + timestamp_old = 29, + passsec = 31, + timestampns_old = 33, + mark = 34, + timestamping_old = 35, + rxq_ovfl = 36, + wifi_status = 37, + peek_off = 38, + nofcs = 39, + lock_filter = 40, + select_err_queue = 41, + busy_poll = 48, + max_pacing_rate = 49, + bpf_extensions = 50, + incoming_cpu = 51, + attach_bpf = 52, + attach_reuseport_cbpf = 53, + attach_reuseport_ebpf = 54, + cnx_advice = 55, + meminfo = 57, + incoming_napi_id = 58, + cookie = 59, + peergroups = 61, + zerocopy = 62, + txtime = 63, + bindtoifindex = 65, + timestamp_new = 70, + timestampns_new = 66, + timestamping_new = 67, + rcvtimeo_new = 68, + sndtimeo_new = 69, + detach_reuseport_bpf = 71, + _, + + // aliases + pub const get_filter: So = .attach_filter; + pub const detach_bpf: So = .detach_filter; +} else enum(u16) { + debug = 1, + reuseaddr = 2, + type = 3, + @"error" = 4, + dontroute = 5, + broadcast = 6, + sndbuf = 7, + rcvbuf = 8, + keepalive = 9, + oobinline = 10, + no_check = 11, + priority = 12, + linger = 13, + bsdcompat = 14, + reuseport = 15, + passcred = 16, + peercred = 17, + rcvlowat = 18, + sndlowat = 19, + rcvtimeo = 20, + sndtimeo = 21, + acceptconn = 30, + peersec = 31, + sndbufforce = 32, + rcvbufforce = 33, + passsec = 34, + timestampns_old = 35, + mark = 36, + timestamping_old = 37, + protocol = 38, + domain = 39, + rxq_ovfl = 40, + wifi_status = 41, + peek_off = 42, + nofcs = 43, + lock_filter = 44, + select_err_queue = 45, + busy_poll = 46, + max_pacing_rate = 47, + bpf_extensions = 48, + incoming_cpu = 49, + attach_bpf = 50, + attach_reuseport_cbpf = 51, + attach_reuseport_ebpf = 52, + cnx_advice = 53, + meminfo = 55, + incoming_napi_id = 56, + cookie = 57, + peergroups = 59, + zerocopy = 60, + txtime = 61, + bindtoifindex = 62, + timestamp_new = 63, + timestampns_new = 64, + timestamping_new = 65, + rcvtimeo_new = 66, + sndtimeo_new = 67, + detach_reuseport_bpf = 68, + security_authentication = 22, + security_encryption_transport = 23, + security_encryption_network = 24, + bindtodevice = 25, + attach_filter = 26, + detach_filter = 27, + peername = 28, + timestamp_old = 29, + _, + + // aliases + pub const get_filter: So = .attach_filter; + pub const detach_bpf: So = .detach_filter; +}; + +// COMMIT: add SO constants +/// Backwards-compatible SO_* constants +pub const SO = struct { + pub const DEBUG: u16 = @intFromEnum(So.debug); + pub const REUSEADDR: u16 = @intFromEnum(So.reuseaddr); + pub const KEEPALIVE: u16 = @intFromEnum(So.keepalive); + pub const DONTROUTE: u16 = @intFromEnum(So.dontroute); + pub const BROADCAST: u16 = @intFromEnum(So.broadcast); + pub const LINGER: u16 = @intFromEnum(So.linger); + pub const OOBINLINE: u16 = @intFromEnum(So.oobinline); + pub const REUSEPORT: u16 = @intFromEnum(So.reuseport); + pub const SNDBUF: u16 = @intFromEnum(So.sndbuf); + pub const RCVBUF: u16 = @intFromEnum(So.rcvbuf); + pub const SNDLOWAT: u16 = @intFromEnum(So.sndlowat); + pub const RCVLOWAT: u16 = @intFromEnum(So.rcvlowat); + pub const RCVTIMEO: u16 = @intFromEnum(So.rcvtimeo); + pub const SNDTIMEO: u16 = @intFromEnum(So.sndtimeo); + pub const ERROR: u16 = @intFromEnum(So.@"error"); + pub const TYPE: u16 = @intFromEnum(So.type); + pub const ACCEPTCONN: u16 = @intFromEnum(So.acceptconn); + pub const PROTOCOL: u16 = @intFromEnum(So.protocol); + pub const DOMAIN: u16 = @intFromEnum(So.domain); + pub const NO_CHECK: u16 = @intFromEnum(So.no_check); + pub const PRIORITY: u16 = @intFromEnum(So.priority); + pub const BSDCOMPAT: u16 = @intFromEnum(So.bsdcompat); + pub const PASSCRED: u16 = @intFromEnum(So.passcred); + pub const PEERCRED: u16 = @intFromEnum(So.peercred); + pub const PEERSEC: u16 = @intFromEnum(So.peersec); + pub const SNDBUFFORCE: u16 = @intFromEnum(So.sndbufforce); + pub const RCVBUFFORCE: u16 = @intFromEnum(So.rcvbufforce); + pub const SECURITY_AUTHENTICATION: u16 = @intFromEnum(So.security_authentication); + pub const SECURITY_ENCRYPTION_TRANSPORT: u16 = @intFromEnum(So.security_encryption_transport); + pub const SECURITY_ENCRYPTION_NETWORK: u16 = @intFromEnum(So.security_encryption_network); + pub const BINDTODEVICE: u16 = @intFromEnum(So.bindtodevice); + pub const ATTACH_FILTER: u16 = @intFromEnum(So.attach_filter); + pub const DETACH_FILTER: u16 = @intFromEnum(So.detach_filter); + pub const GET_FILTER: u16 = ATTACH_FILTER; // alias + pub const PEERNAME: u16 = @intFromEnum(So.peername); + pub const TIMESTAMP_OLD: u16 = @intFromEnum(So.timestamp_old); + pub const PASSSEC: u16 = @intFromEnum(So.passsec); + pub const TIMESTAMPNS_OLD: u16 = @intFromEnum(So.timestampns_old); + pub const MARK: u16 = @intFromEnum(So.mark); + pub const TIMESTAMPING_OLD: u16 = @intFromEnum(So.timestamping_old); + pub const RXQ_OVFL: u16 = @intFromEnum(So.rxq_ovfl); + pub const WIFI_STATUS: u16 = @intFromEnum(So.wifi_status); + pub const PEEK_OFF: u16 = @intFromEnum(So.peek_off); + pub const NOFCS: u16 = @intFromEnum(So.nofcs); + pub const LOCK_FILTER: u16 = @intFromEnum(So.lock_filter); + pub const SELECT_ERR_QUEUE: u16 = @intFromEnum(So.select_err_queue); + pub const BUSY_POLL: u16 = @intFromEnum(So.busy_poll); + pub const MAX_PACING_RATE: u16 = @intFromEnum(So.max_pacing_rate); + pub const BPF_EXTENSIONS: u16 = @intFromEnum(So.bpf_extensions); + pub const INCOMING_CPU: u16 = @intFromEnum(So.incoming_cpu); + pub const ATTACH_BPF: u16 = @intFromEnum(So.attach_bpf); + pub const DETACH_BPF: u16 = DETACH_FILTER; // alias in original + pub const ATTACH_REUSEPORT_CBPF: u16 = @intFromEnum(So.attach_reuseport_cbpf); + pub const ATTACH_REUSEPORT_EBPF: u16 = @intFromEnum(So.attach_reuseport_ebpf); + pub const CNX_ADVICE: u16 = @intFromEnum(So.cnx_advice); + pub const MEMINFO: u16 = @intFromEnum(So.meminfo); + pub const INCOMING_NAPI_ID: u16 = @intFromEnum(So.incoming_napi_id); + pub const COOKIE: u16 = @intFromEnum(So.cookie); + pub const PEERGROUPS: u16 = @intFromEnum(So.peergroups); + pub const ZEROCOPY: u16 = @intFromEnum(So.zerocopy); + pub const TXTIME: u16 = @intFromEnum(So.txtime); + pub const BINDTOIFINDEX: u16 = @intFromEnum(So.bindtoifindex); + pub const TIMESTAMP_NEW: u16 = @intFromEnum(So.timestamp_new); + pub const TIMESTAMPNS_NEW: u16 = @intFromEnum(So.timestampns_new); + pub const TIMESTAMPING_NEW: u16 = @intFromEnum(So.timestamping_new); + pub const RCVTIMEO_NEW: u16 = @intFromEnum(So.rcvtimeo_new); + pub const SNDTIMEO_NEW: u16 = @intFromEnum(So.sndtimeo_new); + pub const DETACH_REUSEPORT_BPF: u16 = @intFromEnum(So.detach_reuseport_bpf); }; pub const SCM = struct { @@ -4427,37 +4672,100 @@ pub const SCM = struct { pub const TXTIME = SO.TXTIME; }; -pub const SOL = struct { - pub const SOCKET = if (is_mips or is_sparc) 65535 else 1; - - pub const IP = 0; - pub const IPV6 = 41; - pub const ICMPV6 = 58; - - pub const RAW = 255; - pub const DECNET = 261; - pub const X25 = 262; - pub const PACKET = 263; - pub const ATM = 264; - pub const AAL = 265; - pub const IRDA = 266; - pub const NETBEUI = 267; - pub const LLC = 268; - pub const DCCP = 269; - pub const NETLINK = 270; - pub const TIPC = 271; - pub const RXRPC = 272; - pub const PPPOL2TP = 273; - pub const BLUETOOTH = 274; - pub const PNPIPE = 275; - pub const RDS = 276; - pub const IUCV = 277; - pub const CAIF = 278; - pub const ALG = 279; - pub const NFC = 280; - pub const KCM = 281; - pub const TLS = 282; - pub const XDP = 283; +/// Deprecated in favor of Sol +pub const SOL = Sol; +// https://github.com/torvalds/linux/blob/0d97f2067c166eb495771fede9f7b73999c67f66/include/linux/socket.h#L347C1-L388C22 +/// Socket option level for setsockopt(2)/getsockopt(2) +pub const Sol = enum(u16) { + ip = 0, + socket = if (is_mips or is_sparc) 65535 else 1, + tcp = 6, + udp = 17, + ipv6 = 41, + icmpv6 = 58, + sctp = 132, + /// UDP-Lite (RFC 3828) + udplite = 136, + raw = 255, + ipx = 256, + ax25 = 257, + atalk = 258, + netrom = 259, + rose = 260, + decnet = 261, + x25 = 262, + packet = 263, + /// ATM layer (cell level) + atm = 264, + /// ATM Adaption Layer (packet level) + aal = 265, + irda = 266, + netbeui = 267, + llc = 268, + dccp = 269, + netlink = 270, + tipc = 271, + rxrpc = 272, + pppol2tp = 273, + bluetooth = 274, + pnpipe = 275, + rds = 276, + iucv = 277, + caif = 278, + alg = 279, + nfc = 280, + kcm = 281, + tls = 282, + xdp = 283, + mptcp = 284, + mctp = 285, + smc = 286, + vsock = 287, + _, + + /// Deprecated constants for compatibility with current Zig + pub const IP: u16 = @intFromEnum(Sol.ip); + pub const SOCKET: u16 = @intFromEnum(Sol.socket); + pub const TCP: u16 = @intFromEnum(Sol.tcp); + pub const UDP: u16 = @intFromEnum(Sol.udp); + pub const IPV6: u16 = @intFromEnum(Sol.ipv6); + pub const ICMPV6: u16 = @intFromEnum(Sol.icmpv6); + pub const SCTP: u16 = @intFromEnum(Sol.sctp); + pub const UDPLITE: u16 = @intFromEnum(Sol.udplite); + + pub const RAW: u16 = @intFromEnum(Sol.raw); + pub const IPX: u16 = @intFromEnum(Sol.ipx); + pub const AX25: u16 = @intFromEnum(Sol.ax25); + pub const ATALK: u16 = @intFromEnum(Sol.atalk); + pub const NETROM: u16 = @intFromEnum(Sol.netrom); + pub const ROSE: u16 = @intFromEnum(Sol.rose); + pub const DECNET: u16 = @intFromEnum(Sol.decnet); + pub const X25: u16 = @intFromEnum(Sol.x25); + pub const PACKET: u16 = @intFromEnum(Sol.packet); + pub const ATM: u16 = @intFromEnum(Sol.atm); + pub const AAL: u16 = @intFromEnum(Sol.aal); + pub const IRDA: u16 = @intFromEnum(Sol.irda); + pub const NETBEUI: u16 = @intFromEnum(Sol.netbeui); + pub const LLC: u16 = @intFromEnum(Sol.llc); + pub const DCCP: u16 = @intFromEnum(Sol.dccp); + pub const NETLINK: u16 = @intFromEnum(Sol.netlink); + pub const TIPC: u16 = @intFromEnum(Sol.tipc); + pub const RXRPC: u16 = @intFromEnum(Sol.rxrpc); + pub const PPPOL2TP: u16 = @intFromEnum(Sol.pppol2tp); + pub const BLUETOOTH: u16 = @intFromEnum(Sol.bluetooth); + pub const PNPIPE: u16 = @intFromEnum(Sol.pnpipe); + pub const RDS: u16 = @intFromEnum(Sol.rds); + pub const IUCV: u16 = @intFromEnum(Sol.iucv); + pub const CAIF: u16 = @intFromEnum(Sol.caif); + pub const ALG: u16 = @intFromEnum(Sol.alg); + pub const NFC: u16 = @intFromEnum(Sol.nfc); + pub const KCM: u16 = @intFromEnum(Sol.kcm); + pub const TLS: u16 = @intFromEnum(Sol.tls); + pub const XDP: u16 = @intFromEnum(Sol.xdp); + pub const MPTCP: u16 = @intFromEnum(Sol.mptcp); + pub const MCTP: u16 = @intFromEnum(Sol.mctp); + pub const SMC: u16 = @intFromEnum(Sol.smc); + pub const VSOCK: u16 = @intFromEnum(Sol.vsock); }; pub const SOMAXCONN = 128; @@ -4883,28 +5191,88 @@ pub const ETH = struct { }; }; -pub const MSG = struct { - pub const OOB = 0x0001; - pub const PEEK = 0x0002; - pub const DONTROUTE = 0x0004; - pub const CTRUNC = 0x0008; - pub const PROXY = 0x0010; - pub const TRUNC = 0x0020; - pub const DONTWAIT = 0x0040; - pub const EOR = 0x0080; - pub const WAITALL = 0x0100; - pub const FIN = 0x0200; - pub const SYN = 0x0400; - pub const CONFIRM = 0x0800; - pub const RST = 0x1000; - pub const ERRQUEUE = 0x2000; - pub const NOSIGNAL = 0x4000; - pub const MORE = 0x8000; - pub const WAITFORONE = 0x10000; - pub const BATCH = 0x40000; - pub const ZEROCOPY = 0x4000000; - pub const FASTOPEN = 0x20000000; - pub const CMSG_CLOEXEC = 0x40000000; +// Deprecated alias for Msg +pub const MSG = Msg; +pub const Msg = packed struct(u32) { + /// Process out-of-band data + oob: bool = false, + /// Peek at incoming message + peek: bool = false, + /// Send without using routing tables + dontroute: bool = false, + /// Control data truncated + ctrunc: bool = false, + /// Do not send. Only probe path (e.g. for MTU) + probe: bool = false, + /// Normal data truncated + trunc: bool = false, + /// Nonblocking I/O + dontwait: bool = false, + /// End of record + eor: bool = false, + /// Wait for a full request + waitall: bool = false, + /// FIN flag + fin: bool = false, + /// SYN flag + syn: bool = false, + /// Confirm path validity + confirm: bool = false, + /// RST flag + rst: bool = false, + /// Fetch message from error queue + errqueue: bool = false, + /// Do not generate SIGPIPE + nosignal: bool = false, + /// Sender will send more + more: bool = false, + /// recvmmsg(): block until 1+ packets available + waitforone: bool = false, + _18: u1 = 0, + /// sendmmsg(): more messages coming + batch: bool = false, + /// sendpage() internal: page frags are not shared + no_shared_frags: bool = false, + /// sendpage() internal: page may carry plain text and require encryption + sendpage_decrypted: bool = false, + _22: u4 = 0, + // COMMIT: new flags + /// Receive devmem skbs as cmsg + sock_devmem: bool = false, + /// Use user data in kernel path + zerocopy: bool = false, + /// Splice the pages from the iterator in sendmsg() + splice_pages: bool = false, + _29: u1 = 0, + /// Send data in TCP SYN + fastopen: bool = false, + /// Set close_on_exec for file descriptor received through SCM_RIGHTS + cmsg_cloexec: bool = false, + _: u1 = 0, + + // DEPRECATED CONSTANTS + pub const OOB: u32 = @bitCast(Msg{ .oob = true }); + pub const PEEK: u32 = @bitCast(Msg{ .peek = true }); + pub const DONTROUTE: u32 = @bitCast(Msg{ .dontroute = true }); + pub const CTRUNC: u32 = @bitCast(Msg{ .ctrunc = true }); + // fix typo PROBE not PROXY + pub const PROBE: u32 = @bitCast(Msg{ .probe = true }); + pub const TRUNC: u32 = @bitCast(Msg{ .trunc = true }); + pub const DONTWAIT: u32 = @bitCast(Msg{ .dontwait = true }); + pub const EOR: u32 = @bitCast(Msg{ .eor = true }); + pub const WAITALL: u32 = @bitCast(Msg{ .waitall = true }); + pub const FIN: u32 = @bitCast(Msg{ .fin = true }); + pub const SYN: u32 = @bitCast(Msg{ .syn = true }); + pub const CONFIRM: u32 = @bitCast(Msg{ .confirm = true }); + pub const RST: u32 = @bitCast(Msg{ .rst = true }); + pub const ERRQUEUE: u32 = @bitCast(Msg{ .errqueue = true }); + pub const NOSIGNAL: u32 = @bitCast(Msg{ .nosignal = true }); + pub const MORE: u32 = @bitCast(Msg{ .more = true }); + pub const WAITFORONE: u32 = @bitCast(Msg{ .waitforone = true }); + pub const BATCH: u32 = @bitCast(Msg{ .batch = true }); + pub const ZEROCOPY: u32 = @bitCast(Msg{ .zerocopy = true }); + pub const FASTOPEN: u32 = @bitCast(Msg{ .fastopen = true }); + pub const CMSG_CLOEXEC: u32 = @bitCast(Msg{ .cmsg_cloexec = true }); }; pub const DT = struct { @@ -5368,28 +5736,181 @@ pub const SER = struct { }; }; -pub const EPOLL = struct { +/// Valid opcodes to issue to sys_epoll_ctl() +pub const EpollOp = enum(u32) { + ctl_add = 1, + ctl_del = 2, + ctl_mod = 3, + _, + + // Deprecated Constants + pub const CTL_ADD: u32 = @intFromEnum(EpollOp.ctl_add); + pub const CTL_DEL: u32 = @intFromEnum(EpollOp.ctl_del); + pub const CTL_MOD: u32 = @intFromEnum(EpollOp.ctl_mod); +}; + +/// Deprecated alias for Epoll +pub const EPOLL = Epoll; +/// Epoll event masks +// https://github.com/torvalds/linux/blob/18a7e218cfcdca6666e1f7356533e4c988780b57/include/uapi/linux/eventpoll.h#L30 +pub const Epoll = if (is_mips) packed struct(u32) { + // EPOLL event types (lower 16 bits) + // + /// The associated file is available for read(2) operations + in: bool = false, + /// There is an exceptional condition on the file descriptor + pri: bool = false, + /// The associated file is available for write(2) operations + out: bool = false, + /// Error condition happened on the associated file descriptor + err: bool = false, + /// Hang up happened on the associated file descriptor + hup: bool = false, + /// Invalid request: fd not open + nval: bool = false, + /// Normal data may be read + rdnorm: bool = false, + /// Priority data may be read + rdband: bool = false, + /// Priority data may be written + wrband: bool = false, + _10: u1 = 0, + /// Message available (unused on Linux) + msg: bool = false, + _12: u2 = 0, + /// Stream socket peer closed connection + rdhup: bool = false, + _15: u13 = 0, + // EPOLL input flags (Higher order flags are included as internal stat) + // + /// Internal flag - wakeup generated by io_uring, used to detect + /// recursion back into the io_uring poll handler + uring_wake: bool = false, + /// Set exclusive wakeup mode for the target file descriptor + exclusive: bool = false, + /// Request the handling of system wakeup events so as to prevent system + /// suspends from happening while those events are being processed. + /// Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will + /// not be re-allowed until epoll_wait is called again after consuming the + /// wakeup event(s). + /// Requires CAP_BLOCK_SUSPEND + wakeup: bool = false, + /// Set the One Shot behaviour for the target file descriptor + oneshot: bool = false, + /// Set the Edge Triggered behaviour for the target file descriptor + et: bool = false, + + /// Alias to out on Mips + /// Writing is now possible (normal data) + pub const wrnorm: Epoll = .{ .out = true }; + + // Deprecated Named constants + // EPOLL event types + pub const IN: u32 = @bitCast(Epoll{ .in = true }); + pub const PRI: u32 = @bitCast(Epoll{ .pri = true }); + pub const OUT: u32 = @bitCast(Epoll{ .out = true }); + pub const ERR: u32 = @bitCast(Epoll{ .err = true }); + pub const HUP: u32 = @bitCast(Epoll{ .hup = true }); + pub const NVAL: u32 = @bitCast(Epoll{ .nval = true }); + pub const RDNORM: u32 = @bitCast(Epoll{ .rdnorm = true }); + pub const RDBAND: u32 = @bitCast(Epoll{ .rdband = true }); + pub const WRNORM: u32 = @bitCast(wrnorm); + pub const WRBAND: u32 = @bitCast(Epoll{ .wrband = true }); + pub const MSG: u32 = @bitCast(Epoll{ .msg = true }); + pub const RDHUP: u32 = @bitCast(Epoll{ .rdhup = true }); + + // EPOLL input flags + pub const URING_WAKE: u32 = @bitCast(Epoll{ .uring_wake = true }); + pub const EXCLUSIVE: u32 = @bitCast(Epoll{ .exclusive = true }); + pub const WAKEUP: u32 = @bitCast(Epoll{ .wakeup = true }); + pub const ONESHOT: u32 = @bitCast(Epoll{ .oneshot = true }); + pub const ET: u32 = @bitCast(Epoll{ .et = true }); + + /// Flags for epoll_create1 pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC"); - pub const CTL_ADD = 1; - pub const CTL_DEL = 2; - pub const CTL_MOD = 3; + // Deprecated Op Constants use EpollOp enum type + pub const CTL_ADD: u32 = @intFromEnum(EpollOp.ctl_add); + pub const CTL_DEL: u32 = @intFromEnum(EpollOp.ctl_del); + pub const CTL_MOD: u32 = @intFromEnum(EpollOp.ctl_mod); +} else packed struct(u32) { + // EPOLL event types (lower 16 bits) + // + /// The associated file is available for read(2) operations + in: bool = false, + /// There is an exceptional condition on the file descriptor + pri: bool = false, + /// The associated file is available for write(2) operations + out: bool = false, + /// Error condition happened on the associated file descriptor + err: bool = false, + /// Hang up happened on the associated file descriptor + hup: bool = false, + /// Invalid request: fd not open + nval: bool = false, + /// Normal data may be read + rdnorm: bool = false, + /// Priority data may be read + rdband: bool = false, + // COMMIT: new flags + /// Writing is now possible (normal data) + wrnorm: bool = false, + /// Priority data may be written + wrband: bool = false, + /// Message available (unused on Linux) + msg: bool = false, + _12: u2 = 0, + /// Stream socket peer closed connection + rdhup: bool = false, + _15: u13 = 0, + // EPOLL input flags (Higher order flags are included as internal stat) + // + /// Internal flag - wakeup generated by io_uring, used to detect + /// recursion back into the io_uring poll handler + uring_wake: bool = false, + /// Set exclusive wakeup mode for the target file descriptor + exclusive: bool = false, + /// Request the handling of system wakeup events so as to prevent system + /// suspends from happening while those events are being processed. + /// Assuming neither EPOLLET nor EPOLLONESHOT is set, system suspends will + /// not be re-allowed until epoll_wait is called again after consuming the + /// wakeup event(s). + /// Requires CAP_BLOCK_SUSPEND + wakeup: bool = false, + /// Set the One Shot behaviour for the target file descriptor + oneshot: bool = false, + /// Set the Edge Triggered behaviour for the target file descriptor + et: bool = false, + + // Deprecated Named constants + // EPOLL event types + pub const IN: u32 = @bitCast(Epoll{ .in = true }); + pub const PRI: u32 = @bitCast(Epoll{ .pri = true }); + pub const OUT: u32 = @bitCast(Epoll{ .out = true }); + pub const ERR: u32 = @bitCast(Epoll{ .err = true }); + pub const HUP: u32 = @bitCast(Epoll{ .hup = true }); + pub const NVAL: u32 = @bitCast(Epoll{ .nval = true }); + pub const RDNORM: u32 = @bitCast(Epoll{ .rdnorm = true }); + pub const RDBAND: u32 = @bitCast(Epoll{ .rdband = true }); + pub const WRNORM: u32 = @bitCast(Epoll{ .wrnorm = true }); + pub const WRBAND: u32 = @bitCast(Epoll{ .wrband = true }); + pub const MSG: u32 = @bitCast(Epoll{ .msg = true }); + pub const RDHUP: u32 = @bitCast(Epoll{ .rdhup = true }); + + // EPOLL input flags + pub const URING_WAKE: u32 = @bitCast(Epoll{ .uring_wake = true }); + pub const EXCLUSIVE: u32 = @bitCast(Epoll{ .exclusive = true }); + pub const WAKEUP: u32 = @bitCast(Epoll{ .wakeup = true }); + pub const ONESHOT: u32 = @bitCast(Epoll{ .oneshot = true }); + pub const ET: u32 = @bitCast(Epoll{ .et = true }); + + /// Flags for epoll_create1 + pub const CLOEXEC = 1 << @bitOffsetOf(O, "CLOEXEC"); - pub const IN = 0x001; - pub const PRI = 0x002; - pub const OUT = 0x004; - pub const RDNORM = 0x040; - pub const RDBAND = 0x080; - pub const WRNORM = if (is_mips) 0x004 else 0x100; - pub const WRBAND = if (is_mips) 0x100 else 0x200; - pub const MSG = 0x400; - pub const ERR = 0x008; - pub const HUP = 0x010; - pub const RDHUP = 0x2000; - pub const EXCLUSIVE = (@as(u32, 1) << 28); - pub const WAKEUP = (@as(u32, 1) << 29); - pub const ONESHOT = (@as(u32, 1) << 30); - pub const ET = (@as(u32, 1) << 31); + // Deprecated Op Constants use EpollOp enum type + pub const CTL_ADD: u32 = @intFromEnum(EpollOp.ctl_add); + pub const CTL_DEL: u32 = @intFromEnum(EpollOp.ctl_del); + pub const CTL_MOD: u32 = @intFromEnum(EpollOp.ctl_mod); }; pub const CLOCK = clockid_t; @@ -5892,6 +6413,7 @@ pub const signalfd_siginfo = extern struct { }; pub const in_port_t = u16; +// TODO: change to AF type pub const sa_family_t = u16; pub const socklen_t = u32; @@ -5912,7 +6434,7 @@ pub const sockaddr = extern struct { /// IPv4 socket address pub const in = extern struct { - family: sa_family_t = AF.INET, + family: sa_family_t = Af.INET, port: in_port_t, addr: u32, zero: [8]u8 = [8]u8{ 0, 0, 0, 0, 0, 0, 0, 0 }, @@ -5920,7 +6442,7 @@ pub const sockaddr = extern struct { /// IPv6 socket address pub const in6 = extern struct { - family: sa_family_t = AF.INET6, + family: sa_family_t = Af.INET6, port: in_port_t, flowinfo: u32, addr: [16]u8, @@ -5929,13 +6451,13 @@ pub const sockaddr = extern struct { /// UNIX domain socket address pub const un = extern struct { - family: sa_family_t = AF.UNIX, + family: sa_family_t = Af.UNIX, path: [108]u8, }; /// Packet socket address pub const ll = extern struct { - family: sa_family_t = AF.PACKET, + family: sa_family_t = Af.PACKET, protocol: u16, ifindex: i32, hatype: u16, @@ -5946,7 +6468,7 @@ pub const sockaddr = extern struct { /// Netlink socket address pub const nl = extern struct { - family: sa_family_t = AF.NETLINK, + family: sa_family_t = Af.NETLINK, __pad1: c_ushort = 0, /// port ID @@ -5957,7 +6479,7 @@ pub const sockaddr = extern struct { }; pub const xdp = extern struct { - family: u16 = AF.XDP, + family: u16 = Af.XDP, flags: u16, ifindex: u32, queue_id: u32, @@ -5966,7 +6488,7 @@ pub const sockaddr = extern struct { /// Address structure for vSockets pub const vm = extern struct { - family: sa_family_t = AF.VSOCK, + family: sa_family_t = Af.VSOCK, reserved1: u16 = 0, port: u32, cid: u32, @@ -6302,667 +6824,6 @@ else fields: siginfo_fields_union, }; -// io_uring_params.flags - -/// io_context is polled -pub const IORING_SETUP_IOPOLL = 1 << 0; - -/// SQ poll thread -pub const IORING_SETUP_SQPOLL = 1 << 1; - -/// sq_thread_cpu is valid -pub const IORING_SETUP_SQ_AFF = 1 << 2; - -/// app defines CQ size -pub const IORING_SETUP_CQSIZE = 1 << 3; - -/// clamp SQ/CQ ring sizes -pub const IORING_SETUP_CLAMP = 1 << 4; - -/// attach to existing wq -pub const IORING_SETUP_ATTACH_WQ = 1 << 5; - -/// start with ring disabled -pub const IORING_SETUP_R_DISABLED = 1 << 6; - -/// continue submit on error -pub const IORING_SETUP_SUBMIT_ALL = 1 << 7; - -/// Cooperative task running. When requests complete, they often require -/// forcing the submitter to transition to the kernel to complete. If this -/// flag is set, work will be done when the task transitions anyway, rather -/// than force an inter-processor interrupt reschedule. This avoids interrupting -/// a task running in userspace, and saves an IPI. -pub const IORING_SETUP_COOP_TASKRUN = 1 << 8; - -/// If COOP_TASKRUN is set, get notified if task work is available for -/// running and a kernel transition would be needed to run it. This sets -/// IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. -pub const IORING_SETUP_TASKRUN_FLAG = 1 << 9; - -/// SQEs are 128 byte -pub const IORING_SETUP_SQE128 = 1 << 10; -/// CQEs are 32 byte -pub const IORING_SETUP_CQE32 = 1 << 11; - -/// Only one task is allowed to submit requests -pub const IORING_SETUP_SINGLE_ISSUER = 1 << 12; - -/// Defer running task work to get events. -/// Rather than running bits of task work whenever the task transitions -/// try to do it just before it is needed. -pub const IORING_SETUP_DEFER_TASKRUN = 1 << 13; - -/// Application provides ring memory -pub const IORING_SETUP_NO_MMAP = 1 << 14; - -/// Register the ring fd in itself for use with -/// IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index rather -/// than an fd. -pub const IORING_SETUP_REGISTERED_FD_ONLY = 1 << 15; - -/// Removes indirection through the SQ index array. -pub const IORING_SETUP_NO_SQARRAY = 1 << 16; - -/// IO submission data structure (Submission Queue Entry) -pub const io_uring_sqe = @import("linux/io_uring_sqe.zig").io_uring_sqe; - -pub const IoUring = @import("linux/IoUring.zig"); - -/// If sqe->file_index is set to this for opcodes that instantiate a new -/// direct descriptor (like openat/openat2/accept), then io_uring will allocate -/// an available direct descriptor instead of having the application pass one -/// in. The picked direct descriptor will be returned in cqe->res, or -ENFILE -/// if the space is full. -/// Available since Linux 5.19 -pub const IORING_FILE_INDEX_ALLOC = maxInt(u32); - -pub const IOSQE_BIT = enum(u8) { - FIXED_FILE, - IO_DRAIN, - IO_LINK, - IO_HARDLINK, - ASYNC, - BUFFER_SELECT, - CQE_SKIP_SUCCESS, - - _, -}; - -// io_uring_sqe.flags - -/// use fixed fileset -pub const IOSQE_FIXED_FILE = 1 << @intFromEnum(IOSQE_BIT.FIXED_FILE); - -/// issue after inflight IO -pub const IOSQE_IO_DRAIN = 1 << @intFromEnum(IOSQE_BIT.IO_DRAIN); - -/// links next sqe -pub const IOSQE_IO_LINK = 1 << @intFromEnum(IOSQE_BIT.IO_LINK); - -/// like LINK, but stronger -pub const IOSQE_IO_HARDLINK = 1 << @intFromEnum(IOSQE_BIT.IO_HARDLINK); - -/// always go async -pub const IOSQE_ASYNC = 1 << @intFromEnum(IOSQE_BIT.ASYNC); - -/// select buffer from buf_group -pub const IOSQE_BUFFER_SELECT = 1 << @intFromEnum(IOSQE_BIT.BUFFER_SELECT); - -/// don't post CQE if request succeeded -/// Available since Linux 5.17 -pub const IOSQE_CQE_SKIP_SUCCESS = 1 << @intFromEnum(IOSQE_BIT.CQE_SKIP_SUCCESS); - -pub const IORING_OP = enum(u8) { - NOP, - READV, - WRITEV, - FSYNC, - READ_FIXED, - WRITE_FIXED, - POLL_ADD, - POLL_REMOVE, - SYNC_FILE_RANGE, - SENDMSG, - RECVMSG, - TIMEOUT, - TIMEOUT_REMOVE, - ACCEPT, - ASYNC_CANCEL, - LINK_TIMEOUT, - CONNECT, - FALLOCATE, - OPENAT, - CLOSE, - FILES_UPDATE, - STATX, - READ, - WRITE, - FADVISE, - MADVISE, - SEND, - RECV, - OPENAT2, - EPOLL_CTL, - SPLICE, - PROVIDE_BUFFERS, - REMOVE_BUFFERS, - TEE, - SHUTDOWN, - RENAMEAT, - UNLINKAT, - MKDIRAT, - SYMLINKAT, - LINKAT, - MSG_RING, - FSETXATTR, - SETXATTR, - FGETXATTR, - GETXATTR, - SOCKET, - URING_CMD, - SEND_ZC, - SENDMSG_ZC, - READ_MULTISHOT, - WAITID, - FUTEX_WAIT, - FUTEX_WAKE, - FUTEX_WAITV, - FIXED_FD_INSTALL, - FTRUNCATE, - BIND, - LISTEN, - RECV_ZC, - - _, -}; -// io_uring_sqe.uring_cmd_flags (rw_flags in the Zig struct) - -/// use registered buffer; pass thig flag along with setting sqe->buf_index. -pub const IORING_URING_CMD_FIXED = 1 << 0; - -// io_uring_sqe.fsync_flags (rw_flags in the Zig struct) -pub const IORING_FSYNC_DATASYNC = 1 << 0; - -// io_uring_sqe.timeout_flags (rw_flags in the Zig struct) -pub const IORING_TIMEOUT_ABS = 1 << 0; -pub const IORING_TIMEOUT_UPDATE = 1 << 1; // Available since Linux 5.11 -pub const IORING_TIMEOUT_BOOTTIME = 1 << 2; // Available since Linux 5.15 -pub const IORING_TIMEOUT_REALTIME = 1 << 3; // Available since Linux 5.15 -pub const IORING_LINK_TIMEOUT_UPDATE = 1 << 4; // Available since Linux 5.15 -pub const IORING_TIMEOUT_ETIME_SUCCESS = 1 << 5; // Available since Linux 5.16 -pub const IORING_TIMEOUT_CLOCK_MASK = IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME; -pub const IORING_TIMEOUT_UPDATE_MASK = IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE; - -// io_uring_sqe.splice_flags (rw_flags in the Zig struct) -// extends splice(2) flags -pub const IORING_SPLICE_F_FD_IN_FIXED = 1 << 31; - -// POLL_ADD flags. -// Note that since sqe->poll_events (rw_flags in the Zig struct) is the flag space, the command flags for POLL_ADD are stored in sqe->len. - -/// Multishot poll. Sets IORING_CQE_F_MORE if the poll handler will continue to report CQEs on behalf of the same SQE. -pub const IORING_POLL_ADD_MULTI = 1 << 0; -/// Update existing poll request, matching sqe->addr as the old user_data field. -pub const IORING_POLL_UPDATE_EVENTS = 1 << 1; -pub const IORING_POLL_UPDATE_USER_DATA = 1 << 2; -pub const IORING_POLL_ADD_LEVEL = 1 << 3; - -// ASYNC_CANCEL flags. - -/// Cancel all requests that match the given key -pub const IORING_ASYNC_CANCEL_ALL = 1 << 0; -/// Key off 'fd' for cancelation rather than the request 'user_data'. -pub const IORING_ASYNC_CANCEL_FD = 1 << 1; -/// Match any request -pub const IORING_ASYNC_CANCEL_ANY = 1 << 2; -/// 'fd' passed in is a fixed descriptor. Available since Linux 6.0 -pub const IORING_ASYNC_CANCEL_FD_FIXED = 1 << 3; - -// send/sendmsg and recv/recvmsg flags (sqe->ioprio) - -/// If set, instead of first attempting to send or receive and arm poll if that yields an -EAGAIN result, -/// arm poll upfront and skip the initial transfer attempt. -pub const IORING_RECVSEND_POLL_FIRST = 1 << 0; -/// Multishot recv. Sets IORING_CQE_F_MORE if the handler will continue to report CQEs on behalf of the same SQE. -pub const IORING_RECV_MULTISHOT = 1 << 1; -/// Use registered buffers, the index is stored in the buf_index field. -pub const IORING_RECVSEND_FIXED_BUF = 1 << 2; -/// If set, SEND[MSG]_ZC should report the zerocopy usage in cqe.res for the IORING_CQE_F_NOTIF cqe. -pub const IORING_SEND_ZC_REPORT_USAGE = 1 << 3; -/// If set, send or recv will grab as many buffers from the buffer group ID given and send them all. -/// The completion result will be the number of buffers send, with the starting buffer ID in cqe as per usual. -/// The buffers be contigious from the starting buffer ID. -/// Used with IOSQE_BUFFER_SELECT. -pub const IORING_RECVSEND_BUNDLE = 1 << 4; -/// CQE.RES FOR IORING_CQE_F_NOTIF if IORING_SEND_ZC_REPORT_USAGE was requested -pub const IORING_NOTIF_USAGE_ZC_COPIED = 1 << 31; - -/// accept flags stored in sqe->iopri -pub const IORING_ACCEPT_MULTISHOT = 1 << 0; - -/// IORING_OP_MSG_RING command types, stored in sqe->addr -pub const IORING_MSG_RING_COMMAND = enum(u8) { - /// pass sqe->len as 'res' and off as user_data - DATA, - /// send a registered fd to another ring - SEND_FD, -}; - -// io_uring_sqe.msg_ring_flags (rw_flags in the Zig struct) - -/// Don't post a CQE to the target ring. Not applicable for IORING_MSG_DATA, obviously. -pub const IORING_MSG_RING_CQE_SKIP = 1 << 0; - -/// Pass through the flags from sqe->file_index (splice_fd_in in the zig struct) to cqe->flags */ -pub const IORING_MSG_RING_FLAGS_PASS = 1 << 1; - -// IO completion data structure (Completion Queue Entry) -pub const io_uring_cqe = extern struct { - /// io_uring_sqe.data submission passed back - user_data: u64, - - /// result code for this event - res: i32, - flags: u32, - - // Followed by 16 bytes of padding if initialized with IORING_SETUP_CQE32, doubling cqe size - - pub fn err(self: io_uring_cqe) E { - if (self.res > -4096 and self.res < 0) { - return @as(E, @enumFromInt(-self.res)); - } - return .SUCCESS; - } - - // On successful completion of the provided buffers IO request, the CQE flags field - // will have IORING_CQE_F_BUFFER set and the selected buffer ID will be indicated by - // the upper 16-bits of the flags field. - pub fn buffer_id(self: io_uring_cqe) !u16 { - if (self.flags & IORING_CQE_F_BUFFER != IORING_CQE_F_BUFFER) { - return error.NoBufferSelected; - } - return @as(u16, @intCast(self.flags >> IORING_CQE_BUFFER_SHIFT)); - } -}; - -// io_uring_cqe.flags - -/// If set, the upper 16 bits are the buffer ID -pub const IORING_CQE_F_BUFFER = 1 << 0; -/// If set, parent SQE will generate more CQE entries. -/// Available since Linux 5.13. -pub const IORING_CQE_F_MORE = 1 << 1; -/// If set, more data to read after socket recv -pub const IORING_CQE_F_SOCK_NONEMPTY = 1 << 2; -/// Set for notification CQEs. Can be used to distinct them from sends. -pub const IORING_CQE_F_NOTIF = 1 << 3; -/// If set, the buffer ID set in the completion will get more completions. -pub const IORING_CQE_F_BUF_MORE = 1 << 4; - -pub const IORING_CQE_BUFFER_SHIFT = 16; - -/// Magic offsets for the application to mmap the data it needs -pub const IORING_OFF_SQ_RING = 0; -pub const IORING_OFF_CQ_RING = 0x8000000; -pub const IORING_OFF_SQES = 0x10000000; - -/// Filled with the offset for mmap(2) -pub const io_sqring_offsets = extern struct { - /// offset of ring head - head: u32, - - /// offset of ring tail - tail: u32, - - /// ring mask value - ring_mask: u32, - - /// entries in ring - ring_entries: u32, - - /// ring flags - flags: u32, - - /// number of sqes not submitted - dropped: u32, - - /// sqe index array - array: u32, - - resv1: u32, - user_addr: u64, -}; - -// io_sqring_offsets.flags - -/// needs io_uring_enter wakeup -pub const IORING_SQ_NEED_WAKEUP = 1 << 0; -/// kernel has cqes waiting beyond the cq ring -pub const IORING_SQ_CQ_OVERFLOW = 1 << 1; -/// task should enter the kernel -pub const IORING_SQ_TASKRUN = 1 << 2; - -pub const io_cqring_offsets = extern struct { - head: u32, - tail: u32, - ring_mask: u32, - ring_entries: u32, - overflow: u32, - cqes: u32, - flags: u32, - resv: u32, - user_addr: u64, -}; - -// io_cqring_offsets.flags - -/// disable eventfd notifications -pub const IORING_CQ_EVENTFD_DISABLED = 1 << 0; - -// io_uring_enter flags -pub const IORING_ENTER_GETEVENTS = 1 << 0; -pub const IORING_ENTER_SQ_WAKEUP = 1 << 1; -pub const IORING_ENTER_SQ_WAIT = 1 << 2; -pub const IORING_ENTER_EXT_ARG = 1 << 3; -pub const IORING_ENTER_REGISTERED_RING = 1 << 4; - -pub const io_uring_params = extern struct { - sq_entries: u32, - cq_entries: u32, - flags: u32, - sq_thread_cpu: u32, - sq_thread_idle: u32, - features: u32, - wq_fd: u32, - resv: [3]u32, - sq_off: io_sqring_offsets, - cq_off: io_cqring_offsets, -}; - -// io_uring_params.features flags - -pub const IORING_FEAT_SINGLE_MMAP = 1 << 0; -pub const IORING_FEAT_NODROP = 1 << 1; -pub const IORING_FEAT_SUBMIT_STABLE = 1 << 2; -pub const IORING_FEAT_RW_CUR_POS = 1 << 3; -pub const IORING_FEAT_CUR_PERSONALITY = 1 << 4; -pub const IORING_FEAT_FAST_POLL = 1 << 5; -pub const IORING_FEAT_POLL_32BITS = 1 << 6; -pub const IORING_FEAT_SQPOLL_NONFIXED = 1 << 7; -pub const IORING_FEAT_EXT_ARG = 1 << 8; -pub const IORING_FEAT_NATIVE_WORKERS = 1 << 9; -pub const IORING_FEAT_RSRC_TAGS = 1 << 10; -pub const IORING_FEAT_CQE_SKIP = 1 << 11; -pub const IORING_FEAT_LINKED_FILE = 1 << 12; - -// io_uring_register opcodes and arguments -pub const IORING_REGISTER = enum(u32) { - REGISTER_BUFFERS, - UNREGISTER_BUFFERS, - REGISTER_FILES, - UNREGISTER_FILES, - REGISTER_EVENTFD, - UNREGISTER_EVENTFD, - REGISTER_FILES_UPDATE, - REGISTER_EVENTFD_ASYNC, - REGISTER_PROBE, - REGISTER_PERSONALITY, - UNREGISTER_PERSONALITY, - REGISTER_RESTRICTIONS, - REGISTER_ENABLE_RINGS, - - // extended with tagging - REGISTER_FILES2, - REGISTER_FILES_UPDATE2, - REGISTER_BUFFERS2, - REGISTER_BUFFERS_UPDATE, - - // set/clear io-wq thread affinities - REGISTER_IOWQ_AFF, - UNREGISTER_IOWQ_AFF, - - // set/get max number of io-wq workers - REGISTER_IOWQ_MAX_WORKERS, - - // register/unregister io_uring fd with the ring - REGISTER_RING_FDS, - UNREGISTER_RING_FDS, - - // register ring based provide buffer group - REGISTER_PBUF_RING, - UNREGISTER_PBUF_RING, - - // sync cancelation API - REGISTER_SYNC_CANCEL, - - // register a range of fixed file slots for automatic slot allocation - REGISTER_FILE_ALLOC_RANGE, - - // return status information for a buffer group - REGISTER_PBUF_STATUS, - - // set/clear busy poll settings - REGISTER_NAPI, - UNREGISTER_NAPI, - - REGISTER_CLOCK, - - // clone registered buffers from source ring to current ring - REGISTER_CLONE_BUFFERS, - - // send MSG_RING without having a ring - REGISTER_SEND_MSG_RING, - - // register a netdev hw rx queue for zerocopy - REGISTER_ZCRX_IFQ, - - // resize CQ ring - REGISTER_RESIZE_RINGS, - - REGISTER_MEM_REGION, - - // flag added to the opcode to use a registered ring fd - REGISTER_USE_REGISTERED_RING = 1 << 31, - - _, -}; - -/// io_uring_restriction->opcode values -pub const IOWQ_CATEGORIES = enum(u8) { - BOUND, - UNBOUND, -}; - -/// deprecated, see struct io_uring_rsrc_update -pub const io_uring_files_update = extern struct { - offset: u32, - resv: u32, - fds: u64, -}; - -/// Register a fully sparse file space, rather than pass in an array of all -1 file descriptors. -pub const IORING_RSRC_REGISTER_SPARSE = 1 << 0; - -pub const io_uring_rsrc_register = extern struct { - nr: u32, - flags: u32, - resv2: u64, - data: u64, - tags: u64, -}; - -pub const io_uring_rsrc_update = extern struct { - offset: u32, - resv: u32, - data: u64, -}; - -pub const io_uring_rsrc_update2 = extern struct { - offset: u32, - resv: u32, - data: u64, - tags: u64, - nr: u32, - resv2: u32, -}; - -pub const io_uring_notification_slot = extern struct { - tag: u64, - resv: [3]u64, -}; - -pub const io_uring_notification_register = extern struct { - nr_slots: u32, - resv: u32, - resv2: u64, - data: u64, - resv3: u64, -}; - -pub const io_uring_napi = extern struct { - busy_poll_to: u32, - prefer_busy_poll: u8, - _pad: [3]u8, - resv: u64, -}; - -/// Skip updating fd indexes set to this value in the fd table */ -pub const IORING_REGISTER_FILES_SKIP = -2; - -pub const IO_URING_OP_SUPPORTED = 1 << 0; - -pub const io_uring_probe_op = extern struct { - op: IORING_OP, - resv: u8, - /// IO_URING_OP_* flags - flags: u16, - resv2: u32, - - pub fn is_supported(self: @This()) bool { - return self.flags & IO_URING_OP_SUPPORTED != 0; - } -}; - -pub const io_uring_probe = extern struct { - /// Last opcode supported - last_op: IORING_OP, - /// Length of ops[] array below - ops_len: u8, - resv: u16, - resv2: [3]u32, - ops: [256]io_uring_probe_op, - - /// Is the operation supported on the running kernel. - pub fn is_supported(self: @This(), op: IORING_OP) bool { - const i = @intFromEnum(op); - if (i > @intFromEnum(self.last_op) or i >= self.ops_len) - return false; - return self.ops[i].is_supported(); - } -}; - -pub const io_uring_restriction = extern struct { - opcode: IORING_RESTRICTION, - arg: extern union { - /// IORING_RESTRICTION_REGISTER_OP - register_op: IORING_REGISTER, - - /// IORING_RESTRICTION_SQE_OP - sqe_op: IORING_OP, - - /// IORING_RESTRICTION_SQE_FLAGS_* - sqe_flags: u8, - }, - resv: u8, - resv2: [3]u32, -}; - -/// io_uring_restriction->opcode values -pub const IORING_RESTRICTION = enum(u16) { - /// Allow an io_uring_register(2) opcode - REGISTER_OP = 0, - - /// Allow an sqe opcode - SQE_OP = 1, - - /// Allow sqe flags - SQE_FLAGS_ALLOWED = 2, - - /// Require sqe flags (these flags must be set on each submission) - SQE_FLAGS_REQUIRED = 3, - - _, -}; - -pub const IO_URING_SOCKET_OP = enum(u16) { - SIOCIN = 0, - SIOCOUTQ = 1, - GETSOCKOPT = 2, - SETSOCKOPT = 3, -}; - -pub const io_uring_buf = extern struct { - addr: u64, - len: u32, - bid: u16, - resv: u16, -}; - -pub const io_uring_buf_ring = extern struct { - resv1: u64, - resv2: u32, - resv3: u16, - tail: u16, -}; - -/// argument for IORING_(UN)REGISTER_PBUF_RING -pub const io_uring_buf_reg = extern struct { - ring_addr: u64, - ring_entries: u32, - bgid: u16, - flags: Flags, - resv: [3]u64, - - pub const Flags = packed struct { - _0: u1 = 0, - /// Incremental buffer consumption. - inc: bool, - _: u14 = 0, - }; -}; - -pub const io_uring_getevents_arg = extern struct { - sigmask: u64, - sigmask_sz: u32, - pad: u32, - ts: u64, -}; - -/// Argument for IORING_REGISTER_SYNC_CANCEL -pub const io_uring_sync_cancel_reg = extern struct { - addr: u64, - fd: i32, - flags: u32, - timeout: kernel_timespec, - pad: [4]u64, -}; - -/// Argument for IORING_REGISTER_FILE_ALLOC_RANGE -/// The range is specified as [off, off + len) -pub const io_uring_file_index_range = extern struct { - off: u32, - len: u32, - resv: u64, -}; - -pub const io_uring_recvmsg_out = extern struct { - namelen: u32, - controllen: u32, - payloadlen: u32, - flags: u32, -}; - pub const utsname = extern struct { sysname: [64:0]u8, nodename: [64:0]u8, @@ -6973,28 +6834,20 @@ pub const utsname = extern struct { }; pub const HOST_NAME_MAX = 64; -pub const STATX_TYPE = 0x0001; -pub const STATX_MODE = 0x0002; -pub const STATX_NLINK = 0x0004; -pub const STATX_UID = 0x0008; -pub const STATX_GID = 0x0010; -pub const STATX_ATIME = 0x0020; -pub const STATX_MTIME = 0x0040; -pub const STATX_CTIME = 0x0080; -pub const STATX_INO = 0x0100; -pub const STATX_SIZE = 0x0200; -pub const STATX_BLOCKS = 0x0400; -pub const STATX_BASIC_STATS = 0x07ff; - -pub const STATX_BTIME = 0x0800; - -pub const STATX_ATTR_COMPRESSED = 0x0004; -pub const STATX_ATTR_IMMUTABLE = 0x0010; -pub const STATX_ATTR_APPEND = 0x0020; -pub const STATX_ATTR_NODUMP = 0x0040; -pub const STATX_ATTR_ENCRYPTED = 0x0800; -pub const STATX_ATTR_AUTOMOUNT = 0x1000; +// COMMIT: RenameFlags +pub const Rename = packed struct(u32) { + /// Don't overwrite target + noreplace: bool = false, + /// Exchange source and dest + exchange: bool = false, + /// Whiteout source + whiteout: bool = false, + _: u29 = 0, +}; +pub const SetXattr = packed struct(u32) { + _: u32 = 0, // TODO: add flags +}; pub const statx_timestamp = extern struct { sec: i64, nsec: u32, @@ -7004,13 +6857,13 @@ pub const statx_timestamp = extern struct { /// Renamed to `Statx` to not conflict with the `statx` function. pub const Statx = extern struct { /// Mask of bits indicating filled fields - mask: u32, + mask: Mask, /// Block size for filesystem I/O blksize: u32, /// Extra file attribute indicators - attributes: u64, + attributes: Attr, /// Number of hard links nlink: u32, @@ -7035,7 +6888,7 @@ pub const Statx = extern struct { blocks: u64, /// Mask to show what's supported in `attributes`. - attributes_mask: u64, + attributes_mask: Attr, /// Last access file timestamp atime: statx_timestamp, @@ -7062,8 +6915,129 @@ pub const Statx = extern struct { dev_minor: u32, __pad2: [14]u64, + + // COMMIT: add new StatxMask fields + // https://github.com/torvalds/linux/blob/755fa5b4fb36627796af19932a432d343220ec63/include/uapi/linux/stat.h#L203 + /// matches STATX_* in kernel + pub const Mask = packed struct(u32) { + type: bool = false, + /// Want/got stx_mode & ~S_IFMT + mode: bool = false, + /// Want/got stx_nlink + nlink: bool = false, + /// Want/got stx_uid + uid: bool = false, + /// Want/got stx_gid + gid: bool = false, + /// Want/got stx_atime + atime: bool = false, + /// Want/got stx_mtime + mtime: bool = false, + /// Want/got stx_ctime + ctime: bool = false, + /// Want/got stx_ino + ino: bool = false, + /// Want/got stx_size + size: bool = false, + /// Want/got stx_blocks + blocks: bool = false, + /// Want/got stx_btime + btime: bool = false, + /// Got stx_mnt_id + mnt_id: bool = false, + /// Want/got direct I/O alignment info + dioalign: bool = false, + /// Want/got extended stx_mount_id + mnt_id_unique: bool = false, + /// Want/got stx_subvol + subvol: bool = false, + /// Want/got atomic_write_* fields + write_atomic: bool = false, + /// Want/got dio read alignment info + dio_read_align: bool = false, + /// Reserved for future struct statx expansion + _: u14 = 0, + + /// The stuff in the normal stat struct (bits 0-10) + pub const basic_stats: Mask = .{ + .type = true, + .mode = true, + .nlink = true, + .uid = true, + .gid = true, + .atime = true, + .mtime = true, + .ctime = true, + .ino = true, + .size = true, + .blocks = true, + }; + }; + + // COMMIT: Statx as Packed Struct + // https://github.com/torvalds/linux/blob/755fa5b4fb36627796af19932a432d343220ec63/include/uapi/linux/stat.h#L248 + /// matches STATX_ATTR_* in kernel + pub const Attr = packed struct(u64) { + _0: u2 = 0, + /// File is compressed by the fs + compressed: bool = false, + _1: u1 = 0, + /// File is marked immutable + immutable: bool = false, + /// File is append-only + append: bool = false, + /// File is not to be dumped + nodump: bool = false, + _2: u4 = 0, + /// File requires key to decrypt in fs + encrypted: bool = false, + /// Dir: Automount trigger + automount: bool = false, + /// Root of a mount + mount_root: bool = false, + _3: u6 = 0, + /// Verity protected file + verity: bool = false, + /// File is currently in DAX state + dax: bool = false, + /// File supports atomic write operations + write_atomic: bool = false, + _: u41 = 0, + }; }; +// DEPRECATED aliases to Statx.Mask and Statx.Attr +const STATX_TYPE: u32 = @bitCast(Statx.Mask{ .type = true }); +const STATX_MODE: u32 = @bitCast(Statx.Mask{ .mode = true }); +const STATX_NLINK: u32 = @bitCast(Statx.Mask{ .nlink = true }); +const STATX_UID: u32 = @bitCast(Statx.Mask{ .uid = true }); +const STATX_GID: u32 = @bitCast(Statx.Mask{ .gid = true }); +const STATX_ATIME: u32 = @bitCast(Statx.Mask{ .atime = true }); +const STATX_MTIME: u32 = @bitCast(Statx.Mask{ .mtime = true }); +const STATX_CTIME: u32 = @bitCast(Statx.Mask{ .ctime = true }); +const STATX_INO: u32 = @bitCast(Statx.Mask{ .ino = true }); +const STATX_SIZE: u32 = @bitCast(Statx.Mask{ .size = true }); +const STATX_BLOCKS: u32 = @bitCast(Statx.Mask{ .blocks = true }); +const STATX_BASIC_STATS: u32 = @bitCast(Statx.Mask.basic_stats); +const STATX_BTIME: u32 = @bitCast(Statx.Mask{ .btime = true }); +const STATX_MNT_ID: u32 = @bitCast(Statx.Mask{ .mnt_id = true }); +const STATX_DIOALIGN: u32 = @bitCast(Statx.Mask{ .dioalign = true }); +const STATX_MNT_ID_UNIQUE: u32 = @bitCast(Statx.Mask{ .mnt_id_unique = true }); +const STATX_SUBVOL: u32 = @bitCast(Statx.Mask{ .subvol = true }); +const STATX_WRITE_ATOMIC: u32 = @bitCast(Statx.Mask{ .write_atomic = true }); +const STATX_DIO_READ_ALIGN: u32 = @bitCast(Statx.Mask{ .dio_read_align = true }); + +const STATX_ATTR_COMPRESSED: u64 = @bitCast(Statx.Attr{ .compressed = true }); +const STATX_ATTR_IMMUTABLE: u64 = @bitCast(Statx.Attr{ .immutable = true }); +const STATX_ATTR_APPEND: u64 = @bitCast(Statx.Attr{ .append = true }); +const STATX_ATTR_NODUMP: u64 = @bitCast(Statx.Attr{ .nodump = true }); +const STATX_ATTR_ENCRYPTED: u64 = @bitCast(Statx.Attr{ .encrypted = true }); +const STATX_ATTR_AUTOMOUNT: u64 = @bitCast(Statx.Attr{ .automount = true }); +const STATX_ATTR_MOUNT_ROOT: u64 = @bitCast(Statx.Attr{ .mount_root = true }); +const STATX_ATTR_VERITY: u64 = @bitCast(Statx.Attr{ .verity = true }); +const STATX_ATTR_DAX: u64 = @bitCast(Statx.Attr{ .dax = true }); +const STATX_ATTR_WRITE_ATOMIC: u64 = @bitCast(Statx.Attr{ .write_atomic = true }); + pub const addrinfo = extern struct { flags: AI, family: i32, @@ -7089,40 +7063,83 @@ pub const AI = packed struct(u32) { pub const IPPORT_RESERVED = 1024; -pub const IPPROTO = struct { - pub const IP = 0; - pub const HOPOPTS = 0; - pub const ICMP = 1; - pub const IGMP = 2; - pub const IPIP = 4; - pub const TCP = 6; - pub const EGP = 8; - pub const PUP = 12; - pub const UDP = 17; - pub const IDP = 22; - pub const TP = 29; - pub const DCCP = 33; - pub const IPV6 = 41; - pub const ROUTING = 43; - pub const FRAGMENT = 44; - pub const RSVP = 46; - pub const GRE = 47; - pub const ESP = 50; - pub const AH = 51; - pub const ICMPV6 = 58; - pub const NONE = 59; - pub const DSTOPTS = 60; - pub const MTP = 92; - pub const BEETPH = 94; - pub const ENCAP = 98; - pub const PIM = 103; - pub const COMP = 108; - pub const SCTP = 132; - pub const MH = 135; - pub const UDPLITE = 136; - pub const MPLS = 137; - pub const RAW = 255; - pub const MAX = 256; +/// Deprecated alias to IpProto +pub const IPPROTO = IpProto; +/// IP Protocol numbers +pub const IpProto = enum(u16) { + ip = 0, + icmp = 1, + igmp = 2, + ipip = 4, + tcp = 6, + egp = 8, + pup = 12, + udp = 17, + idp = 22, + tp = 29, + dccp = 33, + ipv6 = 41, + routing = 43, + fragment = 44, + rsvp = 46, + gre = 47, + esp = 50, + ah = 51, + icmpv6 = 58, + none = 59, + dstopts = 60, + mtp = 92, + beetph = 94, + encap = 98, + pim = 103, + comp = 108, + sctp = 132, + mh = 135, + udplite = 136, + mpls = 137, + raw = 255, + max = 256, + _, + + // Aliases + pub const hopopts = IpProto.ip; + pub const default = IpProto.ip; + + // Deprecated constants use enum instead + // Legacy constants for backward compatibility + pub const IP: u16 = @intFromEnum(IpProto.ip); + pub const HOPOPTS: u16 = @intFromEnum(hopopts); + pub const ICMP: u16 = @intFromEnum(IpProto.icmp); + pub const IGMP: u16 = @intFromEnum(IpProto.igmp); + pub const IPIP: u16 = @intFromEnum(IpProto.ipip); + pub const TCP: u16 = @intFromEnum(IpProto.tcp); + pub const EGP: u16 = @intFromEnum(IpProto.egp); + pub const PUP: u16 = @intFromEnum(IpProto.pup); + pub const UDP: u16 = @intFromEnum(IpProto.udp); + pub const IDP: u16 = @intFromEnum(IpProto.idp); + pub const TP: u16 = @intFromEnum(IpProto.tp); + pub const DCCP: u16 = @intFromEnum(IpProto.dccp); + pub const IPV6: u16 = @intFromEnum(IpProto.ipv6); + pub const ROUTING: u16 = @intFromEnum(IpProto.routing); + pub const FRAGMENT: u16 = @intFromEnum(IpProto.fragment); + pub const RSVP: u16 = @intFromEnum(IpProto.rsvp); + pub const GRE: u16 = @intFromEnum(IpProto.gre); + pub const ESP: u16 = @intFromEnum(IpProto.esp); + pub const AH: u16 = @intFromEnum(IpProto.ah); + pub const ICMPV6: u16 = @intFromEnum(IpProto.icmpv6); + pub const NONE: u16 = @intFromEnum(IpProto.none); + pub const DSTOPTS: u16 = @intFromEnum(IpProto.DSTOPTS); + pub const MTP: u16 = @intFromEnum(IpProto.mtp); + pub const BEETPH: u16 = @intFromEnum(IpProto.beetph); + pub const ENCAP: u16 = @intFromEnum(IpProto.encap); + pub const PIM: u16 = @intFromEnum(IpProto.pim); + pub const COMP: u16 = @intFromEnum(IpProto.comp); + pub const SCTP: u16 = @intFromEnum(IpProto.sctp); + pub const MH: u16 = @intFromEnum(IpProto.mh); + pub const UDPLITE: u16 = @intFromEnum(IpProto.udplite); + pub const MPLS: u16 = @intFromEnum(IpProto.mpls); + pub const RAW: u16 = @intFromEnum(IpProto.raw); + pub const MAX: u16 = @intFromEnum(IpProto.max); }; pub const RR = struct { @@ -8402,6 +8419,13 @@ pub const MADV = struct { pub const SOFT_OFFLINE = 101; }; +pub const Madvice = enum(u32) { + _, // TODO: add options +}; +pub const Fadvice = enum(u32) { + _, // TODO: add options +}; + pub const POSIX_FADV = switch (native_arch) { .s390x => if (@typeInfo(usize).int.bits == 64) struct { pub const NORMAL = 0; diff --git a/lib/std/os/linux/IoUring.zig b/lib/std/os/linux/IoUring.zig index 25d4d88fd02f..393fe8192817 100644 --- a/lib/std/os/linux/IoUring.zig +++ b/lib/std/os/linux/IoUring.zig @@ -3,6 +3,7 @@ const std = @import("std"); const builtin = @import("builtin"); const assert = std.debug.assert; const mem = std.mem; +const math = std.math; const net = std.net; const posix = std.posix; const linux = std.os.linux; @@ -11,71 +12,86 @@ const is_linux = builtin.os.tag == .linux; const page_size_min = std.heap.page_size_min; fd: posix.fd_t = -1, -sq: SubmissionQueue, -cq: CompletionQueue, -flags: u32, -features: u32, +sq: Sq, +cq: Cq, +flags: uflags.Setup, +features: uflags.Features, +/// matches int_flags in liburing +init_flags: uflags.Init, /// A friendly way to setup an io_uring, with default linux.io_uring_params. -/// `entries` must be a power of two between 1 and 32768, although the kernel will make the final -/// call on how many entries the submission and completion queues will ultimately have, +/// `entries` must be a power of two between 1 and 32768, although the kernel +/// will make the final call on how many entries the submission and completion +/// queues will ultimately have, /// see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L8027-L8050. -/// Matches the interface of io_uring_queue_init() in liburing. -pub fn init(entries: u16, flags: u32) !IoUring { - var params = mem.zeroInit(linux.io_uring_params, .{ +/// Matches the interface of `io_uring_queue_init()` in liburing. +pub fn init(entries: u16, flags: uflags.Setup) !IoUring { + var params = mem.zeroInit(Params, .{ .flags = flags, .sq_thread_idle = 1000, }); - return try IoUring.init_params(entries, ¶ms); + return try .init_params(entries, ¶ms); } -/// A powerful way to setup an io_uring, if you want to tweak linux.io_uring_params such as submission -/// queue thread cpu affinity or thread idle timeout (the kernel and our default is 1 second). -/// `params` is passed by reference because the kernel needs to modify the parameters. -/// Matches the interface of io_uring_queue_init_params() in liburing. -pub fn init_params(entries: u16, p: *linux.io_uring_params) !IoUring { +/// A powerful way to setup an io_uring, if you want to tweak +/// linux.io_uring_params such as submission queue thread cpu affinity or +/// thread idle timeout (the kernel and our default is 1 second). +/// `params` is passed by reference because the kernel needs to modify the +/// parameters. +/// Matches the interface of `io_uring_queue_init_params()` in liburing. +pub fn init_params(entries: u16, p: *Params) !IoUring { if (entries == 0) return error.EntriesZero; - if (!std.math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo; - + if (!math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo; assert(p.sq_entries == 0); - assert(p.cq_entries == 0 or p.flags & linux.IORING_SETUP_CQSIZE != 0); - assert(p.features == 0); - assert(p.wq_fd == 0 or p.flags & linux.IORING_SETUP_ATTACH_WQ != 0); + assert(p.features.empty()); assert(p.resv[0] == 0); assert(p.resv[1] == 0); assert(p.resv[2] == 0); + assert(p.cq_entries == 0 or p.flags.cqsize); + assert(p.wq_fd == 0 or p.flags.attach_wq); + + // flags compatibility + if (p.flags.sqpoll) assert(!(p.flags.coop_taskrun or p.flags.taskrun_flag or p.flags.defer_taskrun)); + if (p.flags.sq_aff) assert(p.flags.sqpoll); + if (p.flags.defer_taskrun) assert(p.flags.single_issuer); + const res = linux.io_uring_setup(entries, p); switch (linux.E.init(res)) { .SUCCESS => {}, .FAULT => return error.ParamsOutsideAccessibleAddressSpace, - // The resv array contains non-zero data, p.flags contains an unsupported flag, - // entries out of bounds, IORING_SETUP_SQ_AFF was specified without IORING_SETUP_SQPOLL, - // or IORING_SETUP_CQSIZE was specified but linux.io_uring_params.cq_entries was invalid: + // The resv array contains non-zero data, p.flags contains an + // unsupported flag, entries out of bounds, IORING_SETUP_SQ_AFF was + // specified without IORING_SETUP_SQPOLL, or IORING_SETUP_CQSIZE was + // specified but linux.io_uring_params.cq_entries was invalid: .INVAL => return error.ArgumentsInvalid, .MFILE => return error.ProcessFdQuotaExceeded, .NFILE => return error.SystemFdQuotaExceeded, .NOMEM => return error.SystemResources, - // IORING_SETUP_SQPOLL was specified but effective user ID lacks sufficient privileges, - // or a container seccomp policy prohibits io_uring syscalls: + // IORING_SETUP_SQPOLL was specified but effective user ID lacks + // sufficient privileges, or a container seccomp policy prohibits + // io_uring syscalls: .PERM => return error.PermissionDenied, .NOSYS => return error.SystemOutdated, else => |errno| return posix.unexpectedErrno(errno), } - const fd = @as(posix.fd_t, @intCast(res)); + const fd: posix.fd_t = @intCast(res); assert(fd >= 0); errdefer posix.close(fd); - // Kernel versions 5.4 and up use only one mmap() for the submission and completion queues. - // This is not an optional feature for us... if the kernel does it, we have to do it. - // The thinking on this by the kernel developers was that both the submission and the - // completion queue rings have sizes just over a power of two, but the submission queue ring - // is significantly smaller with u32 slots. By bundling both in a single mmap, the kernel - // gets the submission queue ring for free. + // Kernel versions 5.4 and up use only one mmap() for the submission and + // completion queues. + // This is not an optional feature for us... if the kernel does it, we have + // to do it. The thinking on this by the kernel developers was that both + // the submission and the completion queue rings have sizes just over a + // power of two, but the submission queue ring is significantly smaller + // with u32 slots. By bundling both in a single mmap, the kernel gets the + // submission queue ring for free. // See https://patchwork.kernel.org/patch/11115257 for the kernel patch. - // We do not support the double mmap() done before 5.4, because we want to keep the - // init/deinit mmap paths simple and because io_uring has had many bug fixes even since 5.4. - if ((p.features & linux.IORING_FEAT_SINGLE_MMAP) == 0) { + // We do not support the double mmap() done before 5.4, because we want to + // keep the init/deinit mmap paths simple and because io_uring has had many + // bug fixes even since 5.4. + if (!p.features.single_mmap) { return error.SystemOutdated; } @@ -84,18 +100,21 @@ pub fn init_params(entries: u16, p: *linux.io_uring_params) !IoUring { assert(p.cq_entries != 0); assert(p.cq_entries >= p.sq_entries); - // From here on, we only need to read from params, so pass `p` by value as immutable. - // The completion queue shares the mmap with the submission queue, so pass `sq` there too. - var sq = try SubmissionQueue.init(fd, p.*); + // From here on, we only need to read from params, so pass `p` by value as + // immutable. + // The completion queue shares the mmap with the submission queue, so pass + // `sq` there too. + var sq: Sq = try .init(fd, p.*); errdefer sq.deinit(); - var cq = try CompletionQueue.init(fd, p.*, sq); + var cq: Cq = try .init(fd, p.*, sq); errdefer cq.deinit(); // Check that our starting state is as we expect. assert(sq.head.* == 0); assert(sq.tail.* == 0); assert(sq.mask == p.sq_entries - 1); - // Allow flags.* to be non-zero, since the kernel may set IORING_SQ_NEED_WAKEUP at any time. + // Allow flags.* to be non-zero, since the kernel may set + // IORING_SQ_NEED_WAKEUP at any time. assert(sq.dropped.* == 0); assert(sq.array.len == p.sq_entries); assert(sq.sqes.len == p.sq_entries); @@ -108,12 +127,13 @@ pub fn init_params(entries: u16, p: *linux.io_uring_params) !IoUring { assert(cq.overflow.* == 0); assert(cq.cqes.len == p.cq_entries); - return IoUring{ + return .{ .fd = fd, .sq = sq, .cq = cq, .flags = p.flags, .features = p.features, + .init_flags = .{}, }; } @@ -126,17 +146,20 @@ pub fn deinit(self: *IoUring) void { self.fd = -1; } -/// Returns a pointer to a vacant SQE, or an error if the submission queue is full. -/// We follow the implementation (and atomics) of liburing's `io_uring_get_sqe()` exactly. +/// Returns a pointer to a vacant SQE, or an error if the submission queue is +/// full. We follow the implementation (and atomics) of liburing's +/// `io_uring_get_sqe()` exactly. /// However, instead of a null we return an error to force safe handling. -/// Any situation where the submission queue is full tends more towards a control flow error, -/// and the null return in liburing is more a C idiom than anything else, for lack of a better -/// alternative. In Zig, we have first-class error handling... so let's use it. -/// Matches the implementation of io_uring_get_sqe() in liburing. -pub fn get_sqe(self: *IoUring) !*linux.io_uring_sqe { +/// Any situation where the submission queue is full tends more towards a +/// control flow error, and the null return in liburing is more a C idiom than +/// anything else, for lack of a better alternative. In Zig, we have +/// first-class error handling... so let's use it. +/// Matches the implementation of `io_uring_get_sqe()` in liburing. +pub fn get_sqe(self: *IoUring) !*Sqe { const head = @atomicLoad(u32, self.sq.head, .acquire); - // Remember that these head and tail offsets wrap around every four billion operations. - // We must therefore use wrapping addition and subtraction to avoid a runtime crash. + // Remember that these head and tail offsets wrap around every four billion + // operations. We must therefore use wrapping addition and subtraction to + // avoid a runtime crash. const next = self.sq.sqe_tail +% 1; if (next -% head > self.sq.sqes.len) return error.SubmissionQueueFull; const sqe = &self.sq.sqes[self.sq.sqe_tail & self.sq.mask]; @@ -144,26 +167,28 @@ pub fn get_sqe(self: *IoUring) !*linux.io_uring_sqe { return sqe; } -/// Submits the SQEs acquired via get_sqe() to the kernel. You can call this once after you have -/// called get_sqe() multiple times to setup multiple I/O requests. -/// Returns the number of SQEs submitted, if not used alongside IORING_SETUP_SQPOLL. -/// If the io_uring instance is uses IORING_SETUP_SQPOLL, the value returned on success is not -/// guaranteed to match the amount of actually submitted sqes during this call. A value higher -/// or lower, including 0, may be returned. -/// Matches the implementation of io_uring_submit() in liburing. +/// Submits the SQEs acquired via `get_sqe()` to the kernel. You can call this +/// once after you have called `get_sqe()` multiple times to setup multiple I/O +/// requests. +/// Returns the number of SQEs submitted, if not used alongside +/// IORING_SETUP_SQPOLL. +/// If the io_uring instance uses IORING_SETUP_SQPOLL, the value returned on +/// success is not guaranteed to match the amount of actually submitted sqes +/// during this call. A value higher or lower, including 0, may be returned. +/// Matches the implementation of `io_uring_submit()` in liburing. pub fn submit(self: *IoUring) !u32 { return self.submit_and_wait(0); } -/// Like submit(), but allows waiting for events as well. +/// Like `submit()`, but allows waiting for events as well. /// Returns the number of SQEs submitted. -/// Matches the implementation of io_uring_submit_and_wait() in liburing. +/// Matches the implementation of `io_uring_submit_and_wait()` in liburing. pub fn submit_and_wait(self: *IoUring, wait_nr: u32) !u32 { const submitted = self.flush_sq(); - var flags: u32 = 0; + var flags: uflags.Enter = self.enter_flags(); if (self.sq_ring_needs_enter(&flags) or wait_nr > 0) { - if (wait_nr > 0 or (self.flags & linux.IORING_SETUP_IOPOLL) != 0) { - flags |= linux.IORING_ENTER_GETEVENTS; + if (wait_nr > 0 or self.flags.iopoll) { + flags.getevents = true; } return try self.enter(submitted, wait_nr, flags); } @@ -172,45 +197,53 @@ pub fn submit_and_wait(self: *IoUring, wait_nr: u32) !u32 { /// Tell the kernel we have submitted SQEs and/or want to wait for CQEs. /// Returns the number of SQEs submitted. -pub fn enter(self: *IoUring, to_submit: u32, min_complete: u32, flags: u32) !u32 { +pub fn enter(self: *IoUring, to_submit: u32, min_complete: u32, flags: uflags.Enter) !u32 { assert(self.fd >= 0); const res = linux.io_uring_enter(self.fd, to_submit, min_complete, flags, null); switch (linux.E.init(res)) { .SUCCESS => {}, - // The kernel was unable to allocate memory or ran out of resources for the request. - // The application should wait for some completions and try again: + // The kernel was unable to allocate memory or ran out of resources for + // the request. The application should wait for some completions and + // try again: .AGAIN => return error.SystemResources, - // The SQE `fd` is invalid, or IOSQE_FIXED_FILE was set but no files were registered: + // The SQE `fd` is invalid, or IOSQE_FIXED_FILE was set but no files + // were registered: .BADF => return error.FileDescriptorInvalid, // The file descriptor is valid, but the ring is not in the right state. // See io_uring_register(2) for how to enable the ring. .BADFD => return error.FileDescriptorInBadState, - // The application attempted to overcommit the number of requests it can have pending. - // The application should wait for some completions and try again: + // The application attempted to overcommit the number of requests it + // can have pending. The application should wait for some completions + // and try again: .BUSY => return error.CompletionQueueOvercommitted, - // The SQE is invalid, or valid but the ring was setup with IORING_SETUP_IOPOLL: + // The SQE is invalid, or valid but the ring was setup with + // IORING_SETUP_IOPOLL: .INVAL => return error.SubmissionQueueEntryInvalid, - // The buffer is outside the process' accessible address space, or IORING_OP_READ_FIXED - // or IORING_OP_WRITE_FIXED was specified but no buffers were registered, or the range - // described by `addr` and `len` is not within the buffer registered at `buf_index`: + // The buffer is outside the process' accessible address space, or + // IORING_OP_READ_FIXED or IORING_OP_WRITE_FIXED was specified but no + // buffers were registered, or the range described by `addr` and `len` + // is not within the buffer registered at `buf_index`: .FAULT => return error.BufferInvalid, .NXIO => return error.RingShuttingDown, - // The kernel believes our `self.fd` does not refer to an io_uring instance, - // or the opcode is valid but not supported by this kernel (more likely): + // The kernel believes our `self.fd` does not refer to an io_uring + // instance, or the opcode is valid but not supported by this kernel + // (more likely): .OPNOTSUPP => return error.OpcodeNotSupported, - // The operation was interrupted by a delivery of a signal before it could complete. - // This can happen while waiting for events with IORING_ENTER_GETEVENTS: + // The operation was interrupted by a delivery of a signal before it + // could complete. This can happen while waiting for events with + // IORING_ENTER_GETEVENTS: .INTR => return error.SignalInterrupt, else => |errno| return posix.unexpectedErrno(errno), } - return @as(u32, @intCast(res)); + return @intCast(res); } /// Sync internal state with kernel ring state on the SQ side. -/// Returns the number of all pending events in the SQ ring, for the shared ring. -/// This return value includes previously flushed SQEs, as per liburing. -/// The rationale is to suggest that an io_uring_enter() call is needed rather than not. -/// Matches the implementation of __io_uring_flush_sq() in liburing. +/// Returns the number of all pending events in the SQ ring, for the shared +/// ring. This return value includes previously flushed SQEs, as per liburing. +/// The rationale is to suggest that an `io_uring_enter()` call is needed rather +/// than not. +/// Matches the implementation of `__io_uring_flush_sq()` in liburing. pub fn flush_sq(self: *IoUring) u32 { if (self.sq.sqe_head != self.sq.sqe_tail) { // Fill in SQEs that we have queued up, adding them to the kernel ring. @@ -222,64 +255,75 @@ pub fn flush_sq(self: *IoUring) u32 { tail +%= 1; self.sq.sqe_head +%= 1; } - // Ensure that the kernel can actually see the SQE updates when it sees the tail update. + // Ensure that the kernel can actually see the SQE updates when it sees + // the tail update. @atomicStore(u32, self.sq.tail, tail, .release); } return self.sq_ready(); } /// Returns true if we are not using an SQ thread (thus nobody submits but us), -/// or if IORING_SQ_NEED_WAKEUP is set and the SQ thread must be explicitly awakened. -/// For the latter case, we set the SQ thread wakeup flag. -/// Matches the implementation of sq_ring_needs_enter() in liburing. -pub fn sq_ring_needs_enter(self: *IoUring, flags: *u32) bool { - assert(flags.* == 0); - if ((self.flags & linux.IORING_SETUP_SQPOLL) == 0) return true; - if ((@atomicLoad(u32, self.sq.flags, .unordered) & linux.IORING_SQ_NEED_WAKEUP) != 0) { - flags.* |= linux.IORING_ENTER_SQ_WAKEUP; +/// or if IORING_SQ_NEED_WAKEUP is set and the SQ thread must be explicitly +/// awakened. For the latter case, we set the SQ thread wakeup flag. +/// Matches the implementation of `sq_ring_needs_enter()` in liburing. +pub fn sq_ring_needs_enter(self: *IoUring, flags: *uflags.Enter) bool { + assert(flags.*.valid_init_flags()); + if (!self.flags.sqpoll) return true; + if (@atomicLoad(Sq.Flags, self.sq.flags, .unordered).need_wakeup) { + flags.*.sq_wakeup = true; return true; } return false; } -/// Returns the number of flushed and unflushed SQEs pending in the submission queue. -/// In other words, this is the number of SQEs in the submission queue, i.e. its length. -/// These are SQEs that the kernel is yet to consume. -/// Matches the implementation of io_uring_sq_ready in liburing. +/// Returns the number of flushed and unflushed SQEs pending in the submission +/// queue. In other words, this is the number of SQEs in the submission queue, +/// i.e. its length. These are SQEs that the kernel is yet to consume. +/// Matches the implementation of `io_uring_sq_ready()` in liburing. pub fn sq_ready(self: *IoUring) u32 { - // Always use the shared ring state (i.e. head and not sqe_head) to avoid going out of sync, - // see https://github.com/axboe/liburing/issues/92. + // Always use the shared ring state (i.e. head and not sqe_head) to avoid + // going out of sync, see https://github.com/axboe/liburing/issues/92. return self.sq.sqe_tail -% @atomicLoad(u32, self.sq.head, .acquire); } /// Returns the number of CQEs in the completion queue, i.e. its length. /// These are CQEs that the application is yet to consume. -/// Matches the implementation of io_uring_cq_ready in liburing. +/// Matches the implementation of `io_uring_cq_ready()` in liburing. pub fn cq_ready(self: *IoUring) u32 { return @atomicLoad(u32, self.cq.tail, .acquire) -% self.cq.head.*; } -/// Copies as many CQEs as are ready, and that can fit into the destination `cqes` slice. -/// If none are available, enters into the kernel to wait for at most `wait_nr` CQEs. +/// Copies as many CQEs as are ready, and that can fit into the destination +/// `cqes` slice. If none are available, enters into the kernel to wait for at +/// most `wait_nr` CQEs. /// Returns the number of CQEs copied, advancing the CQ ring. -/// Provides all the wait/peek methods found in liburing, but with batching and a single method. -/// The rationale for copying CQEs rather than copying pointers is that pointers are 8 bytes -/// whereas CQEs are not much more at only 16 bytes, and this provides a safer faster interface. -/// Safer, because you no longer need to call cqe_seen(), avoiding idempotency bugs. -/// Faster, because we can now amortize the atomic store release to `cq.head` across the batch. +/// Provides all the wait/peek methods found in liburing, but with batching and +/// a single method. +/// The rationale for copying CQEs rather than copying pointers is that +/// pointers are 8 bytes whereas CQEs are not much more at only 16 bytes, and +/// this provides a safer faster interface. +/// Safer, because you no longer need to call `cqe_seen()`, avoiding idempotency +/// bugs. Faster, because we can now amortize the atomic store release to +/// `cq.head` across the batch. /// See https://github.com/axboe/liburing/issues/103#issuecomment-686665007. -/// Matches the implementation of io_uring_peek_batch_cqe() in liburing, but supports waiting. -pub fn copy_cqes(self: *IoUring, cqes: []linux.io_uring_cqe, wait_nr: u32) !u32 { +/// Matches the implementation of `io_uring_peek_batch_cqe()` in liburing, but +/// supports waiting. +pub fn copy_cqes(self: *IoUring, cqes: []Cqe, wait_nr: u32) !u32 { const count = self.copy_cqes_ready(cqes); if (count > 0) return count; if (self.cq_ring_needs_flush() or wait_nr > 0) { - _ = try self.enter(0, wait_nr, linux.IORING_ENTER_GETEVENTS); + const flags = blk: { + var flags = self.enter_flags(); + flags.getevents = true; + break :blk flags; + }; + _ = try self.enter(0, wait_nr, flags); return self.copy_cqes_ready(cqes); } return 0; } -fn copy_cqes_ready(self: *IoUring, cqes: []linux.io_uring_cqe) u32 { +fn copy_cqes_ready(self: *IoUring, cqes: []Cqe) u32 { const ready = self.cq_ready(); const count = @min(cqes.len, ready); const head = self.cq.head.* & self.cq.mask; @@ -298,89 +342,118 @@ fn copy_cqes_ready(self: *IoUring, cqes: []linux.io_uring_cqe) u32 { return count; } -/// Returns a copy of an I/O completion, waiting for it if necessary, and advancing the CQ ring. -/// A convenience method for `copy_cqes()` for when you don't need to batch or peek. -pub fn copy_cqe(ring: *IoUring) !linux.io_uring_cqe { - var cqes: [1]linux.io_uring_cqe = undefined; +/// Returns a copy of an I/O completion, waiting for it if necessary, and +/// advancing the CQ ring. +/// A convenience method for `copy_cqes()` for when you don't need to batch or +/// peek. +pub fn copy_cqe(ring: *IoUring) !Cqe { + var cqes: [1]Cqe = undefined; while (true) { const count = try ring.copy_cqes(&cqes, 1); if (count > 0) return cqes[0]; } } -/// Matches the implementation of cq_ring_needs_flush() in liburing. +/// Matches the implementation of `cq_ring_needs_flush()` in liburing. pub fn cq_ring_needs_flush(self: *IoUring) bool { - return (@atomicLoad(u32, self.sq.flags, .unordered) & linux.IORING_SQ_CQ_OVERFLOW) != 0; + const sq_flags = @atomicLoad(Sq.Flags, self.sq.flags, .unordered); + if (sq_flags.cq_overflow or sq_flags.taskrun) return true; + return false; } /// For advanced use cases only that implement custom completion queue methods. -/// If you use copy_cqes() or copy_cqe() you must not call cqe_seen() or cq_advance(). -/// Must be called exactly once after a zero-copy CQE has been processed by your application. +/// If you use `copy_cqes()` or `copy_cqe()` you must not call `cqe_seen()` or +/// `cq_advance()`. Must be called exactly once after a zero-copy CQE has been +/// processed by your application. /// Not idempotent, calling more than once will result in other CQEs being lost. -/// Matches the implementation of cqe_seen() in liburing. -pub fn cqe_seen(self: *IoUring, cqe: *linux.io_uring_cqe) void { +/// Matches the implementation of `cqe_seen()` in liburing. +pub fn cqe_seen(self: *IoUring, cqe: *Cqe) void { _ = cqe; self.cq_advance(1); } /// For advanced use cases only that implement custom completion queue methods. -/// Matches the implementation of cq_advance() in liburing. +/// Matches the implementation of `cq_advance()` in liburing. pub fn cq_advance(self: *IoUring, count: u32) void { if (count > 0) { - // Ensure the kernel only sees the new head value after the CQEs have been read. + // Ensure the kernel only sees the new head value after the CQEs have + // been read. @atomicStore(u32, self.cq.head, self.cq.head.* +% count, .release); } } -/// Queues (but does not submit) an SQE to perform an `fsync(2)`. -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. -/// For example, for `fdatasync()` you can set `IORING_FSYNC_DATASYNC` in the SQE's `rw_flags`. -/// N.B. While SQEs are initiated in the order in which they appear in the submission queue, -/// operations execute in parallel and completions are unordered. Therefore, an application that -/// submits a write followed by an fsync in the submission queue cannot expect the fsync to -/// apply to the write, since the fsync may complete before the write is issued to the disk. -/// You should preferably use `link_with_next_sqe()` on a write's SQE to link it with an fsync, -/// or else insert a full write barrier using `drain_previous_sqes()` when queueing an fsync. -pub fn fsync(self: *IoUring, user_data: u64, fd: posix.fd_t, flags: u32) !*linux.io_uring_sqe { +/// Enable/disable setting of iowait by the kernel. +/// matches `io_uring_set_iowait` in liburing +pub fn set_iowait(self: *IoUring, enable_iowait: bool) !void { + if (!self.features.no_iowait) { + return error.SystemOutdated; + } + self.init_flags.no_iowait = !enable_iowait; +} + +/// matches `ring_enter_flags()` in liburing +pub fn enter_flags(self: *IoUring) uflags.Enter { + return self.init_flags.enter_flags(); +} + +/// Queues (but does not submit) an SQE to perform a `splice(2)` +/// Either `fd_in` or `fd_out` must be a pipe. +/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to +/// math.maxInt(u64). +/// If `fd_in` does not refer to a pipe and `off_in` is maxInt(u64), then `len` +/// are read from `fd_in` starting from the file offset, which is incremented +/// by the number of bytes read. +/// If `fd_in` does not refer to a pipe and `off_in` is not maxInt(u64), then +/// the starting offset of `fd_in` will be `off_in`. +/// +/// This splice operation can be used to implement sendfile by splicing to an +/// intermediate pipe first, then splice to the final destination. In fact, the +/// implementation of sendfile in kernel uses splice internally. +/// +/// NOTE that even if `fd_in` or `fd_out` refers to a pipe, the splice operation +/// can still fail with EINVAL if one of the fd doesn't explicitly support +/// splice operation, e.g. reading from terminal is unsupported from kernel 5.7 +/// to 5.11. See https://github.com/axboe/liburing/issues/291 +/// +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. +pub fn splice( + self: *IoUring, + user_data: u64, + fd_in: posix.fd_t, + off_in: u64, + fd_out: posix.fd_t, + off_out: u64, + len: usize, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_fsync(fd, flags); + sqe.prep_splice(fd_in, off_in, fd_out, off_out, len); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a no-op. -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. -/// A no-op is more useful than may appear at first glance. -/// For example, you could call `drain_previous_sqes()` on the returned SQE, to use the no-op to -/// know when the ring is idle before acting on a kill signal. -pub fn nop(self: *IoUring, user_data: u64) !*linux.io_uring_sqe { +// COMMIT: ignored flags for splice and tee lets see if they become important +// in the future +pub fn tee( + self: *IoUring, + user_data: u64, + fd_in: posix.fd_t, + fd_out: posix.fd_t, + len: usize, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_nop(); + sqe.prep_tee(fd_in, fd_out, len); sqe.user_data = user_data; return sqe; } -/// Used to select how the read should be handled. -pub const ReadBuffer = union(enum) { - /// io_uring will read directly into this buffer - buffer: []u8, - - /// io_uring will read directly into these buffers using readv. - iovecs: []const posix.iovec, - - /// io_uring will select a buffer that has previously been provided with `provide_buffers`. - /// The buffer group reference by `group_id` must contain at least one buffer for the read to work. - /// `len` controls the number of bytes to read into the selected buffer. - buffer_selection: struct { - group_id: u16, - len: usize, - }, -}; - -/// Queues (but does not submit) an SQE to perform a `read(2)` or `preadv(2)` depending on the buffer type. -/// * Reading into a `ReadBuffer.buffer` uses `read(2)` +/// Queues (but does not submit) an SQE to perform a `pread(2)` or `preadv(2)` +/// depending on the buffer type. +/// * Reading into a `ReadBuffer.buffer` uses `pread(2)` /// * Reading into a `ReadBuffer.iovecs` uses `preadv(2)` -/// If you want to do a `preadv2(2)` then set `rw_flags` on the returned SQE. See https://man7.org/linux/man-pages/man2/preadv2.2.html +/// +/// If you want to do a `preadv2(2)` then set `rw_flags` on the returned SQE. +/// See https://man7.org/linux/man-pages/man2/preadv2.2.html /// /// Returns a pointer to the SQE. pub fn read( @@ -389,14 +462,14 @@ pub fn read( fd: posix.fd_t, buffer: ReadBuffer, offset: u64, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); switch (buffer) { .buffer => |slice| sqe.prep_read(fd, slice, offset), .iovecs => |vecs| sqe.prep_readv(fd, vecs, offset), .buffer_selection => |selection| { - sqe.prep_rw(.READ, fd, 0, selection.len, offset); - sqe.flags |= linux.IOSQE_BUFFER_SELECT; + sqe.prep_rw(.read, fd, 0, selection.len, offset); + sqe.flags.buffer_select = true; sqe.buf_index = selection.group_id; }, } @@ -404,375 +477,305 @@ pub fn read( return sqe; } -/// Queues (but does not submit) an SQE to perform a `write(2)`. -/// Returns a pointer to the SQE. -pub fn write( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - buffer: []const u8, - offset: u64, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_write(fd, buffer, offset); - sqe.user_data = user_data; - return sqe; -} - -/// Queues (but does not submit) an SQE to perform a `splice(2)` -/// Either `fd_in` or `fd_out` must be a pipe. -/// If `fd_in` refers to a pipe, `off_in` is ignored and must be set to std.math.maxInt(u64). -/// If `fd_in` does not refer to a pipe and `off_in` is maxInt(u64), then `len` are read -/// from `fd_in` starting from the file offset, which is incremented by the number of bytes read. -/// If `fd_in` does not refer to a pipe and `off_in` is not maxInt(u64), then the starting offset of `fd_in` will be `off_in`. -/// This splice operation can be used to implement sendfile by splicing to an intermediate pipe first, -/// then splice to the final destination. In fact, the implementation of sendfile in kernel uses splice internally. -/// -/// NOTE that even if fd_in or fd_out refers to a pipe, the splice operation can still fail with EINVAL if one of the -/// fd doesn't explicitly support splice peration, e.g. reading from terminal is unsupported from kernel 5.7 to 5.11. -/// See https://github.com/axboe/liburing/issues/291 -/// -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. -pub fn splice(self: *IoUring, user_data: u64, fd_in: posix.fd_t, off_in: u64, fd_out: posix.fd_t, off_out: u64, len: usize) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_splice(fd_in, off_in, fd_out, off_out, len); - sqe.user_data = user_data; - return sqe; -} - /// Queues (but does not submit) an SQE to perform a IORING_OP_READ_FIXED. -/// The `buffer` provided must be registered with the kernel by calling `register_buffers` first. -/// The `buffer_index` must be the same as its index in the array provided to `register_buffers`. +/// The `buffer` provided must be registered with the kernel by calling +/// `register_buffers()` first. The `buffer_index` must be the same as its +/// index in the array provided to `register_buffers()`. /// -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. pub fn read_fixed( self: *IoUring, user_data: u64, fd: posix.fd_t, - buffer: *posix.iovec, + buffer: ReadBuffer, offset: u64, buffer_index: u16, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_read_fixed(fd, buffer, offset, buffer_index); + switch (buffer) { + .buffer => |slice| sqe.prep_read_fixed(fd, slice, offset, buffer_index), + .iovecs => |vecs| sqe.prep_readv_fixed(fd, vecs, offset, buffer_index), + .buffer_selection => |selection| { + sqe.prep_rw(.read_fixed, fd, 0, selection.len, offset); + sqe.flags.buffer_select = true; + sqe.buf_index = selection.group_id; + }, + } sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `pwritev()`. -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. -/// For example, if you want to do a `pwritev2()` then set `rw_flags` on the returned SQE. -/// See https://linux.die.net/man/2/pwritev. -pub fn writev( +/// Queues (but does not submit) an SQE to perform a `pwrite(2)` or `pwritev(2)` +/// depending on the write buffer type. +/// * Reading into a `WriteBuffer.buffer` uses `pwrite(2)` +/// * Reading into a `WriteBuffer.iovecs` uses `pwritev(2)` +/// +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. +/// For example, if you want to do a `pwritev2()` then set `rw_flags` on the +/// returned SQE. See https://linux.die.net/man/2/pwritev. +pub fn write( self: *IoUring, user_data: u64, fd: posix.fd_t, - iovecs: []const posix.iovec_const, + buffer: WriteBuffer, offset: u64, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_writev(fd, iovecs, offset); + switch (buffer) { + .buffer => |slice| sqe.prep_write(fd, slice, offset), + .iovecs => |vecs| sqe.prep_writev(fd, vecs, offset), + } sqe.user_data = user_data; return sqe; } /// Queues (but does not submit) an SQE to perform a IORING_OP_WRITE_FIXED. -/// The `buffer` provided must be registered with the kernel by calling `register_buffers` first. -/// The `buffer_index` must be the same as its index in the array provided to `register_buffers`. +/// The `buffer` provided must be registered with the kernel by calling +/// `register_buffers()` first. The `buffer_index` must be the same as its index +/// in the array provided to `register_buffers()`. /// -/// Returns a pointer to the SQE so that you can further modify the SQE for advanced use cases. +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. pub fn write_fixed( self: *IoUring, user_data: u64, fd: posix.fd_t, - buffer: *posix.iovec, + buffer: WriteBuffer, offset: u64, buffer_index: u16, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_write_fixed(fd, buffer, offset, buffer_index); + switch (buffer) { + .buffer => |slice| { + sqe.prep_write_fixed(fd, slice, offset, buffer_index); + }, + .iovecs => |vecs| { + sqe.prep_writev_fixed(fd, vecs, offset, buffer_index); + }, + } sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform an `accept4(2)` on a socket. +/// Queues (but does not submit) an SQE to perform a `recvmsg(2)`. /// Returns a pointer to the SQE. -/// Available since 5.5 -pub fn accept( +/// Available since 5.3 +pub fn recvmsg( self: *IoUring, user_data: u64, fd: posix.fd_t, - addr: ?*posix.sockaddr, - addrlen: ?*posix.socklen_t, - flags: u32, -) !*linux.io_uring_sqe { + msg: *posix.msghdr, + flags: linux.Msg, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_accept(fd, addr, addrlen, flags); + sqe.prep_recvmsg(fd, msg, flags); sqe.user_data = user_data; return sqe; } -/// Queues an multishot accept on a socket. -/// -/// Multishot variant allows an application to issue a single accept request, -/// which will repeatedly trigger a CQE when a connection request comes in. -/// While IORING_CQE_F_MORE flag is set in CQE flags accept will generate -/// further CQEs. -/// -/// Available since 5.19 -pub fn accept_multishot( +/// Queues (but does not submit) an SQE to perform a multishot `recvmsg(2)`. +/// Returns a pointer to the SQE. +pub fn recvmsg_multishot( self: *IoUring, user_data: u64, fd: posix.fd_t, - addr: ?*posix.sockaddr, - addrlen: ?*posix.socklen_t, - flags: u32, -) !*linux.io_uring_sqe { + msg: *posix.msghdr, + flags: linux.Msg, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_multishot_accept(fd, addr, addrlen, flags); + sqe.prep_recvmsg_multishot(fd, msg, flags); sqe.user_data = user_data; return sqe; } -/// Queues an accept using direct (registered) file descriptors. -/// -/// To use an accept direct variant, the application must first have registered -/// a file table (with register_files). An unused table index will be -/// dynamically chosen and returned in the CQE res field. -/// -/// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE -/// flags member, and setting the SQE fd field to the direct descriptor value -/// rather than the regular file descriptor. -/// -/// Available since 5.19 -pub fn accept_direct( +/// Queues (but does not submit) an SQE to perform a `sendmsg(2)`. +/// Returns a pointer to the SQE. +/// Available since 5.3 +pub fn sendmsg( self: *IoUring, user_data: u64, fd: posix.fd_t, - addr: ?*posix.sockaddr, - addrlen: ?*posix.socklen_t, - flags: u32, -) !*linux.io_uring_sqe { + msg: *const posix.msghdr_const, + flags: linux.Msg, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_accept_direct(fd, addr, addrlen, flags, linux.IORING_FILE_INDEX_ALLOC); + sqe.prep_sendmsg(fd, msg, flags); sqe.user_data = user_data; return sqe; } -/// Queues an multishot accept using direct (registered) file descriptors. -/// Available since 5.19 -pub fn accept_multishot_direct( +/// Queues (but does not submit) an SQE to perform a `poll(2)`. +/// Returns a pointer to the SQE. +pub fn poll_add( self: *IoUring, user_data: u64, fd: posix.fd_t, - addr: ?*posix.sockaddr, - addrlen: ?*posix.socklen_t, - flags: u32, -) !*linux.io_uring_sqe { + poll_mask: linux.Epoll, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_multishot_accept_direct(fd, addr, addrlen, flags); + sqe.prep_poll_add(fd, poll_mask); sqe.user_data = user_data; return sqe; } -/// Queue (but does not submit) an SQE to perform a `connect(2)` on a socket. +/// Queues (but does not submit) an SQE to perform a multishot `poll(2)`. /// Returns a pointer to the SQE. -pub fn connect( +pub fn poll_multishot( self: *IoUring, user_data: u64, fd: posix.fd_t, - addr: *const posix.sockaddr, - addrlen: posix.socklen_t, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_connect(fd, addr, addrlen); - sqe.user_data = user_data; + poll_mask: linux.Epoll, +) !*Sqe { + const sqe = try self.poll_add(user_data, fd, poll_mask); + sqe.len = @bitCast(uflags.Poll{ .add_multi = true }); return sqe; } -/// Queues (but does not submit) an SQE to perform a `epoll_ctl(2)`. +/// Queues (but does not submit) an SQE to remove an existing poll operation. /// Returns a pointer to the SQE. -pub fn epoll_ctl( +pub fn poll_remove( self: *IoUring, user_data: u64, - epfd: posix.fd_t, - fd: posix.fd_t, - op: u32, - ev: ?*linux.epoll_event, -) !*linux.io_uring_sqe { + target_user_data: u64, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_epoll_ctl(epfd, fd, op, ev); + sqe.prep_poll_remove(target_user_data); sqe.user_data = user_data; return sqe; } -/// Used to select how the recv call should be handled. -pub const RecvBuffer = union(enum) { - /// io_uring will recv directly into this buffer - buffer: []u8, - - /// io_uring will select a buffer that has previously been provided with `provide_buffers`. - /// The buffer group referenced by `group_id` must contain at least one buffer for the recv call to work. - /// `len` controls the number of bytes to read into the selected buffer. - buffer_selection: struct { - group_id: u16, - len: usize, - }, -}; - -/// Queues (but does not submit) an SQE to perform a `recv(2)`. -/// Returns a pointer to the SQE. -/// Available since 5.6 -pub fn recv( +/// Queues (but does not submit) an SQE to update the user data of an existing +/// poll operation. Returns a pointer to the SQE. +pub fn poll_update( self: *IoUring, user_data: u64, - fd: posix.fd_t, - buffer: RecvBuffer, - flags: u32, -) !*linux.io_uring_sqe { + old_user_data: u64, + new_user_data: u64, + poll_mask: linux.Epoll, + flags: uflags.Poll, +) !*Sqe { const sqe = try self.get_sqe(); - switch (buffer) { - .buffer => |slice| sqe.prep_recv(fd, slice, flags), - .buffer_selection => |selection| { - sqe.prep_rw(.RECV, fd, 0, selection.len, 0); - sqe.rw_flags = flags; - sqe.flags |= linux.IOSQE_BUFFER_SELECT; - sqe.buf_index = selection.group_id; - }, - } + sqe.prep_poll_update(old_user_data, new_user_data, poll_mask, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `send(2)`. -/// Returns a pointer to the SQE. -/// Available since 5.6 -pub fn send( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - buffer: []const u8, - flags: u32, -) !*linux.io_uring_sqe { +/// Queues (but does not submit) an SQE to perform an `fsync(2)`. +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. +/// For example, for `fdatasync()` you can set `IORING_FSYNC_DATASYNC` in the +/// SQE's `rw_flags`. +/// N.B. While SQEs are initiated in the order in which they appear in the +/// submission queue, operations execute in parallel and completions are +/// unordered. Therefore, an application that submits a write followed by an +/// fsync in the submission queue cannot expect the fsync to apply to the write, +/// since the fsync may complete before the write is issued to the disk. +/// You should preferably use `link_with_next_sqe()` on a write's SQE to link +/// it with an fsync, or else insert a full write barrier using +/// `drain_previous_sqes()` when queueing an fsync. +pub fn fsync(self: *IoUring, user_data: u64, fd: posix.fd_t, flags: uflags.Fsync) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_send(fd, buffer, flags); + sqe.prep_fsync(fd, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`. +/// Queues (but does not submit) an SQE to perform a no-op. +/// Returns a pointer to the SQE so that you can further modify the SQE for +/// advanced use cases. +/// A no-op is more useful than may appear at first glance. +/// For example, you could call `drain_previous_sqes()` on the returned SQE, to +/// use the no-op to know when the ring is idle before acting on a kill signal. +pub fn nop(self: *IoUring, user_data: u64) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_nop(); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to register a timeout operation. +/// Returns a pointer to the SQE. /// -/// This operation will most likely produce two CQEs. The flags field of the -/// first cqe may likely contain IORING_CQE_F_MORE, which means that there will -/// be a second cqe with the user_data field set to the same value. The user -/// must not modify the data buffer until the notification is posted. The first -/// cqe follows the usual rules and so its res field will contain the number of -/// bytes sent or a negative error code. The notification's res field will be -/// set to zero and the flags field will contain IORING_CQE_F_NOTIF. The two -/// step model is needed because the kernel may hold on to buffers for a long -/// time, e.g. waiting for a TCP ACK. Notifications responsible for controlling -/// the lifetime of the buffers. Even errored requests may generate a -/// notification. +/// The timeout will complete when either the timeout expires, or after the +/// specified number of events complete (if `count` is greater than `0`). /// -/// Available since 6.0 -pub fn send_zc( +/// `flags` may be `0` for a relative timeout, or `IORING_TIMEOUT_ABS` for an +/// absolute timeout. +/// +/// The completion event result will be `-ETIME` if the timeout completed +/// through expiration, `0` if the timeout completed after the specified number +/// of events, or `-ECANCELED` if the timeout was removed before it expired. +/// +/// io_uring timeouts use the `CLOCK.MONOTONIC` clock source. +pub fn timeout( self: *IoUring, user_data: u64, - fd: posix.fd_t, - buffer: []const u8, - send_flags: u32, - zc_flags: u16, -) !*linux.io_uring_sqe { + ts: *const linux.kernel_timespec, + count: u32, + flags: uflags.Timeout, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_send_zc(fd, buffer, send_flags, zc_flags); + sqe.prep_timeout(ts, count, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`. +/// Queues (but does not submit) an SQE to remove an existing timeout operation. /// Returns a pointer to the SQE. -/// Available since 6.0 -pub fn send_zc_fixed( +/// +/// The timeout is identified by its `user_data`. +/// +/// The completion event result will be `0` if the timeout was found and +/// cancelled successfully else: +/// `-EBUSY` if the timeout was found but expiration was already in progress, or +/// `-ENOENT` if the timeout was not found. +pub fn timeout_remove( self: *IoUring, user_data: u64, - fd: posix.fd_t, - buffer: []const u8, - send_flags: u32, - zc_flags: u16, - buf_index: u16, -) !*linux.io_uring_sqe { + timeout_user_data: u64, + flags: uflags.Timeout, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_send_zc_fixed(fd, buffer, send_flags, zc_flags, buf_index); + sqe.prep_timeout_remove(timeout_user_data, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `recvmsg(2)`. -/// Returns a pointer to the SQE. -/// Available since 5.3 -pub fn recvmsg( +pub fn timeout_update( self: *IoUring, user_data: u64, - fd: posix.fd_t, - msg: *posix.msghdr, - flags: u32, -) !*linux.io_uring_sqe { + timeout_user_data: u64, + ts: *const linux.kernel_timespec, + flags: uflags.Timeout, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_recvmsg(fd, msg, flags); + sqe.prep_timeout_update(timeout_user_data, ts, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `sendmsg(2)`. +/// Queues (but does not submit) an SQE to perform an `accept4(2)` on a socket. /// Returns a pointer to the SQE. -/// Available since 5.3 -pub fn sendmsg( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - msg: *const posix.msghdr_const, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_sendmsg(fd, msg, flags); - sqe.user_data = user_data; - return sqe; -} - -/// Queues (but does not submit) an SQE to perform an async zerocopy `sendmsg(2)`. -/// Returns a pointer to the SQE. -/// Available since 6.1 -pub fn sendmsg_zc( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - msg: *const posix.msghdr_const, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_sendmsg_zc(fd, msg, flags); - sqe.user_data = user_data; - return sqe; -} - -/// Queues (but does not submit) an SQE to perform an `openat(2)`. -/// Returns a pointer to the SQE. -/// Available since 5.6. -pub fn openat( +/// Available since 5.5 +// TODO: can't we make the sockaddr and socklen_t combo in our api better? +// Investigate this +pub fn accept( self: *IoUring, user_data: u64, fd: posix.fd_t, - path: [*:0]const u8, - flags: linux.O, - mode: posix.mode_t, -) !*linux.io_uring_sqe { + addr: ?*posix.sockaddr, + addrlen: ?*posix.socklen_t, + flags: linux.Sock, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_openat(fd, path, flags, mode); + sqe.prep_accept(fd, addr, addrlen, flags); sqe.user_data = user_data; return sqe; } -/// Queues an openat using direct (registered) file descriptors. +/// Queues an accept using direct (registered) file descriptors. /// /// To use an accept direct variant, the application must first have registered /// a file table (with register_files). An unused table index will be @@ -782,83 +785,88 @@ pub fn openat( /// flags member, and setting the SQE fd field to the direct descriptor value /// rather than the regular file descriptor. /// -/// Available since 5.15 -pub fn openat_direct( +/// Available since 5.19 +pub fn accept_direct( self: *IoUring, user_data: u64, fd: posix.fd_t, - path: [*:0]const u8, - flags: linux.O, - mode: posix.mode_t, - file_index: u32, -) !*linux.io_uring_sqe { + addr: ?*posix.sockaddr, + addrlen: ?*posix.socklen_t, + flags: linux.Sock, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_openat_direct(fd, path, flags, mode, file_index); + sqe.prep_accept_direct(fd, addr, addrlen, flags, constants.FILE_INDEX_ALLOC); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `close(2)`. -/// Returns a pointer to the SQE. -/// Available since 5.6. -pub fn close(self: *IoUring, user_data: u64, fd: posix.fd_t) !*linux.io_uring_sqe { +/// Queues an multishot accept on a socket. +/// +/// Multishot variant allows an application to issue a single accept request, +/// which will repeatedly trigger a CQE when a connection request comes in. +/// While IORING_CQE_F_MORE flag is set in CQE flags accept will generate +/// further CQEs. +/// +/// Available since 5.19 +pub fn accept_multishot( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + addr: ?*posix.sockaddr, + addrlen: ?*posix.socklen_t, + flags: linux.Sock, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_close(fd); + sqe.prep_multishot_accept(fd, addr, addrlen, flags); sqe.user_data = user_data; return sqe; } -/// Queues close of registered file descriptor. -/// Available since 5.15 -pub fn close_direct(self: *IoUring, user_data: u64, file_index: u32) !*linux.io_uring_sqe { +/// Queues an multishot accept using direct (registered) file descriptors. +/// Available since 5.19 +pub fn accept_multishot_direct( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + addr: ?*posix.sockaddr, + addrlen: ?*posix.socklen_t, + flags: linux.Sock, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_close_direct(file_index); + sqe.prep_multishot_accept_direct(fd, addr, addrlen, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to register a timeout operation. +/// Queues (but does not submit) an SQE to remove an existing operation. /// Returns a pointer to the SQE. /// -/// The timeout will complete when either the timeout expires, or after the specified number of -/// events complete (if `count` is greater than `0`). -/// -/// `flags` may be `0` for a relative timeout, or `IORING_TIMEOUT_ABS` for an absolute timeout. -/// -/// The completion event result will be `-ETIME` if the timeout completed through expiration, -/// `0` if the timeout completed after the specified number of events, or `-ECANCELED` if the -/// timeout was removed before it expired. +/// The operation is identified by its `user_data`. /// -/// io_uring timeouts use the `CLOCK.MONOTONIC` clock source. -pub fn timeout( +/// The completion event result will be `0` if the operation was found and +/// cancelled successfully else either of: +/// `-EALREADY` if the operation was found but was already in progress +/// `-ENOENT` if the operation was not found. +pub fn cancel( self: *IoUring, user_data: u64, - ts: *const linux.kernel_timespec, - count: u32, - flags: u32, -) !*linux.io_uring_sqe { + cancel_user_data: u64, + flags: uflags.AsyncCancel, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_timeout(ts, count, flags); + sqe.prep_cancel(cancel_user_data, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to remove an existing timeout operation. -/// Returns a pointer to the SQE. -/// -/// The timeout is identified by its `user_data`. -/// -/// The completion event result will be `0` if the timeout was found and cancelled successfully, -/// `-EBUSY` if the timeout was found but expiration was already in progress, or -/// `-ENOENT` if the timeout was not found. -pub fn timeout_remove( +pub fn cancel_fd( self: *IoUring, user_data: u64, - timeout_user_data: u64, - flags: u32, -) !*linux.io_uring_sqe { + fd: linux.fd_t, + flags: uflags.AsyncCancel, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_timeout_remove(timeout_user_data, flags); + sqe.prep_cancel_fd(fd, flags); sqe.user_data = user_data; return sqe; } @@ -866,659 +874,2439 @@ pub fn timeout_remove( /// Queues (but does not submit) an SQE to add a link timeout operation. /// Returns a pointer to the SQE. /// -/// You need to set linux.IOSQE_IO_LINK to flags of the target operation -/// and then call this method right after the target operation. +/// You need to set IOSQE_IO_LINK to flags of the target operation and then +/// call this method right after the target operation. /// See https://lwn.net/Articles/803932/ for detail. /// /// If the dependent request finishes before the linked timeout, the timeout /// is canceled. If the timeout finishes before the dependent request, the /// dependent request will be canceled. /// -/// The completion event result of the link_timeout will be -/// `-ETIME` if the timeout finishes before the dependent request -/// (in this case, the completion event result of the dependent request will -/// be `-ECANCELED`), or +/// The completion event result of the link_timeout will be either of: +/// `-ETIME` if the timeout finishes before the dependent request (in this case, +/// the completion event result of the dependent request will be `-ECANCELED`) /// `-EALREADY` if the dependent request finishes before the linked timeout. pub fn link_timeout( self: *IoUring, user_data: u64, ts: *const linux.kernel_timespec, - flags: u32, -) !*linux.io_uring_sqe { + flags: uflags.Timeout, +) !*Sqe { const sqe = try self.get_sqe(); sqe.prep_link_timeout(ts, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `poll(2)`. +/// Queue (but does not submit) an SQE to perform a `connect(2)` on a socket. /// Returns a pointer to the SQE. -pub fn poll_add( +pub fn connect( self: *IoUring, user_data: u64, fd: posix.fd_t, - poll_mask: u32, -) !*linux.io_uring_sqe { + addr: *const posix.sockaddr, + addrlen: posix.socklen_t, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_poll_add(fd, poll_mask); + sqe.prep_connect(fd, addr, addrlen); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to remove an existing poll operation. +/// Queues (but does not submit) an SQE to perform an `bind(2)` on a socket. /// Returns a pointer to the SQE. -pub fn poll_remove( +/// Available since 6.11 +pub fn bind( self: *IoUring, user_data: u64, - target_user_data: u64, -) !*linux.io_uring_sqe { + fd: posix.fd_t, + addr: *const posix.sockaddr, + addrlen: posix.socklen_t, + // liburing doesn't have this flag, hence 0 should be passed + // TODO: consider removing this and all flags like this + flags: u32, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_poll_remove(target_user_data); + sqe.prep_bind(fd, addr, addrlen, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to update the user data of an existing poll -/// operation. Returns a pointer to the SQE. -pub fn poll_update( +/// Queues (but does not submit) an SQE to perform an `listen(2)` on a socket. +/// Returns a pointer to the SQE. +/// Available since 6.11 +pub fn listen( self: *IoUring, user_data: u64, - old_user_data: u64, - new_user_data: u64, - poll_mask: u32, + fd: posix.fd_t, + backlog: usize, + // liburing doesn't have this flag, hence 0 should be passed + // TODO: consider removing this and all flags like this flags: u32, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_poll_update(old_user_data, new_user_data, poll_mask, flags); + sqe.prep_listen(fd, backlog, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform an `fallocate(2)`. +/// Queues (but does not submit) an SQE to perform a `epoll_wait(2)`. /// Returns a pointer to the SQE. -pub fn fallocate( +pub fn epoll_wait( self: *IoUring, user_data: u64, fd: posix.fd_t, - mode: i32, - offset: u64, - len: u64, -) !*linux.io_uring_sqe { + events: ?*linux.epoll_event, + max_events: u32, + flags: linux.Epoll, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_fallocate(fd, mode, offset, len); + sqe.prep_epoll_wait(fd, events, max_events, flags); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform an `statx(2)`. +/// Queues (but does not submit) an SQE to perform a `epoll_ctl(2)`. /// Returns a pointer to the SQE. -pub fn statx( +pub fn epoll_ctl( self: *IoUring, user_data: u64, + epfd: posix.fd_t, fd: posix.fd_t, - path: [:0]const u8, - flags: u32, - mask: u32, - buf: *linux.Statx, -) !*linux.io_uring_sqe { + op: linux.EpollOp, + ev: ?*linux.epoll_event, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_statx(fd, path, flags, mask, buf); + sqe.prep_epoll_ctl(epfd, fd, op, ev); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to remove an existing operation. -/// Returns a pointer to the SQE. -/// -/// The operation is identified by its `user_data`. -/// -/// The completion event result will be `0` if the operation was found and cancelled successfully, -/// `-EALREADY` if the operation was found but was already in progress, or -/// `-ENOENT` if the operation was not found. -pub fn cancel( +pub fn files_update( self: *IoUring, user_data: u64, - cancel_user_data: u64, - flags: u32, -) !*linux.io_uring_sqe { + fds: []const linux.fd_t, + offset: u32, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_cancel(cancel_user_data, flags); + sqe.prep_files_update(fds, offset); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `shutdown(2)`. +/// Queues (but does not submit) an SQE to perform an `fallocate(2)`. /// Returns a pointer to the SQE. -/// -/// The operation is identified by its `user_data`. -pub fn shutdown( +pub fn fallocate( self: *IoUring, user_data: u64, - sockfd: posix.socket_t, - how: u32, -) !*linux.io_uring_sqe { + fd: posix.fd_t, + mode: i32, + offset: u64, + len: u64, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_shutdown(sockfd, how); + sqe.prep_fallocate(fd, mode, offset, len); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `renameat2(2)`. +/// Queues (but does not submit) an SQE to perform an `openat(2)`. /// Returns a pointer to the SQE. -pub fn renameat( +/// Available since 5.6. +pub fn openat( self: *IoUring, user_data: u64, - old_dir_fd: posix.fd_t, - old_path: [*:0]const u8, - new_dir_fd: posix.fd_t, - new_path: [*:0]const u8, - flags: u32, -) !*linux.io_uring_sqe { + fd: posix.fd_t, + path: [*:0]const u8, + flags: linux.O, + mode: posix.mode_t, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_renameat(old_dir_fd, old_path, new_dir_fd, new_path, flags); + sqe.prep_openat(fd, path, flags, mode); sqe.user_data = user_data; return sqe; } +// COMMIT: ignore openat2* for now -/// Queues (but does not submit) an SQE to perform a `unlinkat(2)`. -/// Returns a pointer to the SQE. -pub fn unlinkat( +/// Queues an openat using direct (registered) file descriptors. +/// +/// To use an accept direct variant, the application must first have registered +/// a file table (with register_files()). An unused table index will be +/// dynamically chosen and returned in the CQE res field. +/// +/// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE +/// flags member, and setting the SQE fd field to the direct descriptor value +/// rather than the regular file descriptor. +/// +/// Available since 5.15 +pub fn openat_direct( self: *IoUring, user_data: u64, - dir_fd: posix.fd_t, + fd: posix.fd_t, path: [*:0]const u8, - flags: u32, -) !*linux.io_uring_sqe { + flags: linux.O, + mode: posix.mode_t, + file_index: u32, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_unlinkat(dir_fd, path, flags); + sqe.prep_openat_direct(fd, path, flags, mode, file_index); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `mkdirat(2)`. +/// Queues (but does not submit) an SQE to perform an `open(2)`. /// Returns a pointer to the SQE. -pub fn mkdirat( +pub fn open( self: *IoUring, user_data: u64, - dir_fd: posix.fd_t, path: [*:0]const u8, + flags: linux.O, mode: posix.mode_t, -) !*linux.io_uring_sqe { +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_mkdirat(dir_fd, path, mode); + sqe.prep_openat(linux.At.fdcwd, path, flags, mode); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `symlinkat(2)`. -/// Returns a pointer to the SQE. -pub fn symlinkat( +/// Queues an open using direct (registered) file descriptors. +/// +/// To use an accept direct variant, the application must first have registered +/// a file table (with register_files()). An unused table index will be +/// dynamically chosen and returned in the CQE res field. +/// +/// After creation, they can be used by setting IOSQE_FIXED_FILE in the SQE +/// flags member, and setting the SQE fd field to the direct descriptor value +/// rather than the regular file descriptor. +pub fn open_direct( self: *IoUring, user_data: u64, - target: [*:0]const u8, - new_dir_fd: posix.fd_t, - link_path: [*:0]const u8, -) !*linux.io_uring_sqe { + path: [*:0]const u8, + flags: linux.O, + mode: posix.mode_t, + file_index: u32, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_symlinkat(target, new_dir_fd, link_path); + sqe.prep_openat_direct(linux.At.fdcwd, path, flags, mode, file_index); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `linkat(2)`. +/// Queues (but does not submit) an SQE to perform a `close(2)`. /// Returns a pointer to the SQE. -pub fn linkat( - self: *IoUring, - user_data: u64, - old_dir_fd: posix.fd_t, - old_path: [*:0]const u8, - new_dir_fd: posix.fd_t, - new_path: [*:0]const u8, - flags: u32, -) !*linux.io_uring_sqe { +/// Available since 5.6. +pub fn close(self: *IoUring, user_data: u64, fd: posix.fd_t) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_linkat(old_dir_fd, old_path, new_dir_fd, new_path, flags); + sqe.prep_close(fd); + sqe.user_data = user_data; + return sqe; +} + +/// Queues close of registered file descriptor. +/// Available since 5.15 +pub fn close_direct(self: *IoUring, user_data: u64, file_index: u32) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_close_direct(file_index); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to provide a group of buffers used for commands that read/receive data. +/// Queues (but does not submit) an SQE to perform an `statx(2)`. /// Returns a pointer to the SQE. -/// -/// Provided buffers can be used in `read`, `recv` or `recvmsg` commands via .buffer_selection. -/// -/// The kernel expects a contiguous block of memory of size (buffers_count * buffer_size). -pub fn provide_buffers( +pub fn statx( self: *IoUring, user_data: u64, - buffers: [*]u8, - buffer_size: usize, - buffers_count: usize, - group_id: usize, - buffer_id: usize, -) !*linux.io_uring_sqe { + fd: posix.fd_t, + path: [:0]const u8, + flags: linux.At, + mask: linux.Statx.Mask, + buf: *linux.Statx, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_provide_buffers(buffers, buffer_size, buffers_count, group_id, buffer_id); + sqe.prep_statx(fd, path, flags, mask, buf); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to remove a group of provided buffers. +// COMMIT: don't implement f/madvice64 for now I dought it is used by a lot of people in practice +/// Queues (but does not submit) an SQE to perform an `posix_fadvise(2)`. /// Returns a pointer to the SQE. -pub fn remove_buffers( +pub fn fadvice( self: *IoUring, user_data: u64, - buffers_count: usize, - group_id: usize, -) !*linux.io_uring_sqe { + fd: posix.fd_t, + offset: u64, + len: u32, + advice: linux.Fadvice, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_remove_buffers(buffers_count, group_id); + sqe.prep_fadvice(fd, offset, len, advice); sqe.user_data = user_data; return sqe; } -/// Queues (but does not submit) an SQE to perform a `waitid(2)`. +/// Queues (but does not submit) an SQE to perform an `madvise(2)`. /// Returns a pointer to the SQE. -pub fn waitid( +pub fn madvice( self: *IoUring, user_data: u64, - id_type: linux.P, - id: i32, - infop: *linux.siginfo_t, - options: u32, - flags: u32, -) !*linux.io_uring_sqe { + memory: []u8, + advice: linux.Fadvice, +) !*Sqe { const sqe = try self.get_sqe(); - sqe.prep_waitid(id_type, id, infop, options, flags); + sqe.prep_madvice(memory, advice); sqe.user_data = user_data; return sqe; } -/// Registers an array of file descriptors. -/// Every time a file descriptor is put in an SQE and submitted to the kernel, the kernel must -/// retrieve a reference to the file, and once I/O has completed the file reference must be -/// dropped. The atomic nature of this file reference can be a slowdown for high IOPS workloads. -/// This slowdown can be avoided by pre-registering file descriptors. -/// To refer to a registered file descriptor, IOSQE_FIXED_FILE must be set in the SQE's flags, -/// and the SQE's fd must be set to the index of the file descriptor in the registered array. -/// Registering file descriptors will wait for the ring to idle. -/// Files are automatically unregistered by the kernel when the ring is torn down. -/// An application need unregister only if it wants to register a new array of file descriptors. -pub fn register_files(self: *IoUring, fds: []const posix.fd_t) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register( - self.fd, - .REGISTER_FILES, - @as(*const anyopaque, @ptrCast(fds.ptr)), - @as(u32, @intCast(fds.len)), - ); - try handle_registration_result(res); -} - -/// Updates registered file descriptors. -/// -/// Updates are applied starting at the provided offset in the original file descriptors slice. -/// There are three kind of updates: -/// * turning a sparse entry (where the fd is -1) into a real one -/// * removing an existing entry (set the fd to -1) -/// * replacing an existing entry with a new fd -/// Adding new file descriptors must be done with `register_files`. -pub fn register_files_update(self: *IoUring, offset: u32, fds: []const posix.fd_t) !void { - assert(self.fd >= 0); - - const FilesUpdate = extern struct { - offset: u32, - resv: u32, - fds: u64 align(8), - }; - var update = FilesUpdate{ - .offset = offset, - .resv = @as(u32, 0), - .fds = @as(u64, @intFromPtr(fds.ptr)), - }; - - const res = linux.io_uring_register( - self.fd, - .REGISTER_FILES_UPDATE, - @as(*const anyopaque, @ptrCast(&update)), - @as(u32, @intCast(fds.len)), - ); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform a `send(2)`. +/// Returns a pointer to the SQE. +/// Available since 5.6 +pub fn send( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + buffer: []const u8, + flags: linux.Msg, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_send(sockfd, buffer, flags); + sqe.user_data = user_data; + return sqe; } -/// Registers an empty (-1) file table of `nr_files` number of file descriptors. -pub fn register_files_sparse(self: *IoUring, nr_files: u32) !void { - assert(self.fd >= 0); - - const reg = &linux.io_uring_rsrc_register{ - .nr = nr_files, - .flags = linux.IORING_RSRC_REGISTER_SPARSE, - .resv2 = 0, - .data = 0, - .tags = 0, - }; - - const res = linux.io_uring_register( - self.fd, - .REGISTER_FILES2, - @ptrCast(reg), - @as(u32, @sizeOf(linux.io_uring_rsrc_register)), - ); - - return handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform a bundled `send(2)`. +/// Returns a pointer to the SQE. +pub fn send_bundle( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + len: u64, + flags: linux.Msg, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_send_bundle(sockfd, len, flags); + sqe.user_data = user_data; + return sqe; } -// Registers range for fixed file allocations. -// Available since 6.0 -pub fn register_file_alloc_range(self: *IoUring, offset: u32, len: u32) !void { - assert(self.fd >= 0); - - const range = &linux.io_uring_file_index_range{ - .off = offset, - .len = len, - .resv = 0, - }; - - const res = linux.io_uring_register( - self.fd, - .REGISTER_FILE_ALLOC_RANGE, - @ptrCast(range), - @as(u32, @sizeOf(linux.io_uring_file_index_range)), - ); - - return handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform a bundled `sendto(2)`. +/// Returns a pointer to the SQE. +pub fn send_to( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + buffer: []const u8, + flags: linux.Msg, + addr: *const linux.sockaddr, + addrlen: linux.socklen_t, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_send_to(sockfd, buffer, flags, addr, addrlen); + sqe.user_data = user_data; + return sqe; } -/// Registers the file descriptor for an eventfd that will be notified of completion events on -/// an io_uring instance. -/// Only a single a eventfd can be registered at any given point in time. -pub fn register_eventfd(self: *IoUring, fd: posix.fd_t) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register( - self.fd, - .REGISTER_EVENTFD, - @as(*const anyopaque, @ptrCast(&fd)), - 1, - ); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`. +/// +/// This operation will most likely produce two CQEs. The flags field of the +/// first cqe may likely contain IORING_CQE_F_MORE, which means that there will +/// be a second cqe with the user_data field set to the same value. The user +/// must not modify the data buffer until the notification is posted. The first +/// cqe follows the usual rules and so its res field will contain the number of +/// bytes sent or a negative error code. The notification's res field will be +/// set to zero and the flags field will contain IORING_CQE_F_NOTIF. The two +/// step model is needed because the kernel may hold on to buffers for a long +/// time, e.g. waiting for a TCP ACK. Notifications responsible for controlling +/// the lifetime of the buffers. Even errored requests may generate a +/// notification. +/// +/// Available since 6.0 +pub fn send_zc( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + buffer: []const u8, + send_flags: linux.Msg, + zc_flags: Sqe.SendRecv, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_send_zc(sockfd, buffer, send_flags, zc_flags); + sqe.user_data = user_data; + return sqe; } -/// Registers the file descriptor for an eventfd that will be notified of completion events on -/// an io_uring instance. Notifications are only posted for events that complete in an async manner. -/// This means that events that complete inline while being submitted do not trigger a notification event. -/// Only a single eventfd can be registered at any given point in time. -pub fn register_eventfd_async(self: *IoUring, fd: posix.fd_t) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register( - self.fd, - .REGISTER_EVENTFD_ASYNC, - @as(*const anyopaque, @ptrCast(&fd)), - 1, - ); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform an async zerocopy `send(2)`. +/// Returns a pointer to the SQE. +/// Available since 6.0 +pub fn send_zc_fixed( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + buffer: []const u8, + send_flags: linux.Msg, + zc_flags: Sqe.SendRecv, + buf_index: u16, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_send_zc_fixed(sockfd, buffer, send_flags, zc_flags, buf_index); + sqe.user_data = user_data; + return sqe; } -/// Unregister the registered eventfd file descriptor. -pub fn unregister_eventfd(self: *IoUring) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register( - self.fd, - .UNREGISTER_EVENTFD, - null, - 0, - ); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform an async zerocopy `sendmsg(2)`. +/// Returns a pointer to the SQE. +/// Available since 6.1 +pub fn sendmsg_zc( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + msg: *const posix.msghdr_const, + flags: linux.Msg, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_sendmsg_zc(fd, msg, flags); + sqe.user_data = user_data; + return sqe; } -pub fn register_napi(self: *IoUring, napi: *linux.io_uring_napi) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register(self.fd, .REGISTER_NAPI, napi, 1); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform an fixed async zerocopy +/// `sendmsg(2)`. Returns a pointer to the SQE. +pub fn sendmsg_zc_fixed( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + msg: *const posix.msghdr_const, + flags: linux.Msg, + buf_index: u16, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_sendmsg_zc_fixed(fd, msg, flags, buf_index); + sqe.user_data = user_data; + return sqe; } -pub fn unregister_napi(self: *IoUring, napi: *linux.io_uring_napi) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register(self.fd, .UNREGISTER_NAPI, napi, 1); - try handle_registration_result(res); +/// Queues (but does not submit) an SQE to perform a `recv(2)`. +/// Returns a pointer to the SQE. +/// Available since 5.6 +pub fn recv( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + buffer: RecvBuffer, + flags: linux.Msg, +) !*Sqe { + const sqe = try self.get_sqe(); + switch (buffer) { + .buffer => |slice| sqe.prep_recv(fd, slice, flags), + .buffer_selection => |selection| { + sqe.prep_rw(.recv, fd, 0, selection.len, 0); + sqe.rw_flags = @bitCast(flags); + sqe.flags.buffer_select = true; + sqe.buf_index = selection.group_id; + }, + } + sqe.user_data = user_data; + return sqe; } -/// Registers an array of buffers for use with `read_fixed` and `write_fixed`. -pub fn register_buffers(self: *IoUring, buffers: []const posix.iovec) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register( - self.fd, - .REGISTER_BUFFERS, - buffers.ptr, - @as(u32, @intCast(buffers.len)), - ); - try handle_registration_result(res); +pub fn recv_multishot( + self: *IoUring, + user_data: u64, + sockfd: posix.fd_t, + buffer: RecvBuffer, + flags: linux.Msg, +) !*Sqe { + const sqe = try self.get_sqe(); + switch (buffer) { + .buffer => |slice| sqe.prep_recv_multishot(sockfd, slice, flags), + .buffer_selection => |selection| { + sqe.prep_rw(.recv, sockfd, 0, selection.len, 0); + sqe.ioprio = .{ .send_recv = .{ .recv_multishot = true } }; + sqe.rw_flags = @bitCast(flags); + sqe.flags.buffer_select = true; + sqe.buf_index = selection.group_id; + }, + } + sqe.user_data = user_data; + return sqe; } -/// Unregister the registered buffers. -pub fn unregister_buffers(self: *IoUring) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register(self.fd, .UNREGISTER_BUFFERS, null, 0); - switch (linux.E.init(res)) { - .SUCCESS => {}, - .NXIO => return error.BuffersNotRegistered, - else => |errno| return posix.unexpectedErrno(errno), - } +/// Queues (but does not submit) an SQE to provide a group of buffers used for +/// commands that read/receive data. Returns a pointer to the SQE. +/// +/// Provided buffers can be used in `read`, `recv` or `recvmsg` commands via +/// buffer_selection. +/// +/// The kernel expects a contiguous block of memory of size (buffers_count * +/// buffer_size). +// TODO: why not use a slice with `buffers_count` +pub fn provide_buffers( + self: *IoUring, + user_data: u64, + buffers: [*]u8, + buffer_size: usize, + buffers_count: usize, + group_id: usize, + buffer_id: usize, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_provide_buffers(buffers, buffer_size, buffers_count, group_id, buffer_id); + sqe.user_data = user_data; + return sqe; } -/// Returns a io_uring_probe which is used to probe the capabilities of the -/// io_uring subsystem of the running kernel. The io_uring_probe contains the -/// list of supported operations. -pub fn get_probe(self: *IoUring) !linux.io_uring_probe { - var probe = mem.zeroInit(linux.io_uring_probe, .{}); - const res = linux.io_uring_register(self.fd, .REGISTER_PROBE, &probe, probe.ops.len); - try handle_register_buf_ring_result(res); - return probe; +/// Queues (but does not submit) an SQE to remove a group of provided buffers. +/// Returns a pointer to the SQE. +pub fn remove_buffers( + self: *IoUring, + user_data: u64, + buffers_count: usize, + group_id: usize, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_remove_buffers(buffers_count, group_id); + sqe.user_data = user_data; + return sqe; } -fn handle_registration_result(res: usize) !void { - switch (linux.E.init(res)) { - .SUCCESS => {}, - // One or more fds in the array are invalid, or the kernel does not support sparse sets: - .BADF => return error.FileDescriptorInvalid, - .BUSY => return error.FilesAlreadyRegistered, - .INVAL => return error.FilesEmpty, - // Adding `nr_args` file references would exceed the maximum allowed number of files the - // user is allowed to have according to the per-user RLIMIT_NOFILE resource limit and - // the CAP_SYS_RESOURCE capability is not set, or `nr_args` exceeds the maximum allowed - // for a fixed file set (older kernels have a limit of 1024 files vs 64K files): +/// Queues (but does not submit) an SQE to perform a `shutdown(2)`. +/// Returns a pointer to the SQE. +/// +/// The operation is identified by its `user_data`. +pub fn shutdown( + self: *IoUring, + user_data: u64, + sockfd: posix.socket_t, + how: linux.Shut, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_shutdown(sockfd, how); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `unlinkat(2)`. +/// Returns a pointer to the SQE. +pub fn unlinkat( + self: *IoUring, + user_data: u64, + dir_fd: posix.fd_t, + path: [*:0]const u8, + flags: linux.At, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_unlinkat(dir_fd, path, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `unlink(2)`. +/// Returns a pointer to the SQE. +pub fn unlink( + self: *IoUring, + user_data: u64, + path: [*:0]const u8, + flags: linux.At, +) !*Sqe { + return try self.unlinkat(user_data, linux.At.fdcwd, path, flags); +} + +/// Queues (but does not submit) an SQE to perform a `renameat2(2)`. +/// Returns a pointer to the SQE. +pub fn renameat( + self: *IoUring, + user_data: u64, + old_dir_fd: posix.fd_t, + old_path: [*:0]const u8, + new_dir_fd: posix.fd_t, + new_path: [*:0]const u8, + flags: linux.Rename, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_renameat(old_dir_fd, old_path, new_dir_fd, new_path, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `rename(2)`. +/// Returns a pointer to the SQE. +pub fn rename( + self: *IoUring, + user_data: u64, + old_path: [*:0]const u8, + new_path: [*:0]const u8, + flags: linux.Rename, +) !*Sqe { + return try self.renameat(user_data, linux.At.fdcwd, old_path, linux.At.fdcwd, new_path, flags); +} + +/// Queues (but does not submit) an SQE to perform a `sync_file_range(2)`. +/// Returns a pointer to the SQE. +pub fn sync_file_range( + self: *IoUring, + user_data: u64, + fd: posix.fd_t, + len: u32, + offset: u64, + flags: linux.SyncFileRange, // TODO: add flags +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_sync_file_range(fd, len, offset, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `mkdirat(2)`. +/// Returns a pointer to the SQE. +pub fn mkdirat( + self: *IoUring, + user_data: u64, + dir_fd: posix.fd_t, + path: [*:0]const u8, + mode: posix.mode_t, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_mkdirat(dir_fd, path, mode); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `mkdir(2)`. +/// Returns a pointer to the SQE. +pub fn mkdir( + self: *IoUring, + user_data: u64, + path: [*:0]const u8, + mode: posix.mode_t, +) !*Sqe { + return try self.mkdirat(user_data, linux.At.fdcwd, path, mode); +} + +/// Queues (but does not submit) an SQE to perform a `symlinkat(2)`. +/// Returns a pointer to the SQE. +pub fn symlinkat( + self: *IoUring, + user_data: u64, + target: [*:0]const u8, + new_dir_fd: posix.fd_t, + link_path: [*:0]const u8, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_symlinkat(target, new_dir_fd, link_path); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `symlink(2)`. +/// Returns a pointer to the SQE. +pub fn symlink( + self: *IoUring, + user_data: u64, + target: [*:0]const u8, + link_path: [*:0]const u8, +) !*Sqe { + return try self.symlinkat(user_data, target, linux.At.fdcwd, link_path); +} + +/// Queues (but does not submit) an SQE to perform a `linkat(2)`. +/// Returns a pointer to the SQE. +pub fn linkat( + self: *IoUring, + user_data: u64, + old_dir_fd: posix.fd_t, + old_path: [*:0]const u8, + new_dir_fd: posix.fd_t, + new_path: [*:0]const u8, + flags: linux.At, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_linkat(old_dir_fd, old_path, new_dir_fd, new_path, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `link(2)`. +/// Returns a pointer to the SQE. +pub fn link( + self: *IoUring, + user_data: u64, + old_path: [*:0]const u8, + new_path: [*:0]const u8, + flags: linux.At, +) !*Sqe { + return try self.linkat(user_data, linux.At.fdcwd, old_path, linux.At.fdcwd, new_path, flags); +} + +/// Queues (but does not submit) an SQE to send a CQE to an io_uring file +/// descriptor. The use case for this can be anything from simply waking up +/// someone waiting on the targeted ring, or it can be used to pass messages +/// between the two rings +/// Returns a pointer to the SQE. +pub fn msg_ring( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + len: u32, + data: u64, + flags: uflags.MsgRing, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_msg_ring(fd, len, data, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to send a CQE to an io_uring file +/// descriptor. See `msg_ring` +/// This has and additonal `cqe_flags` parameter that allows you to set the CQE +/// flags field cqe.flags when sending a message +/// Returns a pointer to the SQE. +pub fn msg_ring_cqe_flags( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + len: u32, + data: u64, + msg_flags: uflags.MsgRing, + cqe_flags: Cqe.Flags, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_msg_ring_cqe_flags( + fd, + len, + data, + msg_flags, + cqe_flags, + ); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to to send a direct file descriptor to +/// another ring. +/// This has and additonal `cqe_flags` parameter that allows you to set the CQE +/// flags field cqe.flags when sending a message +/// Returns a pointer to the SQE. +pub fn msg_ring_fd( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + source_fd: linux.fd_t, + target_fd: linux.fd_t, + data: u64, + flags: uflags.MsgRing, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_msg_ring_fd( + fd, + source_fd, + target_fd, + data, + flags, + ); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to send a direct file descriptor to +/// another ring. See `msg_ring_fd()` +/// `msg_ring_fd_alloc()` is similar to `msg_ring_fd()`, but doesn't specify a +/// target_fd for the descriptor. Instead, this target_fd is allocated in the +/// target ring and returned in the CQE res field. +/// Returns a pointer to the SQE. +pub fn msg_ring_fd_alloc( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + source_fd: linux.fd_t, + data: u64, + flags: uflags.MsgRing, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_msg_ring_fd_alloc( + fd, + source_fd, + data, + flags, + ); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to prepares a request to get an +/// extended attribute value +/// The `from` parameter is used to decide the source to get the extended +/// attributes from +/// Returns a pointer to the SQE. +pub fn getxattr( + self: *IoUring, + user_data: u64, + name: []const u8, + value: []const u8, + from: XattrSource, + len: u32, +) !*Sqe { + const sqe = try self.get_sqe(); + switch (from) { + .path => |path_| sqe.prep_getxattr(name, value, path_, len), + .fd => |fd_| sqe.prep_fgetxattr(name, value, fd_, len), + } + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to prepares a request to set an +/// extended attribute value +/// The `on` parameter is used to decide the source to set the extended +/// attributes on +/// Returns a pointer to the SQE. +pub fn setxattr( + self: *IoUring, + user_data: u64, + name: []const u8, + value: []const u8, + on: XattrSource, + flags: linux.SetXattr, + len: u32, +) !*Sqe { + const sqe = try self.get_sqe(); + switch (on) { + .path => |path_| sqe.prep_setxattr(name, value, path_, flags, len), + .fd => |fd_| sqe.prep_fsetxattr(name, value, fd_, flags, len), + } + sqe.user_data = user_data; + return sqe; +} + +/// Prepares a socket creation request. +/// New socket fd will be returned in completion result. +/// Available since 5.19 +pub fn socket( + self: *IoUring, + user_data: u64, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags is unused +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_socket(domain, socket_type, protocol, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Prepares a socket creation request for registered file at index `file_index`. +/// Available since 5.19 +pub fn socket_direct( + self: *IoUring, + user_data: u64, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags is unused + file_index: u32, +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_socket_direct(domain, socket_type, protocol, flags, file_index); + sqe.user_data = user_data; + return sqe; +} + +/// Prepares a socket creation request for registered file, index chosen by +/// kernel (file index alloc). +/// File index will be returned in CQE res field. +/// Available since 5.19 +pub fn socket_direct_alloc( + self: *IoUring, + user_data: u64, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags unused +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_socket_direct_alloc(domain, socket_type, protocol, flags); + sqe.user_data = user_data; + return sqe; +} + +/// Prepares an cmd request for a socket. +/// See: https://man7.org/linux/man-pages/man3/io_uring_prep_cmd.3.html +/// Available since 6.7. +pub fn cmd_sock( + self: *IoUring, + user_data: u64, + cmd_op: SocketOp, + fd: linux.fd_t, + level: linux.Sol, + optname: linux.So, + optval: u64, // pointer to the option value + optlen: u32, // size of the option value +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_cmd_sock(cmd_op, fd, level, optname, optval, optlen); + sqe.user_data = user_data; + return sqe; +} + +/// Queues (but does not submit) an SQE to perform a `waitid(2)`. +/// Returns a pointer to the SQE. +pub fn waitid( + self: *IoUring, + user_data: u64, + id_type: linux.P, + id: i32, + infop: *linux.siginfo_t, + options: linux.W, + flags: u32, // They are currently unused, and hence 0 should be passed +) !*Sqe { + const sqe = try self.get_sqe(); + sqe.prep_waitid(id_type, id, infop, options, flags); + sqe.user_data = user_data; + return sqe; +} + +pub fn register_buffers_sparse(self: *IoUring, nr: u32) !void { + assert(self.fd >= 0); + + const reg: RsrcRegister = .{ + .flags = .{ .register_sparse = true }, + .nr = nr, + }; + + const res = linux.io_uring_register(self.fd, .register_buffers2, ®, @sizeOf(RsrcRegister)); + try handle_registration_result(res); +} + +/// Registers an array of buffers for use with `read_fixed`, `readv_fixed`, +/// `write_fixed` and `writev_fixed`. +pub fn register_buffers(self: *IoUring, buffers: []const posix.iovec) !void { + assert(self.fd >= 0); + + const res = linux.io_uring_register(self.fd, .register_buffers, buffers.ptr, @intCast(buffers.len)); + try handle_registration_result(res); +} + +/// Unregister the registered buffers. +pub fn unregister_buffers(self: *IoUring) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .unregister_buffers, null, 0); + switch (linux.E.init(res)) { + .SUCCESS => {}, + .NXIO => return error.BuffersNotRegistered, + else => |errno| return posix.unexpectedErrno(errno), + } +} + +/// Updates registered file descriptors. +/// +/// Updates are applied starting at the provided offset in the original file +/// descriptors slice. +/// There are three kind of updates: +/// * turning a sparse entry (where the fd is -1) into a real one +/// * removing an existing entry (set the fd to -1) +/// * replacing an existing entry with a new fd +/// +/// Adding new file descriptors must be done with `register_files`. +pub fn register_files_update(self: *IoUring, offset: u32, fds: []const posix.fd_t) !void { + assert(self.fd >= 0); + + var update = mem.zeroInit(RsrcUpdate, .{ + .offset = offset, + .data = @intFromPtr(fds.ptr), + }); + + const res = linux.io_uring_register(self.fd, .register_files_update, &update, @intCast(fds.len)); + try handle_registration_result(res); +} + +/// Registers an empty (-1) file table of `nr_files` number of file descriptors. +pub fn register_files_sparse(self: *IoUring, nr_files: u32) !void { + assert(self.fd >= 0); + + const reg = mem.zeroInit(RsrcRegister, .{ + .nr = nr_files, + .flags = .{ .register_sparse = true }, + }); + + const res = linux.io_uring_register(self.fd, .register_files2, ®, @sizeOf(RsrcRegister)); + + return handle_registration_result(res); +} +/// Registers an array of file descriptors. +/// +/// Every time a file descriptor is put in an SQE and submitted to the kernel, +/// the kernel must retrieve a reference to the file, and once I/O has +/// completed, the file reference must be dropped. The atomic nature of this +/// file reference can be a slowdown for high IOPS workloads. This slowdown can +/// be avoided by pre-registering file descriptors. +/// +/// To refer to a registered file descriptor, IOSQE_FIXED_FILE must be set in +/// the SQE's flags, and the SQE's fd must be set to the index of the file +/// descriptor in the registered array. +/// +/// Registering file descriptors will wait for the ring to idle and files are +/// automatically unregistered by the kernel when the ring is torn down. +/// +/// An application need unregister only if it wants to register a new array of +/// file descriptors. +pub fn register_files(self: *IoUring, fds: []const posix.fd_t) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_files, fds.ptr, @intCast(fds.len)); + try handle_registration_result(res); +} + +/// Unregisters all registered file descriptors previously associated with the +/// ring. +pub fn unregister_files(self: *IoUring) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .unregister_files, null, 0); + switch (linux.E.init(res)) { + .SUCCESS => {}, + .NXIO => return error.FilesNotRegistered, + else => |errno| return posix.unexpectedErrno(errno), + } +} + +/// Registers the file descriptor for an eventfd that will be notified of +/// completion events on an io_uring instance. +/// Only a single a eventfd can be registered at any given point in time. +pub fn register_eventfd(self: *IoUring, fd: posix.fd_t) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_eventfd, &fd, 1); + try handle_registration_result(res); +} + +/// Registers the file descriptor for an eventfd that will be notified of +/// completion events on an io_uring instance. Notifications are only posted +/// for events that complete in an async manner. This means that events that +/// complete inline while being submitted do not trigger a notification event. +/// Only a single eventfd can be registered at any given point in time. +pub fn register_eventfd_async(self: *IoUring, fd: posix.fd_t) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_eventfd_async, &fd, 1); + try handle_registration_result(res); +} + +/// Unregister the registered eventfd file descriptor. +pub fn unregister_eventfd(self: *IoUring) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .unregister_eventfd, null, 0); + try handle_registration_result(res); +} + +pub fn register_probe(self: *IoUring, probe: []Probe) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_probe, probe.ptr, @intCast(probe.len)); + try handle_registration_result(res); +} + +/// See https://github.com/axboe/liburing/issues/357 for how to use personality +/// matches `io_uring_register_personality()` in liburing +pub fn register_personality(self: *IoUring) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_personality, null, 0); + try handle_registration_result(res); +} + +pub fn unregister_personality(self: *IoUring, credential_id: u32) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .unregister_personality, null, credential_id); + try handle_registration_result(res); +} + +pub fn register_restrictions(self: *IoUring, restriction: []Restriction) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_restrictions, restriction.ptr, @intCast(restriction.len)); + try handle_registration_result(res); +} + +pub fn enable_rings(self: *IoUring) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_enable_rings, null, 0); + try handle_registration_result(res); +} + +pub fn register_iowq_aff(self: *IoUring, cpusz: u32, mask: *linux.cpu_set_t) !void { + assert(self.fd >= 0); + + if (cpusz >= math.maxInt(u32)) return error.ArgumentsInvalid; + + const res = linux.io_uring_register(self.fd, .register_iowq_aff, mask, cpusz); + try handle_registration_result(res); +} + +pub fn unregister_iowq_aff(self: *IoUring) !void { + assert(self.fd >= 0); + + const res = linux.io_uring_register(self.fd, .unregister_iowq_aff, null, 0); + try handle_registration_result(res); +} + +/// `max_workers`: `max_workers[0]` should contain the maximum number of +/// desired bounded workers, and the `max_workers[1]` the maximum number of +/// desired unbounded workers. +/// If both values are set to 0, the existing values are returned +/// Read `io_uring_register_iowq_max_workers(3)` for more info +pub fn register_iowq_max_workers(self: *IoUring, max_workers: [2]u32) !void { + assert(self.fd >= 0); + + const res = linux.io_uring_register(self.fd, .register_iowq_max_workers, &max_workers, 2); + try handle_registration_result(res); +} + +/// See `io_uring_register_sync_cancel(3)` +pub fn register_sync_cancel(self: *IoUring, cancel_reg: *SyncCancelRegister) !void { + assert(self.fd >= 0); + + const res = linux.io_uring_register(self.fd, .register_sync_cancel, cancel_reg, 1); + try handle_registration_result(res); +} + +/// See `io_uring_register_sync_msg(3)` +pub fn register_sync_msg(self: *IoUring, sqe: *Sqe) !void { + assert(self.fd >= 0); + + const res = linux.io_uring_register(-1, .register_send_msg_ring, sqe, 1); + try handle_registration_result(res); +} + +// COMMIT: fix register file alloc range taking @sizeOf(FileIndexRange) instead of zero in register syscall +/// Registers range for fixed file allocations. +/// Available since 6.0 +pub fn register_file_alloc_range(self: *IoUring, offset: u32, len: u32) !void { + assert(self.fd >= 0); + + const range: FileIndexRange = .{ + .off = offset, + .len = len, + .resv = 0, + }; + + const res = linux.io_uring_register(self.fd, .register_file_alloc_range, &range, 0); + + return handle_registration_result(res); +} + +pub fn register_napi(self: *IoUring, napi: *Napi) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_napi, napi, 1); + try handle_registration_result(res); +} + +pub fn unregister_napi(self: *IoUring, napi: *Napi) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .unregister_napi, napi, 1); + try handle_registration_result(res); +} + +pub fn register_clock(self: *IoUring, clock_reg: *ClockRegister) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_clock, clock_reg, 0); + try handle_registration_result(res); +} + +pub fn register_ifq(self: *IoUring, ifq_reg: *ZcrxIfqRegister) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_zcrx_ifq, ifq_reg, 1); + try handle_registration_result(res); +} + +pub fn register_resize_rings(self: *IoUring, params: *Params) !void { + _ = params; // autofix + assert(self.fd >= 0); + return error.Unimplemented; +} + +pub fn register_region(self: *IoUring, mem_reg: *MemRegionRegister) !void { + assert(self.fd >= 0); + const res = linux.io_uring_register(self.fd, .register_mem_region, mem_reg, 1); + try handle_registration_result(res); +} + +/// Returns a Probe which is used to probe the capabilities of the +/// io_uring subsystem of the running kernel. The Probe contains the +/// list of supported operations. +pub fn get_probe(self: *IoUring) !Probe { + var probe = mem.zeroInit(Probe, .{}); + const res = linux.io_uring_register(self.fd, .register_probe, &probe, probe.ops.len); + try handle_register_buf_ring_result(res); + return probe; +} + +fn handle_registration_result(res: usize) !void { + switch (linux.E.init(res)) { + .SUCCESS => {}, + // One or more fds in the array are invalid, or the kernel does not + // support sparse sets: + .BADF => return error.FileDescriptorInvalid, + .BUSY => return error.FilesAlreadyRegistered, + .INVAL => return error.FilesEmpty, + // Adding `nr_args` file references would exceed the maximum allowed + // number of files the user is allowed to have according to the + // per-user RLIMIT_NOFILE resource limit and the CAP_SYS_RESOURCE + // capability is not set, or `nr_args` exceeds the maximum allowed + // for a fixed file set (older kernels have a limit of 1024 files vs + // 64K files): .MFILE => return error.UserFdQuotaExceeded, - // Insufficient kernel resources, or the caller had a non-zero RLIMIT_MEMLOCK soft - // resource limit but tried to lock more memory than the limit permitted (not enforced - // when the process is privileged with CAP_IPC_LOCK): + // Insufficient kernel resources, or the caller had a non-zero + // RLIMIT_MEMLOCK soft resource limit but tried to lock more memory + // than the limit permitted (not enforced when the process is + // privileged with CAP_IPC_LOCK): .NOMEM => return error.SystemResources, - // Attempt to register files on a ring already registering files or being torn down: + // Attempt to register files on a ring already registering files or + // being torn down: .NXIO => return error.RingShuttingDownOrAlreadyRegisteringFiles, else => |errno| return posix.unexpectedErrno(errno), } -} +} + +/// Prepares set socket option for the optname argument, at the protocol +/// level specified by the level argument. +/// Available since 6.7.n +pub fn setsockopt( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + level: linux.Sol, + optname: linux.So, + opt: []const u8, +) !*Sqe { + return try self.cmd_sock( + user_data, + .setsockopt, + fd, + level, + optname, + @intFromPtr(opt.ptr), + @intCast(opt.len), + ); +} + +/// Prepares get socket option to retrieve the value for the option specified by +/// the option_name argument for the socket specified by the fd argument. +/// Available since 6.7. +pub fn getsockopt( + self: *IoUring, + user_data: u64, + fd: linux.fd_t, + level: linux.Sol, + optname: linux.So, + opt: []u8, +) !*Sqe { + return try self.cmd_sock( + user_data, + .getsockopt, + fd, + level, + optname, + @intFromPtr(opt.ptr), + @intCast(opt.len), + ); +} + +// TODO: move buf_ring fns into BufferRing type +/// Registers a shared buffer ring to be used with provided buffers. `entries` +/// number of `io_uring_buf` structures is mem mapped and shared by kernel. +/// +/// `entries` is the number of entries requested in the buffer ring and must be +/// a power of 2. +/// `fd` is IO_Uring.fd for which the provided buffer ring is being registered. +/// `group_id` is the chosen buffer group ID, unique in IO_Uring. +/// matches `io_uring_setup_buf_ring()` in liburing +pub fn init_buffer_ring( + self: *IoUring, + entries: u16, + group_id: u16, + flags: BufferRegister.Flags, +) !*align(page_size_min) BufferRing { + assert(self.fd >= 0); + if (entries == 0 or entries > math.maxInt(u16)) return error.EntriesNotInRange; + if (!math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo; + + const mmap_size: usize = entries * @sizeOf(Buffer); + const mmap = try posix.mmap( + null, + mmap_size, + posix.PROT.READ | posix.PROT.WRITE, + .{ .TYPE = .PRIVATE, .ANONYMOUS = true }, + -1, + 0, + ); + errdefer posix.munmap(mmap); + assert(mmap.len == mmap_size); + + const buffer_ring: *align(page_size_min) BufferRing = @ptrCast(mmap.ptr); + var reg = mem.zeroInit(BufferRegister, .{ + .ring_addr = @intFromPtr(buffer_ring), + .ring_entries = entries, + .bgid = group_id, + .flags = flags, + }); + try self.register_buffer_ring(®); + buffer_ring.init(); + return buffer_ring; +} + +/// matches `io_uring_register_buf_ring` +pub fn register_buffer_ring(self: *IoUring, buf_reg: *BufferRegister) !void { + var res = linux.io_uring_register(self.fd, .register_pbuf_ring, buf_reg, 1); + if (linux.E.init(res) == .INVAL and buf_reg.flags.iou_pbuf_ring_inc) { + // Retry without incremental buffer consumption. + // It is available since kernel 6.12. returns INVAL on older. + buf_reg.flags.iou_pbuf_ring_inc = false; + res = linux.io_uring_register(self.fd, .register_pbuf_ring, buf_reg, 1); + } + try handle_register_buf_ring_result(res); +} + +/// matches `io_uring_unregister_buf_ring` +pub fn unregister_buffer_ring(self: *IoUring, buf_group_id: u16) !void { + var reg = mem.zeroInit(BufferRegister, .{ + .bgid = buf_group_id, + }); + const res = linux.io_uring_register(self.fd, .unregister_pbuf_ring, ®, 1); + try handle_register_buf_ring_result(res); +} + +fn handle_register_buf_ring_result(res: usize) !void { + switch (linux.E.init(res)) { + .SUCCESS => {}, + .INVAL => return error.ArgumentsInvalid, + else => |errno| return posix.unexpectedErrno(errno), + } +} + +/// IO completion data structure (Completion Queue Entry) +pub const Cqe = extern struct { + /// sqe.user_data value passed back + user_data: u64, + /// result code for this event + res: i32, + flags: Flags, + // TODO: add support for the IORING_SETUP_CQE32 case + /// If the ring is initialized with IORING_SETUP_CQE32, then this field + /// contains 16-bytes of padding, doubling the size of the CQE. + // big_cqe: ?[2]u64, + + /// cqe.flags + pub const Flags = packed struct(u32) { + /// IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID + f_buffer: bool = false, + /// IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries + f_more: bool = false, + /// IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket + /// recv + f_sock_nonempty: bool = false, + /// IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to + /// distinct them from sends. + f_notif: bool = false, + /// IORING_CQE_F_BUF_MORE If set, the buffer ID set in the completion + /// will get more completions. In other words, the buffer is being + /// partially consumed, and will be used by the kernel for more + /// completions. This is only set for buffers used via the incremental + /// buffer consumption, as provided by a ring buffer setup with + /// IOU_PBUF_RING_INC. For any other provided buffer type, all + /// completions with a buffer passed back is automatically returned to + /// the application. + f_buf_more: bool = false, + // COMMIT: new flags + /// IORING_CQE_F_SKIP If set, then the application/liburing must ignore + /// this CQE. It's only purpose is to fill a gap in the ring, if a + /// large CQE is attempted posted when the ring has just a single small + /// CQE worth of space left before wrapping. + f_skip: bool = false, + _7: u9 = 0, + /// IORING_CQE_F_32 If set, this is a 32b/big-cqe posting. Use with + /// rings setup in a mixed CQE mode, where both 16b and 32b CQEs may be + /// posted to the CQ ring. + f_32: bool = false, + _17: u16 = 0, + }; + + /// Retrive the 64-bit cqe `user_data`, as `*T` after completion of an Sqe + /// this data is passed through `Sqe` -> `Cqe` unchanged + pub fn get_data(cqe: Cqe, comptime T: type) *T { + return @ptrFromInt(cqe.user_data); + } + + pub fn err(self: Cqe) linux.E { + if (self.res > -4096 and self.res < 0) { + return @enumFromInt(-self.res); + } + return .SUCCESS; + } + + /// On successful completion of the provided buffers IO request, the CQE + /// flags field will have IORING_CQE_F_BUFFER set and the selected buffer + /// ID will be indicated by the upper 16-bits of the flags field. + pub fn buffer_id(self: Cqe) !u16 { + if (!self.flags.f_buffer) { + return error.NoBufferSelected; + } + return @intCast(@as(u32, @bitCast(self.flags)) >> constants.CQE_BUFFER_SHIFT); + } +}; + +/// IO submission data structure (Submission Queue Entry) +/// matches `io_uring_sqe` in liburing +pub const Sqe = extern struct { + /// type of operation for this sqe + opcode: Op, + /// IOSQE_* flags + flags: IoSqe, + /// ioprio for the request + ioprio: packed union { + send_recv: SendRecv, + accept: Accept, + const Ioprio = @This(); + + pub fn init_empty() Ioprio { + return @bitCast(@as(u16, 0)); + } + }, + /// file descriptor to do IO on + fd: i32, + /// offset into file + off: u64, + /// pointer to buffer or iovecs + addr: u64, + /// buffer size or number of iovecs + len: u32, + /// flags for any Sqe operation + /// rw_flags | fsync_flags | poll_event | poll32_event | sync_range_flags | + /// msg_flags | timeout_flags | accept_flags | cancel_flags | open_flags | + /// statx_flags | fadvise_advice | splice_flags | rename_flags | + /// unlink_flags | hardlink_flags xattr_flags | msg_ring_flags | + /// uring_cmd_flags | waitid_flags | futex_flags install_fd_flags | + /// nop_flags | pipe_flags + rw_flags: u32, + /// data to be passed back at completion time + user_data: u64, + /// index into fixed buffers or for grouped buffer selection + buf_index: u16, + personality: u16, + splice_fd_in: i32, + addr3: u64, + resv: u64, + + /// sqe.flags + pub const IoSqe = packed struct(u8) { + /// use fixed fileset + fixed_file: bool = false, + /// issue after inflight IO + io_drain: bool = false, + /// links next sqe + io_link: bool = false, + /// like LINK, but stronger + io_hardlink: bool = false, + /// always go async + async: bool = false, + /// select buffer from sqe->buf_group + buffer_select: bool = false, + /// don't post CQE if request succeeded + cqe_skip_success: bool = false, + _: u1 = 0, + }; + + /// send/sendmsg and recv/recvmsg flags (sqe.ioprio) + pub const SendRecv = packed struct(u16) { + /// IORING_RECVSEND_POLL_FIRST + /// If set, instead of first attempting to send or receive and arm poll + /// if that yields an -EAGAIN result, arm poll upfront and skip the + /// initial transfer attempt. + recvsend_poll_first: bool = false, + /// IORING_RECV_MULTISHOT + /// Multishot recv. Sets IORING_CQE_F_MORE if the handler will continue + /// to report CQEs on behalf of the same SQE. + recv_multishot: bool = false, + /// IORING_RECVSEND_FIXED_BUF + /// Use registered buffers, the index is stored in the buf_index field. + recvsend_fixed_buf: bool = false, + /// IORING_SEND_ZC_REPORT_USAGE + /// If set, SEND[MSG]_ZC should report the zerocopy usage in cqe.res + /// for the IORING_CQE_F_NOTIF cqe. 0 is reported if zerocopy was + /// actually possible. IORING_NOTIF_USAGE_ZC_COPIED if data was copied + /// (at least partially). + send_zc_report_usage: bool = false, + /// IORING_RECVSEND_BUNDLE + /// Used with IOSQE_BUFFER_SELECT. If set, send or recv will grab as + /// many buffers from the buffer group ID given and send them all. + /// The completion result will be the number of buffers send, with the + /// starting buffer ID in cqe.flags as per usual for provided buffer + /// usage. The buffers will be contiguous from the starting buffer ID. + recvsend_bundle: bool = false, + // COMMIT: new flags + /// IORING_SEND_VECTORIZED + /// If set, SEND[_ZC] will take a pointer to a io_vec to allow + /// vectorized send operations. + send_vectorized: bool = false, + _: u10 = 0, + }; + + /// accept flags stored in sqe.ioprio + pub const Accept = packed struct(u16) { + multishot: bool = false, + // COMMIT: new Flags + dontwait: bool = false, + poll_first: bool = false, + _: u13 = 0, + }; + + pub fn prep_nop(sqe: *Sqe) void { + sqe.* = .{ + .opcode = .nop, + .flags = .{}, + .ioprio = .init_empty(), + .fd = 0, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = 0, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + + pub fn prep_fsync(sqe: *Sqe, fd: linux.fd_t, flags: uflags.Fsync) void { + sqe.* = .{ + .opcode = .fsync, + .flags = .{}, + .ioprio = .init_empty(), + .fd = fd, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = @bitCast(flags), + .user_data = 0, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + + pub fn prep_rw( + sqe: *Sqe, + op: Op, + fd: linux.fd_t, + addr: u64, + len: usize, + offset: u64, + ) void { + sqe.* = .{ + .opcode = op, + .flags = .{}, + .ioprio = .init_empty(), + .fd = fd, + .off = offset, + .addr = addr, + .len = @intCast(len), + .rw_flags = 0, + .user_data = 0, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + + pub fn prep_write(sqe: *Sqe, fd: linux.fd_t, buffer: []const u8, offset: u64) void { + sqe.prep_rw(.write, fd, @intFromPtr(buffer.ptr), buffer.len, offset); + } + + pub fn prep_writev( + sqe: *Sqe, + fd: linux.fd_t, + iovecs: []const std.posix.iovec_const, + offset: u64, + ) void { + sqe.prep_rw(.writev, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); + } + + pub fn prep_write_fixed(sqe: *Sqe, fd: linux.fd_t, buffer: []const u8, offset: u64, buffer_index: u16) void { + sqe.prep_rw(.write_fixed, fd, @intFromPtr(buffer.ptr), buffer.len, offset); + sqe.buf_index = buffer_index; + } + + pub fn prep_writev_fixed(sqe: *Sqe, fd: linux.fd_t, iovecs: []const posix.iovec_const, offset: u64, buffer_index: u16) void { + sqe.prep_rw(.write_fixed, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); + sqe.buf_index = buffer_index; + } + + pub fn prep_splice(sqe: *Sqe, fd_in: linux.fd_t, off_in: u64, fd_out: linux.fd_t, off_out: u64, len: usize) void { + sqe.prep_rw(.splice, fd_out, undefined, len, off_out); + sqe.addr = off_in; + sqe.splice_fd_in = fd_in; + } + + pub fn prep_tee(sqe: *Sqe, fd_in: linux.fd_t, fd_out: linux.fd_t, len: usize) void { + sqe.prep_rw(.tee, fd_out, undefined, len, 0); + sqe.addr = undefined; + sqe.splice_fd_in = fd_in; + } + + pub fn prep_read(sqe: *Sqe, fd: linux.fd_t, buffer: []u8, offset: u64) void { + sqe.prep_rw(.read, fd, @intFromPtr(buffer.ptr), buffer.len, offset); + } + + pub fn prep_readv( + sqe: *Sqe, + fd: linux.fd_t, + iovecs: []const std.posix.iovec, + offset: u64, + ) void { + sqe.prep_rw(.readv, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); + } + + pub fn prep_read_fixed( + sqe: *Sqe, + fd: linux.fd_t, + buffer: []u8, + offset: u64, + buffer_index: u16, + ) void { + sqe.prep_rw(.read_fixed, fd, @intFromPtr(buffer.ptr), buffer.len, offset); + sqe.buf_index = buffer_index; + } + + pub fn prep_readv_fixed( + sqe: *Sqe, + fd: linux.fd_t, + iovecs: []const std.posix.iovec, + offset: u64, + buffer_index: u16, + ) void { + sqe.prep_rw(.read_fixed, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); + sqe.buf_index = buffer_index; + } + + pub fn prep_accept( + sqe: *Sqe, + fd: linux.fd_t, + addr: ?*linux.sockaddr, + addrlen: ?*linux.socklen_t, + flags: linux.Sock, + ) void { + // `addr` holds a pointer to `sockaddr`, and `addr2` holds a pointer to + // socklen_t`. + // `addr2` maps to `sqe.off` (u64) instead of `sqe.len` (which is only + // a u32). + sqe.prep_rw(.accept, fd, @intFromPtr(addr), 0, @intFromPtr(addrlen)); + sqe.rw_flags = @bitCast(flags); + } + + /// accept directly into the fixed file table + pub fn prep_accept_direct( + sqe: *Sqe, + fd: linux.fd_t, + addr: ?*linux.sockaddr, + addrlen: ?*linux.socklen_t, + flags: linux.Sock, + file_index: u32, + ) void { + prep_accept(sqe, fd, addr, addrlen, flags); + set_target_fixed_file(sqe, file_index); + } + + pub fn prep_multishot_accept( + sqe: *Sqe, + fd: linux.fd_t, + addr: ?*linux.sockaddr, + addrlen: ?*linux.socklen_t, + flags: linux.Sock, + ) void { + prep_accept(sqe, fd, addr, addrlen, flags); + sqe.ioprio = .{ .accept = .{ .multishot = true } }; + } + + /// multishot accept directly into the fixed file table + pub fn prep_multishot_accept_direct( + sqe: *Sqe, + fd: linux.fd_t, + addr: ?*linux.sockaddr, + addrlen: ?*linux.socklen_t, + flags: linux.Sock, + ) void { + prep_multishot_accept(sqe, fd, addr, addrlen, flags); + set_target_fixed_file(sqe, constants.FILE_INDEX_ALLOC); + } + + fn set_target_fixed_file(sqe: *Sqe, file_index: u32) void { + const sqe_file_index: u32 = if (file_index == constants.FILE_INDEX_ALLOC) + constants.FILE_INDEX_ALLOC + else + // 0 means no fixed files, indexes should be encoded as "index + 1" + file_index + 1; + // This filed is overloaded in liburing: + // splice_fd_in: i32 + // sqe_file_index: u32 + sqe.splice_fd_in = @bitCast(sqe_file_index); + } + + pub fn prep_connect( + sqe: *Sqe, + fd: linux.fd_t, + addr: *const linux.sockaddr, + addrlen: linux.socklen_t, + ) void { + // `addrlen` maps to `sqe.off` (u64) instead of `sqe.len` (which is + // only a u32). + sqe.prep_rw(.connect, fd, @intFromPtr(addr), 0, addrlen); + } + + pub fn prep_epoll_wait( + sqe: *Sqe, + fd: linux.fd_t, + event: ?*linux.epoll_event, + max_events: u32, + flags: linux.Epoll, + ) void { + sqe.prep_rw(.epoll_wait, fd, @intFromPtr(event), max_events, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_epoll_ctl( + sqe: *Sqe, + epfd: linux.fd_t, + fd: linux.fd_t, + op: linux.EpollOp, + ev: ?*linux.epoll_event, + ) void { + sqe.prep_rw(.epoll_ctl, epfd, @intFromPtr(ev), @intFromEnum(op), @intCast(fd)); + } + + pub fn prep_recv(sqe: *Sqe, fd: linux.fd_t, buffer: []u8, flags: linux.Msg) void { + sqe.prep_rw(.recv, fd, @intFromPtr(buffer.ptr), buffer.len, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_recv_multishot( + sqe: *Sqe, + fd: linux.fd_t, + buffer: []u8, + flags: linux.Msg, + ) void { + sqe.prep_recv(fd, buffer, flags); + sqe.ioprio = .{ .send_recv = .{ .recv_multishot = true } }; + } + + pub fn prep_recvmsg( + sqe: *Sqe, + fd: linux.fd_t, + msg: *linux.msghdr, + flags: linux.Msg, + ) void { + sqe.prep_rw(.recvmsg, fd, @intFromPtr(msg), 1, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_recvmsg_multishot( + sqe: *Sqe, + fd: linux.fd_t, + msg: *linux.msghdr, + flags: linux.Msg, + ) void { + sqe.prep_recvmsg(fd, msg, flags); + sqe.ioprio = .{ .send_recv = .{ .recv_multishot = true } }; + } + + // COMMIT: fix send[|recv] flag param type + pub fn prep_send(sqe: *Sqe, sockfd: linux.fd_t, buffer: []const u8, flags: linux.Msg) void { + sqe.prep_rw(.send, sockfd, @intFromPtr(buffer.ptr), buffer.len, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_send_bundle(sqe: *Sqe, sockfd: linux.fd_t, len: u64, flags: linux.Msg) void { + sqe.prep_rw(.send, sockfd, undefined, len, 0); + sqe.rw_flags = @bitCast(flags); + sqe.ioprio = .{ .send_recv = .{ .recvsend_bundle = true } }; + } + + pub fn prep_send_to( + sqe: *Sqe, + sockfd: linux.fd_t, + buffer: []const u8, + flags: linux.Msg, + addr: *const linux.sockaddr, + addrlen: linux.socklen_t, + ) void { + // addr2 maps to sqe.off and addr_len maps to sqe.splice_fd_in + sqe.prep_send(.send, sockfd, buffer, flags); + sqe.off = @intFromPtr(addr); + sqe.splice_fd_in = @intCast(addrlen); + } + + pub fn prep_send_zc(sqe: *Sqe, fd: linux.fd_t, buffer: []const u8, flags: linux.Msg, zc_flags: Sqe.SendRecv) void { + sqe.prep_rw(.send_zc, fd, @intFromPtr(buffer.ptr), buffer.len, 0); + sqe.rw_flags = @bitCast(flags); + sqe.ioprio = .{ .send_recv = zc_flags }; + } + + pub fn prep_send_zc_fixed(sqe: *Sqe, fd: linux.fd_t, buffer: []const u8, flags: linux.Msg, zc_flags: Sqe.SendRecv, buf_index: u16) void { + const zc_flags_fixed = if (zc_flags.recvsend_fixed_buf) zc_flags else blk: { + var updated_flags = zc_flags; + updated_flags.recvsend_fixed_buf = true; + break :blk updated_flags; + }; + sqe.prep_send_zc(fd, buffer, flags, zc_flags_fixed); + sqe.buf_index = buf_index; + } + + pub fn prep_sendmsg( + sqe: *Sqe, + fd: linux.fd_t, + msg: *const linux.msghdr_const, + flags: linux.Msg, + ) void { + sqe.prep_rw(.sendmsg, fd, @intFromPtr(msg), 1, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_sendmsg_zc( + sqe: *Sqe, + fd: linux.fd_t, + msg: *const linux.msghdr_const, + flags: linux.Msg, + ) void { + sqe.prep_sendmsg(fd, msg, flags); + sqe.opcode = .sendmsg_zc; + } + + pub fn prep_sendmsg_zc_fixed( + sqe: *Sqe, + fd: linux.fd_t, + msg: *const linux.msghdr_const, + flags: linux.Msg, + buf_index: u16, + ) void { + sqe.prep_sendmsg_zc(fd, msg, flags); + sqe.ioprio = .{ .send_recv = .{ .recvsend_fixed_buf = true } }; + sqe.buf_index = buf_index; + } + pub fn prep_openat( + sqe: *Sqe, + fd: linux.fd_t, + path: [*:0]const u8, + flags: linux.O, + mode: linux.mode_t, + ) void { + sqe.prep_rw(.openat, fd, @intFromPtr(path), mode, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_openat_direct( + sqe: *Sqe, + fd: linux.fd_t, + path: [*:0]const u8, + flags: linux.O, + mode: linux.mode_t, + file_index: u32, + ) void { + prep_openat(sqe, fd, path, flags, mode); + set_target_fixed_file(sqe, file_index); + } + + pub fn prep_close(sqe: *Sqe, fd: linux.fd_t) void { + sqe.* = .{ + .opcode = .close, + .flags = .{}, + .ioprio = .init_empty(), + .fd = fd, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = 0, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + + pub fn prep_close_direct(sqe: *Sqe, file_index: u32) void { + prep_close(sqe, 0); + set_target_fixed_file(sqe, file_index); + } + + pub fn prep_timeout( + sqe: *Sqe, + ts: *const linux.kernel_timespec, + count: u32, + flags: uflags.Timeout, + ) void { + sqe.prep_rw(.timeout, -1, @intFromPtr(ts), 1, count); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_timeout_remove(sqe: *Sqe, timeout_user_data: u64, flags: uflags.Timeout) void { + sqe.prep_rw(.timeout_remove, -1, timeout_user_data, 0, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_timeout_update(sqe: *Sqe, timeout_user_data: u64, ts: *const linux.kernel_timespec, flags: uflags.Timeout) void { + sqe.prep_rw(.timeout_remove, -1, timeout_user_data, 0, @intFromPtr(ts)); + const enable_timeout_update = if (flags.timeout_update) flags else blk: { + var tflags = flags; + tflags.timeout_update = true; + break :blk tflags; + }; + sqe.rw_flags = @bitCast(enable_timeout_update); + } + + pub fn prep_link_timeout( + sqe: *Sqe, + ts: *const linux.kernel_timespec, + flags: uflags.Timeout, + ) void { + sqe.prep_rw(.link_timeout, -1, @intFromPtr(ts), 1, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_poll_add( + sqe: *Sqe, + fd: linux.fd_t, + poll_mask: linux.Epoll, + ) void { + sqe.prep_rw(.poll_add, fd, @intFromPtr(@as(?*anyopaque, null)), 0, 0); + // Poll masks previously used to comprise of 16 bits in the flags union + // of a SQE, but were then extended to comprise of 32 bits in order to + // make room for additional option flags. To ensure that the correct + // bits of poll masks are consistently and properly read across + // multiple kernel versions, poll masks are enforced to be + // little-endian. https://www.spinics.net/lists/io-uring/msg02848.html + sqe.rw_flags = std.mem.nativeToLittle(u32, @bitCast(poll_mask)); + } + + pub fn prep_poll_remove( + sqe: *Sqe, + target_user_data: u64, + ) void { + sqe.prep_rw(.poll_remove, -1, target_user_data, 0, 0); + } + + pub fn prep_poll_update( + sqe: *Sqe, + old_user_data: u64, + new_user_data: u64, + poll_mask: linux.Epoll, + flags: uflags.Poll, + ) void { + sqe.prep_rw(.poll_remove, -1, old_user_data, flags, new_user_data); + // Poll masks previously used to comprise of 16 bits in the flags union + // of a SQE, but were then extended to comprise of 32 bits in order to + // make room for additional option flags. To ensure that the correct + // bits of poll masks are consistently and properly read across + // multiple kernel versions, poll masks are enforced to be + // little-endian. https://www.spinics.net/lists/io-uring/msg02848.html + sqe.rw_flags = std.mem.nativeToLittle(u32, @bitCast(poll_mask)); + } + + pub fn prep_fallocate( + sqe: *Sqe, + fd: linux.fd_t, + mode: i32, + offset: u64, + len: u64, + ) void { + sqe.* = .{ + .opcode = .fallocate, + .flags = .{}, + .ioprio = .init_empty(), + .fd = fd, + .off = offset, + .addr = len, + .len = @intCast(mode), + .rw_flags = 0, + .user_data = 0, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + } + + pub fn prep_statx( + sqe: *Sqe, + fd: linux.fd_t, + path: [*:0]const u8, + flags: linux.At, + mask: linux.Statx.Mask, + buf: *linux.Statx, + ) void { + sqe.prep_rw(.statx, fd, @intFromPtr(path), @as(u32, @bitCast(mask)), @intFromPtr(buf)); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_fadvice( + sqe: *Sqe, + fd: linux.fd_t, + offset: u64, + len: u32, + advice: linux.Fadvice, + ) void { + sqe.prep_rw(.fadvise, fd, undefined, len, offset); + sqe.rw_flags = @intFromEnum(advice); + } + + pub fn prep_madvice( + sqe: *Sqe, + memory: []u8, + advice: linux.Madvice, + ) void { + sqe.prep_rw(.madvise, -1, @intFromPtr(memory.ptr), memory.len, 0); + sqe.rw_flags = @intFromEnum(advice); + } + + pub fn prep_cancel( + sqe: *Sqe, + cancel_user_data: u64, + flags: uflags.AsyncCancel, + ) void { + sqe.prep_rw(.async_cancel, -1, cancel_user_data, 0, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_cancel_fd( + sqe: *Sqe, + fd: linux.fd_t, + flags: uflags.AsyncCancel, + ) void { + sqe.prep_rw(.async_cancel, fd, undefined, 0, 0); + const enable_cancel_fd = if (flags.cancel_fd) flags else blk: { + var cancel_flags = flags; + cancel_flags.cancel_fd = true; + break :blk cancel_flags; + }; + sqe.rw_flags = @bitCast(enable_cancel_fd); + } + + pub fn prep_shutdown( + sqe: *Sqe, + sockfd: linux.socket_t, + how: linux.Shut, + ) void { + sqe.prep_rw(.shutdown, sockfd, 0, @intFromEnum(how), 0); + } + + pub fn prep_renameat( + sqe: *Sqe, + old_dir_fd: linux.fd_t, + old_path: [*:0]const u8, + new_dir_fd: linux.fd_t, + new_path: [*:0]const u8, + flags: linux.Rename, + ) void { + sqe.prep_rw( + .renameat, + old_dir_fd, + @intFromPtr(old_path), + 0, + @intFromPtr(new_path), + ); + sqe.len = @bitCast(new_dir_fd); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_unlinkat( + sqe: *Sqe, + dir_fd: linux.fd_t, + path: [*:0]const u8, + flags: linux.At, + ) void { + sqe.prep_rw(.unlinkat, dir_fd, @intFromPtr(path), 0, 0); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_sync_file_range( + sqe: *Sqe, + fd: posix.fd_t, + len: u32, + offset: u64, + flags: linux.SyncFileRange, // TODO: add flags + ) void { + sqe.prep_rw(.sync_file_range, fd, undefined, len, offset); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_mkdirat( + sqe: *Sqe, + dir_fd: linux.fd_t, + path: [*:0]const u8, + mode: linux.mode_t, + ) void { + sqe.prep_rw(.mkdirat, dir_fd, @intFromPtr(path), mode, 0); + } + + pub fn prep_symlinkat( + sqe: *Sqe, + target: [*:0]const u8, + new_dir_fd: linux.fd_t, + link_path: [*:0]const u8, + ) void { + sqe.prep_rw( + .symlinkat, + new_dir_fd, + @intFromPtr(target), + 0, + @intFromPtr(link_path), + ); + } + + pub fn prep_linkat( + sqe: *Sqe, + old_dir_fd: linux.fd_t, + old_path: [*:0]const u8, + new_dir_fd: linux.fd_t, + new_path: [*:0]const u8, + flags: linux.At, + ) void { + sqe.prep_rw( + .linkat, + old_dir_fd, + @intFromPtr(old_path), + 0, + @intFromPtr(new_path), + ); + sqe.len = @bitCast(new_dir_fd); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_msg_ring( + sqe: *Sqe, + fd: linux.fd_t, + len: u32, + data: u64, + flags: uflags.MsgRing, + ) void { + sqe.prep_rw( + .msg_ring, + fd, + undefined, + len, + data, + ); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_msg_ring_cqe_flags( + sqe: *Sqe, + fd: linux.fd_t, + len: u32, + data: u64, + msg_flags: uflags.MsgRing, + cqe_flags: Cqe.Flags, + ) void { + const enable_flags_pass = blk: { + var flags = msg_flags; + flags.flags_pass = true; + break :blk flags; + }; + sqe.prep_msg_ring(fd, len, data, enable_flags_pass); + // sqe.file_index in liburing maps to splice_fd_in in Zig sqe + sqe.splice_fd_in = @intCast(@as(u32, @bitCast(cqe_flags))); + } + + pub fn prep_msg_ring_fd( + sqe: *Sqe, + fd: linux.fd_t, + source_fd: linux.fd_t, + target_fd: linux.fd_t, + data: u64, + flags: uflags.MsgRing, + ) void { + sqe.prep_rw( + .msg_ring, + fd, + @ptrFromInt(@intFromEnum(MsgRingCmd.send_fd)), + 0, + data, + ); + sqe.addr3 = @intCast(source_fd); + sqe.rw_flags = @bitCast(flags); + sqe.set_target_fixed_file(@intCast(target_fd)); + } + + pub fn prep_msg_ring_fd_alloc( + sqe: *Sqe, + fd: linux.fd_t, + source_fd: linux.fd_t, + data: u64, + flags: uflags.MsgRing, + ) void { + sqe.prep_rw( + .msg_ring, + fd, + @ptrFromInt(@intFromEnum(MsgRingCmd.send_fd)), + 0, + data, + ); + sqe.addr3 = @intCast(source_fd); + sqe.rw_flags = @bitCast(flags); + sqe.set_target_fixed_file(constants.FILE_INDEX_ALLOC); + } + + pub fn prep_getxattr( + sqe: *Sqe, + name: []const u8, + value: []const u8, + path: []const u8, + len: u32, + ) void { + sqe.prep_rw( + .getxattr, + 0, + @intFromPtr(name.ptr), + len, + @intFromPtr(value.ptr), + ); + sqe.addr3 = @intFromPtr(path.ptr); + } + + pub fn prep_fgetxattr( + sqe: *Sqe, + name: []const u8, + value: []const u8, + fd: linux.fd_t, + len: u32, + ) void { + sqe.prep_rw( + .fgetxattr, + fd, + @intFromPtr(name.ptr), + len, + @intFromPtr(value.ptr), + ); + } + + pub fn prep_setxattr( + sqe: *Sqe, + name: []const u8, + value: []const u8, + path: []const u8, + flags: linux.SetXattr, + len: u32, + ) void { + sqe.prep_rw( + .setxattr, + 0, + @intFromPtr(name.ptr), + len, + @intFromPtr(value.ptr), + ); + sqe.addr3 = @intFromPtr(path.ptr); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_fsetxattr( + sqe: *Sqe, + name: []const u8, + value: []const u8, + fd: linux.fd_t, + flags: linux.SetXattr, + len: u32, + ) void { + sqe.prep_rw( + .fsetxattr, + fd, + @intFromPtr(name.ptr), + len, + @intFromPtr(value.ptr), + ); + sqe.rw_flags = @bitCast(flags); + } + + pub fn prep_files_update( + sqe: *Sqe, + fds: []const linux.fd_t, + offset: u32, + ) void { + sqe.prep_rw(.files_update, -1, @intFromPtr(fds.ptr), fds.len, @intCast(offset)); + } -/// Unregisters all registered file descriptors previously associated with the ring. -pub fn unregister_files(self: *IoUring) !void { - assert(self.fd >= 0); - const res = linux.io_uring_register(self.fd, .UNREGISTER_FILES, null, 0); - switch (linux.E.init(res)) { - .SUCCESS => {}, - .NXIO => return error.FilesNotRegistered, - else => |errno| return posix.unexpectedErrno(errno), + pub fn prep_files_update_alloc( + sqe: *Sqe, + fds: []linux.fd_t, + ) void { + sqe.prep_rw(.files_update, -1, @intFromPtr(fds.ptr), fds.len, constants.FILE_INDEX_ALLOC); } -} -/// Prepares a socket creation request. -/// New socket fd will be returned in completion result. -/// Available since 5.19 -pub fn socket( - self: *IoUring, - user_data: u64, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_socket(domain, socket_type, protocol, flags); - sqe.user_data = user_data; - return sqe; -} + // TODO: why can't slice be used here ? + pub fn prep_provide_buffers( + sqe: *Sqe, + buffers: [*]u8, + buffer_len: usize, + num: usize, + group_id: usize, + buffer_id: usize, + ) void { + const ptr = @intFromPtr(buffers); + sqe.prep_rw(.provide_buffers, @intCast(num), ptr, buffer_len, buffer_id); + sqe.buf_index = @intCast(group_id); + } -/// Prepares a socket creation request for registered file at index `file_index`. -/// Available since 5.19 -pub fn socket_direct( - self: *IoUring, - user_data: u64, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, - file_index: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_socket_direct(domain, socket_type, protocol, flags, file_index); - sqe.user_data = user_data; - return sqe; -} + pub fn prep_remove_buffers( + sqe: *Sqe, + num: usize, + group_id: usize, + ) void { + sqe.prep_rw(.remove_buffers, @intCast(num), 0, 0, 0); + sqe.buf_index = @intCast(group_id); + } -/// Prepares a socket creation request for registered file, index chosen by kernel (file index alloc). -/// File index will be returned in CQE res field. -/// Available since 5.19 -pub fn socket_direct_alloc( - self: *IoUring, - user_data: u64, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_socket_direct_alloc(domain, socket_type, protocol, flags); - sqe.user_data = user_data; - return sqe; -} + pub fn prep_socket( + sqe: *Sqe, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags is unused + ) void { + sqe.prep_rw(.socket, @intFromEnum(domain), 0, @intFromEnum(protocol), @as(u32, @bitCast(socket_type))); + sqe.rw_flags = flags; + } -/// Queues (but does not submit) an SQE to perform an `bind(2)` on a socket. -/// Returns a pointer to the SQE. -/// Available since 6.11 -pub fn bind( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - addr: *const posix.sockaddr, - addrlen: posix.socklen_t, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_bind(fd, addr, addrlen, flags); - sqe.user_data = user_data; - return sqe; -} + pub fn prep_socket_direct( + sqe: *Sqe, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags is unused + file_index: u32, + ) void { + prep_socket(sqe, domain, socket_type, protocol, flags); + set_target_fixed_file(sqe, file_index); + } -/// Queues (but does not submit) an SQE to perform an `listen(2)` on a socket. -/// Returns a pointer to the SQE. -/// Available since 6.11 -pub fn listen( - self: *IoUring, - user_data: u64, - fd: posix.fd_t, - backlog: usize, - flags: u32, -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_listen(fd, backlog, flags); - sqe.user_data = user_data; - return sqe; -} + pub fn prep_socket_direct_alloc( + sqe: *Sqe, + domain: linux.Af, + socket_type: linux.Sock, + protocol: linux.IpProto, + flags: u32, // flags is unused + ) void { + prep_socket(sqe, domain, socket_type, protocol, flags); + set_target_fixed_file(sqe, constants.FILE_INDEX_ALLOC); + } -/// Prepares an cmd request for a socket. -/// See: https://man7.org/linux/man-pages/man3/io_uring_prep_cmd.3.html -/// Available since 6.7. -pub fn cmd_sock( - self: *IoUring, - user_data: u64, - cmd_op: linux.IO_URING_SOCKET_OP, - fd: linux.fd_t, - level: u32, // linux.SOL - optname: u32, // linux.SO - optval: u64, // pointer to the option value - optlen: u32, // size of the option value -) !*linux.io_uring_sqe { - const sqe = try self.get_sqe(); - sqe.prep_cmd_sock(cmd_op, fd, level, optname, optval, optlen); - sqe.user_data = user_data; - return sqe; -} + pub fn prep_waitid( + sqe: *Sqe, + id_type: linux.P, + id: i32, + infop: *linux.siginfo_t, + options: linux.W, + flags: u32, // flags is unused + ) void { + sqe.prep_rw(.waitid, id, 0, @intFromEnum(id_type), @intFromPtr(infop)); + sqe.rw_flags = flags; + sqe.splice_fd_in = @bitCast(options); + } -/// Prepares set socket option for the optname argument, at the protocol -/// level specified by the level argument. -/// Available since 6.7.n -pub fn setsockopt( - self: *IoUring, - user_data: u64, - fd: linux.fd_t, - level: u32, // linux.SOL - optname: u32, // linux.SO - opt: []const u8, -) !*linux.io_uring_sqe { - return try self.cmd_sock( - user_data, - .SETSOCKOPT, - fd, - level, - optname, - @intFromPtr(opt.ptr), - @intCast(opt.len), - ); -} + // TODO: maybe remove unused flag fields? + pub fn prep_bind( + sqe: *Sqe, + fd: linux.fd_t, + addr: *const linux.sockaddr, + addrlen: linux.socklen_t, + flags: u32, // flags is unused and does't exist in io_uring's api + ) void { + sqe.prep_rw(.bind, fd, @intFromPtr(addr), 0, addrlen); + sqe.rw_flags = flags; + } -/// Prepares get socket option to retrieve the value for the option specified by -/// the option_name argument for the socket specified by the fd argument. -/// Available since 6.7. -pub fn getsockopt( - self: *IoUring, - user_data: u64, - fd: linux.fd_t, - level: u32, // linux.SOL - optname: u32, // linux.SO - opt: []u8, -) !*linux.io_uring_sqe { - return try self.cmd_sock( - user_data, - .GETSOCKOPT, - fd, - level, - optname, - @intFromPtr(opt.ptr), - @intCast(opt.len), - ); -} + pub fn prep_listen( + sqe: *Sqe, + fd: linux.fd_t, + backlog: usize, + flags: u32, // flags is unused and does't exist in io_uring's api + ) void { + sqe.prep_rw(.listen, fd, 0, backlog, 0); + sqe.rw_flags = flags; + } + + pub fn prep_cmd_sock( + sqe: *Sqe, + cmd_op: SocketOp, + fd: linux.fd_t, + level: linux.Sol, + optname: linux.So, + optval: u64, + optlen: u32, + ) void { + sqe.prep_rw(.uring_cmd, fd, 0, 0, 0); + // off is overloaded with cmd_op, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L39 + sqe.off = @intFromEnum(cmd_op); + // addr is overloaded, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L46 + sqe.addr = @bitCast(packed struct { + level: u32, + optname: u32, + }{ + .level = @intFromEnum(level), + .optname = @intFromEnum(optname), + }); + // splice_fd_in if overloaded u32 -> i32 + sqe.splice_fd_in = @bitCast(optlen); + // addr3 is overloaded, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L102 + sqe.addr3 = optval; + } + + pub fn set_flags(sqe: *Sqe, flags: Sqe.IoSqe) void { + const updated_flags = @as(u8, @bitCast(sqe.flags)) | @as(u8, @bitCast(flags)); + sqe.flags = @bitCast(updated_flags); + } + + /// This SQE forms a link with the next SQE in the submission ring. Next SQE + /// will not be started before this one completes. Forms a chain of SQEs. + pub fn link_next(sqe: *Sqe) void { + sqe.flags.io_link = true; + } +}; -pub const SubmissionQueue = struct { +/// matches `io_uring_sq` in liburing +pub const Sq = struct { head: *u32, tail: *u32, mask: u32, - flags: *u32, + flags: *Flags, dropped: *u32, array: []u32, - sqes: []linux.io_uring_sqe, + sqes: []Sqe, mmap: []align(page_size_min) u8, mmap_sqes: []align(page_size_min) u8, - // We use `sqe_head` and `sqe_tail` in the same way as liburing: // We increment `sqe_tail` (but not `tail`) for each call to `get_sqe()`. - // We then set `tail` to `sqe_tail` once, only when these events are actually submitted. - // This allows us to amortize the cost of the @atomicStore to `tail` across multiple SQEs. + // We then set `tail` to `sqe_tail` once, only when these events are + // actually submitted. This allows us to amortize the cost of the + // @atomicStore to `tail` across multiple SQEs. sqe_head: u32 = 0, sqe_tail: u32 = 0, - pub fn init(fd: posix.fd_t, p: linux.io_uring_params) !SubmissionQueue { + /// sq_ring.flags + pub const Flags = packed struct(u32) { + /// needs io_uring_enter wakeup + need_wakeup: bool = false, + /// CQ ring is overflown + cq_overflow: bool = false, + /// task should enter the kernel + taskrun: bool = false, + _: u29 = 0, + }; + + pub fn init(fd: posix.fd_t, p: Params) !Sq { assert(fd >= 0); - assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0); + assert(p.features.single_mmap); const size = @max( p.sq_off.array + p.sq_entries * @sizeOf(u32), - p.cq_off.cqes + p.cq_entries * @sizeOf(linux.io_uring_cqe), + p.cq_off.cqes + p.cq_entries * @sizeOf(Cqe), ); const mmap = try posix.mmap( null, @@ -1526,31 +3314,32 @@ pub const SubmissionQueue = struct { posix.PROT.READ | posix.PROT.WRITE, .{ .TYPE = .SHARED, .POPULATE = true }, fd, - linux.IORING_OFF_SQ_RING, + constants.OFF_SQ_RING, ); errdefer posix.munmap(mmap); assert(mmap.len == size); - // The motivation for the `sqes` and `array` indirection is to make it possible for the - // application to preallocate static linux.io_uring_sqe entries and then replay them when needed. - const size_sqes = p.sq_entries * @sizeOf(linux.io_uring_sqe); + // The motivation for the `sqes` and `array` indirection is to make it + // possible for the application to preallocate static io_uring_sqe + // entries and then replay them when needed. + const size_sqes = p.sq_entries * @sizeOf(Sqe); const mmap_sqes = try posix.mmap( null, size_sqes, posix.PROT.READ | posix.PROT.WRITE, .{ .TYPE = .SHARED, .POPULATE = true }, fd, - linux.IORING_OFF_SQES, + constants.OFF_SQES, ); errdefer posix.munmap(mmap_sqes); assert(mmap_sqes.len == size_sqes); const array: [*]u32 = @ptrCast(@alignCast(&mmap[p.sq_off.array])); - const sqes: [*]linux.io_uring_sqe = @ptrCast(@alignCast(&mmap_sqes[0])); - // We expect the kernel copies p.sq_entries to the u32 pointed to by p.sq_off.ring_entries, - // see https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L7843-L7844. + const sqes: [*]Sqe = @ptrCast(@alignCast(&mmap_sqes[0])); + // We expect the kernel copies p.sq_entries to the u32 pointed to by + // p.sq_off.ring_entries, See https://github.com/torvalds/linux/blob/v5.8/fs/io_uring.c#L7843-L7844. assert(p.sq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_entries]))).*); - return SubmissionQueue{ + return .{ .head = @ptrCast(@alignCast(&mmap[p.sq_off.head])), .tail = @ptrCast(@alignCast(&mmap[p.sq_off.tail])), .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.sq_off.ring_mask]))).*, @@ -1563,26 +3352,34 @@ pub const SubmissionQueue = struct { }; } - pub fn deinit(self: *SubmissionQueue) void { + pub fn deinit(self: *Sq) void { posix.munmap(self.mmap_sqes); posix.munmap(self.mmap); } }; -pub const CompletionQueue = struct { +/// matches `io_uring_cq` in liburing +pub const Cq = struct { head: *u32, tail: *u32, mask: u32, overflow: *u32, - cqes: []linux.io_uring_cqe, + cqes: []Cqe, + + /// cq_ring.flags + pub const Flags = packed struct(u32) { + /// disable eventfd notifications + eventfd_disabled: bool = false, + _: u31 = 0, + }; - pub fn init(fd: posix.fd_t, p: linux.io_uring_params, sq: SubmissionQueue) !CompletionQueue { + pub fn init(fd: posix.fd_t, p: Params, sq: Sq) !Cq { assert(fd >= 0); - assert((p.features & linux.IORING_FEAT_SINGLE_MMAP) != 0); + assert(p.features.single_mmap); const mmap = sq.mmap; - const cqes: [*]linux.io_uring_cqe = @ptrCast(@alignCast(&mmap[p.cq_off.cqes])); + const cqes: [*]Cqe = @ptrCast(@alignCast(&mmap[p.cq_off.cqes])); assert(p.cq_entries == @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_entries]))).*); - return CompletionQueue{ + return .{ .head = @ptrCast(@alignCast(&mmap[p.cq_off.head])), .tail = @ptrCast(@alignCast(&mmap[p.cq_off.tail])), .mask = @as(*u32, @ptrCast(@alignCast(&mmap[p.cq_off.ring_mask]))).*, @@ -1591,10 +3388,11 @@ pub const CompletionQueue = struct { }; } - pub fn deinit(self: *CompletionQueue) void { + pub fn deinit(self: *Cq) void { _ = self; // A no-op since we now share the mmap with the submission queue. - // Here for symmetry with the submission queue, and for any future feature support. + // Here for symmetry with the submission queue, and for any future + // feature support. } }; @@ -1609,20 +3407,19 @@ pub const CompletionQueue = struct { /// ready to receive data, a buffer is picked automatically and the resulting /// CQE will contain the buffer ID in `cqe.buffer_id()`. Use `get` method to get /// buffer for buffer ID identified by CQE. Once the application has processed -/// the buffer, it may hand ownership back to the kernel, by calling `put` +/// the buffer, it may hand ownership back to the kernel, by calling `put()` /// allowing the cycle to repeat. /// /// Depending on the rate of arrival of data, it is possible that a given buffer /// group will run out of buffers before those in CQEs can be put back to the /// kernel. If this happens, a `cqe.err()` will have ENOBUFS as the error value. -/// pub const BufferGroup = struct { /// Parent ring for which this group is registered. ring: *IoUring, /// Pointer to the memory shared by the kernel. /// `buffers_count` of `io_uring_buf` structures are shared by the kernel. /// First `io_uring_buf` is overlaid by `io_uring_buf_ring` struct. - br: *align(page_size_min) linux.io_uring_buf_ring, + br: *align(page_size_min) BufferRing, /// Contiguous block of memory of size (buffers_count * buffer_size). buffers: []u8, /// Size of each buffer in buffers. @@ -1646,20 +3443,19 @@ pub const BufferGroup = struct { const heads = try allocator.alloc(u32, buffers_count); errdefer allocator.free(heads); - const br = try setup_buf_ring(ring.fd, buffers_count, group_id, .{ .inc = true }); - buf_ring_init(br); + const br = try ring.init_buffer_ring(buffers_count, group_id, .{ .iou_pbuf_ring_inc = true }); - const mask = buf_ring_mask(buffers_count); + const mask = br.mask(buffers_count); var i: u16 = 0; while (i < buffers_count) : (i += 1) { const pos = buffer_size * i; const buf = buffers[pos .. pos + buffer_size]; heads[i] = 0; - buf_ring_add(br, buf, i, mask, i); + br.add(buf, i, mask, i); } - buf_ring_advance(br, buffers_count); + br.advance(buffers_count); - return BufferGroup{ + return .{ .ring = ring, .group_id = group_id, .br = br, @@ -1670,217 +3466,1102 @@ pub const BufferGroup = struct { }; } - pub fn deinit(self: *BufferGroup, allocator: mem.Allocator) void { - free_buf_ring(self.ring.fd, self.br, self.buffers_count, self.group_id); - allocator.free(self.buffers); - allocator.free(self.heads); + pub fn deinit(self: *BufferGroup, allocator: mem.Allocator) void { + self.br.deinit(self.ring, self.buffers_count, self.group_id); + allocator.free(self.buffers); + allocator.free(self.heads); + } + + /// Prepare multishot read operation which will select buffer from this + /// group. + pub fn read_multishot( + self: *BufferGroup, + user_data: u64, + fd: posix.fd_t, + nbytes: u32, + offset: u64, + ) !*Sqe { + var sqe = try self.ring.get_sqe(); + sqe.prep_rw(.read_multishot, fd, undefined, nbytes, offset); + sqe.flags.buffer_select = true; + sqe.buf_index = self.group_id; + sqe.user_data = user_data; + return sqe; + } + + /// Prepare recv operation which will select buffer from this group. + pub fn recv( + self: *BufferGroup, + user_data: u64, + fd: posix.fd_t, + flags: linux.Msg, + ) !*Sqe { + var sqe = try self.ring.get_sqe(); + sqe.prep_rw(.recv, fd, 0, 0, 0); + sqe.rw_flags = @bitCast(flags); + sqe.flags.buffer_select = true; + sqe.buf_index = self.group_id; + sqe.user_data = user_data; + return sqe; + } + + /// Prepare multishot recv operation which will select buffer from this + /// group. + pub fn recv_multishot( + self: *BufferGroup, + user_data: u64, + fd: posix.fd_t, + flags: linux.Msg, + ) !*Sqe { + var sqe = try self.recv(user_data, fd, flags); + sqe.ioprio.send_recv.recv_multishot = true; + return sqe; + } + + // Get buffer by id. + fn get_by_id(self: *BufferGroup, buffer_id: u16) []u8 { + const pos = self.buffer_size * buffer_id; + return self.buffers[pos .. pos + self.buffer_size][self.heads[buffer_id]..]; + } + + /// Get buffer by CQE. + pub fn get(self: *BufferGroup, cqe: Cqe) ![]u8 { + const buffer_id = try cqe.buffer_id(); + const used_len: usize = @intCast(cqe.res); + return self.get_by_id(buffer_id)[0..used_len]; + } + + /// Release buffer from CQE to the kernel. + pub fn put(self: *BufferGroup, cqe: Cqe) !void { + const buffer_id = try cqe.buffer_id(); + if (cqe.flags.f_buf_more) { + // Incremental consumption active, kernel will write to the this + // buffer again + const used_len: u32 = @intCast(cqe.res); + // Track what part of the buffer is used + self.heads[buffer_id] += used_len; + return; + } + self.heads[buffer_id] = 0; + + // Release buffer to the kernel. + const mask = self.br.mask(self.buffers_count); + self.br.add(self.get_by_id(buffer_id), buffer_id, mask, 0); + self.br.advance(1); + } +}; + +/// Used to select how the read should be handled. +pub const ReadBuffer = union(enum) { + /// io_uring will read directly into this buffer + buffer: []u8, + /// io_uring will read directly into these buffers using readv. + iovecs: []const posix.iovec, + /// io_uring will select a buffer that has previously been provided with + /// `provide_buffers`. + /// `group_id` must contain at least one buffer for the read to work. + /// `len` controls the number of bytes to read into the selected buffer. + buffer_selection: struct { + group_id: u16, + len: usize, + }, +}; + +/// Used to select how the write should be handled. +pub const WriteBuffer = union(enum) { + /// io_uring will write data from this buffer into fd. + buffer: []const u8, + /// io_uring will write data from iovecs into fd using pwritev. + iovecs: []const posix.iovec_const, +}; + +/// Used to select how get/setxttr should be handled. +pub const XattrSource = union(enum) { + /// Get/Set xattr associated with the given path in the filesystem + path: []const u8, + /// Get/Set xattr for the opened file referenced by this fd + fd: linux.fd_t, +}; + +/// Used to select how the recv call should be handled. +pub const RecvBuffer = union(enum) { + /// io_uring will recv directly into this buffer + buffer: []u8, + /// io_uring will select a buffer that has previously been provided with + /// `provide_buffers`. + /// `group_id` must contain at least one buffer for the recv call to work. + /// `len` controls the number of bytes to read into the selected buffer. + buffer_selection: struct { + group_id: u16, + len: usize, + }, +}; + +/// Filled with the offset for `mmap(2)` +/// matches `io_sqring_offsets` in liburing +pub const SqOffsets = extern struct { + /// offset of ring head + head: u32, + /// offset of ring tail + tail: u32, + /// ring mask value + ring_mask: u32, + /// entries in ring + ring_entries: u32, + /// ring flags index + flags: u32, + /// number of sqes not submitted + dropped: u32, + /// sqe index array + array: u32, + resv1: u32, + user_addr: u64, +}; + +/// matches `io_cqring_offsets` in liburing +pub const CqOffsets = extern struct { + head: u32, + tail: u32, + ring_mask: u32, + ring_entries: u32, + overflow: u32, + cqes: u32, + flags: u32, // flags index + resv: u32, + user_addr: u64, +}; + +/// Passed in for `io_uring_setup(2)`. Copied back with updated info on success +/// matches `io_uring_params` in liburing +pub const Params = extern struct { + sq_entries: u32, + cq_entries: u32, + flags: uflags.Setup, + sq_thread_cpu: u32, + sq_thread_idle: u32, + features: uflags.Features, + wq_fd: u32, + resv: [3]u32, + sq_off: SqOffsets, + cq_off: CqOffsets, +}; + +// COMMIT: remove deprecated io_uring_rsrc_update struct +// deprecated, see struct io_uring_rsrc_update + +// COMMIT: add new io_uring_region_desc struct +/// matches `io_uring_region_desc` in liburing +pub const RegionDesc = extern struct { + user_addr: u64, + size: u64, + flags: Flags, + id: u32, + mmap_offset: u64, + __resv: [4]u64, + + // COMMIT: new constant + /// initialise with user provided memory pointed by user_addr + pub const Flags = packed struct(u32) { + type_user: bool = false, + _: u31 = 0, + }; +}; + +// COMMIT: add new io_uring_mem_region_reg struct +/// matches `io_uring_mem_region_reg` in liburing +pub const MemRegionRegister = extern struct { + /// struct io_uring_region_desc (RegionDesc in Zig) + region_uptr: u64, + flags: Flags, + __resv: [2]u64, + + /// expose the region as registered wait arguments + pub const Flags = packed struct(u64) { + reg_wait_arg: bool = false, + _: u63 = 0, + }; +}; + +/// matches `io_uring_rsrc_register` in liburing +pub const RsrcRegister = extern struct { + nr: u32, + flags: Flags, + resv2: u64, + data: u64, + tags: u64, + + pub const Flags = packed struct(u32) { + /// Register a fully sparse file space, rather than pass in an array of + /// all -1 file descriptors. + register_sparse: bool = false, + _: 31 = 0, + }; +}; + +/// matches `io_uring_rsrc_update` in liburing +pub const RsrcUpdate = extern struct { + offset: u32, + resv: u32, + data: u64, +}; + +/// matches `io_uring_rsrc_update2` in liburing +pub const RsrcUpdate2 = extern struct { + offset: u32, + resv: u32, + data: u64, + tags: u64, + nr: u32, + resv2: u32, +}; + +/// matches `io_uring_probe_op` in liburing +pub const ProbeOp = extern struct { + op: Op, + resv: u8, + flags: Flags, + resv2: u32, + + pub const Flags = packed struct(u16) { + op_supported: bool = false, + _: u15 = 0, + }; + + pub fn is_supported(self: ProbeOp) bool { + return self.flags.op_supported; + } +}; + +/// matches `io_uring_probe` in liburing +pub const Probe = extern struct { + /// Last opcode supported + last_op: Op, + /// Length of ops[] array below + ops_len: u8, + resv: u16, + resv2: [3]u32, + ops: [256]ProbeOp, + + /// Is the operation supported on the running kernel. + pub fn is_supported(self: *const Probe, op: Op) bool { + const i = @intFromEnum(op); + if (i > @intFromEnum(self.last_op) or i >= self.ops_len) + return false; + return self.ops[i].is_supported(); + } +}; + +// COMMIT: fix defination of io_uring_restriction +// RegisterOp is actually u8 +/// matches `io_uring_restriction` in liburing +pub const Restriction = extern struct { + opcode: RestrictionOp, + arg: extern union { + /// IORING_RESTRICTION_REGISTER_OP + register_op: RegisterOp, + /// IORING_RESTRICTION_SQE_OP + sqe_op: Op, + /// IORING_RESTRICTION_SQE_FLAGS_* + sqe_flags: Sqe.IoSqe, + }, + resv: u8, + resv2: [3]u32, +}; + +// COMMIT: add new struct type +/// matches `io_uring_clock_register` in liburing +pub const ClockRegister = extern struct { + clockid: u32, + __resv: [3]u32, +}; + +// COMMIT: add new struct type +/// matches `io_uring_clone_buffers` in liburing +pub const CloneBuffers = extern struct { + src_fd: u32, + flags: Flags, + src_off: u32, + dst_off: u32, + nr: u32, + pad: [3]u32, + + // COMMIT: new flags + pub const Flags = packed struct(u32) { + register_src_registered: bool = false, + register_dst_replace: bool = false, + _: u30 = 0, + }; +}; + +/// matches `io_uring_buf` in liburing +pub const Buffer = extern struct { + addr: u64, + len: u32, + bid: u16, + resv: u16, +}; + +/// matches `io_uring_buf_ring` in liburing +pub const BufferRing = extern struct { + resv1: u64, + resv2: u32, + resv3: u16, + tail: u16, + + /// Initialises `br` so that it is ready to be used. + /// matches `io_uring_buf_ring_init` in liburing + fn init(br: *align(page_size_min) BufferRing) void { + br.tail = 0; } - // Prepare recv operation which will select buffer from this group. - pub fn recv(self: *BufferGroup, user_data: u64, fd: posix.fd_t, flags: u32) !*linux.io_uring_sqe { - var sqe = try self.ring.get_sqe(); - sqe.prep_rw(.RECV, fd, 0, 0, 0); - sqe.rw_flags = flags; - sqe.flags |= linux.IOSQE_BUFFER_SELECT; - sqe.buf_index = self.group_id; - sqe.user_data = user_data; - return sqe; + // Unregisters a previously registered shared buffer ring, returned from + // io_uring_setup_buf_ring. + pub fn deinit(br: *align(page_size_min) BufferRing, uring: *IoUring, entries: u32, group_id: u16) void { + uring.unregister_buffer_ring(group_id) catch {}; + var mmap: []align(page_size_min) u8 = undefined; + mmap.ptr = @ptrCast(br); + mmap.len = entries * @sizeOf(Buffer); + posix.munmap(mmap); } - // Prepare multishot recv operation which will select buffer from this group. - pub fn recv_multishot(self: *BufferGroup, user_data: u64, fd: posix.fd_t, flags: u32) !*linux.io_uring_sqe { - var sqe = try self.recv(user_data, fd, flags); - sqe.ioprio |= linux.IORING_RECV_MULTISHOT; - return sqe; + /// Calculates the appropriate size mask for a buffer ring. + /// `entries` is the ring entries as specified in io_uring_register_buf_ring + pub fn mask(_: *align(page_size_min) BufferRing, entries: u16) u16 { + return entries - 1; } - // Get buffer by id. - fn get_by_id(self: *BufferGroup, buffer_id: u16) []u8 { - const pos = self.buffer_size * buffer_id; - return self.buffers[pos .. pos + self.buffer_size][self.heads[buffer_id]..]; + /// Assigns `buffer` with the `br` buffer ring. + /// `buffer_id` is identifier which will be returned in the CQE. + /// `buffer_offset` is the offset to insert at from the current tail. + /// If just one buffer is provided before the ring tail is committed with + /// advance then offset should be 0. + /// If buffers are provided in a loop before being committed, the offset must + /// be incremented by one for each buffer added. + pub fn add( + br: *align(page_size_min) BufferRing, + buffer: []u8, + buffer_id: u16, + buffer_mask: u16, + buffer_offset: u16, + ) void { + const bufs: [*]Buffer = @ptrCast(br); + const buf: *Buffer = &bufs[(br.tail +% buffer_offset) & buffer_mask]; + + buf.addr = @intFromPtr(buffer.ptr); + buf.len = @intCast(buffer.len); + buf.bid = buffer_id; } - // Get buffer by CQE. - pub fn get(self: *BufferGroup, cqe: linux.io_uring_cqe) ![]u8 { - const buffer_id = try cqe.buffer_id(); - const used_len = @as(usize, @intCast(cqe.res)); - return self.get_by_id(buffer_id)[0..used_len]; + /// Make `count` new buffers visible to the kernel. Called after + /// `io_uring_buf_ring_add` has been called `count` times to fill in new + /// buffers. + pub fn advance(br: *align(page_size_min) BufferRing, count: u16) void { + const tail: u16 = br.tail +% count; + @atomicStore(u16, &br.tail, tail, .release); } +}; - // Release buffer from CQE to the kernel. - pub fn put(self: *BufferGroup, cqe: linux.io_uring_cqe) !void { - const buffer_id = try cqe.buffer_id(); - if (cqe.flags & linux.IORING_CQE_F_BUF_MORE == linux.IORING_CQE_F_BUF_MORE) { - // Incremental consumption active, kernel will write to the this buffer again - const used_len = @as(u32, @intCast(cqe.res)); - // Track what part of the buffer is used - self.heads[buffer_id] += used_len; - return; - } - self.heads[buffer_id] = 0; +/// argument for IORING_(UN)REGISTER_PBUF_RING +/// matches `io_uring_buf_reg` in liburing +pub const BufferRegister = extern struct { + ring_addr: u64, + ring_entries: u32, + bgid: u16, + flags: Flags, + resv: [3]u64, + + // COMMIT: new IORING_REGISTER_PBUF_RING flags + /// Flags for IORING_REGISTER_PBUF_RING. + pub const Flags = packed struct(u16) { + /// IOU_PBUF_RING_MMAP: + /// If set, kernel will allocate the memory for the ring. + /// The application must not set a ring_addr in struct io_uring_buf_reg + /// instead it must subsequently call mmap(2) with the offset set + /// as: IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT) to get + /// a virtual mapping for the ring. + iou_pbuf_ring_mmap: bool = false, + /// IOU_PBUF_RING_INC: + /// If set, buffers consumed from this buffer ring can be + /// consumed incrementally. Normally one (or more) buffers + /// are fully consumed. With incremental consumptions, it's + /// feasible to register big ranges of buffers, and each + /// use of it will consume only as much as it needs. This + /// requires that both the kernel and application keep + /// track of where the current read/recv index is at. + iou_pbuf_ring_inc: bool = false, + _: u14 = 0, + }; +}; - // Release buffer to the kernel. const mask = buf_ring_mask(self.buffers_count); - const mask = buf_ring_mask(self.buffers_count); - buf_ring_add(self.br, self.get_by_id(buffer_id), buffer_id, mask, 0); - buf_ring_advance(self.br, 1); - } +/// argument for IORING_REGISTER_PBUF_STATUS +/// matches `io_uring_buf_status` in liburing +pub const BufferStatus = extern struct { + /// input + buf_group: u32, + /// output + head: u32, + resv: [8]u32, }; -/// Registers a shared buffer ring to be used with provided buffers. -/// `entries` number of `io_uring_buf` structures is mem mapped and shared by kernel. -/// `fd` is IO_Uring.fd for which the provided buffer ring is being registered. -/// `entries` is the number of entries requested in the buffer ring, must be power of 2. -/// `group_id` is the chosen buffer group ID, unique in IO_Uring. -pub fn setup_buf_ring( - fd: posix.fd_t, - entries: u16, - group_id: u16, - flags: linux.io_uring_buf_reg.Flags, -) !*align(page_size_min) linux.io_uring_buf_ring { - if (entries == 0 or entries > 1 << 15) return error.EntriesNotInRange; - if (!std.math.isPowerOfTwo(entries)) return error.EntriesNotPowerOfTwo; +/// argument for IORING_(UN)REGISTER_NAPI +/// matches `io_uring_napi` in liburing +pub const Napi = extern struct { + busy_poll_to: u32, + prefer_busy_poll: u8, + pad: [3]u8, + resv: u64, +}; - const mmap_size = @as(usize, entries) * @sizeOf(linux.io_uring_buf); - const mmap = try posix.mmap( - null, - mmap_size, - posix.PROT.READ | posix.PROT.WRITE, - .{ .TYPE = .PRIVATE, .ANONYMOUS = true }, - -1, - 0, - ); - errdefer posix.munmap(mmap); - assert(mmap.len == mmap_size); +// COMMIT: new struct type +/// Argument for io_uring_enter(2) with IORING_GETEVENTS | IORING_ENTER_EXT_ARG_REG +/// set, where the actual argument is an index into a previously registered +/// fixed wait region described by the below structure. +/// matches `io_uring_reg_wait` in liburing +pub const RegisterWait = extern struct { + ts: linux.kernel_timespec, + min_wait_usec: u32, + flags: Flags, + sigmask: u64, + sigmask_sz: u32, + pad: [3]u32, + pad2: [2]u64, + + // COMMIT: new constant + pub const Flags = packed struct(u32) { + reg_wait_ts: bool = false, + _: u31 = 0, + }; +}; - const br: *align(page_size_min) linux.io_uring_buf_ring = @ptrCast(mmap.ptr); - try register_buf_ring(fd, @intFromPtr(br), entries, group_id, flags); - return br; -} +/// Argument for `io_uring_enter(2)` with IORING_GETEVENTS | +/// IORING_ENTER_EXT_ARG +/// matches `io_uring_getevents_arg` in liburing +pub const GetEventsArg = extern struct { + sigmask: u64, + sigmask_sz: u32, + pad: u32, + ts: u64, +}; -fn register_buf_ring( - fd: posix.fd_t, +// COMMIT: fix type definition of io_uring_sync_cancel_reg +/// Argument for IORING_REGISTER_SYNC_CANCEL +/// matches `io_uring_sync_cancel_reg` in liburing +pub const SyncCancelRegister = extern struct { addr: u64, - entries: u32, - group_id: u16, - flags: linux.io_uring_buf_reg.Flags, -) !void { - var reg = mem.zeroInit(linux.io_uring_buf_reg, .{ - .ring_addr = addr, - .ring_entries = entries, - .bgid = group_id, - .flags = flags, - }); - var res = linux.io_uring_register(fd, .REGISTER_PBUF_RING, @as(*const anyopaque, @ptrCast(®)), 1); - if (linux.E.init(res) == .INVAL and reg.flags.inc) { - // Retry without incremental buffer consumption. - // It is available since kernel 6.12. returns INVAL on older. - reg.flags.inc = false; - res = linux.io_uring_register(fd, .REGISTER_PBUF_RING, @as(*const anyopaque, @ptrCast(®)), 1); - } - try handle_register_buf_ring_result(res); -} + fd: i32, + flags: uflags.AsyncCancel, + timeout: linux.kernel_timespec, + opcode: Op, + pad: [7]u8, + pad2: [4]u64, +}; -fn unregister_buf_ring(fd: posix.fd_t, group_id: u16) !void { - var reg = mem.zeroInit(linux.io_uring_buf_reg, .{ - .bgid = group_id, - }); - const res = linux.io_uring_register( - fd, - .UNREGISTER_PBUF_RING, - @as(*const anyopaque, @ptrCast(®)), - 1, - ); - try handle_register_buf_ring_result(res); -} +/// Argument for IORING_REGISTER_FILE_ALLOC_RANGE +/// The range is specified as [off, off + len) +/// matches `io_uring_file_index_range` in liburing +pub const FileIndexRange = extern struct { + off: u32, + len: u32, + resv: u64, +}; -fn handle_register_buf_ring_result(res: usize) !void { - switch (linux.E.init(res)) { - .SUCCESS => {}, - .INVAL => return error.ArgumentsInvalid, - else => |errno| return posix.unexpectedErrno(errno), - } -} +/// matches `io_uring_recvmsg_out` in liburing +pub const RecvMsgOut = extern struct { + namelen: u32, + controllen: u32, + payloadlen: u32, + flags: linux.Msg, +}; -// Unregisters a previously registered shared buffer ring, returned from io_uring_setup_buf_ring. -pub fn free_buf_ring(fd: posix.fd_t, br: *align(page_size_min) linux.io_uring_buf_ring, entries: u32, group_id: u16) void { - unregister_buf_ring(fd, group_id) catch {}; - var mmap: []align(page_size_min) u8 = undefined; - mmap.ptr = @ptrCast(br); - mmap.len = entries * @sizeOf(linux.io_uring_buf); - posix.munmap(mmap); -} +/// Zero copy receive refill queue entry +/// matches `io_uring_zcrx_rqe` in liburing +pub const ZcrxRqe = extern struct { + off: u64, + len: u32, + __pad: u32, +}; -/// Initialises `br` so that it is ready to be used. -pub fn buf_ring_init(br: *linux.io_uring_buf_ring) void { - br.tail = 0; -} +/// matches `io_uring_zcrx_cqe` in liburing +pub const ZcrxCqe = extern struct { + off: u64, + __pad: u64, +}; -/// Calculates the appropriate size mask for a buffer ring. -/// `entries` is the ring entries as specified in io_uring_register_buf_ring. -pub fn buf_ring_mask(entries: u16) u16 { - return entries - 1; -} +/// matches `io_uring_zcrx_offsets` in liburing +pub const ZcrxOffsets = extern struct { + head: u32, + tail: u32, + rqes: u32, + __resv2: u32, + __resv: [2]u64, +}; -/// Assigns `buffer` with the `br` buffer ring. -/// `buffer_id` is identifier which will be returned in the CQE. -/// `buffer_offset` is the offset to insert at from the current tail. -/// If just one buffer is provided before the ring tail is committed with advance then offset should be 0. -/// If buffers are provided in a loop before being committed, the offset must be incremented by one for each buffer added. -pub fn buf_ring_add( - br: *linux.io_uring_buf_ring, - buffer: []u8, - buffer_id: u16, - mask: u16, - buffer_offset: u16, -) void { - const bufs: [*]linux.io_uring_buf = @ptrCast(br); - const buf: *linux.io_uring_buf = &bufs[(br.tail +% buffer_offset) & mask]; +/// matches `io_uring_zcrx_area_reg` in liburing +pub const ZcrxAreaRegister = extern struct { + addr: u64, + len: u64, + rq_area_token: u64, + flags: Flags, + dmabuf_fd: u32, + __resv2: [2]u64, + + pub const Flags = packed struct(u32) { + dmabuf: bool = false, + _: u31 = 0, + }; +}; - buf.addr = @intFromPtr(buffer.ptr); - buf.len = @intCast(buffer.len); - buf.bid = buffer_id; -} +/// Argument for IORING_REGISTER_ZCRX_IFQ +/// matches `io_uring_zcrx_ifq_reg` in liburing +pub const ZcrxIfqRegister = extern struct { + if_idx: u32, + if_rxq: u32, + rq_entries: u32, + // FIXME: I don't know what these flags are yet even after my research + flags: u32, + /// pointer to struct io_uring_zcrx_area_reg + area_ptr: u64, + /// struct io_uring_region_desc + region_ptr: u64, + offsets: ZcrxOffsets, // the kernel fill in the offsets + zcrx_id: u32, + __resv2: u32, + __resv: [3]u64, +}; -/// Make `count` new buffers visible to the kernel. Called after -/// `io_uring_buf_ring_add` has been called `count` times to fill in new buffers. -pub fn buf_ring_advance(br: *linux.io_uring_buf_ring, count: u16) void { - const tail: u16 = br.tail +% count; - @atomicStore(u16, &br.tail, tail, .release); -} +// COMMIT: move IoUring constants to Constants +pub const constants = struct { + /// If sqe.file_index (splice_fd_in in Zig Struct) is set to this for + /// opcodes that instantiate a new an available direct descriptor instead + /// of having the application pass one direct descriptor + /// (like openat/openat2/accept), then io_uring will allocate in. The + /// picked direct descriptor will be returned in cqe.res, or -ENFILE + /// if the space is full. + pub const FILE_INDEX_ALLOC = math.maxInt(u32); + + pub const CMD_MASK = 1 << 0; + + pub const TIMEOUT_CLOCK_MASK = ((1 << 2) | (1 << 3)); + pub const TIMEOUT_UPDATE_MASK = ((1 << 1) | (1 << 4)); + + pub const CQE_BUFFER_SHIFT = 16; + + /// cqe.res for IORING_CQE_F_NOTIF if IORING_SEND_ZC_REPORT_USAGE was + /// requested It should be treated as a flag, all other bits of cqe.res + /// should be treated as reserved! + pub const NOTIF_USAGE_ZC_COPIED = (1 << 31); + + //Magic offsets for the application to mmap the data it needs + pub const OFF_SQ_RING = 0; + pub const OFF_CQ_RING = 0x8000000; + pub const OFF_SQES = 0x10000000; + // COMMIT: new magic constants + pub const OFF_PBUF_RING = 0x80000000; + pub const OFF_PBUF_SHIFT = 16; + pub const OFF_MMAP_MASK = 0xf8000000; + + /// Skip updating fd indexes set to this value in the fd table + pub const REGISTER_FILES_SKIP = -2; + + // COMMIT: new TX Timestamp definition + /// SOCKET_URING_OP_TX_TIMESTAMP definitions + pub const TIMESTAMP_HW_SHIFT = 16; + /// The cqe.flags bit from which the timestamp type is stored + pub const TIMESTAMP_TYPE_SHIFT = (TIMESTAMP_HW_SHIFT + 1); + /// The cqe.flags flag signifying whether it's a hardware timestamp + pub const CQE_F_TSTAMP_HW = (1 << TIMESTAMP_HW_SHIFT); + + /// The bit from which area id is encoded into offsets + pub const ZCRX_AREA_SHIFT = 48; + pub const ZCRX_AREA_MASK = (~((1 << ZCRX_AREA_SHIFT) - 1)); + + // flag added to the opcode to use a registered ring fd + pub const REGISTER_USE_REGISTERED_RING = 1 << 31; +}; + +// COMMIT: move IoUring flags to Flags struct +pub const uflags = struct { + /// io_uring_setup() flags + pub const Setup = packed struct(u32) { + /// io_context is polled + iopoll: bool = false, + /// SQ poll thread + sqpoll: bool = false, + /// sq_thread_cpu is valid + sq_aff: bool = false, + /// app defines CQ size + cqsize: bool = false, + /// clamp SQ/CQ ring sizes + clamp: bool = false, + /// attach to existing wq + attach_wq: bool = false, + /// start with ring disabled + r_disabled: bool = false, + /// continue submit on error + submit_all: bool = false, + /// Cooperative task running. When requests complete, they often require + /// forcing the submitter to transition to the kernel to complete. If + /// this flag is set, work will be done when the task transitions + /// anyway, rather than force an inter-processor interrupt reschedule. + /// This avoids interrupting a task running in userspace, and saves an + /// IPI. + coop_taskrun: bool = false, + /// If COOP_TASKRUN is set, get notified if task work is available for + /// running and a kernel transition would be needed to run it. This sets + /// IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN. + taskrun_flag: bool = false, + /// SQEs are 128 byte + sqe128: bool = false, + /// CQEs are 32 byte + cqe32: bool = false, + /// Only one task is allowed to submit requests + single_issuer: bool = false, + /// Defer running task work to get events. + /// Rather than running bits of task work whenever the task transitions + /// try to do it just before it is needed. + defer_taskrun: bool = false, + /// Application provides the memory for the rings + no_mmap: bool = false, + /// Register the ring fd in itself for use with + /// IORING_REGISTER_USE_REGISTERED_RING; return a registered fd index + /// rather than an fd. + registered_fd_only: bool = false, + /// Removes indirection through the SQ index array. + no_sqarray: bool = false, + // COMMIT: new setup flags + /// Use hybrid poll in iopoll process + hybrid_iopoll: bool = false, + /// Allow both 16b and 32b CQEs. If a 32b CQE is posted, it will have + /// IORING_CQE_F_32 set in cqe.flags. + cqe_mixed: bool = false, + _20: u13 = 0, + }; + + /// sqe.uring_cmd_flags (rw_flags in the Zig struct) + /// top 8bits aren't available for userspace + /// use registered buffer; pass this flag along with setting sqe.buf_index. + pub const Cmd = packed struct(u32) { + cmd_fixed: bool = false, + _2: u31 = 0, + }; + + /// sqe.fsync_flags (rw_flags in the Zig struct) + pub const Fsync = packed struct(u32) { + datasync: bool = false, + _2: u31 = 0, + }; + + /// sqe.timeout_flags + pub const Timeout = packed struct(u32) { + timeout_abs: bool = false, + /// Available since Linux 5.11 + timeout_update: bool = false, + /// Available since Linux 5.15 + timeout_boottime: bool = false, + /// Available since Linux 5.15 + timeout_realtime: bool = false, + /// Available since Linux 5.15 + link_timeout_update: bool = false, + /// Available since Linux 5.16 + timeout_etime_success: bool = false, + // COMMIT: new Timeout Flag + // TODO: add when it became available + timeout_multishot: bool = false, + _8: u25 = 0, + }; + + /// sqe.splice_flags (rw_flags in Zig Struct) + /// extends splice(2) flags + pub const Splice = packed struct(u32) { + _1: u31 = 0, + /// the last bit of __u32 + f_fd_in_fixed: bool = false, + }; + + /// POLL_ADD flags. Note that since sqe.poll_events (rw_flags in Zig Struct) + /// is the flag space, the command flags for POLL_ADD are stored in sqe.len. + pub const Poll = packed struct(u32) { + /// IORING_POLL_ADD_MULTI + /// Multishot poll. Sets IORING_CQE_F_MORE if the poll handler will + /// continue to report CQEs on behalf of the same SQE. + add_multi: bool = false, + // TODO: verify this doc comment is valid for the 2 flags below + /// IORING_POLL_UPDATE + /// Update existing poll request, matching sqe.addr as the old user_data + /// field. + update_events: bool = false, + /// IORING_POLL_UPDATE + /// Update existing poll request, matching sqe.addr as the old user_data + /// field. + update_user_data: bool = false, + /// IORING_POLL_LEVEL + /// Level triggered poll. + add_level: bool = false, + _5: u28 = 0, + }; + + /// ASYNC_CANCEL flags. + pub const AsyncCancel = packed struct(u32) { + /// IORING_ASYNC_CANCEL_ALL + /// Cancel all requests that match the given key + cancel_all: bool = false, + /// IORING_ASYNC_CANCEL_FD + /// Key off 'fd' for cancelation rather than the request 'user_data' + cancel_fd: bool = false, + /// IORING_ASYNC_CANCEL_ANY + /// Match any request + cancel_any: bool = false, + /// IORING_ASYNC_CANCEL_FD_FIXED + /// 'fd' passed in is a fixed descriptor + cancel_fd_fixed: bool = false, + // COMMIT: new AsyncCancel Flags + /// IORING_ASYNC_CANCEL_USERDATA + /// Match on user_data, default for no other key + cancel_userdata: bool = false, + /// IORING_ASYNC_CANCEL_OP + /// Match request based on opcode + cancel_op: bool = false, + _7: u26 = 0, + }; + + /// IORING_OP_MSG_RING flags (sqe.msg_ring_flags or sqe.rw_flags in Zig Struct) + pub const MsgRing = packed struct(u32) { + /// IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. + /// Not applicable for IORING_MSG_DATA, obviously. + cqe_skip: bool = false, + /// Pass through the flags from sqe.file_index to cqe.flags + flags_pass: bool = false, + _3: u30 = 0, + }; + + // COMMIT: new flag + /// IORING_OP_FIXED_FD_INSTALL flags (sqe.install_fd_flags or sqe.rw_flags in Zig Struct) + pub const FixedFd = packed struct(u32) { + /// IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC + no_cloexec: bool = false, + }; + + /// COMMIT: new flags + /// IORING_OP_NOP flags (sqe.nop_flags or sqe.rw_flags in Zig Struct) + pub const Nop = packed struct(u32) { + /// IORING_NOP_INJECT_RESULT Inject result from sqe.result + inject_result: bool = false, + _2: u4 = 0, + cqe32: bool = false, + _7: u26 = 0, + }; + + /// io_uring_enter(2) flags + pub const Enter = packed struct(u32) { + getevents: bool = false, + sq_wakeup: bool = false, + sq_wait: bool = false, + ext_arg: bool = false, + registered_ring: bool = false, + // commit: new flags + abs_timer: bool = false, + ext_arg_reg: bool = false, + no_iowait: bool = false, + _9: u24 = 0, + + /// Ensure only `Init` flags usable in `Enter` are set + pub fn valid_init_flags(self: Enter) bool { + const valid_flags: u32 = @bitCast(Enter{ .registered_ring = true, .no_iowait = true }); + const flags: u32 = @bitCast(self); + // check if any invalid flags are set + return (flags & ~valid_flags) == 0; + } + + pub fn empty(flags: Enter) bool { + return @as(u32, @bitCast(flags)) == 0; + } + }; + + /// matches INT_FLAG_* in liburing + pub const Init = packed struct(u8) { + reg_reg_ring: bool = false, + app_mem: bool = false, + cq_enter: bool = false, + _4: u1 = 0, + /// matches `registered_ring` flag in `Enter` + reg_ring: bool = false, + _6: u2 = 0, + /// matches `no_iowait` flag in `Enter` + no_iowait: bool = false, + + /// Return all valid `Enter` flags set in `Init` + pub fn enter_flags(self: Init) Enter { + const valid_flags: u8 = @bitCast(Init{ .reg_ring = true, .no_iowait = true }); + const flags: u8 = @bitCast(self); + return @bitCast(@as(u32, @intCast(flags & valid_flags))); + } + }; + + /// io_uring_params.features flags + pub const Features = packed struct(u32) { + single_mmap: bool = false, + nodrop: bool = false, + submit_stable: bool = false, + rw_cur_pos: bool = false, + cur_personality: bool = false, + fast_poll: bool = false, + poll_32bits: bool = false, + sqpoll_nonfixed: bool = false, + ext_arg: bool = false, + native_workers: bool = false, + rsrc_tags: bool = false, + cqe_skip: bool = false, + linked_file: bool = false, + // commit: add new feature flags + reg_reg_ring: bool = false, + recvsend_bundle: bool = false, + min_timeout: bool = false, + rw_attr: bool = false, + no_iowait: bool = false, + _19: u14 = 0, + + pub fn empty(features: Features) bool { + return @as(u32, @bitCast(features)) == 0; + } + }; +}; + +/// `io_uring_register(2)` opcodes and arguments +/// matches `io_uring_register_op` in liburing +pub const RegisterOp = enum(u8) { + register_buffers, + unregister_buffers, + register_files, + unregister_files, + register_eventfd, + unregister_eventfd, + register_files_update, + register_eventfd_async, + register_probe, + register_personality, + unregister_personality, + register_restrictions, + register_enable_rings, + + // extended with tagging + register_files2, + register_files_update2, + register_buffers2, + register_buffers_update, + + // set/clear io-wq thread affinities + register_iowq_aff, + unregister_iowq_aff, + + // set/get max number of io-wq workers + register_iowq_max_workers, + + // register/unregister io_uring fd with the ring + register_ring_fds, + unregister_ring_fds, + + // register ring based provide buffer group + register_pbuf_ring, + unregister_pbuf_ring, + + // sync cancelation API + register_sync_cancel, + + // register a range of fixed file slots for automatic slot allocation + register_file_alloc_range, + + // return status information for a buffer group + register_pbuf_status, + + // set/clear busy poll settings + register_napi, + unregister_napi, + + register_clock, + + // clone registered buffers from source ring to current ring + register_clone_buffers, + + // send MSG_RING without having a ring + register_send_msg_ring, + + // register a netdev hw rx queue for zerocopy + register_zcrx_ifq, + + // resize CQ ring + register_resize_rings, + + register_mem_region, + + // COMMIT: new register opcode + // query various aspects of io_uring, see linux/io_uring/query.h + register_query, + + _, +}; + +/// io-wq worker categories +/// matches `io_wq_type` in liburing +pub const IoWqCategory = enum(u8) { + bound, + unbound, + _, +}; + +/// matches `io_uring_socket_op` in liburing +pub const SocketOp = enum(u16) { + siocin, + siocoutq, + getsockopt, + setsockopt, + // COMMIT: new socket op + tx_timestamp, + _, +}; + +/// io_uring_restriction.opcode values +/// matches `io_uring_register_restriction_op` in liburing +pub const RestrictionOp = enum(u16) { + /// Allow an io_uring_register(2) opcode + register_op = 0, + /// Allow an sqe opcode + sqe_op = 1, + /// Allow sqe flags + sqe_flags_allowed = 2, + /// Require sqe flags (these flags must be set on each submission) + sqe_flags_required = 3, + + _, +}; + +/// IORING_OP_MSG_RING command types, stored in sqe.addr +/// matches `io_uring_msg_ring_flags` in liburing +pub const MsgRingCmd = enum { + /// pass sqe->len as 'res' and off as user_data + data, + /// send a registered fd to another ring + send_fd, +}; + +// COMMIT: OP to IoUring +/// matches `io_uring_op` in liburing +pub const Op = enum(u8) { + nop, + readv, + writev, + fsync, + read_fixed, + write_fixed, + poll_add, + poll_remove, + sync_file_range, + sendmsg, + recvmsg, + timeout, + timeout_remove, + accept, + async_cancel, + link_timeout, + connect, + fallocate, + openat, + close, + files_update, + statx, + read, + write, + fadvise, + madvise, + send, + recv, + epoll_ctl, + openat2, + splice, + provide_buffers, + remove_buffers, + tee, + shutdown, + renameat, + unlinkat, + mkdirat, + symlinkat, + linkat, + msg_ring, + fsetxattr, + setxattr, + fgetxattr, + getxattr, + socket, + uring_cmd, + send_zc, + sendmsg_zc, + read_multishot, + waitid, + futex_wait, + futex_wake, + futex_waitv, + fixed_fd_install, + ftruncate, + bind, + listen, + recv_zc, + // COMMIT: new OPs + // TODO: to be implemented + epoll_wait, + readv_fixed, + writev_fixed, + pipe, + + _, +}; + +// TODO: I don't see these types in liburing and on a ddg search +// Maybe they should be removed +pub const io_uring_notification_slot = extern struct { + tag: u64, + resv: [3]u64, +}; +pub const io_uring_notification_register = extern struct { + nr_slots: u32, + resv: u32, + resv2: u64, + data: u64, + resv3: u64, +}; test "structs/offsets/entries" { if (!is_linux) return error.SkipZigTest; - try testing.expectEqual(@as(usize, 120), @sizeOf(linux.io_uring_params)); - try testing.expectEqual(@as(usize, 64), @sizeOf(linux.io_uring_sqe)); - try testing.expectEqual(@as(usize, 16), @sizeOf(linux.io_uring_cqe)); + try testing.expectEqual(120, @sizeOf(Params)); + try testing.expectEqual(64, @sizeOf(Sqe)); + try testing.expectEqual(16, @sizeOf(Cqe)); - try testing.expectEqual(0, linux.IORING_OFF_SQ_RING); - try testing.expectEqual(0x8000000, linux.IORING_OFF_CQ_RING); - try testing.expectEqual(0x10000000, linux.IORING_OFF_SQES); + try testing.expectEqual(0, constants.OFF_SQ_RING); + try testing.expectEqual(0x8000000, constants.OFF_CQ_RING); + try testing.expectEqual(0x10000000, constants.OFF_SQES); - try testing.expectError(error.EntriesZero, IoUring.init(0, 0)); - try testing.expectError(error.EntriesNotPowerOfTwo, IoUring.init(3, 0)); + try testing.expectError(error.EntriesZero, IoUring.init(0, .{})); + try testing.expectError(error.EntriesNotPowerOfTwo, IoUring.init(3, .{})); } test "nop" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, }; defer { ring.deinit(); - testing.expectEqual(@as(posix.fd_t, -1), ring.fd) catch @panic("test failed"); + testing.expectEqual(-1, ring.fd) catch @panic("test failed"); } const sqe = try ring.nop(0xaaaaaaaa); - try testing.expectEqual(linux.io_uring_sqe{ - .opcode = .NOP, - .flags = 0, - .ioprio = 0, + try testing.expectEqual(Sqe{ + .opcode = .nop, + .flags = .{}, + .ioprio = .init_empty(), .fd = 0, .off = 0, .addr = 0, @@ -1894,46 +4575,46 @@ test "nop" { .resv = 0, }, sqe.*); - try testing.expectEqual(@as(u32, 0), ring.sq.sqe_head); - try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail); - try testing.expectEqual(@as(u32, 0), ring.sq.tail.*); - try testing.expectEqual(@as(u32, 0), ring.cq.head.*); - try testing.expectEqual(@as(u32, 1), ring.sq_ready()); - try testing.expectEqual(@as(u32, 0), ring.cq_ready()); - - try testing.expectEqual(@as(u32, 1), try ring.submit()); - try testing.expectEqual(@as(u32, 1), ring.sq.sqe_head); - try testing.expectEqual(@as(u32, 1), ring.sq.sqe_tail); - try testing.expectEqual(@as(u32, 1), ring.sq.tail.*); - try testing.expectEqual(@as(u32, 0), ring.cq.head.*); - try testing.expectEqual(@as(u32, 0), ring.sq_ready()); - - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(0, ring.sq.sqe_head); + try testing.expectEqual(1, ring.sq.sqe_tail); + try testing.expectEqual(0, ring.sq.tail.*); + try testing.expectEqual(0, ring.cq.head.*); + try testing.expectEqual(1, ring.sq_ready()); + try testing.expectEqual(0, ring.cq_ready()); + + try testing.expectEqual(1, try ring.submit()); + try testing.expectEqual(1, ring.sq.sqe_head); + try testing.expectEqual(1, ring.sq.sqe_tail); + try testing.expectEqual(1, ring.sq.tail.*); + try testing.expectEqual(0, ring.cq.head.*); + try testing.expectEqual(0, ring.sq_ready()); + + try testing.expectEqual(Cqe{ .user_data = 0xaaaaaaaa, .res = 0, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); - try testing.expectEqual(@as(u32, 1), ring.cq.head.*); - try testing.expectEqual(@as(u32, 0), ring.cq_ready()); + try testing.expectEqual(1, ring.cq.head.*); + try testing.expectEqual(0, ring.cq_ready()); const sqe_barrier = try ring.nop(0xbbbbbbbb); - sqe_barrier.flags |= linux.IOSQE_IO_DRAIN; - try testing.expectEqual(@as(u32, 1), try ring.submit()); - try testing.expectEqual(linux.io_uring_cqe{ + sqe_barrier.flags.io_drain = true; + try testing.expectEqual(1, try ring.submit()); + try testing.expectEqual(Cqe{ .user_data = 0xbbbbbbbb, .res = 0, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); - try testing.expectEqual(@as(u32, 2), ring.sq.sqe_head); - try testing.expectEqual(@as(u32, 2), ring.sq.sqe_tail); - try testing.expectEqual(@as(u32, 2), ring.sq.tail.*); - try testing.expectEqual(@as(u32, 2), ring.cq.head.*); + try testing.expectEqual(2, ring.sq.sqe_head); + try testing.expectEqual(2, ring.sq.sqe_tail); + try testing.expectEqual(2, ring.sq.tail.*); + try testing.expectEqual(2, ring.cq.head.*); } test "readv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -1957,15 +4638,15 @@ test "readv" { var buffer = [_]u8{42} ** 128; var iovecs = [_]posix.iovec{posix.iovec{ .base = &buffer, .len = buffer.len }}; const sqe = try ring.read(0xcccccccc, fd_index, .{ .iovecs = iovecs[0..] }, 0); - try testing.expectEqual(linux.IORING_OP.READV, sqe.opcode); - sqe.flags |= linux.IOSQE_FIXED_FILE; + try testing.expectEqual(Op.readv, sqe.opcode); + sqe.flags.fixed_file = true; try testing.expectError(error.SubmissionQueueFull, ring.nop(0)); - try testing.expectEqual(@as(u32, 1), try ring.submit()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(1, try ring.submit()); + try testing.expectEqual(Cqe{ .user_data = 0xcccccccc, .res = buffer.len, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]); @@ -1975,7 +4656,7 @@ test "readv" { test "writev/fsync/readv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(4, 0) catch |err| switch (err) { + var ring = IoUring.init(4, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -1999,45 +4680,45 @@ test "writev/fsync/readv" { posix.iovec{ .base = &buffer_read, .len = buffer_read.len }, }; - const sqe_writev = try ring.writev(0xdddddddd, fd, iovecs_write[0..], 17); - try testing.expectEqual(linux.IORING_OP.WRITEV, sqe_writev.opcode); - try testing.expectEqual(@as(u64, 17), sqe_writev.off); - sqe_writev.flags |= linux.IOSQE_IO_LINK; + const sqe_writev = try ring.write(0xdddddddd, fd, .{ .iovecs = iovecs_write[0..] }, 17); + try testing.expectEqual(Op.writev, sqe_writev.opcode); + try testing.expectEqual(17, sqe_writev.off); + sqe_writev.link_next(); - const sqe_fsync = try ring.fsync(0xeeeeeeee, fd, 0); - try testing.expectEqual(linux.IORING_OP.FSYNC, sqe_fsync.opcode); + const sqe_fsync = try ring.fsync(0xeeeeeeee, fd, .{}); + try testing.expectEqual(.fsync, sqe_fsync.opcode); try testing.expectEqual(fd, sqe_fsync.fd); - sqe_fsync.flags |= linux.IOSQE_IO_LINK; + sqe_fsync.link_next(); const sqe_readv = try ring.read(0xffffffff, fd, .{ .iovecs = iovecs_read[0..] }, 17); - try testing.expectEqual(linux.IORING_OP.READV, sqe_readv.opcode); - try testing.expectEqual(@as(u64, 17), sqe_readv.off); + try testing.expectEqual(Op.readv, sqe_readv.opcode); + try testing.expectEqual(17, sqe_readv.off); - try testing.expectEqual(@as(u32, 3), ring.sq_ready()); - try testing.expectEqual(@as(u32, 3), try ring.submit_and_wait(3)); - try testing.expectEqual(@as(u32, 0), ring.sq_ready()); - try testing.expectEqual(@as(u32, 3), ring.cq_ready()); + try testing.expectEqual(3, ring.sq_ready()); + try testing.expectEqual(3, try ring.submit_and_wait(3)); + try testing.expectEqual(0, ring.sq_ready()); + try testing.expectEqual(3, ring.cq_ready()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xdddddddd, .res = buffer_write.len, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); - try testing.expectEqual(@as(u32, 2), ring.cq_ready()); + try testing.expectEqual(2, ring.cq_ready()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xeeeeeeee, .res = 0, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); - try testing.expectEqual(@as(u32, 1), ring.cq_ready()); + try testing.expectEqual(1, ring.cq_ready()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xffffffff, .res = buffer_read.len, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); - try testing.expectEqual(@as(u32, 0), ring.cq_ready()); + try testing.expectEqual(0, ring.cq_ready()); try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]); } @@ -2045,7 +4726,7 @@ test "writev/fsync/readv" { test "write/read" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2061,14 +4742,14 @@ test "write/read" { const buffer_write = [_]u8{97} ** 20; var buffer_read = [_]u8{98} ** 20; - const sqe_write = try ring.write(0x11111111, fd, buffer_write[0..], 10); - try testing.expectEqual(linux.IORING_OP.WRITE, sqe_write.opcode); - try testing.expectEqual(@as(u64, 10), sqe_write.off); - sqe_write.flags |= linux.IOSQE_IO_LINK; + const sqe_write = try ring.write(0x11111111, fd, .{ .buffer = buffer_write[0..] }, 10); + try testing.expectEqual(Op.write, sqe_write.opcode); + try testing.expectEqual(10, sqe_write.off); + sqe_write.flags.io_link = true; const sqe_read = try ring.read(0x22222222, fd, .{ .buffer = buffer_read[0..] }, 10); - try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode); - try testing.expectEqual(@as(u64, 10), sqe_read.off); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(Op.read, sqe_read.opcode); + try testing.expectEqual(10, sqe_read.off); + try testing.expectEqual(2, try ring.submit()); const cqe_write = try ring.copy_cqe(); const cqe_read = try ring.copy_cqe(); @@ -2076,15 +4757,15 @@ test "write/read" { // https://lwn.net/Articles/809820/ if (cqe_write.err() == .INVAL) return error.SkipZigTest; if (cqe_read.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x11111111, .res = buffer_write.len, - .flags = 0, + .flags = .{}, }, cqe_write); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x22222222, .res = buffer_read.len, - .flags = 0, + .flags = .{}, }, cqe_read); try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]); } @@ -2092,7 +4773,7 @@ test "write/read" { test "splice/read" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(4, 0) catch |err| switch (err) { + var ring = IoUring.init(4, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2115,24 +4796,24 @@ test "splice/read" { _ = try file_src.write(&buffer_write); const fds = try posix.pipe(); - const pipe_offset: u64 = std.math.maxInt(u64); + const pipe_offset: u64 = math.maxInt(u64); const sqe_splice_to_pipe = try ring.splice(0x11111111, fd_src, 0, fds[1], pipe_offset, buffer_write.len); - try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_to_pipe.opcode); - try testing.expectEqual(@as(u64, 0), sqe_splice_to_pipe.addr); + try testing.expectEqual(Op.splice, sqe_splice_to_pipe.opcode); + try testing.expectEqual(0, sqe_splice_to_pipe.addr); try testing.expectEqual(pipe_offset, sqe_splice_to_pipe.off); - sqe_splice_to_pipe.flags |= linux.IOSQE_IO_LINK; + sqe_splice_to_pipe.link_next(); const sqe_splice_from_pipe = try ring.splice(0x22222222, fds[0], pipe_offset, fd_dst, 10, buffer_write.len); - try testing.expectEqual(linux.IORING_OP.SPLICE, sqe_splice_from_pipe.opcode); + try testing.expectEqual(Op.splice, sqe_splice_from_pipe.opcode); try testing.expectEqual(pipe_offset, sqe_splice_from_pipe.addr); - try testing.expectEqual(@as(u64, 10), sqe_splice_from_pipe.off); - sqe_splice_from_pipe.flags |= linux.IOSQE_IO_LINK; + try testing.expectEqual(10, sqe_splice_from_pipe.off); + sqe_splice_from_pipe.link_next(); const sqe_read = try ring.read(0x33333333, fd_dst, .{ .buffer = buffer_read[0..] }, 10); - try testing.expectEqual(linux.IORING_OP.READ, sqe_read.opcode); - try testing.expectEqual(@as(u64, 10), sqe_read.off); - try testing.expectEqual(@as(u32, 3), try ring.submit()); + try testing.expectEqual(Op.read, sqe_read.opcode); + try testing.expectEqual(10, sqe_read.off); + try testing.expectEqual(3, try ring.submit()); const cqe_splice_to_pipe = try ring.copy_cqe(); const cqe_splice_from_pipe = try ring.copy_cqe(); @@ -2142,20 +4823,20 @@ test "splice/read" { if (cqe_splice_to_pipe.err() == .INVAL) return error.SkipZigTest; if (cqe_splice_from_pipe.err() == .INVAL) return error.SkipZigTest; if (cqe_read.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x11111111, .res = buffer_write.len, - .flags = 0, + .flags = .{}, }, cqe_splice_to_pipe); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x22222222, .res = buffer_write.len, - .flags = 0, + .flags = .{}, }, cqe_splice_from_pipe); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x33333333, .res = buffer_read.len, - .flags = 0, + .flags = .{}, }, cqe_read); try testing.expectEqualSlices(u8, buffer_write[0..], buffer_read[0..]); } @@ -2163,7 +4844,7 @@ test "splice/read" { test "write_fixed/read_fixed" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2195,29 +4876,29 @@ test "write_fixed/read_fixed" { else => |e| return e, }; - const sqe_write = try ring.write_fixed(0x45454545, fd, &buffers[0], 3, 0); - try testing.expectEqual(linux.IORING_OP.WRITE_FIXED, sqe_write.opcode); - try testing.expectEqual(@as(u64, 3), sqe_write.off); - sqe_write.flags |= linux.IOSQE_IO_LINK; + const sqe_write = try ring.write_fixed(0x45454545, fd, .{ .buffer = raw_buffers[0][0..] }, 3, 0); + try testing.expectEqual(Op.write_fixed, sqe_write.opcode); + try testing.expectEqual(3, sqe_write.off); + sqe_write.link_next(); - const sqe_read = try ring.read_fixed(0x12121212, fd, &buffers[1], 0, 1); - try testing.expectEqual(linux.IORING_OP.READ_FIXED, sqe_read.opcode); - try testing.expectEqual(@as(u64, 0), sqe_read.off); + const sqe_read = try ring.read_fixed(0x12121212, fd, .{ .buffer = raw_buffers[1][0..] }, 0, 1); + try testing.expectEqual(Op.read_fixed, sqe_read.opcode); + try testing.expectEqual(0, sqe_read.off); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(2, try ring.submit()); const cqe_write = try ring.copy_cqe(); const cqe_read = try ring.copy_cqe(); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x45454545, - .res = @as(i32, @intCast(buffers[0].len)), - .flags = 0, + .res = @intCast(buffers[0].len), + .flags = .{}, }, cqe_write); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, - .res = @as(i32, @intCast(buffers[1].len)), - .flags = 0, + .res = @intCast(buffers[1].len), + .flags = .{}, }, cqe_read); try testing.expectEqualSlices(u8, "\x00\x00\x00", buffers[1].base[0..3]); @@ -2228,7 +4909,7 @@ test "write_fixed/read_fixed" { test "openat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2250,10 +4931,10 @@ test "openat" { const flags: linux.O = .{ .CLOEXEC = true, .ACCMODE = .RDWR, .CREAT = true }; const mode: posix.mode_t = 0o666; const sqe_openat = try ring.openat(0x33333333, tmp.dir.fd, path, flags, mode); - try testing.expectEqual(linux.io_uring_sqe{ - .opcode = .OPENAT, - .flags = 0, - .ioprio = 0, + try testing.expectEqual(Sqe{ + .opcode = .openat, + .flags = .{}, + .ioprio = .init_empty(), .fd = tmp.dir.fd, .off = 0, .addr = path_addr, @@ -2266,15 +4947,15 @@ test "openat" { .addr3 = 0, .resv = 0, }, sqe_openat.*); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe_openat = try ring.copy_cqe(); - try testing.expectEqual(@as(u64, 0x33333333), cqe_openat.user_data); + try testing.expectEqual(0x33333333, cqe_openat.user_data); if (cqe_openat.err() == .INVAL) return error.SkipZigTest; if (cqe_openat.err() == .BADF) return error.SkipZigTest; if (cqe_openat.res <= 0) std.debug.print("\ncqe_openat.res={}\n", .{cqe_openat.res}); try testing.expect(cqe_openat.res > 0); - try testing.expectEqual(@as(u32, 0), cqe_openat.flags); + try testing.expectEqual(@as(Cqe.Flags, .{}), cqe_openat.flags); posix.close(cqe_openat.res); } @@ -2282,7 +4963,7 @@ test "openat" { test "close" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2297,23 +4978,23 @@ test "close" { errdefer file.close(); const sqe_close = try ring.close(0x44444444, file.handle); - try testing.expectEqual(linux.IORING_OP.CLOSE, sqe_close.opcode); + try testing.expectEqual(Op.close, sqe_close.opcode); try testing.expectEqual(file.handle, sqe_close.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe_close = try ring.copy_cqe(); if (cqe_close.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x44444444, .res = 0, - .flags = 0, + .flags = .{}, }, cqe_close); } test "accept/connect/send/recv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2326,26 +5007,28 @@ test "accept/connect/send/recv" { const buffer_send = [_]u8{ 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }; var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 }; - const sqe_send = try ring.send(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0); - sqe_send.flags |= linux.IOSQE_IO_LINK; - _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + const sqe_send = try ring.send(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], .{}); + sqe_send.link_next(); + + _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, .{}); + try testing.expectEqual(2, try ring.submit()); const cqe_send = try ring.copy_cqe(); if (cqe_send.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xeeeeeeee, .res = buffer_send.len, - .flags = 0, + .flags = .{}, }, cqe_send); const cqe_recv = try ring.copy_cqe(); if (cqe_recv.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xffffffff, .res = buffer_recv.len, - // ignore IORING_CQE_F_SOCK_NONEMPTY since it is only set on some systems - .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY, + // Only check IORING_CQE_F_SOCK_NONEMPTY flag, as other flags are + // system-dependent + .flags = .{ .f_sock_nonempty = cqe_recv.flags.f_sock_nonempty }, }, cqe_recv); try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]); @@ -2354,7 +5037,7 @@ test "accept/connect/send/recv" { test "sendmsg/recvmsg" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2365,8 +5048,8 @@ test "sendmsg/recvmsg" { const server = try posix.socket(address_server.any.family, posix.SOCK.DGRAM, 0); defer posix.close(server); - try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEPORT, &mem.toBytes(@as(c_int, 1))); - try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1))); + try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEPORT, &mem.toBytes(@as(u32, 1))); + try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(u32, 1))); try posix.bind(server, &address_server.any, address_server.getOsSockLen()); // set address_server to the OS-chosen IP/port. @@ -2389,9 +5072,9 @@ test "sendmsg/recvmsg" { .controllen = 0, .flags = 0, }; - const sqe_sendmsg = try ring.sendmsg(0x11111111, client, &msg_send, 0); - sqe_sendmsg.flags |= linux.IOSQE_IO_LINK; - try testing.expectEqual(linux.IORING_OP.SENDMSG, sqe_sendmsg.opcode); + const sqe_sendmsg = try ring.sendmsg(0x11111111, client, &msg_send, .{}); + sqe_sendmsg.flags.io_link = true; + try testing.expectEqual(Op.sendmsg, sqe_sendmsg.opcode); try testing.expectEqual(client, sqe_sendmsg.fd); var buffer_recv = [_]u8{0} ** 128; @@ -2409,30 +5092,30 @@ test "sendmsg/recvmsg" { .controllen = 0, .flags = 0, }; - const sqe_recvmsg = try ring.recvmsg(0x22222222, server, &msg_recv, 0); - try testing.expectEqual(linux.IORING_OP.RECVMSG, sqe_recvmsg.opcode); + const sqe_recvmsg = try ring.recvmsg(0x22222222, server, &msg_recv, .{}); + try testing.expectEqual(Op.recvmsg, sqe_recvmsg.opcode); try testing.expectEqual(server, sqe_recvmsg.fd); - try testing.expectEqual(@as(u32, 2), ring.sq_ready()); - try testing.expectEqual(@as(u32, 2), try ring.submit_and_wait(2)); - try testing.expectEqual(@as(u32, 0), ring.sq_ready()); - try testing.expectEqual(@as(u32, 2), ring.cq_ready()); + try testing.expectEqual(2, ring.sq_ready()); + try testing.expectEqual(2, try ring.submit_and_wait(2)); + try testing.expectEqual(0, ring.sq_ready()); + try testing.expectEqual(2, ring.cq_ready()); const cqe_sendmsg = try ring.copy_cqe(); if (cqe_sendmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x11111111, .res = buffer_send.len, - .flags = 0, + .flags = .{}, }, cqe_sendmsg); const cqe_recvmsg = try ring.copy_cqe(); if (cqe_recvmsg.res == -@as(i32, @intFromEnum(linux.E.INVAL))) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x22222222, .res = buffer_recv.len, // ignore IORING_CQE_F_SOCK_NONEMPTY since it is set non-deterministically - .flags = cqe_recvmsg.flags & linux.IORING_CQE_F_SOCK_NONEMPTY, + .flags = .{ .f_sock_nonempty = cqe_recvmsg.flags.f_sock_nonempty }, }, cqe_recvmsg); try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]); @@ -2441,7 +5124,7 @@ test "sendmsg/recvmsg" { test "timeout (after a relative time)" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2453,26 +5136,26 @@ test "timeout (after a relative time)" { const ts: linux.kernel_timespec = .{ .sec = 0, .nsec = ms * 1000000 }; const started = std.time.milliTimestamp(); - const sqe = try ring.timeout(0x55555555, &ts, 0, 0); - try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe.opcode); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + const sqe = try ring.timeout(0x55555555, &ts, 0, .{}); + try testing.expectEqual(Op.timeout, sqe.opcode); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); const stopped = std.time.milliTimestamp(); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x55555555, .res = -@as(i32, @intFromEnum(linux.E.TIME)), - .flags = 0, + .flags = .{}, }, cqe); // Tests should not depend on timings: skip test if outside margin. - if (!std.math.approxEqAbs(f64, ms, @as(f64, @floatFromInt(stopped - started)), margin)) return error.SkipZigTest; + if (!math.approxEqAbs(f64, ms, @floatFromInt(stopped - started), margin)) return error.SkipZigTest; } test "timeout (after a number of completions)" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2481,31 +5164,31 @@ test "timeout (after a number of completions)" { const ts: linux.kernel_timespec = .{ .sec = 3, .nsec = 0 }; const count_completions: u64 = 1; - const sqe_timeout = try ring.timeout(0x66666666, &ts, count_completions, 0); - try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode); + const sqe_timeout = try ring.timeout(0x66666666, &ts, count_completions, .{}); + try testing.expectEqual(Op.timeout, sqe_timeout.opcode); try testing.expectEqual(count_completions, sqe_timeout.off); _ = try ring.nop(0x77777777); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(2, try ring.submit()); const cqe_nop = try ring.copy_cqe(); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x77777777, .res = 0, - .flags = 0, + .flags = .{}, }, cqe_nop); const cqe_timeout = try ring.copy_cqe(); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x66666666, .res = 0, - .flags = 0, + .flags = .{}, }, cqe_timeout); } test "timeout_remove" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2513,22 +5196,22 @@ test "timeout_remove" { defer ring.deinit(); const ts: linux.kernel_timespec = .{ .sec = 3, .nsec = 0 }; - const sqe_timeout = try ring.timeout(0x88888888, &ts, 0, 0); - try testing.expectEqual(linux.IORING_OP.TIMEOUT, sqe_timeout.opcode); - try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout.user_data); + const sqe_timeout = try ring.timeout(0x88888888, &ts, 0, .{}); + try testing.expectEqual(Op.timeout, sqe_timeout.opcode); + try testing.expectEqual(0x88888888, sqe_timeout.user_data); - const sqe_timeout_remove = try ring.timeout_remove(0x99999999, 0x88888888, 0); - try testing.expectEqual(linux.IORING_OP.TIMEOUT_REMOVE, sqe_timeout_remove.opcode); - try testing.expectEqual(@as(u64, 0x88888888), sqe_timeout_remove.addr); - try testing.expectEqual(@as(u64, 0x99999999), sqe_timeout_remove.user_data); + const sqe_timeout_remove = try ring.timeout_remove(0x99999999, 0x88888888, .{}); + try testing.expectEqual(Op.timeout_remove, sqe_timeout_remove.opcode); + try testing.expectEqual(0x88888888, sqe_timeout_remove.addr); + try testing.expectEqual(0x99999999, sqe_timeout_remove.user_data); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(2, try ring.submit()); // The order in which the CQE arrive is not clearly documented and it changed with kernel 5.18: // * kernel 5.10 gives user data 0x88888888 first, 0x99999999 second // * kernel 5.18 gives user data 0x99999999 first, 0x88888888 second - var cqes: [2]linux.io_uring_cqe = undefined; + var cqes: [2]Cqe = undefined; cqes[0] = try ring.copy_cqe(); cqes[1] = try ring.copy_cqe(); @@ -2539,7 +5222,7 @@ test "timeout_remove" { // We don't want to skip this test for newer kernels. if (cqe.user_data == 0x99999999 and cqe.err() == .BADF and - (ring.features & linux.IORING_FEAT_RW_CUR_POS) == 0) + (!ring.features.rw_cur_pos)) { return error.SkipZigTest; } @@ -2547,16 +5230,16 @@ test "timeout_remove" { try testing.expect(cqe.user_data == 0x88888888 or cqe.user_data == 0x99999999); if (cqe.user_data == 0x88888888) { - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x88888888, .res = -@as(i32, @intFromEnum(linux.E.CANCELED)), - .flags = 0, + .flags = .{}, }, cqe); } else if (cqe.user_data == 0x99999999) { - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x99999999, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); } } @@ -2565,7 +5248,7 @@ test "timeout_remove" { test "accept/connect/recv/link_timeout" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2577,14 +5260,14 @@ test "accept/connect/recv/link_timeout" { var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 }; - const sqe_recv = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0); - sqe_recv.flags |= linux.IOSQE_IO_LINK; + const sqe_recv = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, .{}); + sqe_recv.link_next(); const ts = linux.kernel_timespec{ .sec = 0, .nsec = 1000000 }; - _ = try ring.link_timeout(0x22222222, &ts, 0); + _ = try ring.link_timeout(0x22222222, &ts, .{}); const nr_wait = try ring.submit(); - try testing.expectEqual(@as(u32, 2), nr_wait); + try testing.expectEqual(2, nr_wait); var i: usize = 0; while (i < nr_wait) : (i += 1) { @@ -2614,7 +5297,7 @@ test "accept/connect/recv/link_timeout" { test "fallocate" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2628,13 +5311,13 @@ test "fallocate" { const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 }); defer file.close(); - try testing.expectEqual(@as(u64, 0), (try file.stat()).size); + try testing.expectEqual(0, (try file.stat()).size); const len: u64 = 65536; const sqe = try ring.fallocate(0xaaaaaaaa, file.handle, 0, 0, len); - try testing.expectEqual(linux.IORING_OP.FALLOCATE, sqe.opcode); + try testing.expectEqual(Op.fallocate, sqe.opcode); try testing.expectEqual(file.handle, sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -2648,10 +5331,10 @@ test "fallocate" { .OPNOTSUPP => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xaaaaaaaa, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); try testing.expectEqual(len, (try file.stat()).size); @@ -2660,7 +5343,7 @@ test "fallocate" { test "statx" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2673,7 +5356,7 @@ test "statx" { const file = try tmp.dir.createFile(path, .{ .truncate = true, .mode = 0o666 }); defer file.close(); - try testing.expectEqual(@as(u64, 0), (try file.stat()).size); + try testing.expectEqual(0, (try file.stat()).size); try file.writeAll("foobar"); @@ -2682,13 +5365,13 @@ test "statx" { 0xaaaaaaaa, tmp.dir.fd, path, - 0, - linux.STATX_SIZE, + .{}, + .{ .size = true }, &buf, ); - try testing.expectEqual(linux.IORING_OP.STATX, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.statx, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -2704,20 +5387,20 @@ test "statx" { .BADF => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xaaaaaaaa, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); - try testing.expect(buf.mask & linux.STATX_SIZE == linux.STATX_SIZE); - try testing.expectEqual(@as(u64, 6), buf.size); + try testing.expect(buf.mask.size); + try testing.expectEqual(6, buf.size); } test "accept/connect/recv/cancel" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2729,14 +5412,14 @@ test "accept/connect/recv/cancel" { var buffer_recv = [_]u8{ 0, 1, 0, 1, 0 }; - _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, .{}); + try testing.expectEqual(1, try ring.submit()); - const sqe_cancel = try ring.cancel(0x99999999, 0xffffffff, 0); - try testing.expectEqual(linux.IORING_OP.ASYNC_CANCEL, sqe_cancel.opcode); - try testing.expectEqual(@as(u64, 0xffffffff), sqe_cancel.addr); - try testing.expectEqual(@as(u64, 0x99999999), sqe_cancel.user_data); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + const sqe_cancel = try ring.cancel(0x99999999, 0xffffffff, .{}); + try testing.expectEqual(Op.async_cancel, sqe_cancel.opcode); + try testing.expectEqual(0xffffffff, sqe_cancel.addr); + try testing.expectEqual(0x99999999, sqe_cancel.user_data); + try testing.expectEqual(1, try ring.submit()); var cqe_recv = try ring.copy_cqe(); if (cqe_recv.err() == .INVAL) return error.SkipZigTest; @@ -2751,23 +5434,23 @@ test "accept/connect/recv/cancel" { cqe_cancel = a; } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xffffffff, .res = -@as(i32, @intFromEnum(linux.E.CANCELED)), - .flags = 0, + .flags = .{}, }, cqe_recv); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x99999999, .res = 0, - .flags = 0, + .flags = .{}, }, cqe_cancel); } test "register_files_update" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2802,14 +5485,14 @@ test "register_files_update" { var buffer = [_]u8{42} ** 128; { const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - sqe.flags |= linux.IOSQE_FIXED_FILE; + try testing.expectEqual(Op.read, sqe.opcode); + sqe.set_flags(.{ .fixed_file = true }); - try testing.expectEqual(@as(u32, 1), try ring.submit()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(1, try ring.submit()); + try testing.expectEqual(Cqe{ .user_data = 0xcccccccc, .res = buffer.len, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]); } @@ -2823,14 +5506,14 @@ test "register_files_update" { { // Next read should still work since fd_index in the registered file descriptors hasn't been updated yet. const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - sqe.flags |= linux.IOSQE_FIXED_FILE; + try testing.expectEqual(Op.read, sqe.opcode); + sqe.set_flags(.{ .fixed_file = true }); - try testing.expectEqual(@as(u32, 1), try ring.submit()); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(1, try ring.submit()); + try testing.expectEqual(Cqe{ .user_data = 0xcccccccc, .res = buffer.len, - .flags = 0, + .flags = .{}, }, try ring.copy_cqe()); try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer.len), buffer[0..]); } @@ -2840,10 +5523,10 @@ test "register_files_update" { { // Now this should fail since both fds are sparse (-1) const sqe = try ring.read(0xcccccccc, fd_index, .{ .buffer = &buffer }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - sqe.flags |= linux.IOSQE_FIXED_FILE; + try testing.expectEqual(Op.read, sqe.opcode); + sqe.set_flags(.{ .fixed_file = true }); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); try testing.expectEqual(linux.E.BADF, cqe.err()); } @@ -2854,7 +5537,7 @@ test "register_files_update" { test "shutdown" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2865,9 +5548,10 @@ test "shutdown" { // Socket bound, expect shutdown to work { - const server = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); + // TODO: update posix later to use Typed Flags + const server = try posix.socket(address.any.family, @as(u32, @bitCast(linux.Sock{ .type = .stream, .flags = .{ .cloexec = true } })), 0); defer posix.close(server); - try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1))); + try posix.setsockopt(server, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(u32, 1))); try posix.bind(server, &address.any, address.getOsSockLen()); try posix.listen(server, 1); @@ -2875,11 +5559,11 @@ test "shutdown" { var slen: posix.socklen_t = address.getOsSockLen(); try posix.getsockname(server, &address.any, &slen); - const shutdown_sqe = try ring.shutdown(0x445445445, server, linux.SHUT.RD); - try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode); - try testing.expectEqual(@as(i32, server), shutdown_sqe.fd); + const shutdown_sqe = try ring.shutdown(0x445445445, server, .rd); + try testing.expectEqual(Op.shutdown, shutdown_sqe.opcode); + try testing.expectEqual(server, shutdown_sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -2889,10 +5573,10 @@ test "shutdown" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x445445445, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); } @@ -2901,16 +5585,16 @@ test "shutdown" { const server = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); defer posix.close(server); - const shutdown_sqe = ring.shutdown(0x445445445, server, linux.SHUT.RD) catch |err| switch (err) { + const shutdown_sqe = ring.shutdown(0x445445445, server, .rd) catch |err| switch (err) { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), }; - try testing.expectEqual(linux.IORING_OP.SHUTDOWN, shutdown_sqe.opcode); - try testing.expectEqual(@as(i32, server), shutdown_sqe.fd); + try testing.expectEqual(Op.shutdown, shutdown_sqe.opcode); + try testing.expectEqual(server, shutdown_sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); - try testing.expectEqual(@as(u64, 0x445445445), cqe.user_data); + try testing.expectEqual(0x445445445, cqe.user_data); try testing.expectEqual(linux.E.NOTCONN, cqe.err()); } } @@ -2918,7 +5602,7 @@ test "shutdown" { test "renameat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -2945,12 +5629,12 @@ test "renameat" { old_path, tmp.dir.fd, new_path, - 0, + .{}, ); - try testing.expectEqual(linux.IORING_OP.RENAMEAT, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len))); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.renameat, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(tmp.dir.fd, @as(i32, @intCast(sqe.len))); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -2959,10 +5643,10 @@ test "renameat" { .BADF, .INVAL => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); // Validate that the old file doesn't exist anymore @@ -2987,7 +5671,7 @@ test "renameat" { test "unlinkat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3010,11 +5694,11 @@ test "unlinkat" { 0x12121212, tmp.dir.fd, path, - 0, + .{}, ); - try testing.expectEqual(linux.IORING_OP.UNLINKAT, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.unlinkat, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3023,10 +5707,10 @@ test "unlinkat" { .BADF, .INVAL => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); // Validate that the file doesn't exist anymore @@ -3039,7 +5723,7 @@ test "unlinkat" { test "mkdirat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3059,9 +5743,9 @@ test "mkdirat" { path, 0o0755, ); - try testing.expectEqual(linux.IORING_OP.MKDIRAT, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.mkdirat, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3070,10 +5754,10 @@ test "mkdirat" { .BADF, .INVAL => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); // Validate that the directory exist @@ -3083,7 +5767,7 @@ test "mkdirat" { test "symlinkat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3107,9 +5791,9 @@ test "symlinkat" { tmp.dir.fd, link_path, ); - try testing.expectEqual(linux.IORING_OP.SYMLINKAT, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.symlinkat, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3118,10 +5802,10 @@ test "symlinkat" { .BADF, .INVAL => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); // Validate that the symlink exist @@ -3131,7 +5815,7 @@ test "symlinkat" { test "linkat" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3158,12 +5842,12 @@ test "linkat" { first_path, tmp.dir.fd, second_path, - 0, + .{}, ); - try testing.expectEqual(linux.IORING_OP.LINKAT, sqe.opcode); - try testing.expectEqual(@as(i32, tmp.dir.fd), sqe.fd); - try testing.expectEqual(@as(i32, tmp.dir.fd), @as(i32, @bitCast(sqe.len))); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.linkat, sqe.opcode); + try testing.expectEqual(tmp.dir.fd, sqe.fd); + try testing.expectEqual(tmp.dir.fd, @as(i32, @intCast(sqe.len))); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3172,10 +5856,10 @@ test "linkat" { .BADF, .INVAL => return error.SkipZigTest, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0x12121212, .res = 0, - .flags = 0, + .flags = .{}, }, cqe); // Validate the second file @@ -3190,7 +5874,7 @@ test "linkat" { test "provide_buffers: read" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3210,12 +5894,12 @@ test "provide_buffers: read" { // Provide 4 buffers { - const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id); - try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode); + const sqe = try ring.provide_buffers(0xcccccccc, @ptrCast(&buffers), buffer_len, buffers.len, group_id, buffer_id); + try testing.expectEqual(Op.provide_buffers, sqe.opcode); try testing.expectEqual(@as(i32, buffers.len), sqe.fd); - try testing.expectEqual(@as(u32, buffers[0].len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(buffers[0].len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3224,7 +5908,7 @@ test "provide_buffers: read" { .SUCCESS => {}, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data); + try testing.expectEqual(0xcccccccc, cqe.user_data); } // Do 4 reads which should consume all buffers @@ -3232,12 +5916,12 @@ test "provide_buffers: read" { var i: usize = 0; while (i < buffers.len) : (i += 1) { const sqe = try ring.read(0xdededede, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - try testing.expectEqual(@as(i32, fd), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.read, sqe.opcode); + try testing.expectEqual(fd, sqe.fd); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3245,25 +5929,26 @@ test "provide_buffers: read" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); - const used_buffer_id = cqe.flags >> 16; + try testing.expect(cqe.flags.f_buffer); + const used_buffer_id = try cqe.buffer_id(); try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3); - try testing.expectEqual(@as(i32, buffer_len), cqe.res); + try testing.expectEqual(buffer_len, cqe.res); - try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data); - try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]); + try testing.expectEqual(0xdededede, cqe.user_data); + const empty: [buffer_len]u8 = @splat(0); + try testing.expectEqualSlices(u8, empty[0..], buffers[used_buffer_id][0..@intCast(cqe.res)]); } // This read should fail { const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - try testing.expectEqual(@as(i32, fd), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.read, sqe.opcode); + try testing.expectEqual(fd, sqe.fd); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3272,7 +5957,7 @@ test "provide_buffers: read" { .SUCCESS => std.debug.panic("unexpected success", .{}), else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data); + try testing.expectEqual(0xdfdfdfdf, cqe.user_data); } // Provide 1 buffer again @@ -3283,8 +5968,8 @@ test "provide_buffers: read" { const reprovided_buffer_id = 2; { - _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.provide_buffers(0xabababab, @ptrCast(&buffers[reprovided_buffer_id]), buffer_len, 1, group_id, reprovided_buffer_id); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3297,12 +5982,12 @@ test "provide_buffers: read" { { const sqe = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.READ, sqe.opcode); - try testing.expectEqual(@as(i32, fd), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.read, sqe.opcode); + try testing.expectEqual(fd, sqe.fd); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3310,19 +5995,20 @@ test "provide_buffers: read" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); - const used_buffer_id = cqe.flags >> 16; + try testing.expect(cqe.flags.f_buffer); + const used_buffer_id = try cqe.buffer_id(); try testing.expectEqual(used_buffer_id, reprovided_buffer_id); - try testing.expectEqual(@as(i32, buffer_len), cqe.res); - try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data); - try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]); + try testing.expectEqual(buffer_len, cqe.res); + try testing.expectEqual(0xdfdfdfdf, cqe.user_data); + const empty: [buffer_len]u8 = @splat(0); + try testing.expectEqualSlices(u8, empty[0..], buffers[used_buffer_id][0..@intCast(cqe.res)]); } } test "remove_buffers" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3342,8 +6028,8 @@ test "remove_buffers" { // Provide 4 buffers { - _ = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.provide_buffers(0xcccccccc, @ptrCast(&buffers), buffer_len, buffers.len, group_id, buffer_id); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3351,32 +6037,32 @@ test "remove_buffers" { .SUCCESS => {}, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data); + try testing.expectEqual(0xcccccccc, cqe.user_data); } // Remove 3 buffers { const sqe = try ring.remove_buffers(0xbababababa, 3, group_id); - try testing.expectEqual(linux.IORING_OP.REMOVE_BUFFERS, sqe.opcode); - try testing.expectEqual(@as(i32, 3), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(Op.remove_buffers, sqe.opcode); + try testing.expectEqual(3, sqe.fd); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { .SUCCESS => {}, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xbababababa), cqe.user_data); + try testing.expectEqual(0xbababababa, cqe.user_data); } // This read should work { _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3384,19 +6070,20 @@ test "remove_buffers" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); - const used_buffer_id = cqe.flags >> 16; + try testing.expect(cqe.flags.f_buffer); + const used_buffer_id = try cqe.buffer_id(); try testing.expect(used_buffer_id >= 0 and used_buffer_id < 4); - try testing.expectEqual(@as(i32, buffer_len), cqe.res); - try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data); - try testing.expectEqualSlices(u8, &([_]u8{0} ** buffer_len), buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]); + try testing.expectEqual(buffer_len, cqe.res); + try testing.expectEqual(0xdfdfdfdf, cqe.user_data); + const empty: [buffer_len]u8 = @splat(0); + try testing.expectEqualSlices(u8, empty[0..], buffers[used_buffer_id][0..@intCast(cqe.res)]); } // Final read should _not_ work { _ = try ring.read(0xdfdfdfdf, fd, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3411,7 +6098,7 @@ test "remove_buffers" { test "provide_buffers: accept/connect/send/recv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3427,12 +6114,12 @@ test "provide_buffers: accept/connect/send/recv" { // Provide 4 buffers { - const sqe = try ring.provide_buffers(0xcccccccc, @as([*]u8, @ptrCast(&buffers)), buffer_len, buffers.len, group_id, buffer_id); - try testing.expectEqual(linux.IORING_OP.PROVIDE_BUFFERS, sqe.opcode); + const sqe = try ring.provide_buffers(0xcccccccc, @ptrCast(&buffers), buffer_len, buffers.len, group_id, buffer_id); + try testing.expectEqual(Op.provide_buffers, sqe.opcode); try testing.expectEqual(@as(i32, buffers.len), sqe.fd); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3443,7 +6130,7 @@ test "provide_buffers: accept/connect/send/recv" { .SUCCESS => {}, else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xcccccccc), cqe.user_data); + try testing.expectEqual(0xcccccccc, cqe.user_data); } const socket_test_harness = try createSocketTestHarness(&ring); @@ -3454,12 +6141,12 @@ test "provide_buffers: accept/connect/send/recv" { { var i: usize = 0; while (i < buffers.len) : (i += 1) { - _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'z'} ** buffer_len), 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'z'} ** buffer_len), .{}); + try testing.expectEqual(1, try ring.submit()); } - var cqes: [4]linux.io_uring_cqe = undefined; - try testing.expectEqual(@as(u32, 4), try ring.copy_cqes(&cqes, 4)); + var cqes: [4]Cqe = undefined; + try testing.expectEqual(4, try ring.copy_cqes(&cqes, 4)); } // Do 4 recv which should consume all buffers @@ -3469,15 +6156,15 @@ test "provide_buffers: accept/connect/send/recv" { var i: usize = 0; while (i < buffers.len) : (i += 1) { - const sqe = try ring.recv(0xdededede, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode); + const sqe = try ring.recv(0xdededede, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, .{}); + try testing.expectEqual(Op.recv, sqe.opcode); try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 0), sqe.rw_flags); - try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(0, sqe.rw_flags); + try testing.expectEqual(Sqe.IoSqe{ .buffer_select = true }, sqe.flags); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3485,28 +6172,28 @@ test "provide_buffers: accept/connect/send/recv" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); - const used_buffer_id = cqe.flags >> 16; + try testing.expect(cqe.flags.f_buffer); + const used_buffer_id = try cqe.buffer_id(); try testing.expect(used_buffer_id >= 0 and used_buffer_id <= 3); - try testing.expectEqual(@as(i32, buffer_len), cqe.res); + try testing.expectEqual(buffer_len, cqe.res); - try testing.expectEqual(@as(u64, 0xdededede), cqe.user_data); - const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]; - try testing.expectEqualSlices(u8, &([_]u8{'z'} ** buffer_len), buffer); + try testing.expectEqual(0xdededede, cqe.user_data); + const zzz: [buffer_len]u8 = @splat('z'); + try testing.expectEqualSlices(u8, zzz[0..], buffers[used_buffer_id][0..@intCast(cqe.res)]); } // This recv should fail { - const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode); + const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, .{}); + try testing.expectEqual(Op.recv, sqe.opcode); try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 0), sqe.rw_flags); - try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(0, sqe.rw_flags); + try testing.expectEqual(Sqe.IoSqe{ .buffer_select = true }, sqe.flags); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3515,7 +6202,7 @@ test "provide_buffers: accept/connect/send/recv" { .SUCCESS => std.debug.panic("unexpected success", .{}), else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data); + try testing.expectEqual(0xdfdfdfdf, cqe.user_data); } // Provide 1 buffer again @@ -3523,8 +6210,8 @@ test "provide_buffers: accept/connect/send/recv" { const reprovided_buffer_id = 2; { - _ = try ring.provide_buffers(0xabababab, @as([*]u8, @ptrCast(&buffers[reprovided_buffer_id])), buffer_len, 1, group_id, reprovided_buffer_id); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.provide_buffers(0xabababab, @ptrCast(&buffers[reprovided_buffer_id]), buffer_len, 1, group_id, reprovided_buffer_id); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3536,8 +6223,8 @@ test "provide_buffers: accept/connect/send/recv" { // Redo 1 send on the server socket { - _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'w'} ** buffer_len), 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.send(0xdeaddead, socket_test_harness.server, &([_]u8{'w'} ** buffer_len), .{}); + try testing.expectEqual(1, try ring.submit()); _ = try ring.copy_cqe(); } @@ -3548,15 +6235,15 @@ test "provide_buffers: accept/connect/send/recv" { @memset(mem.sliceAsBytes(&buffers), 1); { - const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, 0); - try testing.expectEqual(linux.IORING_OP.RECV, sqe.opcode); + const sqe = try ring.recv(0xdfdfdfdf, socket_test_harness.client, .{ .buffer_selection = .{ .group_id = group_id, .len = buffer_len } }, .{}); + try testing.expectEqual(Op.recv, sqe.opcode); try testing.expectEqual(@as(i32, socket_test_harness.client), sqe.fd); - try testing.expectEqual(@as(u64, 0), sqe.addr); - try testing.expectEqual(@as(u32, buffer_len), sqe.len); - try testing.expectEqual(@as(u16, group_id), sqe.buf_index); - try testing.expectEqual(@as(u32, 0), sqe.rw_flags); - try testing.expectEqual(@as(u32, linux.IOSQE_BUFFER_SELECT), sqe.flags); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(0, sqe.addr); + try testing.expectEqual(buffer_len, sqe.len); + try testing.expectEqual(group_id, sqe.buf_index); + try testing.expectEqual(0, sqe.rw_flags); + try testing.expectEqual(Sqe.IoSqe{ .buffer_select = true }, sqe.flags); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); switch (cqe.err()) { @@ -3564,13 +6251,13 @@ test "provide_buffers: accept/connect/send/recv" { else => |errno| std.debug.panic("unhandled errno: {}", .{errno}), } - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); - const used_buffer_id = cqe.flags >> 16; + try testing.expect(cqe.flags.f_buffer); + const used_buffer_id = try cqe.buffer_id(); try testing.expectEqual(used_buffer_id, reprovided_buffer_id); - try testing.expectEqual(@as(i32, buffer_len), cqe.res); - try testing.expectEqual(@as(u64, 0xdfdfdfdf), cqe.user_data); - const buffer = buffers[used_buffer_id][0..@as(usize, @intCast(cqe.res))]; - try testing.expectEqualSlices(u8, &([_]u8{'w'} ** buffer_len), buffer); + try testing.expectEqual(buffer_len, cqe.res); + try testing.expectEqual(0xdfdfdfdf, cqe.user_data); + const www: [buffer_len]u8 = @splat('w'); + try testing.expectEqualSlices(u8, www[0..], buffers[used_buffer_id][0..@intCast(cqe.res)]); } } @@ -3595,14 +6282,14 @@ fn createSocketTestHarness(ring: *IoUring) !SocketTestHarness { // Submit 1 accept var accept_addr: posix.sockaddr = undefined; var accept_addr_len: posix.socklen_t = @sizeOf(@TypeOf(accept_addr)); - _ = try ring.accept(0xaaaaaaaa, listener_socket, &accept_addr, &accept_addr_len, 0); + _ = try ring.accept(0xaaaaaaaa, listener_socket, &accept_addr, &accept_addr_len, .{}); // Create a TCP client socket const client = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); errdefer posix.close(client); _ = try ring.connect(0xcccccccc, client, &address.any, address.getOsSockLen()); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(2, try ring.submit()); var cqe_accept = try ring.copy_cqe(); if (cqe_accept.err() == .INVAL) return error.SkipZigTest; @@ -3617,19 +6304,19 @@ fn createSocketTestHarness(ring: *IoUring) !SocketTestHarness { cqe_connect = a; } - try testing.expectEqual(@as(u64, 0xaaaaaaaa), cqe_accept.user_data); + try testing.expectEqual(0xaaaaaaaa, cqe_accept.user_data); if (cqe_accept.res <= 0) std.debug.print("\ncqe_accept.res={}\n", .{cqe_accept.res}); try testing.expect(cqe_accept.res > 0); - try testing.expectEqual(@as(u32, 0), cqe_accept.flags); - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(@as(Cqe.Flags, .{}), cqe_accept.flags); + try testing.expectEqual(Cqe{ .user_data = 0xcccccccc, .res = 0, - .flags = 0, + .flags = .{}, }, cqe_connect); // All good - return SocketTestHarness{ + return .{ .listener = listener_socket, .server = cqe_accept.res, .client = client, @@ -3641,7 +6328,7 @@ fn createListenerSocket(address: *net.Address) !posix.socket_t { const listener_socket = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); errdefer posix.close(listener_socket); - try posix.setsockopt(listener_socket, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(c_int, 1))); + try posix.setsockopt(listener_socket, posix.SOL.SOCKET, posix.SO.REUSEADDR, &mem.toBytes(@as(u32, 1))); try posix.bind(listener_socket, &address.any, address.getOsSockLen()); try posix.listen(listener_socket, kernel_backlog); @@ -3655,7 +6342,7 @@ fn createListenerSocket(address: *net.Address) !posix.socket_t { test "accept multishot" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3670,8 +6357,8 @@ test "accept multishot" { var addr: posix.sockaddr = undefined; var addr_len: posix.socklen_t = @sizeOf(@TypeOf(addr)); const userdata: u64 = 0xaaaaaaaa; - _ = try ring.accept_multishot(userdata, listener_socket, &addr, &addr_len, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.accept_multishot(userdata, listener_socket, &addr, &addr_len, .{}); + try testing.expectEqual(1, try ring.submit()); var nr: usize = 4; // number of clients to connect while (nr > 0) : (nr -= 1) { @@ -3685,7 +6372,7 @@ test "accept multishot" { if (cqe.err() == .INVAL) return error.SkipZigTest; try testing.expect(cqe.res > 0); try testing.expect(cqe.user_data == userdata); - try testing.expect(cqe.flags & linux.IORING_CQE_F_MORE > 0); // more flag is set + try testing.expect(cqe.flags.f_more); // more flag is set posix.close(client); } @@ -3694,7 +6381,7 @@ test "accept multishot" { test "accept/connect/send_zc/recv" { try skipKernelLessThan(.{ .major = 6, .minor = 0, .patch = 0 }); - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3708,10 +6395,10 @@ test "accept/connect/send_zc/recv" { var buffer_recv = [_]u8{0} ** 10; // zero-copy send - const sqe_send = try ring.send_zc(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], 0, 0); - sqe_send.flags |= linux.IOSQE_IO_LINK; - _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, 0); - try testing.expectEqual(@as(u32, 2), try ring.submit()); + const sqe_send = try ring.send_zc(0xeeeeeeee, socket_test_harness.client, buffer_send[0..], .{}, .{}); + sqe_send.link_next(); + _ = try ring.recv(0xffffffff, socket_test_harness.server, .{ .buffer = buffer_recv[0..] }, .{}); + try testing.expectEqual(2, try ring.submit()); var cqe_send = try ring.copy_cqe(); // First completion of zero-copy send. @@ -3719,10 +6406,10 @@ test "accept/connect/send_zc/recv" { // will be a second completion event / notification for the // request, with the user_data field set to the same value. // buffer_send must be keep alive until second cqe. - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xeeeeeeee, .res = buffer_send.len, - .flags = linux.IORING_CQE_F_MORE, + .flags = .{ .f_more = true }, }, cqe_send); cqe_send, const cqe_recv = brk: { @@ -3731,26 +6418,26 @@ test "accept/connect/send_zc/recv" { break :brk if (cqe1.user_data == 0xeeeeeeee) .{ cqe1, cqe2 } else .{ cqe2, cqe1 }; }; - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xffffffff, .res = buffer_recv.len, - .flags = cqe_recv.flags & linux.IORING_CQE_F_SOCK_NONEMPTY, + .flags = .{ .f_sock_nonempty = cqe_recv.flags.f_sock_nonempty }, }, cqe_recv); try testing.expectEqualSlices(u8, buffer_send[0..buffer_recv.len], buffer_recv[0..]); // Second completion of zero-copy send. // IORING_CQE_F_NOTIF in flags signals that kernel is done with send_buffer - try testing.expectEqual(linux.io_uring_cqe{ + try testing.expectEqual(Cqe{ .user_data = 0xeeeeeeee, .res = 0, - .flags = linux.IORING_CQE_F_NOTIF, + .flags = .{ .f_notif = true }, }, cqe_send); } test "accept_direct" { try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 }); - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3771,12 +6458,12 @@ test "accept_direct" { for (0..2) |_| { for (registered_fds, 0..) |_, i| { - var buffer_recv = [_]u8{0} ** 16; + var buffer_recv: [16]u8 = @splat(0); const buffer_send: []const u8 = data[0 .. data.len - i]; // make it different at each loop // submit accept, will chose registered fd and return index in cqe - _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, .{}); + try testing.expectEqual(1, try ring.submit()); // connect const client = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); @@ -3797,9 +6484,10 @@ test "accept_direct" { // Submit receive to fixed file returned by accept (fd_index). // Fd field is set to registered file index, returned by accept. // Flag linux.IOSQE_FIXED_FILE must be set. - const recv_sqe = try ring.recv(read_userdata, fd_index, .{ .buffer = &buffer_recv }, 0); - recv_sqe.flags |= linux.IOSQE_FIXED_FILE; - try testing.expectEqual(@as(u32, 1), try ring.submit()); + const recv_sqe = try ring.recv(read_userdata, fd_index, .{ .buffer = &buffer_recv }, .{}); + recv_sqe.set_flags(.{ .fixed_file = true }); + + try testing.expectEqual(1, try ring.submit()); // accept receive const recv_cqe = try ring.copy_cqe(); @@ -3810,8 +6498,8 @@ test "accept_direct" { // no more available fds, accept will get NFILE error { // submit accept - _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.accept_direct(accept_userdata, listener_socket, null, null, .{}); + try testing.expectEqual(1, try ring.submit()); // connect const client = try posix.socket(address.any.family, posix.SOCK.STREAM | posix.SOCK.CLOEXEC, 0); try posix.connect(client, &address.any, address.getOsSockLen()); @@ -3830,7 +6518,7 @@ test "accept_direct" { test "accept_multishot_direct" { try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 }); - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3850,8 +6538,8 @@ test "accept_multishot_direct" { for (0..2) |_| { // submit multishot accept // Will chose registered fd and return index of the selected registered file in cqe. - _ = try ring.accept_multishot_direct(accept_userdata, listener_socket, null, null, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.accept_multishot_direct(accept_userdata, listener_socket, null, null, .{}); + try testing.expectEqual(1, try ring.submit()); for (registered_fds) |_| { // connect @@ -3864,7 +6552,7 @@ test "accept_multishot_direct" { const fd_index = cqe_accept.res; try testing.expect(fd_index < registered_fds.len); try testing.expect(cqe_accept.user_data == accept_userdata); - try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE > 0); // has more is set + try testing.expect(cqe_accept.flags.f_more); // has more is set } // No more available fds, accept will get NFILE error. // Multishot is terminated (more flag is not set). @@ -3877,7 +6565,7 @@ test "accept_multishot_direct" { const cqe_accept = try ring.copy_cqe(); try testing.expect(cqe_accept.user_data == accept_userdata); try testing.expectEqual(posix.E.NFILE, cqe_accept.err()); - try testing.expect(cqe_accept.flags & linux.IORING_CQE_F_MORE == 0); // has more is not set + try testing.expect(!cqe_accept.flags.f_more); // has more is not set } // return file descriptors to kernel try ring.register_files_update(0, registered_fds[0..]); @@ -3888,7 +6576,7 @@ test "accept_multishot_direct" { test "socket" { try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 }); - var ring = IoUring.init(1, 0) catch |err| switch (err) { + var ring = IoUring.init(1, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3896,8 +6584,8 @@ test "socket" { defer ring.deinit(); // prepare, submit socket operation - _ = try ring.socket(0, linux.AF.INET, posix.SOCK.STREAM, 0, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.socket(0, .inet, .{ .type = .stream }, .default, 0); + try testing.expectEqual(1, try ring.submit()); // test completion var cqe = try ring.copy_cqe(); @@ -3911,7 +6599,7 @@ test "socket" { test "socket_direct/socket_direct_alloc/close_direct" { try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 }); - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -3922,29 +6610,29 @@ test "socket_direct/socket_direct_alloc/close_direct" { try ring.register_files(registered_fds[0..]); // create socket in registered file descriptor at index 0 (last param) - _ = try ring.socket_direct(0, linux.AF.INET, posix.SOCK.STREAM, 0, 0, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.socket_direct(0, .inet, .{ .type = .stream }, .default, 0, 0); + try testing.expectEqual(1, try ring.submit()); var cqe_socket = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe_socket.err()); try testing.expect(cqe_socket.res == 0); // create socket in registered file descriptor at index 1 (last param) - _ = try ring.socket_direct(0, linux.AF.INET, posix.SOCK.STREAM, 0, 0, 1); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.socket_direct(0, .inet, .{ .type = .stream }, .default, 0, 1); + try testing.expectEqual(1, try ring.submit()); cqe_socket = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe_socket.err()); try testing.expect(cqe_socket.res == 0); // res is 0 when index is specified // create socket in kernel chosen file descriptor index (_alloc version) // completion res has index from registered files - _ = try ring.socket_direct_alloc(0, linux.AF.INET, posix.SOCK.STREAM, 0, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.socket_direct_alloc(0, .inet, .{ .type = .stream }, .default, 0); + try testing.expectEqual(1, try ring.submit()); cqe_socket = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe_socket.err()); try testing.expect(cqe_socket.res == 2); // returns registered file index // use sockets from registered_fds in connect operation - var address = try net.Address.parseIp4("127.0.0.1", 0); + var address: net.Address = try .parseIp4("127.0.0.1", 0); const listener_socket = try createListenerSocket(&address); defer posix.close(listener_socket); const accept_userdata: u64 = 0xaaaaaaaa; @@ -3952,12 +6640,14 @@ test "socket_direct/socket_direct_alloc/close_direct" { const close_userdata: u64 = 0xcccccccc; for (registered_fds, 0..) |_, fd_index| { // prepare accept - _ = try ring.accept(accept_userdata, listener_socket, null, null, 0); + _ = try ring.accept(accept_userdata, listener_socket, null, null, .{}); // prepare connect with fixed socket const connect_sqe = try ring.connect(connect_userdata, @intCast(fd_index), &address.any, address.getOsSockLen()); - connect_sqe.flags |= linux.IOSQE_FIXED_FILE; // fd is fixed file index + // fd is fixed file index + connect_sqe.set_flags(.{ .fixed_file = true }); + // submit both - try testing.expectEqual(@as(u32, 2), try ring.submit()); + try testing.expectEqual(2, try ring.submit()); // get completions var cqe_connect = try ring.copy_cqe(); var cqe_accept = try ring.copy_cqe(); @@ -3977,7 +6667,7 @@ test "socket_direct/socket_direct_alloc/close_direct" { // submit and test close_direct _ = try ring.close_direct(close_userdata, @intCast(fd_index)); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); var cqe_close = try ring.copy_cqe(); try testing.expect(cqe_close.user_data == close_userdata); try testing.expectEqual(posix.E.SUCCESS, cqe_close.err()); @@ -3989,7 +6679,7 @@ test "socket_direct/socket_direct_alloc/close_direct" { test "openat_direct/close_direct" { try skipKernelLessThan(.{ .major = 5, .minor = 19, .patch = 0 }); - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4008,21 +6698,21 @@ test "openat_direct/close_direct" { // use registered file at index 0 (last param) _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); var cqe = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe.err()); try testing.expect(cqe.res == 0); // use registered file at index 1 _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, 1); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); cqe = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe.err()); try testing.expect(cqe.res == 0); // res is 0 when we specify index // let kernel choose registered file index - _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, linux.IORING_FILE_INDEX_ALLOC); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.openat_direct(user_data, tmp.dir.fd, path, flags, mode, constants.FILE_INDEX_ALLOC); + try testing.expectEqual(1, try ring.submit()); cqe = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe.err()); try testing.expect(cqe.res == 2); // chosen index is in res @@ -4030,7 +6720,7 @@ test "openat_direct/close_direct" { // close all open file descriptors for (registered_fds, 0..) |_, fd_index| { _ = try ring.close_direct(user_data, @intCast(fd_index)); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + try testing.expectEqual(1, try ring.submit()); var cqe_close = try ring.copy_cqe(); try testing.expectEqual(posix.E.SUCCESS, cqe_close.err()); } @@ -4040,7 +6730,7 @@ test "openat_direct/close_direct" { test "waitid" { try skipKernelLessThan(.{ .major = 6, .minor = 7, .patch = 0 }); - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4053,7 +6743,7 @@ test "waitid" { } var siginfo: posix.siginfo_t = undefined; - _ = try ring.waitid(0, .PID, pid, &siginfo, posix.W.EXITED, 0); + _ = try ring.waitid(0, .PID, pid, &siginfo, .{ .exited = true }, 0); try testing.expectEqual(1, try ring.submit()); @@ -4075,14 +6765,26 @@ inline fn skipKernelLessThan(required: std.SemanticVersion) !void { } const release = mem.sliceTo(&uts.release, 0); - // Strips potential extra, as kernel version might not be semver compliant, example "6.8.9-300.fc40.x86_64" - const extra_index = std.mem.indexOfAny(u8, release, "-+"); - const stripped = release[0..(extra_index orelse release.len)]; - // Make sure the input don't rely on the extra we just stripped + // Make sure the input don't rely on the extra we are about to stripped try testing.expect(required.pre == null and required.build == null); + const stripped = blk: { + // Strips potential extra, as kernel version might not be semver compliant, example "6.8.9-300.fc40.x86_64" + const extra_index = std.mem.findAny(u8, release, "-+"); + const stripped = release[0..(extra_index orelse release.len)]; + + // wsl kernel isn't semver compliant + // .ie 6.6.87.2-microsoft-standard-WSL2 strip the extra .2 after 87 + const wsl = "WSL2"; + if (std.mem.eql(u8, release[release.len - wsl.len ..][0..wsl.len], wsl)) { + const wsl_stripped, _ = std.mem.cutScalarLast(u8, stripped, '.') orelse unreachable; + break :blk wsl_stripped; + } + break :blk stripped; + }; var current = try std.SemanticVersion.parse(stripped); current.pre = null; // don't check pre field + if (required.order(current) == .gt) return error.SkipZigTest; } @@ -4090,7 +6792,7 @@ test BufferGroup { if (!is_linux) return error.SkipZigTest; // Init IoUring - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4121,18 +6823,18 @@ test BufferGroup { // Client sends data { - _ = try ring.send(1, fds.client, data[0..], 0); + _ = try ring.send(1, fds.client, data[0..], .{}); const submitted = try ring.submit(); try testing.expectEqual(1, submitted); const cqe_send = try ring.copy_cqe(); if (cqe_send.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ .user_data = 1, .res = data.len, .flags = 0 }, cqe_send); + try testing.expectEqual(Cqe{ .user_data = 1, .res = data.len, .flags = .{} }, cqe_send); } // Server uses buffer group receive { // Submit recv operation, buffer will be chosen from buffer group - _ = try buf_grp.recv(2, fds.server, 0); + _ = try buf_grp.recv(2, fds.server, .{}); const submitted = try ring.submit(); try testing.expectEqual(1, submitted); @@ -4141,7 +6843,7 @@ test BufferGroup { try testing.expectEqual(2, cqe.user_data); // matches submitted user_data try testing.expect(cqe.res >= 0); // success try testing.expectEqual(posix.E.SUCCESS, cqe.err()); - try testing.expectEqual(data.len, @as(usize, @intCast(cqe.res))); // cqe.res holds received data len + try testing.expectEqual(@as(i32, data.len), cqe.res); // cqe.res holds received data len // Get buffer from pool const buf = try buf_grp.get(cqe); @@ -4154,7 +6856,7 @@ test BufferGroup { test "ring mapped buffers recv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4192,11 +6894,11 @@ test "ring mapped buffers recv" { const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe }; { const user_data = rnd.int(u64); - _ = try ring.send(user_data, fds.client, data[0..], 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.send(user_data, fds.client, data[0..], .{}); + try testing.expectEqual(1, try ring.submit()); const cqe_send = try ring.copy_cqe(); if (cqe_send.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ .user_data = user_data, .res = data.len, .flags = 0 }, cqe_send); + try testing.expectEqual(Cqe{ .user_data = user_data, .res = data.len, .flags = .{} }, cqe_send); } var pos: usize = 0; @@ -4215,13 +6917,13 @@ test "ring mapped buffers recv" { // 'no more buffers', until we put buffers to the kernel { const user_data = rnd.int(u64); - _ = try buf_grp.recv(user_data, fds.server, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try buf_grp.recv(user_data, fds.server, .{}); + try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); try testing.expectEqual(user_data, cqe.user_data); try testing.expect(cqe.res < 0); // fail try testing.expectEqual(posix.E.NOBUFS, cqe.err()); - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == 0); // IORING_CQE_F_BUFFER flags is set on success only + try testing.expect(!cqe.flags.f_buffer); // IORING_CQE_F_BUFFER flags is set on success only try testing.expectError(error.NoBufferSelected, cqe.buffer_id()); } @@ -4243,7 +6945,7 @@ test "ring mapped buffers recv" { test "ring mapped buffers multishot recv" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(16, 0) catch |err| switch (err) { + var ring = IoUring.init(16, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4281,28 +6983,28 @@ test "ring mapped buffers multishot recv" { const data = [_]u8{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf }; { const user_data = rnd.int(u64); - _ = try ring.send(user_data, fds.client, data[0..], 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.send(user_data, fds.client, data[0..], .{}); + try testing.expectEqual(1, try ring.submit()); const cqe_send = try ring.copy_cqe(); if (cqe_send.err() == .INVAL) return error.SkipZigTest; - try testing.expectEqual(linux.io_uring_cqe{ .user_data = user_data, .res = data.len, .flags = 0 }, cqe_send); + try testing.expectEqual(Cqe{ .user_data = user_data, .res = data.len, .flags = .{} }, cqe_send); } // start multishot recv var recv_user_data = rnd.int(u64); - _ = try buf_grp.recv_multishot(recv_user_data, fds.server, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); // submit + _ = try buf_grp.recv_multishot(recv_user_data, fds.server, .{}); + try testing.expectEqual(1, try ring.submit()); // submit // server reads data into provided buffers // there are 2 buffers of size 4, so each read gets only chunk of data // we read four chunks of 4, 4, 4, 4 bytes each var chunk: []const u8 = data[0..buffer_size]; // first chunk const cqe1 = try expect_buf_grp_cqe(&ring, &buf_grp, recv_user_data, chunk); - try testing.expect(cqe1.flags & linux.IORING_CQE_F_MORE > 0); + try testing.expect(cqe1.flags.f_more); chunk = data[buffer_size .. buffer_size * 2]; // second chunk const cqe2 = try expect_buf_grp_cqe(&ring, &buf_grp, recv_user_data, chunk); - try testing.expect(cqe2.flags & linux.IORING_CQE_F_MORE > 0); + try testing.expect(cqe2.flags.f_more); // both buffers provided to the kernel are used so we get error // 'no more buffers', until we put buffers to the kernel @@ -4311,10 +7013,11 @@ test "ring mapped buffers multishot recv" { try testing.expectEqual(recv_user_data, cqe.user_data); try testing.expect(cqe.res < 0); // fail try testing.expectEqual(posix.E.NOBUFS, cqe.err()); - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == 0); // IORING_CQE_F_BUFFER flags is set on success only + // IORING_CQE_F_BUFFER flags is set on success only + try testing.expect(!cqe.flags.f_buffer); // has more is not set // indicates that multishot is finished - try testing.expect(cqe.flags & linux.IORING_CQE_F_MORE == 0); + try testing.expect(!cqe.flags.f_more); try testing.expectError(error.NoBufferSelected, cqe.buffer_id()); } @@ -4324,24 +7027,24 @@ test "ring mapped buffers multishot recv" { // restart multishot recv_user_data = rnd.int(u64); - _ = try buf_grp.recv_multishot(recv_user_data, fds.server, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); // submit + _ = try buf_grp.recv_multishot(recv_user_data, fds.server, .{}); + try testing.expectEqual(1, try ring.submit()); // submit chunk = data[buffer_size * 2 .. buffer_size * 3]; // third chunk const cqe3 = try expect_buf_grp_cqe(&ring, &buf_grp, recv_user_data, chunk); - try testing.expect(cqe3.flags & linux.IORING_CQE_F_MORE > 0); + try testing.expect(cqe3.flags.f_more); try buf_grp.put(cqe3); chunk = data[buffer_size * 3 ..]; // last chunk const cqe4 = try expect_buf_grp_cqe(&ring, &buf_grp, recv_user_data, chunk); - try testing.expect(cqe4.flags & linux.IORING_CQE_F_MORE > 0); + try testing.expect(cqe4.flags.f_more); try buf_grp.put(cqe4); // cancel pending multishot recv operation { const cancel_user_data = rnd.int(u64); - _ = try ring.cancel(cancel_user_data, recv_user_data, 0); - try testing.expectEqual(@as(u32, 1), try ring.submit()); + _ = try ring.cancel(cancel_user_data, recv_user_data, .{}); + try testing.expectEqual(1, try ring.submit()); // expect completion of cancel operation and completion of recv operation var cqe_cancel = try ring.copy_cqe(); @@ -4374,7 +7077,7 @@ test "ring mapped buffers multishot recv" { try testing.expectEqual(recv_user_data, cqe_recv.user_data); try testing.expect(cqe_recv.res < 0); try testing.expect(cqe_recv.err() == .NOBUFS or cqe_recv.err() == .CANCELED); - try testing.expect(cqe_recv.flags & linux.IORING_CQE_F_MORE == 0); + try testing.expect(!cqe_recv.flags.f_more); } } } @@ -4385,18 +7088,18 @@ fn buf_grp_recv_submit_get_cqe( buf_grp: *BufferGroup, fd: posix.fd_t, user_data: u64, -) !linux.io_uring_cqe { +) !Cqe { // prepare and submit recv - const sqe = try buf_grp.recv(user_data, fd, 0); - try testing.expect(sqe.flags & linux.IOSQE_BUFFER_SELECT == linux.IOSQE_BUFFER_SELECT); + const sqe = try buf_grp.recv(user_data, fd, .{}); + try testing.expect(sqe.flags.buffer_select); try testing.expect(sqe.buf_index == buf_grp.group_id); - try testing.expectEqual(@as(u32, 1), try ring.submit()); // submit + try testing.expectEqual(1, try ring.submit()); // submit // get cqe, expect success const cqe = try ring.copy_cqe(); try testing.expectEqual(user_data, cqe.user_data); try testing.expect(cqe.res >= 0); // success try testing.expectEqual(posix.E.SUCCESS, cqe.err()); - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); // IORING_CQE_F_BUFFER flag is set + try testing.expect(cqe.flags.f_buffer); // IORING_CQE_F_BUFFER flag is set return cqe; } @@ -4406,18 +7109,18 @@ fn expect_buf_grp_cqe( buf_grp: *BufferGroup, user_data: u64, expected: []const u8, -) !linux.io_uring_cqe { +) !Cqe { // get cqe const cqe = try ring.copy_cqe(); try testing.expectEqual(user_data, cqe.user_data); try testing.expect(cqe.res >= 0); // success - try testing.expect(cqe.flags & linux.IORING_CQE_F_BUFFER == linux.IORING_CQE_F_BUFFER); // IORING_CQE_F_BUFFER flag is set - try testing.expectEqual(expected.len, @as(usize, @intCast(cqe.res))); + try testing.expect(cqe.flags.f_buffer); // IORING_CQE_F_BUFFER flag is set + try testing.expectEqual(@as(i32, @intCast(expected.len)), cqe.res); try testing.expectEqual(posix.E.SUCCESS, cqe.err()); // get buffer from pool const buffer_id = try cqe.buffer_id(); - const len = @as(usize, @intCast(cqe.res)); + const len: usize = @intCast(cqe.res); const buf = buf_grp.get_by_id(buffer_id)[0..len]; try testing.expectEqualSlices(u8, expected, buf); @@ -4427,7 +7130,7 @@ fn expect_buf_grp_cqe( test "copy_cqes with wrapping sq.cqes buffer" { if (!is_linux) return error.SkipZigTest; - var ring = IoUring.init(2, 0) catch |err| switch (err) { + var ring = IoUring.init(2, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4438,11 +7141,11 @@ test "copy_cqes with wrapping sq.cqes buffer" { try testing.expectEqual(4, ring.cq.cqes.len); // submit 2 entries, receive 2 completions - var cqes: [8]linux.io_uring_cqe = undefined; + var cqes: [8]Cqe = undefined; { for (0..2) |_| { const sqe = try ring.get_sqe(); - sqe.prep_timeout(&.{ .sec = 0, .nsec = 10000 }, 0, 0); + sqe.prep_timeout(&.{ .sec = 0, .nsec = 10000 }, 0, .{}); try testing.expect(try ring.submit() == 1); } var cqe_count: u32 = 0; @@ -4459,7 +7162,7 @@ test "copy_cqes with wrapping sq.cqes buffer" { for (1..1024) |i| { for (0..4) |_| { const sqe = try ring.get_sqe(); - sqe.prep_timeout(&.{ .sec = 0, .nsec = 10000 }, 0, 0); + sqe.prep_timeout(&.{ .sec = 0, .nsec = 10000 }, 0, .{}); try testing.expect(try ring.submit() == 1); } var cqe_count: u32 = 0; @@ -4472,7 +7175,7 @@ test "copy_cqes with wrapping sq.cqes buffer" { } test "bind/listen/connect" { - var ring = IoUring.init(4, 0) catch |err| switch (err) { + var ring = IoUring.init(4, .{}) catch |err| switch (err) { error.SystemOutdated => return error.SkipZigTest, error.PermissionDenied => return error.SkipZigTest, else => return err, @@ -4481,14 +7184,19 @@ test "bind/listen/connect" { const probe = ring.get_probe() catch return error.SkipZigTest; // LISTEN is higher required operation - if (!probe.is_supported(.LISTEN)) return error.SkipZigTest; - - var addr = net.Address.initIp4([4]u8{ 127, 0, 0, 1 }, 0); + if (!probe.is_supported(.listen)) return error.SkipZigTest; + + var addr: net.Address = .initIp4([4]u8{ 127, 0, 0, 1 }, 0); + // TODO: switch family to IpProto type + // const proto: linux.IpProto = switch (addr.any.family) { + // .unix => .default, + // else => .tcp, + // }; const proto: u32 = if (addr.any.family == linux.AF.UNIX) 0 else linux.IPPROTO.TCP; const listen_fd = brk: { // Create socket - _ = try ring.socket(1, addr.any.family, linux.SOCK.STREAM | linux.SOCK.CLOEXEC, proto, 0); + _ = try ring.socket(1, @enumFromInt(addr.any.family), .{ .type = .stream, .flags = .{ .cloexec = true } }, @enumFromInt(proto), 0); try testing.expectEqual(1, try ring.submit()); var cqe = try ring.copy_cqe(); try testing.expectEqual(1, cqe.user_data); @@ -4498,8 +7206,8 @@ test "bind/listen/connect" { // Prepare: set socket option * 2, bind, listen var optval: u32 = 1; - (try ring.setsockopt(2, listen_fd, linux.SOL.SOCKET, linux.SO.REUSEADDR, mem.asBytes(&optval))).link_next(); - (try ring.setsockopt(3, listen_fd, linux.SOL.SOCKET, linux.SO.REUSEPORT, mem.asBytes(&optval))).link_next(); + (try ring.setsockopt(2, listen_fd, .socket, .reuseaddr, mem.asBytes(&optval))).link_next(); + (try ring.setsockopt(3, listen_fd, .socket, .reuseport, mem.asBytes(&optval))).link_next(); (try ring.bind(4, listen_fd, &addr.any, addr.getOsSockLen(), 0)).link_next(); _ = try ring.listen(5, listen_fd, 1, 0); // Submit 4 operations @@ -4513,7 +7221,7 @@ test "bind/listen/connect" { // Check that socket option is set optval = 0; - _ = try ring.getsockopt(5, listen_fd, linux.SOL.SOCKET, linux.SO.REUSEADDR, mem.asBytes(&optval)); + _ = try ring.getsockopt(5, listen_fd, .socket, .reuseaddr, mem.asBytes(&optval)); try testing.expectEqual(1, try ring.submit()); cqe = try ring.copy_cqe(); try testing.expectEqual(5, cqe.user_data); @@ -4529,7 +7237,7 @@ test "bind/listen/connect" { const connect_fd = brk: { // Create connect socket - _ = try ring.socket(6, addr.any.family, linux.SOCK.STREAM | linux.SOCK.CLOEXEC, proto, 0); + _ = try ring.socket(6, @enumFromInt(addr.any.family), .{ .type = .stream, .flags = .{ .cloexec = true } }, @enumFromInt(proto), 0); try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); try testing.expectEqual(6, cqe.user_data); @@ -4541,7 +7249,7 @@ test "bind/listen/connect" { }; // Prepare accept/connect operations - _ = try ring.accept(7, listen_fd, null, null, 0); + _ = try ring.accept(7, listen_fd, null, null, .{}); _ = try ring.connect(8, connect_fd, &addr.any, addr.getOsSockLen()); try testing.expectEqual(2, try ring.submit()); // Get listener accepted socket @@ -4563,7 +7271,7 @@ test "bind/listen/connect" { // Shutdown and close all sockets for ([_]posix.socket_t{ connect_fd, accept_fd, listen_fd }) |fd| { - (try ring.shutdown(9, fd, posix.SHUT.RDWR)).link_next(); + (try ring.shutdown(9, fd, .rdwr)).link_next(); _ = try ring.close(10, fd); try testing.expectEqual(2, try ring.submit()); for (0..2) |i| { @@ -4579,20 +7287,20 @@ fn testSendRecv(ring: *IoUring, send_fd: posix.socket_t, recv_fd: posix.socket_t var buffer_recv: [buffer_send.len * 2]u8 = undefined; // 2 sends - _ = try ring.send(1, send_fd, buffer_send, linux.MSG.WAITALL); - _ = try ring.send(2, send_fd, buffer_send, linux.MSG.WAITALL); + _ = try ring.send(1, send_fd, buffer_send, .{ .waitall = true }); + _ = try ring.send(2, send_fd, buffer_send, .{ .waitall = true }); try testing.expectEqual(2, try ring.submit()); for (0..2) |i| { const cqe = try ring.copy_cqe(); try testing.expectEqual(1 + i, cqe.user_data); try testing.expectEqual(posix.E.SUCCESS, cqe.err()); - try testing.expectEqual(buffer_send.len, @as(usize, @intCast(cqe.res))); + try testing.expectEqual(@as(i32, buffer_send.len), cqe.res); } // receive var recv_len: usize = 0; while (recv_len < buffer_send.len * 2) { - _ = try ring.recv(3, recv_fd, .{ .buffer = buffer_recv[recv_len..] }, 0); + _ = try ring.recv(3, recv_fd, .{ .buffer = buffer_recv[recv_len..] }, .{}); try testing.expectEqual(1, try ring.submit()); const cqe = try ring.copy_cqe(); try testing.expectEqual(3, cqe.user_data); diff --git a/lib/std/os/linux/io_uring_sqe.zig b/lib/std/os/linux/io_uring_sqe.zig deleted file mode 100644 index 5658206a66a8..000000000000 --- a/lib/std/os/linux/io_uring_sqe.zig +++ /dev/null @@ -1,679 +0,0 @@ -//! Contains only the definition of `io_uring_sqe`. -//! Split into its own file to compartmentalize the initialization methods. - -const std = @import("../../std.zig"); -const linux = std.os.linux; - -pub const io_uring_sqe = extern struct { - opcode: linux.IORING_OP, - flags: u8, - ioprio: u16, - fd: i32, - off: u64, - addr: u64, - len: u32, - rw_flags: u32, - user_data: u64, - buf_index: u16, - personality: u16, - splice_fd_in: i32, - addr3: u64, - resv: u64, - - pub fn prep_nop(sqe: *linux.io_uring_sqe) void { - sqe.* = .{ - .opcode = .NOP, - .flags = 0, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_fsync(sqe: *linux.io_uring_sqe, fd: linux.fd_t, flags: u32) void { - sqe.* = .{ - .opcode = .FSYNC, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = flags, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_rw( - sqe: *linux.io_uring_sqe, - op: linux.IORING_OP, - fd: linux.fd_t, - addr: u64, - len: usize, - offset: u64, - ) void { - sqe.* = .{ - .opcode = op, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = offset, - .addr = addr, - .len = @intCast(len), - .rw_flags = 0, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_read(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []u8, offset: u64) void { - sqe.prep_rw(.READ, fd, @intFromPtr(buffer.ptr), buffer.len, offset); - } - - pub fn prep_write(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []const u8, offset: u64) void { - sqe.prep_rw(.WRITE, fd, @intFromPtr(buffer.ptr), buffer.len, offset); - } - - pub fn prep_splice(sqe: *linux.io_uring_sqe, fd_in: linux.fd_t, off_in: u64, fd_out: linux.fd_t, off_out: u64, len: usize) void { - sqe.prep_rw(.SPLICE, fd_out, undefined, len, off_out); - sqe.addr = off_in; - sqe.splice_fd_in = fd_in; - } - - pub fn prep_readv( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - iovecs: []const std.posix.iovec, - offset: u64, - ) void { - sqe.prep_rw(.READV, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); - } - - pub fn prep_writev( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - iovecs: []const std.posix.iovec_const, - offset: u64, - ) void { - sqe.prep_rw(.WRITEV, fd, @intFromPtr(iovecs.ptr), iovecs.len, offset); - } - - pub fn prep_read_fixed(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: *std.posix.iovec, offset: u64, buffer_index: u16) void { - sqe.prep_rw(.READ_FIXED, fd, @intFromPtr(buffer.base), buffer.len, offset); - sqe.buf_index = buffer_index; - } - - pub fn prep_write_fixed(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: *std.posix.iovec, offset: u64, buffer_index: u16) void { - sqe.prep_rw(.WRITE_FIXED, fd, @intFromPtr(buffer.base), buffer.len, offset); - sqe.buf_index = buffer_index; - } - - pub fn prep_accept( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: ?*linux.sockaddr, - addrlen: ?*linux.socklen_t, - flags: u32, - ) void { - // `addr` holds a pointer to `sockaddr`, and `addr2` holds a pointer to socklen_t`. - // `addr2` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32). - sqe.prep_rw(.ACCEPT, fd, @intFromPtr(addr), 0, @intFromPtr(addrlen)); - sqe.rw_flags = flags; - } - - pub fn prep_accept_direct( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: ?*linux.sockaddr, - addrlen: ?*linux.socklen_t, - flags: u32, - file_index: u32, - ) void { - prep_accept(sqe, fd, addr, addrlen, flags); - __io_uring_set_target_fixed_file(sqe, file_index); - } - - pub fn prep_multishot_accept_direct( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: ?*linux.sockaddr, - addrlen: ?*linux.socklen_t, - flags: u32, - ) void { - prep_multishot_accept(sqe, fd, addr, addrlen, flags); - __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC); - } - - fn __io_uring_set_target_fixed_file(sqe: *linux.io_uring_sqe, file_index: u32) void { - const sqe_file_index: u32 = if (file_index == linux.IORING_FILE_INDEX_ALLOC) - linux.IORING_FILE_INDEX_ALLOC - else - // 0 means no fixed files, indexes should be encoded as "index + 1" - file_index + 1; - // This filed is overloaded in liburing: - // splice_fd_in: i32 - // sqe_file_index: u32 - sqe.splice_fd_in = @bitCast(sqe_file_index); - } - - pub fn prep_connect( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: *const linux.sockaddr, - addrlen: linux.socklen_t, - ) void { - // `addrlen` maps to `sqe.off` (u64) instead of `sqe.len` (which is only a u32). - sqe.prep_rw(.CONNECT, fd, @intFromPtr(addr), 0, addrlen); - } - - pub fn prep_epoll_ctl( - sqe: *linux.io_uring_sqe, - epfd: linux.fd_t, - fd: linux.fd_t, - op: u32, - ev: ?*linux.epoll_event, - ) void { - sqe.prep_rw(.EPOLL_CTL, epfd, @intFromPtr(ev), op, @intCast(fd)); - } - - pub fn prep_recv(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []u8, flags: u32) void { - sqe.prep_rw(.RECV, fd, @intFromPtr(buffer.ptr), buffer.len, 0); - sqe.rw_flags = flags; - } - - pub fn prep_recv_multishot( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - buffer: []u8, - flags: u32, - ) void { - sqe.prep_recv(fd, buffer, flags); - sqe.ioprio |= linux.IORING_RECV_MULTISHOT; - } - - pub fn prep_recvmsg( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - msg: *linux.msghdr, - flags: u32, - ) void { - sqe.prep_rw(.RECVMSG, fd, @intFromPtr(msg), 1, 0); - sqe.rw_flags = flags; - } - - pub fn prep_recvmsg_multishot( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - msg: *linux.msghdr, - flags: u32, - ) void { - sqe.prep_recvmsg(fd, msg, flags); - sqe.ioprio |= linux.IORING_RECV_MULTISHOT; - } - - pub fn prep_send(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []const u8, flags: u32) void { - sqe.prep_rw(.SEND, fd, @intFromPtr(buffer.ptr), buffer.len, 0); - sqe.rw_flags = flags; - } - - pub fn prep_send_zc(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []const u8, flags: u32, zc_flags: u16) void { - sqe.prep_rw(.SEND_ZC, fd, @intFromPtr(buffer.ptr), buffer.len, 0); - sqe.rw_flags = flags; - sqe.ioprio = zc_flags; - } - - pub fn prep_send_zc_fixed(sqe: *linux.io_uring_sqe, fd: linux.fd_t, buffer: []const u8, flags: u32, zc_flags: u16, buf_index: u16) void { - prep_send_zc(sqe, fd, buffer, flags, zc_flags); - sqe.ioprio |= linux.IORING_RECVSEND_FIXED_BUF; - sqe.buf_index = buf_index; - } - - pub fn prep_sendmsg_zc( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - msg: *const linux.msghdr_const, - flags: u32, - ) void { - prep_sendmsg(sqe, fd, msg, flags); - sqe.opcode = .SENDMSG_ZC; - } - - pub fn prep_sendmsg( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - msg: *const linux.msghdr_const, - flags: u32, - ) void { - sqe.prep_rw(.SENDMSG, fd, @intFromPtr(msg), 1, 0); - sqe.rw_flags = flags; - } - - pub fn prep_openat( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - path: [*:0]const u8, - flags: linux.O, - mode: linux.mode_t, - ) void { - sqe.prep_rw(.OPENAT, fd, @intFromPtr(path), mode, 0); - sqe.rw_flags = @bitCast(flags); - } - - pub fn prep_openat_direct( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - path: [*:0]const u8, - flags: linux.O, - mode: linux.mode_t, - file_index: u32, - ) void { - prep_openat(sqe, fd, path, flags, mode); - __io_uring_set_target_fixed_file(sqe, file_index); - } - - pub fn prep_close(sqe: *linux.io_uring_sqe, fd: linux.fd_t) void { - sqe.* = .{ - .opcode = .CLOSE, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_close_direct(sqe: *linux.io_uring_sqe, file_index: u32) void { - prep_close(sqe, 0); - __io_uring_set_target_fixed_file(sqe, file_index); - } - - pub fn prep_timeout( - sqe: *linux.io_uring_sqe, - ts: *const linux.kernel_timespec, - count: u32, - flags: u32, - ) void { - sqe.prep_rw(.TIMEOUT, -1, @intFromPtr(ts), 1, count); - sqe.rw_flags = flags; - } - - pub fn prep_timeout_remove(sqe: *linux.io_uring_sqe, timeout_user_data: u64, flags: u32) void { - sqe.* = .{ - .opcode = .TIMEOUT_REMOVE, - .flags = 0, - .ioprio = 0, - .fd = -1, - .off = 0, - .addr = timeout_user_data, - .len = 0, - .rw_flags = flags, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_link_timeout( - sqe: *linux.io_uring_sqe, - ts: *const linux.kernel_timespec, - flags: u32, - ) void { - sqe.prep_rw(.LINK_TIMEOUT, -1, @intFromPtr(ts), 1, 0); - sqe.rw_flags = flags; - } - - pub fn prep_poll_add( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - poll_mask: u32, - ) void { - sqe.prep_rw(.POLL_ADD, fd, @intFromPtr(@as(?*anyopaque, null)), 0, 0); - // Poll masks previously used to comprise of 16 bits in the flags union of - // a SQE, but were then extended to comprise of 32 bits in order to make - // room for additional option flags. To ensure that the correct bits of - // poll masks are consistently and properly read across multiple kernel - // versions, poll masks are enforced to be little-endian. - // https://www.spinics.net/lists/io-uring/msg02848.html - sqe.rw_flags = std.mem.nativeToLittle(u32, poll_mask); - } - - pub fn prep_poll_remove( - sqe: *linux.io_uring_sqe, - target_user_data: u64, - ) void { - sqe.prep_rw(.POLL_REMOVE, -1, target_user_data, 0, 0); - } - - pub fn prep_poll_update( - sqe: *linux.io_uring_sqe, - old_user_data: u64, - new_user_data: u64, - poll_mask: u32, - flags: u32, - ) void { - sqe.prep_rw(.POLL_REMOVE, -1, old_user_data, flags, new_user_data); - // Poll masks previously used to comprise of 16 bits in the flags union of - // a SQE, but were then extended to comprise of 32 bits in order to make - // room for additional option flags. To ensure that the correct bits of - // poll masks are consistently and properly read across multiple kernel - // versions, poll masks are enforced to be little-endian. - // https://www.spinics.net/lists/io-uring/msg02848.html - sqe.rw_flags = std.mem.nativeToLittle(u32, poll_mask); - } - - pub fn prep_fallocate( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - mode: i32, - offset: u64, - len: u64, - ) void { - sqe.* = .{ - .opcode = .FALLOCATE, - .flags = 0, - .ioprio = 0, - .fd = fd, - .off = offset, - .addr = len, - .len = @intCast(mode), - .rw_flags = 0, - .user_data = 0, - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - } - - pub fn prep_statx( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - path: [*:0]const u8, - flags: u32, - mask: u32, - buf: *linux.Statx, - ) void { - sqe.prep_rw(.STATX, fd, @intFromPtr(path), mask, @intFromPtr(buf)); - sqe.rw_flags = flags; - } - - pub fn prep_cancel( - sqe: *linux.io_uring_sqe, - cancel_user_data: u64, - flags: u32, - ) void { - sqe.prep_rw(.ASYNC_CANCEL, -1, cancel_user_data, 0, 0); - sqe.rw_flags = flags; - } - - pub fn prep_cancel_fd( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - flags: u32, - ) void { - sqe.prep_rw(.ASYNC_CANCEL, fd, 0, 0, 0); - sqe.rw_flags = flags | linux.IORING_ASYNC_CANCEL_FD; - } - - pub fn prep_shutdown( - sqe: *linux.io_uring_sqe, - sockfd: linux.socket_t, - how: u32, - ) void { - sqe.prep_rw(.SHUTDOWN, sockfd, 0, how, 0); - } - - pub fn prep_renameat( - sqe: *linux.io_uring_sqe, - old_dir_fd: linux.fd_t, - old_path: [*:0]const u8, - new_dir_fd: linux.fd_t, - new_path: [*:0]const u8, - flags: u32, - ) void { - sqe.prep_rw( - .RENAMEAT, - old_dir_fd, - @intFromPtr(old_path), - 0, - @intFromPtr(new_path), - ); - sqe.len = @bitCast(new_dir_fd); - sqe.rw_flags = flags; - } - - pub fn prep_unlinkat( - sqe: *linux.io_uring_sqe, - dir_fd: linux.fd_t, - path: [*:0]const u8, - flags: u32, - ) void { - sqe.prep_rw(.UNLINKAT, dir_fd, @intFromPtr(path), 0, 0); - sqe.rw_flags = flags; - } - - pub fn prep_mkdirat( - sqe: *linux.io_uring_sqe, - dir_fd: linux.fd_t, - path: [*:0]const u8, - mode: linux.mode_t, - ) void { - sqe.prep_rw(.MKDIRAT, dir_fd, @intFromPtr(path), mode, 0); - } - - pub fn prep_symlinkat( - sqe: *linux.io_uring_sqe, - target: [*:0]const u8, - new_dir_fd: linux.fd_t, - link_path: [*:0]const u8, - ) void { - sqe.prep_rw( - .SYMLINKAT, - new_dir_fd, - @intFromPtr(target), - 0, - @intFromPtr(link_path), - ); - } - - pub fn prep_linkat( - sqe: *linux.io_uring_sqe, - old_dir_fd: linux.fd_t, - old_path: [*:0]const u8, - new_dir_fd: linux.fd_t, - new_path: [*:0]const u8, - flags: u32, - ) void { - sqe.prep_rw( - .LINKAT, - old_dir_fd, - @intFromPtr(old_path), - 0, - @intFromPtr(new_path), - ); - sqe.len = @bitCast(new_dir_fd); - sqe.rw_flags = flags; - } - - pub fn prep_files_update( - sqe: *linux.io_uring_sqe, - fds: []const linux.fd_t, - offset: u32, - ) void { - sqe.prep_rw(.FILES_UPDATE, -1, @intFromPtr(fds.ptr), fds.len, @intCast(offset)); - } - - pub fn prep_files_update_alloc( - sqe: *linux.io_uring_sqe, - fds: []linux.fd_t, - ) void { - sqe.prep_rw(.FILES_UPDATE, -1, @intFromPtr(fds.ptr), fds.len, linux.IORING_FILE_INDEX_ALLOC); - } - - pub fn prep_provide_buffers( - sqe: *linux.io_uring_sqe, - buffers: [*]u8, - buffer_len: usize, - num: usize, - group_id: usize, - buffer_id: usize, - ) void { - const ptr = @intFromPtr(buffers); - sqe.prep_rw(.PROVIDE_BUFFERS, @intCast(num), ptr, buffer_len, buffer_id); - sqe.buf_index = @intCast(group_id); - } - - pub fn prep_remove_buffers( - sqe: *linux.io_uring_sqe, - num: usize, - group_id: usize, - ) void { - sqe.prep_rw(.REMOVE_BUFFERS, @intCast(num), 0, 0, 0); - sqe.buf_index = @intCast(group_id); - } - - pub fn prep_multishot_accept( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: ?*linux.sockaddr, - addrlen: ?*linux.socklen_t, - flags: u32, - ) void { - prep_accept(sqe, fd, addr, addrlen, flags); - sqe.ioprio |= linux.IORING_ACCEPT_MULTISHOT; - } - - pub fn prep_socket( - sqe: *linux.io_uring_sqe, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, - ) void { - sqe.prep_rw(.SOCKET, @intCast(domain), 0, protocol, socket_type); - sqe.rw_flags = flags; - } - - pub fn prep_socket_direct( - sqe: *linux.io_uring_sqe, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, - file_index: u32, - ) void { - prep_socket(sqe, domain, socket_type, protocol, flags); - __io_uring_set_target_fixed_file(sqe, file_index); - } - - pub fn prep_socket_direct_alloc( - sqe: *linux.io_uring_sqe, - domain: u32, - socket_type: u32, - protocol: u32, - flags: u32, - ) void { - prep_socket(sqe, domain, socket_type, protocol, flags); - __io_uring_set_target_fixed_file(sqe, linux.IORING_FILE_INDEX_ALLOC); - } - - pub fn prep_waitid( - sqe: *linux.io_uring_sqe, - id_type: linux.P, - id: i32, - infop: *linux.siginfo_t, - options: u32, - flags: u32, - ) void { - sqe.prep_rw(.WAITID, id, 0, @intFromEnum(id_type), @intFromPtr(infop)); - sqe.rw_flags = flags; - sqe.splice_fd_in = @bitCast(options); - } - - pub fn prep_bind( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - addr: *const linux.sockaddr, - addrlen: linux.socklen_t, - flags: u32, - ) void { - sqe.prep_rw(.BIND, fd, @intFromPtr(addr), 0, addrlen); - sqe.rw_flags = flags; - } - - pub fn prep_listen( - sqe: *linux.io_uring_sqe, - fd: linux.fd_t, - backlog: usize, - flags: u32, - ) void { - sqe.prep_rw(.LISTEN, fd, 0, backlog, 0); - sqe.rw_flags = flags; - } - - pub fn prep_cmd_sock( - sqe: *linux.io_uring_sqe, - cmd_op: linux.IO_URING_SOCKET_OP, - fd: linux.fd_t, - level: u32, - optname: u32, - optval: u64, - optlen: u32, - ) void { - sqe.prep_rw(.URING_CMD, fd, 0, 0, 0); - // off is overloaded with cmd_op, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L39 - sqe.off = @intFromEnum(cmd_op); - // addr is overloaded, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L46 - sqe.addr = @bitCast(packed struct { - level: u32, - optname: u32, - }{ - .level = level, - .optname = optname, - }); - // splice_fd_in if overloaded u32 -> i32 - sqe.splice_fd_in = @bitCast(optlen); - // addr3 is overloaded, https://github.com/axboe/liburing/blob/e1003e496e66f9b0ae06674869795edf772d5500/src/include/liburing/io_uring.h#L102 - sqe.addr3 = optval; - } - - pub fn set_flags(sqe: *linux.io_uring_sqe, flags: u8) void { - sqe.flags |= flags; - } - - /// This SQE forms a link with the next SQE in the submission ring. Next SQE - /// will not be started before this one completes. Forms a chain of SQEs. - pub fn link_next(sqe: *linux.io_uring_sqe) void { - sqe.flags |= linux.IOSQE_IO_LINK; - } -}; diff --git a/lib/std/os/linux/test.zig b/lib/std/os/linux/test.zig index e38687dbde10..a6ea1cdf9809 100644 --- a/lib/std/os/linux/test.zig +++ b/lib/std/os/linux/test.zig @@ -83,7 +83,7 @@ test "statx" { defer file.close(); var statx_buf: linux.Statx = undefined; - switch (linux.E.init(linux.statx(file.handle, "", linux.AT.EMPTY_PATH, linux.STATX_BASIC_STATS, &statx_buf))) { + switch (linux.E.init(linux.statx(file.handle, "", .{ .empty_path = true }, linux.Statx.Mask.basic_stats, &statx_buf))) { .SUCCESS => {}, else => unreachable, } @@ -91,7 +91,7 @@ test "statx" { if (builtin.cpu.arch == .riscv32 or builtin.cpu.arch.isLoongArch()) return error.SkipZigTest; // No fstatat, so the rest of the test is meaningless. var stat_buf: linux.Stat = undefined; - switch (linux.E.init(linux.fstatat(file.handle, "", &stat_buf, linux.AT.EMPTY_PATH))) { + switch (linux.E.init(linux.fstatat(file.handle, "", &stat_buf, @as(u32, @bitCast(linux.At{ .empty_path = true }))))) { .SUCCESS => {}, else => unreachable, } diff --git a/lib/std/process/Child.zig b/lib/std/process/Child.zig index 50157d52d956..ffe51acdc056 100644 --- a/lib/std/process/Child.zig +++ b/lib/std/process/Child.zig @@ -514,14 +514,15 @@ fn cleanupStreams(self: *ChildProcess) void { } fn statusToTerm(status: u32) Term { - return if (posix.W.IFEXITED(status)) - Term{ .Exited = posix.W.EXITSTATUS(status) } - else if (posix.W.IFSIGNALED(status)) - Term{ .Signal = posix.W.TERMSIG(status) } - else if (posix.W.IFSTOPPED(status)) - Term{ .Stopped = posix.W.STOPSIG(status) } + const w: posix.W = @bitCast(status); + return if (w.ifExited()) + .{ .Exited = w.exitStatus() } + else if (w.ifSignaled()) + .{ .Signal = w.termSig() } + else if (w.ifStopped()) + .{ .Stopped = w.stopSig() } else - Term{ .Unknown = status }; + .{ .Unknown = status }; } fn spawnPosix(self: *ChildProcess) SpawnError!void {