-
Notifications
You must be signed in to change notification settings - Fork 717
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
journald: send large journal payloads through memfd (#1744)
See #1698: Properly write large payloads to journal. I'd appreciate a very careful review; this cmsg stuff is nasty, and while it's well documented in `cmsg(3)` I had to fiddle a bit because the corresponding functions in libc aren't const and thus don't permit a direct allocation of the buffer as most `cmsg` C code around does. Closes #1698 ## Motivation Linux limits the maximum amount of data permitted for a single Unix datagram; sending large payloads directly will fail. ## Solution Follow systemd.io/JOURNAL_NATIVE_PROTOCOL/ and check for `EMSGSIZE` from `send()`; in this case write the payload to a memfd, seal it, and pass it on to journald via a corresponding SCM_RIGHTS control message. Per discussion in #1698 this adds no dependency on `nix`, and instead implements fd forwarding directly with some bits of unsafe `libc` code.
- Loading branch information
Showing
5 changed files
with
167 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
//! memfd helpers. | ||
|
||
use libc::*; | ||
use std::fs::File; | ||
use std::io::Error; | ||
use std::io::Result; | ||
use std::os::raw::c_uint; | ||
use std::os::unix::prelude::{FromRawFd, RawFd}; | ||
|
||
fn create(flags: c_uint) -> Result<File> { | ||
let fd = unsafe { memfd_create("tracing-journald\0".as_ptr() as *const c_char, flags) }; | ||
if fd < 0 { | ||
Err(Error::last_os_error()) | ||
} else { | ||
Ok(unsafe { File::from_raw_fd(fd as RawFd) }) | ||
} | ||
} | ||
|
||
pub fn create_sealable() -> Result<File> { | ||
create(MFD_ALLOW_SEALING | MFD_CLOEXEC) | ||
} | ||
|
||
pub fn seal_fully(fd: RawFd) -> Result<()> { | ||
let all_seals = F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_SEAL; | ||
let result = unsafe { fcntl(fd, F_ADD_SEALS, all_seals) }; | ||
if result < 0 { | ||
Err(Error::last_os_error()) | ||
} else { | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
//! socket helpers. | ||
|
||
use std::io::{Error, Result}; | ||
use std::mem::{size_of, zeroed}; | ||
use std::os::unix::net::UnixDatagram; | ||
use std::os::unix::prelude::{AsRawFd, RawFd}; | ||
use std::ptr; | ||
|
||
use libc::*; | ||
|
||
const CMSG_BUFSIZE: usize = 64; | ||
|
||
#[repr(C)] | ||
union AlignedBuffer<T: Copy + Clone> { | ||
buffer: T, | ||
align: cmsghdr, | ||
} | ||
|
||
fn assert_cmsg_bufsize() { | ||
let space_one_fd = unsafe { CMSG_SPACE(size_of::<RawFd>() as u32) }; | ||
assert!( | ||
space_one_fd <= CMSG_BUFSIZE as u32, | ||
"cmsghdr buffer too small (< {}) to hold a single fd", | ||
space_one_fd | ||
); | ||
} | ||
|
||
#[cfg(test)] | ||
#[test] | ||
fn cmsg_buffer_size_for_one_fd() { | ||
assert_cmsg_bufsize() | ||
} | ||
|
||
pub fn send_one_fd(socket: &UnixDatagram, fd: RawFd) -> Result<usize> { | ||
assert_cmsg_bufsize(); | ||
|
||
let mut cmsg_buffer = AlignedBuffer { | ||
buffer: ([0u8; CMSG_BUFSIZE]), | ||
}; | ||
let mut msg: msghdr = unsafe { zeroed() }; | ||
|
||
// We send no data body with this message. | ||
msg.msg_iov = ptr::null_mut(); | ||
msg.msg_iovlen = 0; | ||
|
||
msg.msg_control = unsafe { cmsg_buffer.buffer.as_mut_ptr() as _ }; | ||
msg.msg_controllen = unsafe { CMSG_SPACE(size_of::<RawFd>() as _) as _ }; | ||
|
||
let mut cmsg: &mut cmsghdr = | ||
unsafe { CMSG_FIRSTHDR(&msg).as_mut() }.expect("Control message buffer exhausted"); | ||
|
||
cmsg.cmsg_level = SOL_SOCKET; | ||
cmsg.cmsg_type = SCM_RIGHTS; | ||
cmsg.cmsg_len = unsafe { CMSG_LEN(size_of::<RawFd>() as _) as _ }; | ||
|
||
unsafe { ptr::write(CMSG_DATA(cmsg) as *mut RawFd, fd) }; | ||
|
||
let result = unsafe { sendmsg(socket.as_raw_fd(), &msg, libc::MSG_NOSIGNAL) }; | ||
|
||
if result < 0 { | ||
Err(Error::last_os_error()) | ||
} else { | ||
// sendmsg returns the number of bytes written | ||
Ok(result as usize) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters