Skip to content

Commit

Permalink
daemon: Add socket activation via /run/rpm-ostreed.socket
Browse files Browse the repository at this point in the history
For historical reasons, the daemon ends up doing a lot of
initialization before even claiming the DBus name.  For example,
it calls `ostree_sysroot_load()`.  We also end up scanning
the RPM database, and actually parse all the GPG keys
in `/etc/pki/rpm-gpg` etc.

This causes two related problems:

- By doing all this work before claiming the bus name, we
  race against the (pretty low) DBus service timeout of 25s.
- If something hard fails at initialization, the client can't
  easily see the error because it just appears in the systemd
  journal.  The client will just see a service timeout.

The solution to this is to adopt systemd socket activation,
which drops out DBus as an intermediary.  On daemon startup,
we now do the process-global initialization (like ostree
sysroot) and if that fails, the daemon just sticks around
(but without claiming the bus name), ready to return the
error message to each client.

After this patch:

```
$ systemctl stop rpm-ostreed
$ umount /boot
$ rpm-ostree status
error: Couldn't start daemon: Error setting up sysroot: loading sysroot: Unexpected state: /run/ostree-booted found, but no /boot/loader directory
```
  • Loading branch information
cgwalters committed Jul 28, 2022
1 parent 6698a8c commit 43a5e9f
Show file tree
Hide file tree
Showing 12 changed files with 274 additions and 25 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ lto = "thin"
# Note: If you add a feature here, you also probably want to update utils.rs:get_features()
fedora-integration = []
rhsm = ["libdnf-sys/rhsm"]
# Enable hard requirement on `rpm-ostreed.socket`; requires https://bugzilla.redhat.com/show_bug.cgi?id=2110012
client-socket = []
bin-unit-tests = []
# ASAN+UBSAN
sanitizers = []
Expand Down
10 changes: 7 additions & 3 deletions Makefile-daemon.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,18 @@ systemdunit_service_in_files = \
$(NULL)

systemdunit_service_files = $(systemdunit_service_in_files:.service.in=.service)
systemdunit_timer_files = \
systemdunit_other_files = \
$(srcdir)/src/daemon/rpm-ostreed-automatic.timer \
$(srcdir)/src/daemon/rpm-ostree-countme.timer \
$(NULL)

if CLIENT_SOCKET
systemdunit_other_files += $(srcdir)/src/daemon/rpm-ostreed.socket
endif

systemdunit_DATA = \
$(systemdunit_service_files) \
$(systemdunit_timer_files) \
$(systemdunit_other_files) \
$(NULL)

systemdunitdir = $(prefix)/lib/systemd/system/
Expand Down Expand Up @@ -110,7 +114,7 @@ EXTRA_DIST += \
$(sysconf_DATA) \
$(service_in_files) \
$(systemdunit_service_in_files) \
$(systemdunit_timer_files) \
$(systemdunit_other_files) \
$(NULL)

CLEANFILES += \
Expand Down
7 changes: 7 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ AC_ARG_ENABLE(featuresrs,
[enable_featuresrs=])
AC_SUBST([RUST_FEATURES], $enable_featuresrs)

AC_ARG_ENABLE(client-socket,
AS_HELP_STRING([--enable-client-socket],
[(default: no)]),,
[enable_client_socket=no])
AS_IF([test x$enable_client_socket = xyes], [enable_featuresrs="$enable_featuresrs client-socket"])
AM_CONDITIONAL(CLIENT_SOCKET, [echo $enable_featuresrs | grep -q 'client-socket'])

# Initialize libtool
LT_PREREQ([2.2.4])
LT_INIT([disable-static])
Expand Down
9 changes: 8 additions & 1 deletion packaging/rpm-ostree.spec.in
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ BuildRequires: rust
%bcond_with rhsm
%endif

# https://bugzilla.redhat.com/show_bug.cgi?id=2110012
%if 0%{?fedora} >= 37
%bcond_without client_socket
%else
%bcond_with client_socket
%endif

# RHEL (8,9) doesn't ship zchunk today. Keep this in sync
# with libdnf: https://gitlab.com/redhat/centos-stream/rpms/libdnf/-/blob/762f631e36d1e42c63a794882269d26c156b68c1/libdnf.spec#L45
%if 0%{?rhel}
Expand Down Expand Up @@ -179,7 +186,7 @@ env NOCONFIGURE=1 ./autogen.sh
export RUSTFLAGS="%{build_rustflags}"
%endif
%configure --disable-silent-rules --enable-gtk-doc %{?rpmdb_default} %{?with_sanitizers:--enable-sanitizers} %{?with_bin_unit_tests:--enable-bin-unit-tests} \
%{?with_rhsm:--enable-featuresrs=rhsm}
%{?with_rhsm:--enable-featuresrs=rhsm} %{?with_client_socket:--enable-client-socket}

%make_build

Expand Down
36 changes: 35 additions & 1 deletion rust/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,26 @@ pub(crate) fn client_handle_fd_argument(
}
}

/// Connect to the client socket and ensure the daemon is initialized;
/// this avoids DBus and ensures that we get any early startup errors
/// returned cleanly.
#[cfg(feature = "client-socket")]
fn start_daemon_via_socket() -> Result<()> {
let address = "/run/rpm-ostree/client.sock";
tracing::debug!("Starting daemon via {address}");
let s = std::os::unix::net::UnixStream::connect(address)
.with_context(|| anyhow!("Failed to connect to {}", address))?;
let mut s = std::io::BufReader::new(s);
let mut r = String::new();
s.read_to_string(&mut r)
.context("Reading from client socket")?;
if r.is_empty() {
Ok(())
} else {
Err(anyhow!("{r}").into())
}
}

/// Explicitly ensure the daemon is started via systemd, if possible.
///
/// This works around bugs from DBus activation, see
Expand All @@ -164,12 +184,14 @@ pub(crate) fn client_handle_fd_argument(
/// to systemd directly and use its client tools to scrape errors.
///
/// What we really should do probably is use native socket activation.
pub(crate) fn client_start_daemon() -> CxxResult<()> {
#[cfg(not(feature = "client-socket"))]
fn start_daemon_via_systemctl() -> Result<()> {
let service = "rpm-ostreed.service";
// Assume non-root can't use systemd right now.
if rustix::process::getuid().as_raw() != 0 {
return Ok(());
}

// Unfortunately, RHEL8 systemd will count "systemctl start"
// invocations against the restart limit, so query the status
// first.
Expand All @@ -196,6 +218,18 @@ pub(crate) fn client_start_daemon() -> CxxResult<()> {
Ok(())
}

pub(crate) fn client_start_daemon() -> CxxResult<()> {
// systemctl and socket paths only work for root right now; in the future
// the socket may be opened up.
if rustix::process::getuid().as_raw() != 0 {
return Ok(());
}
#[cfg(feature = "client-socket")]
return start_daemon_via_socket().map_err(Into::into);
#[cfg(not(feature = "client-socket"))]
return start_daemon_via_systemctl().map_err(Into::into);
}

/// Convert the GVariant parameters from the DownloadProgress DBus API to a human-readable English string.
pub(crate) fn client_render_download_progress(progress: &crate::ffi::GVariant) -> String {
let progress = progress
Expand Down
134 changes: 131 additions & 3 deletions rust/src/daemon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,23 @@ use crate::cxxrsutil::*;
use crate::ffi::{
OverrideReplacementSource, OverrideReplacementType, ParsedRevision, ParsedRevisionKind,
};
use anyhow::{anyhow, format_err, Result};
use anyhow::{anyhow, format_err, Context, Result};
use cap_std::fs::Dir;
use cap_std_ext::dirext::CapStdExtDirExt;
use cap_std_ext::{cap_std, rustix};
use fn_error_context::context;
use glib::prelude::*;
use once_cell::sync::Lazy;
use ostree_ext::{gio, glib, ostree};
use rustix::fd::BorrowedFd;
use rustix::fd::{BorrowedFd, FromRawFd};
use rustix::fs::MetadataExt;
use std::collections::{BTreeMap, BTreeSet};
use std::io::Read;
use std::io::{Read, Write};
use std::os::unix::fs::PermissionsExt;
use std::path::Path;
use std::sync::Mutex;
use tokio::net::{UnixListener, UnixStream};
use tokio::sync::oneshot::{Receiver, Sender};

const RPM_OSTREED_COMMIT_VERIFICATION_CACHE: &str = "rpm-ostree/gpgcheck-cache";

Expand Down Expand Up @@ -131,6 +135,130 @@ fn deployment_populate_variant_origin(
Ok(())
}

async fn send_ok_result_to_client(_client: UnixStream) {
// On success we close the stream without writing anything,
// which acknowledges successful startup to the client.
// In the future we may actually implement a protocol here, so this
// is stubbed out as a full async fn in preparation for that.
tracing::debug!("Acknowleged client");
}

static SHUTDOWN_SIGNAL: Lazy<Mutex<Option<Sender<()>>>> = Lazy::new(|| Mutex::new(None));

async fn process_clients_with_ok(listener: UnixListener, mut cancel: Receiver<()>) {
tracing::debug!("Processing clients...");
loop {
tokio::select! {
_ = &mut cancel => {
tracing::debug!("Got cancellation event");
return
}
r = listener.accept() => {
match r {
Ok((stream, _addr)) => {
send_ok_result_to_client(stream).await;
},
Err(e) => {
tracing::debug!("failed to accept client: {e}")
}
}
}
}
}
}

/// Ensure all asynchronous tasks in this Rust half of the daemon code are stopped.
/// Called from C++.
pub(crate) fn daemon_terminate() {
let chan = (*SHUTDOWN_SIGNAL).lock().unwrap().take().unwrap();
let _ = chan.send(());
}

fn process_one_client(listener: std::os::unix::net::UnixListener, e: anyhow::Error) -> Result<()> {
let mut incoming = match listener.incoming().next() {
Some(r) => r?,
None => {
anyhow::bail!("Expected to find client socket from activation");
}
};

let buf = format!("{e}");
incoming.write_all(buf.as_bytes())?;

todo!()
}

/// Perform initialization steps required by systemd service activation.
///
/// This ensures that the system is running under systemd, then receives the
/// socket-FD for main IPC logic, and notifies systemd about ready-state.
pub(crate) fn daemon_main(debug: bool) -> Result<()> {
let handle = tokio::runtime::Handle::current();
let _tokio_guard = handle.enter();
use std::os::unix::net::UnixListener as StdUnixListener;
if !systemd::daemon::booted()? {
return Err(anyhow!("not running as a systemd service"));
}

let init_res: Result<()> = crate::ffi::daemon_init_inner(debug).map_err(|e| e.into());
tracing::debug!("Initialization result: {init_res:?}");

let mut fds = systemd::daemon::listen_fds(false)?.iter();
let listener = match fds.next() {
None => {
// If started directly via `systemctl start` or DBus activation, we
// directly propagate the error back to our exit code.
init_res?;
tracing::debug!("Initializing directly (not socket activated)");
cfg!(feature = "client-socket")
.then(|| StdUnixListener::bind("/run/rpm-ostree/client.sock"))
.transpose()
.context("Binding to socket")?
}
Some(fd) => {
if fds.next().is_some() {
return Err(anyhow!("Expected exactly 1 fd from systemd activation"));
}
tracing::debug!("Initializing from socket activation; fd={fd}");
let listener = unsafe { StdUnixListener::from_raw_fd(fd) };

match init_res {
Ok(_) => Some(listener),
Err(e) => {
let err_copy = anyhow!("{e}");
tracing::debug!("Reporting initialization error: {e}");
match process_one_client(listener, err_copy) {
Ok(()) => {
tracing::debug!("Acknowleged initial client");
}
Err(e) => {
tracing::debug!("Caught error while processing client {e}");
}
}
return Err(e);
}
}
}
};

if let Some(listener) = listener {
let (shutdown_send, shutdown_recv) = tokio::sync::oneshot::channel();
(*SHUTDOWN_SIGNAL).lock().unwrap().replace(shutdown_send);

let listener = UnixListener::from_std(listener)?;

// On success, we spawn a helper task that just responds with
// sucess to clients that connect via the socket. In the future,
// perhaps we'll expose an API here.
tracing::debug!("Spawning acknowledgement task");
tokio::task::spawn(async { process_clients_with_ok(listener, shutdown_recv).await });
}

tracing::debug!("Entering daemon mainloop");
// And now, enter the main loop.
Ok(crate::ffi::daemon_main_inner()?)
}

/// Serialize information about the given deployment into the `dict`;
/// this will be exposed via DBus and is hence public API.
pub(crate) fn deployment_populate_variant(
Expand Down
8 changes: 8 additions & 0 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ pub mod ffi {

// daemon.rs
extern "Rust" {
fn daemon_main(debug: bool) -> Result<()>;
fn daemon_terminate();
fn daemon_sanitycheck_environment(sysroot: &OstreeSysroot) -> Result<()>;
fn deployment_generate_id(deployment: &OstreeDeployment) -> String;
fn deployment_populate_variant(
Expand Down Expand Up @@ -768,6 +770,12 @@ pub mod ffi {
fn c_unit_tests() -> Result<()>;
}

unsafe extern "C++" {
include!("rpmostreed-daemon.hpp");
fn daemon_init_inner(debug: bool) -> Result<()>;
fn daemon_main_inner() -> Result<()>;
}

unsafe extern "C++" {
include!("rpmostree-clientlib.h");
fn client_require_root() -> Result<()>;
Expand Down
4 changes: 3 additions & 1 deletion rust/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,9 @@ fn inner_main() -> Result<i32> {
.enable_all()
.build()
.context("Failed to build tokio runtime")?;
runtime.block_on(dispatch_multicall(callname, args))
let r = runtime.block_on(dispatch_multicall(callname, args));
tracing::debug!("Exiting inner main with result: {r:?}");
r
}

fn print_error(e: anyhow::Error) {
Expand Down
Loading

0 comments on commit 43a5e9f

Please sign in to comment.