Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Documentation: Add info about cap_sys_ptrace and ptrace_scope #1

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,27 @@ task:
build_script: |
make -C scripts/ci vagrant-fedora-rawhide

task:
name: Vagrant Fedora based test (non-root)
environment:
HOME: "/root"
CIRRUS_WORKING_DIR: "/tmp/criu"

compute_engine_instance:
image_project: cirrus-images
image: family/docker-kvm
platform: linux
cpu: 4
memory: 16G
nested_virtualization: true

setup_script: |
scripts/ci/apt-install make gcc pkg-config git perl-modules iproute2 kmod wget cpu-checker
sudo kvm-ok
ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto
build_script: |
make -C scripts/ci vagrant-fedora-non-root

task:
name: CentOS Stream 8 based test
environment:
Expand Down
32 changes: 32 additions & 0 deletions Documentation/criu.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ not compatible with *--external* *dev*.
notification message contains a file descriptor for
the master pty

*--unprivileged*::
This option tells *criu* to accept the limitations when running
as non-root. Running as non-root requires *criu* at least to have
*CAP_SYS_ADMIN* or *CAP_CHECKPOINT_RESTORE*. For details about running
*criu* as non-root please consult the *NON-ROOT* section.

*-V*, *--version*::
Print program version and exit.

Expand Down Expand Up @@ -877,6 +883,32 @@ configuration file will overwrite all other configuration file settings
or RPC options. *This can lead to undesired behavior of criu and
should only be used carefully.*

NON-ROOT
--------
*criu* can be used as non-root with either the *CAP_SYS_ADMIN* capability
or with the *CAP_CHECKPOINT_RESTORE* capability introduces in Linux kernel 5.9.
*CAP_CHECKPOINT_RESTORE* is the minimum that is required.

*criu* also needs either *CAP_SYS_PTRACE* or a value of 0 in
*/proc/sys/kernel/yama/ptrace_scope* (see *ptrace*(2)) to be able to interrupt
the process for dumping.

Running *criu* as non-root has many limitations and depending on the process
to checkpoint and restore it may not be possible.

In addition to *CAP_CHECKPOINT_RESTORE* it is possible to give *criu* additional
capabilities to enable additional features in non-root mode.

Currently *criu* can benefit from the following additional capabilities:

- *CAP_NET_ADMIN*
- *CAP_SYS_CHROOT*
- *CAP_SETUID*
- *CAP_SYS_RESOURCE*

Independent of the capabilities it is always necessary to use "*--unprivileged*" to
accept *criu*'s limitation in non-root mode.

EXAMPLES
--------
To checkpoint a program with pid of *1234* and write all image files into
Expand Down
6 changes: 6 additions & 0 deletions criu/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,9 @@ int dump_task_cgroup(struct pstree_item *item, u32 *cg_id, struct parasite_dump_
unsigned int n_ctls = 0;
struct cg_set *cs;

if (opts.unprivileged)
return 0;

if (item)
pid = item->pid->real;
else
Expand Down Expand Up @@ -989,6 +992,9 @@ int dump_cgroups(void)
CgroupEntry cg = CGROUP_ENTRY__INIT;
int ret = -1;

if (opts.unprivileged)
return 0;

BUG_ON(!criu_cgset || !root_cgset);

/*
Expand Down
1 change: 1 addition & 0 deletions criu/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd,
{ "lsm-mount-context", required_argument, 0, 1099 },
{ "network-lock", required_argument, 0, 1100 },
BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode),
BOOL_OPT("unprivileged", &opts.unprivileged),
{},
};

Expand Down
71 changes: 44 additions & 27 deletions criu/cr-check.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <sys/prctl.h>
#include <sched.h>
#include <sys/mount.h>
#include <sys/utsname.h>

#include "../soccr/soccr.h"

Expand Down Expand Up @@ -515,6 +516,14 @@ static int check_ipc(void)
{
int ret;

/*
* Since kernel 5.16 sem_next_id can be accessed via CAP_CHECKPOINT_RESTORE, however
* for non-root users access() runs with an empty set of caps and will therefore always
* fail.
*/
if (opts.uid)
return 0;

ret = access("/proc/sys/kernel/sem_next_id", R_OK | W_OK);
if (!ret)
return 0;
Expand Down Expand Up @@ -1039,10 +1048,14 @@ static int check_tcp(void)
}

val = 1;
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
if (ret < 0) {
pr_perror("Can't turn TCP repair mode ON");
goto out;
if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
ret = setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val));
if (ret < 0) {
pr_perror("Can't turn TCP repair mode ON");
goto out;
}
} else {
pr_info("Not checking for TCP repair mode. Please set CAP_NET_ADMIN\n");
}

optlen = sizeof(val);
Expand Down Expand Up @@ -1394,9 +1407,6 @@ int cr_check(void)
struct ns_id *ns;
int ret = 0;

if (!is_root_user())
return -1;

root_item = alloc_pstree_item();
if (root_item == NULL)
return -1;
Expand Down Expand Up @@ -1666,36 +1676,43 @@ static int pr_set_dumpable(int value)

int check_caps(void)
{
struct proc_status_creds creds;
int exit_code = -1;

if (parse_pid_status(PROC_SELF, &creds.s, NULL))
/* Read out effective capabilities and store in opts.cap_eff. */
if (set_opts_cap_eff())
goto out;

memcpy(&opts.cap_eff, &creds.cap_eff, sizeof(u32) * PROC_CAP_SIZE);

/*
* No matter if running as root or not. CRIU always needs
* at least these capabilities.
*/
if (!has_cap_checkpoint_restore(opts.cap_eff))
goto out;

/* For some things we need to know if we are running as root. */
opts.uid = geteuid();

if (opts.uid) {
/*
* At his point we know we are running as non-root with the necessary
* capabilities available. Now we have to make the process dumpable
* so that /proc/self is not owned by root.
*/
if (pr_set_dumpable(1))
return -1;
if (!opts.uid) {
/* CRIU is running as root. No further checks are necessary. */
return 0;
}

exit_code = 0;
out:
if (exit_code) {
pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);
if (!opts.unprivileged) {
pr_msg("Running as non-root requires '--unprivileged'\n");
pr_msg("Please consult the documentation for limitations when running as non-root\n");
return -1;
}

return exit_code;
/*
* At his point we know we are running as non-root with the necessary
* capabilities available. Now we have to make the process dumpable
* so that /proc/self is not owned by root.
*/
if (pr_set_dumpable(1))
return -1;

return 0;
out:
pr_msg("CRIU needs to have the CAP_SYS_ADMIN or the CAP_CHECKPOINT_RESTORE capability: \n");
pr_msg("setcap cap_checkpoint_restore+eip %s\n", opts.argv_0);

return -1;
}
3 changes: 3 additions & 0 deletions criu/cr-restore.c
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,9 @@ static int restore_task_with_children(void *_arg)
goto err;
}

if (set_opts_cap_eff())
goto err;

/* Wait prepare_userns */
if (restore_finish_ns_stage(CR_STATE_ROOT_TASK, CR_STATE_PREPARE_NAMESPACES) < 0)
goto err;
Expand Down
7 changes: 7 additions & 0 deletions criu/cr-service.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <sys/stat.h>
#include <arpa/inet.h>
#include <sched.h>
#include <sys/prctl.h>

#include "version.h"
#include "crtools.h"
Expand Down Expand Up @@ -409,6 +410,12 @@ static int setup_opts_from_req(int sk, CriuOpts *req)
pr_debug("Would overwrite RPC settings with values from %s\n", req->config_file);
}

if (req->has_unprivileged)
opts.unprivileged = req->unprivileged;

if (check_caps())
return 1;

if (kerndat_init())
return 1;

Expand Down
5 changes: 5 additions & 0 deletions criu/crtools.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ int main(int argc, char *argv[], char *envp[])
return cr_service_work(atoi(argv[optind + 1]));
}

if (check_caps())
return 1;

if (opts.imgs_dir == NULL)
SET_CHAR_OPTS(imgs_dir, ".");

Expand Down Expand Up @@ -414,6 +417,8 @@ int main(int argc, char *argv[], char *envp[])
" --network-lock METHOD\n"
" network locking/unlocking method; argument\n"
" can be 'nftables' or 'iptables' (default).\n"
" --unprivileged accept limitations when running as non-root\n"
" consult documentation for further details\n"
"\n"
"* External resources support:\n"
" --external RES dump objects from this list as external resources:\n"
Expand Down
16 changes: 14 additions & 2 deletions criu/fdstore.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "rst-malloc.h"
#include "log.h"
#include "util.h"
#include "cr_options.h"
#include "util-caps.h"

/* clang-format off */
static struct fdstore_desc {
Expand All @@ -27,6 +29,8 @@ int fdstore_init(void)
uint32_t buf[2] = { INT_MAX / 2, INT_MAX / 2 };
struct sockaddr_un addr;
unsigned int addrlen;
int rcv_opt_name;
int snd_opt_name;
struct stat st;
int sk, ret;

Expand All @@ -49,8 +53,16 @@ int fdstore_init(void)
return -1;
}

if (setsockopt(sk, SOL_SOCKET, SO_SNDBUFFORCE, &buf[0], sizeof(buf[0])) < 0 ||
setsockopt(sk, SOL_SOCKET, SO_RCVBUFFORCE, &buf[1], sizeof(buf[1])) < 0) {
if (!opts.unprivileged || has_cap_net_admin(opts.cap_eff)) {
rcv_opt_name = SO_RCVBUFFORCE;
snd_opt_name = SO_SNDBUFFORCE;
} else {
rcv_opt_name = SO_RCVBUF;
snd_opt_name = SO_SNDBUF;
}

if (setsockopt(sk, SOL_SOCKET, snd_opt_name, &buf[0], sizeof(buf[0])) < 0 ||
setsockopt(sk, SOL_SOCKET, rcv_opt_name, &buf[1], sizeof(buf[1])) < 0) {
pr_perror("Unable to set SO_SNDBUFFORCE/SO_RCVBUFFORCE");
close(sk);
return -1;
Expand Down
34 changes: 28 additions & 6 deletions criu/files.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "image.h"
#include "common/list.h"
#include "rst-malloc.h"
#include "util-pie.h"
#include "util-caps.h"
#include "common/lock.h"
#include "sockets.h"
#include "pstree.h"
Expand Down Expand Up @@ -1346,6 +1346,24 @@ static int fchroot(int fd)
return chroot(".");
}

static int need_chroot(int saved_root)
{
struct stat saved_root_stat, cur_root_stat;

if (fstat(saved_root, &saved_root_stat) == -1) {
pr_perror("Failed to stat saved root dir");
return -1;
}

if (stat("/proc/self/root", &cur_root_stat) == -1) {
pr_perror("Failed to stat current root dir");
return -1;
}

return saved_root_stat.st_ino != cur_root_stat.st_ino ||
saved_root_stat.st_dev != cur_root_stat.st_dev;
}

int restore_fs(struct pstree_item *me)
{
int dd_root = -1, dd_cwd = -1, ret, err = -1;
Expand All @@ -1372,11 +1390,15 @@ int restore_fs(struct pstree_item *me)
* Now do chroot/chdir. Chroot goes first as it calls chdir into
* dd_root so we'd need to fix chdir after it anyway.
*/

ret = fchroot(dd_root);
if (ret < 0) {
pr_perror("Can't change root");
goto out;
if (need_chroot(dd_root)) {
ret = fchroot(dd_root);
if (ret < 0) {
pr_perror("Can't change root");
goto out;
}
}
else {
pr_info("Skipping chroot, appears not needed\n");
}

ret = fchdir(dd_cwd);
Expand Down
3 changes: 2 additions & 1 deletion criu/image.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,8 @@ int prepare_inventory(InventoryEntry *he)
if (get_task_ids(&crt.i))
return -1;

he->has_root_cg_set = true;
if (!opts.unprivileged)
he->has_root_cg_set = true;
if (dump_task_cgroup(NULL, &he->root_cg_set, NULL))
return -1;

Expand Down
11 changes: 9 additions & 2 deletions criu/include/cr_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define __CR_OPTIONS_H__

#include <stdbool.h>
#include <sys/capability.h>
#include "common/config.h"
#include "common/list.h"
#include "int.h"
Expand Down Expand Up @@ -223,8 +224,14 @@ struct cr_options {
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN
*/
uid_t uid;
/* This contains the value from /proc/pid/status: CapEff */
u32 cap_eff[CR_CAP_SIZE];
/* This contains the value from capget()->effective */
u32 cap_eff[_LINUX_CAPABILITY_U32S_3];
/*
* If CRIU should be running as non-root with the help of
* CAP_CHECKPOINT_RESTORE or CAP_SYS_ADMIN the user should
* explicitly request it as it comes with many limitations.
*/
int unprivileged;
};

extern struct cr_options opts;
Expand Down
Loading