Skip to content

Commit c40c046

Browse files
xzpeterJuan Quintela
authored andcommitted
util/userfaultfd: Support /dev/userfaultfd
Teach QEMU to use /dev/userfaultfd when it existed and fallback to the system call if either it's not there or doesn't have enough permission. Firstly, as long as the app has permission to access /dev/userfaultfd, it always have the ability to trap kernel faults which QEMU mostly wants. Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be forbidden, so it can be the major way to use postcopy in a restricted environment with strict seccomp setup. Signed-off-by: Peter Xu <peterx@redhat.com> Reviewed-by: Juan Quintela <quintela@redhat.com> Signed-off-by: Juan Quintela <quintela@redhat.com>
1 parent 93e0932 commit c40c046

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed

util/trace-events

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz
9393
qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
9494

9595
#userfaultfd.c
96+
uffd_detect_open_mode(int mode) "%d"
9697
uffd_query_features_nosys(int err) "errno: %i"
9798
uffd_query_features_api_failed(int err) "errno: %i"
9899
uffd_create_fd_nosys(int err) "errno: %i"

util/userfaultfd.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,42 @@
1818
#include <poll.h>
1919
#include <sys/syscall.h>
2020
#include <sys/ioctl.h>
21+
#include <fcntl.h>
22+
23+
typedef enum {
24+
UFFD_UNINITIALIZED = 0,
25+
UFFD_USE_DEV_PATH,
26+
UFFD_USE_SYSCALL,
27+
} uffd_open_mode;
2128

2229
int uffd_open(int flags)
2330
{
2431
#if defined(__NR_userfaultfd)
32+
static uffd_open_mode open_mode;
33+
static int uffd_dev;
34+
35+
/* Detect how to generate uffd desc when run the 1st time */
36+
if (open_mode == UFFD_UNINITIALIZED) {
37+
/*
38+
* Make /dev/userfaultfd the default approach because it has better
39+
* permission controls, meanwhile allows kernel faults without any
40+
* privilege requirement (e.g. SYS_CAP_PTRACE).
41+
*/
42+
uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
43+
if (uffd_dev >= 0) {
44+
open_mode = UFFD_USE_DEV_PATH;
45+
} else {
46+
/* Fallback to the system call */
47+
open_mode = UFFD_USE_SYSCALL;
48+
}
49+
trace_uffd_detect_open_mode(open_mode);
50+
}
51+
52+
if (open_mode == UFFD_USE_DEV_PATH) {
53+
assert(uffd_dev >= 0);
54+
return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
55+
}
56+
2557
return syscall(__NR_userfaultfd, flags);
2658
#else
2759
return -EINVAL;

0 commit comments

Comments
 (0)