From ef51ff533109bccae4a3ec112cfa63a3cb84fb9b Mon Sep 17 00:00:00 2001 From: prakritigoyal19 Date: Sun, 7 Jun 2020 09:22:53 +0530 Subject: [PATCH 001/121] Add flog to CRIU Change made through this commit: - Include copy of flog as a seperate tree. - Modify the makefile to add and compile flog code. Signed-off-by: prakritigoyal19 --- Makefile | 12 ++- flog/Makefile | 29 ++++++ flog/built-in.S | 4 + flog/include/compiler.h | 71 +++++++++++++ flog/include/flog.h | 9 ++ flog/include/log.h | 17 ++++ flog/include/types.h | 16 +++ flog/include/uapi/flog.h | 149 +++++++++++++++++++++++++++ flog/include/util.h | 37 +++++++ flog/src/Makefile | 5 + flog/src/flog.c | 215 +++++++++++++++++++++++++++++++++++++++ flog/src/main.c | 170 +++++++++++++++++++++++++++++++ flog/tests/test00 | 22 ++++ 13 files changed, 755 insertions(+), 1 deletion(-) create mode 100644 flog/Makefile create mode 100644 flog/built-in.S create mode 100644 flog/include/compiler.h create mode 100644 flog/include/flog.h create mode 100644 flog/include/log.h create mode 100644 flog/include/types.h create mode 100644 flog/include/uapi/flog.h create mode 100644 flog/include/util.h create mode 100644 flog/src/Makefile create mode 100644 flog/src/flog.c create mode 100644 flog/src/main.c create mode 100755 flog/tests/test00 diff --git a/Makefile b/Makefile index 08761efed6..a1e3977f8f 100644 --- a/Makefile +++ b/Makefile @@ -147,7 +147,7 @@ HOSTCFLAGS += $(WARNINGS) $(DEFINES) -iquote include/ export AFLAGS CFLAGS USERCLFAGS HOSTCFLAGS # Default target -all: criu lib crit +all: flog criu lib crit .PHONY: all # @@ -233,6 +233,15 @@ soccr/built-in.o: $(CONFIG_HEADER) .FORCE $(SOCCR_A): |soccr/built-in.o criu-deps += $(SOCCR_A) +#flog gets used by criu, build it earlier + +flogMakefile: ; +flog%: + $(Q) $(MAKE) $(build)=flog $@ +flog: + $(Q) $(MAKE) $(build)=flog all +.PHONY: flog + # # CRIU building done in own directory # with slightly different rules so we @@ -275,6 +284,7 @@ lib: crit clean mrproper: $(Q) $(MAKE) $(build)=images $@ + $(Q) $(MAKE) $(build)=flog $@ $(Q) $(MAKE) $(build)=criu $@ $(Q) $(MAKE) $(build)=soccr $@ $(Q) $(MAKE) $(build)=lib $@ diff --git a/flog/Makefile b/flog/Makefile new file mode 100644 index 0000000000..12255af719 --- /dev/null +++ b/flog/Makefile @@ -0,0 +1,29 @@ +OPTS=-ggdb3 -Wall -Werror +export OPTS + +CFLAGS += -iquote include +CFLAGS += -iquote flog/include +CFLAGS += -iquote flog/include/uapi + +include $(__nmk_dir)msg.mk + +$(eval $(call gen-built-in,src)) + +flog: + $(Q) $(MAKE) $(build)=$(obj)/src all +.PHONY: flog + +clean-flog: + $(call msg-gen, $@) + $(Q) $(MAKE) $(build)=$(obj)/src clean + $(Q) $(RM) built-in.o +.PHONY: clean-flog + +clean: clean-flog +mrproper: clean + +test: + ./tests/test00 + +all-y += flog + diff --git a/flog/built-in.S b/flog/built-in.S new file mode 100644 index 0000000000..26627d0544 --- /dev/null +++ b/flog/built-in.S @@ -0,0 +1,4 @@ +SECTIONS +{ + .rodata : { _rodata_start = . ; *(.rodata*) ; _rodata_end = . ;} +} diff --git a/flog/include/compiler.h b/flog/include/compiler.h new file mode 100644 index 0000000000..3e56eb0e64 --- /dev/null +++ b/flog/include/compiler.h @@ -0,0 +1,71 @@ +#ifndef __COMPILER_H__ +#define __COMPILER_H__ + +/* + * Various definitions for success build, + * picked from various places, mostly from + * the linux kernel. + */ + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define NORETURN __attribute__((__noreturn__)) +#define __packed __attribute__((__packed__)) +#define __used __attribute__((__used__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) + +#define __section(S) __attribute__ ((__section__(#S))) + +#ifndef __always_inline +# define __always_inline inline __attribute__((always_inline)) +#endif + +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) + +#ifndef always_inline +# define always_inline __always_inline +#endif + +#ifndef noinline +# define noinline __attribute__((noinline)) +#endif + +#define __aligned(x) __attribute__((aligned(x))) + +#ifndef offsetof +# define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#define barrier() asm volatile("" ::: "memory") + +#define container_of(ptr, type, member) ({ \ + const typeof( ((type *)0)->member ) *__mptr = (ptr); \ + (type *)( (char *)__mptr - offsetof(type,member) );}) + +#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) +#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + +#define is_log2(v) (((v) & ((v) - 1)) == 0) + +#endif /* __COMPILER_H__ */ diff --git a/flog/include/flog.h b/flog/include/flog.h new file mode 100644 index 0000000000..f00c20541f --- /dev/null +++ b/flog/include/flog.h @@ -0,0 +1,9 @@ +#ifndef __FLOG_H__ +#define __FLOG_H__ + +#include +#include + +#include "uapi/flog.h" + +#endif /* __FLOG_H__ */ diff --git a/flog/include/log.h b/flog/include/log.h new file mode 100644 index 0000000000..1a165ea9fb --- /dev/null +++ b/flog/include/log.h @@ -0,0 +1,17 @@ +#ifndef __LOG_H__ +#define __LOG_H__ + +#include + +#define pr_out(fmt, ...) fprintf(stdout, fmt, ##__VA_ARGS__) + +#if 1 +# define pr_debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#else +# define pr_debug(fmt, ...) +#endif + +#define pr_err(fmt, ...) fprintf(stderr, "Error (%s:%d): "fmt, __FILE__, __LINE__, ##__VA_ARGS__) +#define pr_perror(fmt, ...) fprintf(stderr, "Error (%s:%d): "fmt "%m\n", __FILE__, __LINE__, ##__VA_ARGS__) + +#endif /* __LOG_H__ */ diff --git a/flog/include/types.h b/flog/include/types.h new file mode 100644 index 0000000000..0e15bfbff5 --- /dev/null +++ b/flog/include/types.h @@ -0,0 +1,16 @@ +#ifndef __FLOG_TYPES_H__ +#define __FLOG_TYPES_H__ + +#include +#include + +typedef uint64_t u64; +typedef int64_t s64; +typedef uint32_t u32; +typedef int32_t s32; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint8_t u8; +typedef int8_t s8; + +#endif /* __FLOG_TYPES_H__ */ diff --git a/flog/include/uapi/flog.h b/flog/include/uapi/flog.h new file mode 100644 index 0000000000..2d879110fc --- /dev/null +++ b/flog/include/uapi/flog.h @@ -0,0 +1,149 @@ +#ifndef __UAPI_FLOG_H__ +#define __UAPI_FLOG_H__ + +#include +#include +#include + +/* + * We work with up to 32 arguments in macros here. + * If more provided -- behaviour is undefined. + */ + +/* + * By Laurent Deniau at https://groups.google.com/forum/#!topic/comp.std.c/d-6Mj5Lko_s + */ +#define FLOG_PP_NARG_(...) FLOG_PP_ARG_N(__VA_ARGS__) +#define FLOG_PP_NARG(...) FLOG_PP_NARG_(1, ##__VA_ARGS__, FLOG_PP_RSEQ_N()) + +#define FLOG_PP_ARG_N( _0, _1, _2, _3, _4, \ + _5, _6, _7, _8, _9, \ + _10,_11,_12,_13,_14, \ + _15,_16,_17,_18,_19, \ + _20,_21,_22,_23,_24, \ + _25,_26,_27,_28,_29, \ + _30,_31, N, ...) N + +#define FLOG_PP_RSEQ_N() \ + 31, 30, 29, 28, 27, \ + 26, 25, 24, 23, 22, \ + 21, 20, 19, 18, 17, \ + 16, 15, 14, 13, 12, \ + 11, 10, 9, 8, 7, \ + 6, 5, 4, 3, 2, \ + 1, 0 + +#define FLOG_GENMASK_0(N, x) 0 +#define FLOG_GENMASK_1(N, op, x, ...) (op(N, 0, x)) +#define FLOG_GENMASK_2(N, op, x, ...) ((op(N, 1, x)) | FLOG_GENMASK_1(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_3(N, op, x, ...) ((op(N, 2, x)) | FLOG_GENMASK_2(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_4(N, op, x, ...) ((op(N, 3, x)) | FLOG_GENMASK_3(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_5(N, op, x, ...) ((op(N, 4, x)) | FLOG_GENMASK_4(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_6(N, op, x, ...) ((op(N, 5, x)) | FLOG_GENMASK_5(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_7(N, op, x, ...) ((op(N, 6, x)) | FLOG_GENMASK_6(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_8(N, op, x, ...) ((op(N, 7, x)) | FLOG_GENMASK_7(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_9(N, op, x, ...) ((op(N, 8, x)) | FLOG_GENMASK_8(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_10(N, op, x, ...) ((op(N, 9, x)) | FLOG_GENMASK_9(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_11(N, op, x, ...) ((op(N, 10, x)) | FLOG_GENMASK_10(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_12(N, op, x, ...) ((op(N, 11, x)) | FLOG_GENMASK_11(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_13(N, op, x, ...) ((op(N, 12, x)) | FLOG_GENMASK_12(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_14(N, op, x, ...) ((op(N, 13, x)) | FLOG_GENMASK_13(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_15(N, op, x, ...) ((op(N, 14, x)) | FLOG_GENMASK_14(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_16(N, op, x, ...) ((op(N, 15, x)) | FLOG_GENMASK_15(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_17(N, op, x, ...) ((op(N, 16, x)) | FLOG_GENMASK_16(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_18(N, op, x, ...) ((op(N, 17, x)) | FLOG_GENMASK_17(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_19(N, op, x, ...) ((op(N, 18, x)) | FLOG_GENMASK_18(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_20(N, op, x, ...) ((op(N, 19, x)) | FLOG_GENMASK_19(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_21(N, op, x, ...) ((op(N, 20, x)) | FLOG_GENMASK_20(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_22(N, op, x, ...) ((op(N, 21, x)) | FLOG_GENMASK_21(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_23(N, op, x, ...) ((op(N, 22, x)) | FLOG_GENMASK_22(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_24(N, op, x, ...) ((op(N, 23, x)) | FLOG_GENMASK_23(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_25(N, op, x, ...) ((op(N, 24, x)) | FLOG_GENMASK_24(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_26(N, op, x, ...) ((op(N, 25, x)) | FLOG_GENMASK_25(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_27(N, op, x, ...) ((op(N, 26, x)) | FLOG_GENMASK_26(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_28(N, op, x, ...) ((op(N, 27, x)) | FLOG_GENMASK_27(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_29(N, op, x, ...) ((op(N, 28, x)) | FLOG_GENMASK_28(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_30(N, op, x, ...) ((op(N, 29, x)) | FLOG_GENMASK_29(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_31(N, op, x, ...) ((op(N, 30, x)) | FLOG_GENMASK_30(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_32(N, op, x, ...) ((op(N, 31, x)) | FLOG_GENMASK_31(N, op, __VA_ARGS__)) + +#define FLOG_CONCAT(arg1, arg2) FLOG_CONCAT1(arg1, arg2) +#define FLOG_CONCAT1(arg1, arg2) FLOG_CONCAT2(arg1, arg2) +#define FLOG_CONCAT2(arg1, arg2) arg1##arg2 + +#define FLOG_GENMASK_(N, op, ...) FLOG_CONCAT(FLOG_GENMASK_, N)(N, op, ##__VA_ARGS__) +#define FLOG_GENMASK(op, ...) FLOG_GENMASK_(FLOG_PP_NARG(__VA_ARGS__), op, ##__VA_ARGS__) + +#define flog_genbit(ord, n, v, ...) \ + _Generic((v), \ + \ + /* Basic types */ \ + char: 0, \ + signed char: 0, \ + unsigned char: 0, \ + signed short int: 0, \ + unsigned short int: 0, \ + signed int: 0, \ + unsigned int: 0, \ + signed long: 0, \ + unsigned long: 0, \ + signed long long: 0, \ + unsigned long long: 0, \ + \ + /* Not used for a while */ \ + /* float: 12, */ \ + /* double: 13, */ \ + /* long double: 14, */ \ + \ + /* Basic poniters */ \ + char *: (1u << (ord - n - 1)), \ + signed char *: (1u << (ord - n - 1)), \ + unsigned char *: (1u << (ord - n - 1)), \ + signed short int *: 0, \ + unsigned short int *: 0, \ + signed int *: 0, \ + unsigned int *: 0, \ + signed long *: 0, \ + unsigned long *: 0, \ + signed long long *: 0, \ + unsigned long long *: 0, \ + void *: 0, \ + \ + /* Const basic pointers */ \ + const char *: (1u << (ord - n - 1)), \ + const signed char *: (1u << (ord - n - 1)), \ + const unsigned char *: (1u << (ord - n - 1)), \ + const signed short int *: 0, \ + const unsigned short int *: 0, \ + const signed int *: 0, \ + const unsigned int *: 0, \ + const signed long *: 0, \ + const unsigned long *: 0, \ + const signed long long *: 0, \ + const unsigned long long *: 0, \ + const void *: 0, \ + \ + /* Systypes and pointers */ \ + default: -1) + +typedef struct { + unsigned int magic; + unsigned int size; + unsigned int nargs; + unsigned int mask; + long fmt; + long args[0]; +} flog_msg_t; + +extern int flog_encode_msg(int fdout, unsigned int nargs, unsigned int mask, const char *format, ...); +void flog_decode_msg(int fdout, const char *format, ...); +extern int flog_decode_all(int fdin, int fdout); + +#define flog_encode(fdout, fmt, ...) \ + flog_encode_msg(fdout, FLOG_PP_NARG(__VA_ARGS__), \ + FLOG_GENMASK(flog_genbit, ##__VA_ARGS__), fmt, ##__VA_ARGS__) + +int flog_map_buf(int fdout); +int flog_close(int fdout); + +#endif /* __UAPI_FLOG_H__ */ diff --git a/flog/include/util.h b/flog/include/util.h new file mode 100644 index 0000000000..17a4d77997 --- /dev/null +++ b/flog/include/util.h @@ -0,0 +1,37 @@ +#ifndef __UTIL_H__ +#define __UTIL_H__ + +#include +#include + +#include "log.h" +#include "types.h" + +#define __xalloc(op, size, ...) \ + ({ \ + void *___p = op(__VA_ARGS__); \ + ___p; \ + }) + +#define xstrdup(str) __xalloc(strdup, strlen(str) + 1, str) +#define xmalloc(size) __xalloc(malloc, size, size) +#define xzalloc(size) __xalloc(calloc, size, 1, size) +#define xrealloc(p, size) __xalloc(realloc, size, p, size) + +#define xfree(p) do { if (p) free(p); } while (0) + +#define xrealloc_safe(pptr, size) \ + ({ \ + int __ret = -ENOMEM; \ + void *new = xrealloc(*pptr, size); \ + if (new) { \ + *pptr = new; \ + __ret = 0; \ + } \ + __ret; \ + }) + +#define memzero_p(p) memset(p, 0, sizeof(*p)) +#define memzero(p, size) memset(p, 0, size) + +#endif /* __UTIL_H__ */ diff --git a/flog/src/Makefile b/flog/src/Makefile new file mode 100644 index 0000000000..ee73ea7252 --- /dev/null +++ b/flog/src/Makefile @@ -0,0 +1,5 @@ +ccflags-y += -DCONFIG_X86_64 -iquote ./include $(OPTS) +ldflags-y += -r + +#obj-y += main.o +obj-y += flog.o diff --git a/flog/src/flog.c b/flog/src/flog.c new file mode 100644 index 0000000000..533625de61 --- /dev/null +++ b/flog/src/flog.c @@ -0,0 +1,215 @@ +#include +#include +#include +#include +#include +#include +#include + +//#include + +#include "uapi/flog.h" +#include "util.h" + +#define MAGIC 0xABCDABCD + +#define BUF_SIZE (1<<20) +static char _mbuf[BUF_SIZE]; +static char *mbuf = _mbuf; +static char *fbuf; +static uint64_t fsize; +static uint64_t mbuf_size = sizeof(_mbuf); + +/*int flog_decode_all(int fdin, int fdout) +{ + flog_msg_t *m = (void *)mbuf; + ffi_type *args[34] = { + [0] = &ffi_type_sint, + [1] = &ffi_type_pointer, + [2 ... 33] = &ffi_type_slong + }; + void *values[34]; + ffi_cif cif; + ffi_arg rc; + size_t i, ret; + char *fmt; + + values[0] = (void *)&fdout; + + while (1) { + ret = read(fdin, mbuf, sizeof(m)); + if (ret == 0) + break; + if (ret < 0) { + fprintf(stderr, "Unable to read a message: %m"); + return -1; + } + if (m->magic != MAGIC) { + fprintf(stderr, "The log file was not properly closed\n"); + break; + } + ret = m->size - sizeof(m); + if (m->size > mbuf_size) { + fprintf(stderr, "The buffer is too small"); + return -1; + } + if (read(fdin, mbuf + sizeof(m), ret) != ret) { + fprintf(stderr, "Unable to read a message: %m"); + return -1; + } + + fmt = mbuf + m->fmt; + values[1] = &fmt; + + for (i = 0; i < m->nargs; i++) { + values[i + 2] = (void *)&m->args[i]; + if (m->mask & (1u << i)) { + m->args[i] = (long)(mbuf + m->args[i]); + } + } + + if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, m->nargs + 2, + &ffi_type_sint, args) == FFI_OK) + ffi_call(&cif, FFI_FN(dprintf), &rc, values); + } + return 0; +}*/ + +static int flog_enqueue(flog_msg_t *m) +{ + if (write(1, m, m->size) != m->size) { + fprintf(stderr, "Unable to write a message\n"); + return -1; + } + return 0; +} + +/*extern char *rodata_start; +extern char *rodata_end; +*/ +/* Pre-allocate a buffer in a file and map it into memory. */ +int flog_map_buf(int fdout) +{ + uint64_t off = 0; + void *addr; + + /* + * Two buffers are mmaped into memory. A new one is mapped when a first + * one is completly filled. + */ + if (fbuf && (mbuf - fbuf < BUF_SIZE)) + return 0; + + if (fbuf) { + if (munmap(fbuf, BUF_SIZE * 2)) { + fprintf(stderr, "Unable to unmap a buffer: %m"); + return -1; + } + off = mbuf - fbuf - BUF_SIZE; + fbuf = NULL; + } + + if (fsize == 0) + fsize += BUF_SIZE; + fsize += BUF_SIZE; + + if (ftruncate(fdout, fsize)) { + fprintf(stderr, "Unable to truncate a file: %m"); + return -1; + } + + if (!fbuf) + addr = mmap(NULL, BUF_SIZE * 2, PROT_WRITE | PROT_READ, + MAP_FILE | MAP_SHARED, fdout, fsize - 2 * BUF_SIZE); + else + addr = mremap(fbuf + BUF_SIZE, BUF_SIZE, + BUF_SIZE * 2, MREMAP_FIXED, fbuf); + if (addr == MAP_FAILED) { + fprintf(stderr, "Unable to map a buffer: %m"); + return -1; + } + + fbuf = addr; + mbuf = fbuf + off; + mbuf_size = 2 * BUF_SIZE; + + return 0; +} + +int flog_close(int fdout) +{ + if (mbuf == _mbuf) + return 0; + + munmap(fbuf, BUF_SIZE * 2); + + if (ftruncate(fdout, fsize - 2 * BUF_SIZE + mbuf - fbuf)) { + fprintf(stderr, "Unable to truncate a file: %m"); + return -1; + } + return 0; +} + +int flog_encode_msg(int fdout, unsigned int nargs, unsigned int mask, const char *format, ...) +{ + flog_msg_t *m; + va_list argptr; + char *str_start, *p; + size_t i; + + if (mbuf != _mbuf && flog_map_buf(fdout)) + return -1; + + m = (void *) mbuf; + + m->nargs = nargs; + m->mask = mask; + + str_start = (void *)m->args + sizeof(m->args[0]) * nargs; + p = memccpy(str_start, format, 0, mbuf_size - (str_start - mbuf)); + if (p == NULL) { + fprintf(stderr, "No memory for string argument\n"); + return -1; + } + m->fmt = str_start - mbuf; + str_start = p; + + va_start(argptr, format); + for (i = 0; i < nargs; i++) { + m->args[i] = (long)va_arg(argptr, long); + /* + * If we got a string, we should either + * reference it when in rodata, or make + * a copy (FIXME implement rodata refs). + */ + if (mask & (1u << i)) { + p = memccpy(str_start, (void *)m->args[i], 0, mbuf_size - (str_start - mbuf)); + if (p == NULL) { + fprintf(stderr, "No memory for string argument\n"); + return -1; + } + m->args[i] = str_start - mbuf; + str_start = p; + } + } + va_end(argptr); + m->size = str_start - mbuf; + + /* + * A magic is required to know where we stop writing into a log file, + * if it was not properly closed. The file is mapped into memory, so a + * space in the file is allocated in advance and at the end it can have + * some unused tail. + */ + m->magic = MAGIC; + + m->size = roundup(m->size, 8); + if (mbuf == _mbuf) { + if (flog_enqueue(m)) + return -1; + } else { + mbuf += m->size; + mbuf_size -= m->size; + } + return 0; +} diff --git a/flog/src/main.c b/flog/src/main.c new file mode 100644 index 0000000000..c84e774781 --- /dev/null +++ b/flog/src/main.c @@ -0,0 +1,170 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include "flog.h" + +extern char _rodata_start, _rodata_end; +char *rodata_start = &_rodata_start; +char *rodata_end = &_rodata_end; + +enum { + MODE_BINARY, + MODE_FPRINTF, + MODE_SPRINTF, + MODE_DPRINTF, +}; + +int main(int argc, char *argv[]) +{ + static const char str1[] = "String1 String1"; + static const char str2[] = "string2 string2 string2"; + int fdout = STDOUT_FILENO; + bool use_decoder = false; + int mode = MODE_BINARY; + size_t niter = 100; + int opt, idx; + size_t i; + + static const char short_opts[] = "m:o:di:h"; + static struct option long_opts[] = { + { "mode", required_argument, 0, 'm' }, + { "output", required_argument, 0, 'o' }, + { "decode", no_argument, 0, 'd' }, + { "iter", required_argument, 0, 'i' }, + { "help", no_argument, 0, 'h' }, + { }, + }; + + while (1) { + idx = -1; + opt = getopt_long(argc, argv, short_opts, long_opts, &idx); + if (opt == -1) + break; + + switch (opt) { + case 'm': + if (strcmp(optarg, "binary") == 0) { + mode = MODE_BINARY; + } else if (strcmp(optarg, "fprintf") == 0) { + mode = MODE_FPRINTF; + } else if (strcmp(optarg, "sprintf") == 0) { + mode = MODE_SPRINTF; + } else if (strcmp(optarg, "dprintf") == 0) { + mode = MODE_DPRINTF; + } else + goto usage; + break; + case 'o': + if (strcmp(optarg, "stdout") == 0) { + fdout = fileno(stdout); + } else if (strcmp(optarg, "stderr") == 0) { + fdout = fileno(stderr); + } else { + fdout = open(optarg, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (fdout < 0) { + fprintf(stderr, "Can't open %s: %s\n", + optarg, strerror(errno)); + exit(1); + } + } + break; + case 'i': + niter = atoi(optarg); + break; + case 'd': + use_decoder = true; + break; + case 'h': + default: + goto usage; + } + } + + switch (mode) { + case MODE_BINARY: + if (use_decoder) + return flog_decode_all(STDIN_FILENO, fdout); + + if (fdout != STDOUT_FILENO && flog_map_buf(fdout)) + return 1; + for (i = 0; i < niter; i++) + if (flog_encode(fdout, "Some message %s %s %c %li %d %lu\n", + str1, str2, 'c', (long)-4, (short)2, + (unsigned long)2)) + return 1; + if (flog_close(fdout)) + return 1; + break; + case MODE_DPRINTF: + { + for (i = 0; i < niter; i++) { + dprintf(fdout, "Some message %s %s %c %li %d %lu\n", + str1, str2, 'c', (long)-4, (short)2, + (unsigned long)2); + } + break; + } + case MODE_FPRINTF: + { + FILE *f = fdopen(fdout, "w"); + + for (i = 0; i < niter; i++) { + fprintf(f, "Some message %s %s %c %li %d %lu\n", + str1, str2, 'c', (long)-4, (short)2, + (unsigned long)2); + fflush(f); + } + fclose(f); + break; + } + case MODE_SPRINTF: + { + static char buf[4096]; + + for (i = 0; i < niter; i++) { + sprintf(buf, "Some message %s %s %c %li %d %lu\n", + str1, str2, 'c', (long)-4, (short)2, + (unsigned long)2); + } + break; + } + default: + return 1; + } + + return 0; +usage: + fprintf(stderr, + "flog [--mode binary|dprintf] [--output stdout|stderr|filename] [--decode] [--iter number]\n" + "\n" + + "Examples:\n" + "\n" + + " - run 100000 iterations of instant message processing (immediate dprintf calls)\n" + "\n" + " flog -m dprintf -i 100000\n" + "\n" + + " - run 100000 iterations in binary mode without processing (queue messages only)\n" + "\n" + " flog -i 100000\n" + "\n" + + " - run 100000 iterations in binary mode with decoding after\n" + "\n" + " flog -i 100000 -d\n" + "\n" + + " - run 100000 iterations in binary mode with decoding after, writting results into 'out' file\n" + "\n" + " flog -i 100000 -d -o out\n" + "\n"); + return 1; +} diff --git a/flog/tests/test00 b/flog/tests/test00 new file mode 100755 index 0000000000..a7937e4a18 --- /dev/null +++ b/flog/tests/test00 @@ -0,0 +1,22 @@ +#!/bin/sh + +set -e -x + +echo Map a log file into memory +time ./flog run -i 1000000 -o /tmp/flog.raw.map +echo Write into a log file +time ./flog run -i 1000000 > /tmp/flog.raw +echo Use fprintf +time ./flog run -m fprintf -i 1000000 -o /tmp/flog.fprintf.txt +echo Use dprintf +time ./flog run -m dprintf -i 1000000 -o /tmp/flog.dprintf.txt +echo Use sprintf +time ./flog run -m sprintf -i 1000000 + +time ./flog run -d < /tmp/flog.raw > /tmp/flog.raw.txt +cmp /tmp/flog.raw.txt /tmp/flog.fprintf.txt + +time ./flog run -d < /tmp/flog.raw.map > /tmp/flog.raw.map.txt +cmp /tmp/flog.raw.map.txt /tmp/flog.fprintf.txt + +cmp /tmp/flog.dprintf.txt /tmp/flog.fprintf.txt From b5fcd70cc974466867d4bb3cb78beb95dd7a2329 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 28 Sep 2020 07:04:00 +0000 Subject: [PATCH 002/121] flog: Missing varargs init or cleanup (VARARGS) CID 302713 (#1 of 1): Missing varargs init or cleanup (VARARGS) va_end was not called for argptr. Signed-off-by: Adrian Reber --- flog/src/flog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/flog/src/flog.c b/flog/src/flog.c index 533625de61..40cce3fedc 100644 --- a/flog/src/flog.c +++ b/flog/src/flog.c @@ -186,6 +186,7 @@ int flog_encode_msg(int fdout, unsigned int nargs, unsigned int mask, const char p = memccpy(str_start, (void *)m->args[i], 0, mbuf_size - (str_start - mbuf)); if (p == NULL) { fprintf(stderr, "No memory for string argument\n"); + va_end(argptr); return -1; } m->args[i] = str_start - mbuf; From 1a4721f4d4a63ff1314c94eedcf96c2b359f9615 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 4 Aug 2021 07:27:07 +0000 Subject: [PATCH 003/121] Run 'make indent' on 'flog/' Separate commit for easier criu-dev <-> master transfer. Acked-by: Mike Rapoport Signed-off-by: Adrian Reber --- flog/include/compiler.h | 88 +++++++++++++++------------- flog/include/log.h | 10 ++-- flog/include/types.h | 16 +++--- flog/include/uapi/flog.h | 120 ++++++++++++++++++--------------------- flog/include/util.h | 52 +++++++++-------- flog/src/flog.c | 11 ++-- flog/src/main.c | 37 +++++------- 7 files changed, 161 insertions(+), 173 deletions(-) diff --git a/flog/include/compiler.h b/flog/include/compiler.h index 3e56eb0e64..80264ec631 100644 --- a/flog/include/compiler.h +++ b/flog/include/compiler.h @@ -8,64 +8,70 @@ */ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)])) -#define __stringify_1(x...) #x -#define __stringify(x...) __stringify_1(x) +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) -#define NORETURN __attribute__((__noreturn__)) -#define __packed __attribute__((__packed__)) -#define __used __attribute__((__used__)) -#define __maybe_unused __attribute__((unused)) -#define __always_unused __attribute__((unused)) +#define NORETURN __attribute__((__noreturn__)) +#define __packed __attribute__((__packed__)) +#define __used __attribute__((__used__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) -#define __section(S) __attribute__ ((__section__(#S))) +#define __section(S) __attribute__((__section__(#S))) #ifndef __always_inline -# define __always_inline inline __attribute__((always_inline)) +#define __always_inline inline __attribute__((always_inline)) #endif -#define likely(x) __builtin_expect(!!(x), 1) -#define unlikely(x) __builtin_expect(!!(x), 0) +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) #ifndef always_inline -# define always_inline __always_inline +#define always_inline __always_inline #endif #ifndef noinline -# define noinline __attribute__((noinline)) +#define noinline __attribute__((noinline)) #endif -#define __aligned(x) __attribute__((aligned(x))) +#define __aligned(x) __attribute__((aligned(x))) #ifndef offsetof -# define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#define offsetof(TYPE, MEMBER) ((size_t) & ((TYPE *)0)->MEMBER) #endif -#define barrier() asm volatile("" ::: "memory") - -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -#define __round_mask(x, y) ((__typeof__(x))((y) - 1)) -#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) -#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1)) - -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -#define max(x, y) ({ \ - typeof(x) _max1 = (x); \ - typeof(y) _max2 = (y); \ - (void) (&_max1 == &_max2); \ - _max1 > _max2 ? _max1 : _max2; }) - -#define is_log2(v) (((v) & ((v) - 1)) == 0) +#define barrier() asm volatile("" ::: "memory") + +#define container_of(ptr, type, member) \ + ({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); \ + }) + +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y)) + 1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) +#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) +#define ALIGN(x, a) (((x) + (a)-1) & ~((a)-1)) + +#define min(x, y) \ + ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void)(&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; \ + }) + +#define max(x, y) \ + ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void)(&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; \ + }) + +#define is_log2(v) (((v) & ((v)-1)) == 0) #endif /* __COMPILER_H__ */ diff --git a/flog/include/log.h b/flog/include/log.h index 1a165ea9fb..8aafe44b75 100644 --- a/flog/include/log.h +++ b/flog/include/log.h @@ -3,15 +3,15 @@ #include -#define pr_out(fmt, ...) fprintf(stdout, fmt, ##__VA_ARGS__) +#define pr_out(fmt, ...) fprintf(stdout, fmt, ##__VA_ARGS__) #if 1 -# define pr_debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) #else -# define pr_debug(fmt, ...) +#define pr_debug(fmt, ...) #endif -#define pr_err(fmt, ...) fprintf(stderr, "Error (%s:%d): "fmt, __FILE__, __LINE__, ##__VA_ARGS__) -#define pr_perror(fmt, ...) fprintf(stderr, "Error (%s:%d): "fmt "%m\n", __FILE__, __LINE__, ##__VA_ARGS__) +#define pr_err(fmt, ...) fprintf(stderr, "Error (%s:%d): " fmt, __FILE__, __LINE__, ##__VA_ARGS__) +#define pr_perror(fmt, ...) fprintf(stderr, "Error (%s:%d): " fmt "%m\n", __FILE__, __LINE__, ##__VA_ARGS__) #endif /* __LOG_H__ */ diff --git a/flog/include/types.h b/flog/include/types.h index 0e15bfbff5..07c992968b 100644 --- a/flog/include/types.h +++ b/flog/include/types.h @@ -4,13 +4,13 @@ #include #include -typedef uint64_t u64; -typedef int64_t s64; -typedef uint32_t u32; -typedef int32_t s32; -typedef uint16_t u16; -typedef int16_t s16; -typedef uint8_t u8; -typedef int8_t s8; +typedef uint64_t u64; +typedef int64_t s64; +typedef uint32_t u32; +typedef int32_t s32; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint8_t u8; +typedef int8_t s8; #endif /* __FLOG_TYPES_H__ */ diff --git a/flog/include/uapi/flog.h b/flog/include/uapi/flog.h index 2d879110fc..6061f4556a 100644 --- a/flog/include/uapi/flog.h +++ b/flog/include/uapi/flog.h @@ -13,68 +13,59 @@ /* * By Laurent Deniau at https://groups.google.com/forum/#!topic/comp.std.c/d-6Mj5Lko_s */ -#define FLOG_PP_NARG_(...) FLOG_PP_ARG_N(__VA_ARGS__) -#define FLOG_PP_NARG(...) FLOG_PP_NARG_(1, ##__VA_ARGS__, FLOG_PP_RSEQ_N()) +#define FLOG_PP_NARG_(...) FLOG_PP_ARG_N(__VA_ARGS__) +#define FLOG_PP_NARG(...) FLOG_PP_NARG_(1, ##__VA_ARGS__, FLOG_PP_RSEQ_N()) -#define FLOG_PP_ARG_N( _0, _1, _2, _3, _4, \ - _5, _6, _7, _8, _9, \ - _10,_11,_12,_13,_14, \ - _15,_16,_17,_18,_19, \ - _20,_21,_22,_23,_24, \ - _25,_26,_27,_28,_29, \ - _30,_31, N, ...) N +#define FLOG_PP_ARG_N(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, \ + _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, N, ...) \ + N -#define FLOG_PP_RSEQ_N() \ - 31, 30, 29, 28, 27, \ - 26, 25, 24, 23, 22, \ - 21, 20, 19, 18, 17, \ - 16, 15, 14, 13, 12, \ - 11, 10, 9, 8, 7, \ - 6, 5, 4, 3, 2, \ - 1, 0 +#define FLOG_PP_RSEQ_N() \ + 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, \ + 2, 1, 0 -#define FLOG_GENMASK_0(N, x) 0 -#define FLOG_GENMASK_1(N, op, x, ...) (op(N, 0, x)) -#define FLOG_GENMASK_2(N, op, x, ...) ((op(N, 1, x)) | FLOG_GENMASK_1(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_3(N, op, x, ...) ((op(N, 2, x)) | FLOG_GENMASK_2(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_4(N, op, x, ...) ((op(N, 3, x)) | FLOG_GENMASK_3(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_5(N, op, x, ...) ((op(N, 4, x)) | FLOG_GENMASK_4(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_6(N, op, x, ...) ((op(N, 5, x)) | FLOG_GENMASK_5(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_7(N, op, x, ...) ((op(N, 6, x)) | FLOG_GENMASK_6(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_8(N, op, x, ...) ((op(N, 7, x)) | FLOG_GENMASK_7(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_9(N, op, x, ...) ((op(N, 8, x)) | FLOG_GENMASK_8(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_10(N, op, x, ...) ((op(N, 9, x)) | FLOG_GENMASK_9(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_11(N, op, x, ...) ((op(N, 10, x)) | FLOG_GENMASK_10(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_12(N, op, x, ...) ((op(N, 11, x)) | FLOG_GENMASK_11(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_13(N, op, x, ...) ((op(N, 12, x)) | FLOG_GENMASK_12(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_14(N, op, x, ...) ((op(N, 13, x)) | FLOG_GENMASK_13(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_15(N, op, x, ...) ((op(N, 14, x)) | FLOG_GENMASK_14(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_16(N, op, x, ...) ((op(N, 15, x)) | FLOG_GENMASK_15(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_17(N, op, x, ...) ((op(N, 16, x)) | FLOG_GENMASK_16(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_18(N, op, x, ...) ((op(N, 17, x)) | FLOG_GENMASK_17(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_19(N, op, x, ...) ((op(N, 18, x)) | FLOG_GENMASK_18(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_20(N, op, x, ...) ((op(N, 19, x)) | FLOG_GENMASK_19(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_21(N, op, x, ...) ((op(N, 20, x)) | FLOG_GENMASK_20(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_22(N, op, x, ...) ((op(N, 21, x)) | FLOG_GENMASK_21(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_23(N, op, x, ...) ((op(N, 22, x)) | FLOG_GENMASK_22(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_24(N, op, x, ...) ((op(N, 23, x)) | FLOG_GENMASK_23(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_25(N, op, x, ...) ((op(N, 24, x)) | FLOG_GENMASK_24(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_26(N, op, x, ...) ((op(N, 25, x)) | FLOG_GENMASK_25(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_27(N, op, x, ...) ((op(N, 26, x)) | FLOG_GENMASK_26(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_28(N, op, x, ...) ((op(N, 27, x)) | FLOG_GENMASK_27(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_29(N, op, x, ...) ((op(N, 28, x)) | FLOG_GENMASK_28(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_30(N, op, x, ...) ((op(N, 29, x)) | FLOG_GENMASK_29(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_31(N, op, x, ...) ((op(N, 30, x)) | FLOG_GENMASK_30(N, op, __VA_ARGS__)) -#define FLOG_GENMASK_32(N, op, x, ...) ((op(N, 31, x)) | FLOG_GENMASK_31(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_0(N, x) 0 +#define FLOG_GENMASK_1(N, op, x, ...) (op(N, 0, x)) +#define FLOG_GENMASK_2(N, op, x, ...) ((op(N, 1, x)) | FLOG_GENMASK_1(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_3(N, op, x, ...) ((op(N, 2, x)) | FLOG_GENMASK_2(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_4(N, op, x, ...) ((op(N, 3, x)) | FLOG_GENMASK_3(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_5(N, op, x, ...) ((op(N, 4, x)) | FLOG_GENMASK_4(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_6(N, op, x, ...) ((op(N, 5, x)) | FLOG_GENMASK_5(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_7(N, op, x, ...) ((op(N, 6, x)) | FLOG_GENMASK_6(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_8(N, op, x, ...) ((op(N, 7, x)) | FLOG_GENMASK_7(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_9(N, op, x, ...) ((op(N, 8, x)) | FLOG_GENMASK_8(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_10(N, op, x, ...) ((op(N, 9, x)) | FLOG_GENMASK_9(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_11(N, op, x, ...) ((op(N, 10, x)) | FLOG_GENMASK_10(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_12(N, op, x, ...) ((op(N, 11, x)) | FLOG_GENMASK_11(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_13(N, op, x, ...) ((op(N, 12, x)) | FLOG_GENMASK_12(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_14(N, op, x, ...) ((op(N, 13, x)) | FLOG_GENMASK_13(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_15(N, op, x, ...) ((op(N, 14, x)) | FLOG_GENMASK_14(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_16(N, op, x, ...) ((op(N, 15, x)) | FLOG_GENMASK_15(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_17(N, op, x, ...) ((op(N, 16, x)) | FLOG_GENMASK_16(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_18(N, op, x, ...) ((op(N, 17, x)) | FLOG_GENMASK_17(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_19(N, op, x, ...) ((op(N, 18, x)) | FLOG_GENMASK_18(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_20(N, op, x, ...) ((op(N, 19, x)) | FLOG_GENMASK_19(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_21(N, op, x, ...) ((op(N, 20, x)) | FLOG_GENMASK_20(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_22(N, op, x, ...) ((op(N, 21, x)) | FLOG_GENMASK_21(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_23(N, op, x, ...) ((op(N, 22, x)) | FLOG_GENMASK_22(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_24(N, op, x, ...) ((op(N, 23, x)) | FLOG_GENMASK_23(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_25(N, op, x, ...) ((op(N, 24, x)) | FLOG_GENMASK_24(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_26(N, op, x, ...) ((op(N, 25, x)) | FLOG_GENMASK_25(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_27(N, op, x, ...) ((op(N, 26, x)) | FLOG_GENMASK_26(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_28(N, op, x, ...) ((op(N, 27, x)) | FLOG_GENMASK_27(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_29(N, op, x, ...) ((op(N, 28, x)) | FLOG_GENMASK_28(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_30(N, op, x, ...) ((op(N, 29, x)) | FLOG_GENMASK_29(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_31(N, op, x, ...) ((op(N, 30, x)) | FLOG_GENMASK_30(N, op, __VA_ARGS__)) +#define FLOG_GENMASK_32(N, op, x, ...) ((op(N, 31, x)) | FLOG_GENMASK_31(N, op, __VA_ARGS__)) -#define FLOG_CONCAT(arg1, arg2) FLOG_CONCAT1(arg1, arg2) -#define FLOG_CONCAT1(arg1, arg2) FLOG_CONCAT2(arg1, arg2) -#define FLOG_CONCAT2(arg1, arg2) arg1##arg2 +#define FLOG_CONCAT(arg1, arg2) FLOG_CONCAT1(arg1, arg2) +#define FLOG_CONCAT1(arg1, arg2) FLOG_CONCAT2(arg1, arg2) +#define FLOG_CONCAT2(arg1, arg2) arg1##arg2 -#define FLOG_GENMASK_(N, op, ...) FLOG_CONCAT(FLOG_GENMASK_, N)(N, op, ##__VA_ARGS__) -#define FLOG_GENMASK(op, ...) FLOG_GENMASK_(FLOG_PP_NARG(__VA_ARGS__), op, ##__VA_ARGS__) +#define FLOG_GENMASK_(N, op, ...) FLOG_CONCAT(FLOG_GENMASK_, N)(N, op, ##__VA_ARGS__) +#define FLOG_GENMASK(op, ...) FLOG_GENMASK_(FLOG_PP_NARG(__VA_ARGS__), op, ##__VA_ARGS__) -#define flog_genbit(ord, n, v, ...) \ +#define flog_genbit(ord, n, v, ...) \ _Generic((v), \ \ /* Basic types */ \ @@ -127,21 +118,20 @@ default: -1) typedef struct { - unsigned int magic; - unsigned int size; - unsigned int nargs; - unsigned int mask; - long fmt; - long args[0]; + unsigned int magic; + unsigned int size; + unsigned int nargs; + unsigned int mask; + long fmt; + long args[0]; } flog_msg_t; extern int flog_encode_msg(int fdout, unsigned int nargs, unsigned int mask, const char *format, ...); void flog_decode_msg(int fdout, const char *format, ...); extern int flog_decode_all(int fdin, int fdout); -#define flog_encode(fdout, fmt, ...) \ - flog_encode_msg(fdout, FLOG_PP_NARG(__VA_ARGS__), \ - FLOG_GENMASK(flog_genbit, ##__VA_ARGS__), fmt, ##__VA_ARGS__) +#define flog_encode(fdout, fmt, ...) \ + flog_encode_msg(fdout, FLOG_PP_NARG(__VA_ARGS__), FLOG_GENMASK(flog_genbit, ##__VA_ARGS__), fmt, ##__VA_ARGS__) int flog_map_buf(int fdout); int flog_close(int fdout); diff --git a/flog/include/util.h b/flog/include/util.h index 17a4d77997..7b1edb6885 100644 --- a/flog/include/util.h +++ b/flog/include/util.h @@ -7,31 +7,35 @@ #include "log.h" #include "types.h" -#define __xalloc(op, size, ...) \ - ({ \ - void *___p = op(__VA_ARGS__); \ - ___p; \ +#define __xalloc(op, size, ...) \ + ({ \ + void *___p = op(__VA_ARGS__); \ + ___p; \ }) -#define xstrdup(str) __xalloc(strdup, strlen(str) + 1, str) -#define xmalloc(size) __xalloc(malloc, size, size) -#define xzalloc(size) __xalloc(calloc, size, 1, size) -#define xrealloc(p, size) __xalloc(realloc, size, p, size) - -#define xfree(p) do { if (p) free(p); } while (0) - -#define xrealloc_safe(pptr, size) \ - ({ \ - int __ret = -ENOMEM; \ - void *new = xrealloc(*pptr, size); \ - if (new) { \ - *pptr = new; \ - __ret = 0; \ - } \ - __ret; \ - }) - -#define memzero_p(p) memset(p, 0, sizeof(*p)) -#define memzero(p, size) memset(p, 0, size) +#define xstrdup(str) __xalloc(strdup, strlen(str) + 1, str) +#define xmalloc(size) __xalloc(malloc, size, size) +#define xzalloc(size) __xalloc(calloc, size, 1, size) +#define xrealloc(p, size) __xalloc(realloc, size, p, size) + +#define xfree(p) \ + do { \ + if (p) \ + free(p); \ + } while (0) + +#define xrealloc_safe(pptr, size) \ + ({ \ + int __ret = -ENOMEM; \ + void *new = xrealloc(*pptr, size); \ + if (new) { \ + *pptr = new; \ + __ret = 0; \ + } \ + __ret; \ + }) + +#define memzero_p(p) memset(p, 0, sizeof(*p)) +#define memzero(p, size) memset(p, 0, size) #endif /* __UTIL_H__ */ diff --git a/flog/src/flog.c b/flog/src/flog.c index 40cce3fedc..8f11a36cbf 100644 --- a/flog/src/flog.c +++ b/flog/src/flog.c @@ -13,7 +13,7 @@ #define MAGIC 0xABCDABCD -#define BUF_SIZE (1<<20) +#define BUF_SIZE (1 << 20) static char _mbuf[BUF_SIZE]; static char *mbuf = _mbuf; static char *fbuf; @@ -119,11 +119,10 @@ int flog_map_buf(int fdout) } if (!fbuf) - addr = mmap(NULL, BUF_SIZE * 2, PROT_WRITE | PROT_READ, - MAP_FILE | MAP_SHARED, fdout, fsize - 2 * BUF_SIZE); + addr = mmap(NULL, BUF_SIZE * 2, PROT_WRITE | PROT_READ, MAP_FILE | MAP_SHARED, fdout, + fsize - 2 * BUF_SIZE); else - addr = mremap(fbuf + BUF_SIZE, BUF_SIZE, - BUF_SIZE * 2, MREMAP_FIXED, fbuf); + addr = mremap(fbuf + BUF_SIZE, BUF_SIZE, BUF_SIZE * 2, MREMAP_FIXED, fbuf); if (addr == MAP_FAILED) { fprintf(stderr, "Unable to map a buffer: %m"); return -1; @@ -160,7 +159,7 @@ int flog_encode_msg(int fdout, unsigned int nargs, unsigned int mask, const char if (mbuf != _mbuf && flog_map_buf(fdout)) return -1; - m = (void *) mbuf; + m = (void *)mbuf; m->nargs = nargs; m->mask = mask; diff --git a/flog/src/main.c b/flog/src/main.c index c84e774781..fc5d64ebd2 100644 --- a/flog/src/main.c +++ b/flog/src/main.c @@ -33,12 +33,9 @@ int main(int argc, char *argv[]) static const char short_opts[] = "m:o:di:h"; static struct option long_opts[] = { - { "mode", required_argument, 0, 'm' }, - { "output", required_argument, 0, 'o' }, - { "decode", no_argument, 0, 'd' }, - { "iter", required_argument, 0, 'i' }, - { "help", no_argument, 0, 'h' }, - { }, + { "mode", required_argument, 0, 'm' }, { "output", required_argument, 0, 'o' }, + { "decode", no_argument, 0, 'd' }, { "iter", required_argument, 0, 'i' }, + { "help", no_argument, 0, 'h' }, {}, }; while (1) { @@ -68,8 +65,7 @@ int main(int argc, char *argv[]) } else { fdout = open(optarg, O_RDWR | O_CREAT | O_TRUNC, 0644); if (fdout < 0) { - fprintf(stderr, "Can't open %s: %s\n", - optarg, strerror(errno)); + fprintf(stderr, "Can't open %s: %s\n", optarg, strerror(errno)); exit(1); } } @@ -94,42 +90,35 @@ int main(int argc, char *argv[]) if (fdout != STDOUT_FILENO && flog_map_buf(fdout)) return 1; for (i = 0; i < niter; i++) - if (flog_encode(fdout, "Some message %s %s %c %li %d %lu\n", - str1, str2, 'c', (long)-4, (short)2, - (unsigned long)2)) + if (flog_encode(fdout, "Some message %s %s %c %li %d %lu\n", str1, str2, 'c', (long)-4, + (short)2, (unsigned long)2)) return 1; if (flog_close(fdout)) return 1; - break; - case MODE_DPRINTF: - { + break; + case MODE_DPRINTF: { for (i = 0; i < niter; i++) { - dprintf(fdout, "Some message %s %s %c %li %d %lu\n", - str1, str2, 'c', (long)-4, (short)2, + dprintf(fdout, "Some message %s %s %c %li %d %lu\n", str1, str2, 'c', (long)-4, (short)2, (unsigned long)2); } break; } - case MODE_FPRINTF: - { + case MODE_FPRINTF: { FILE *f = fdopen(fdout, "w"); for (i = 0; i < niter; i++) { - fprintf(f, "Some message %s %s %c %li %d %lu\n", - str1, str2, 'c', (long)-4, (short)2, + fprintf(f, "Some message %s %s %c %li %d %lu\n", str1, str2, 'c', (long)-4, (short)2, (unsigned long)2); fflush(f); } fclose(f); break; } - case MODE_SPRINTF: - { + case MODE_SPRINTF: { static char buf[4096]; for (i = 0; i < niter; i++) { - sprintf(buf, "Some message %s %s %c %li %d %lu\n", - str1, str2, 'c', (long)-4, (short)2, + sprintf(buf, "Some message %s %s %c %li %d %lu\n", str1, str2, 'c', (long)-4, (short)2, (unsigned long)2); } break; From 93d977ccacdc6dc01c0ef8c6a27a5c8896ceaaf9 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 20 Sep 2021 13:50:08 +0100 Subject: [PATCH 004/121] criu(8): add --external net option Support for external net namespaces has been introduced with commit c2b21fbf (criu: add support for external net namespaces). Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 690f61e14a..3c4c1eaf87 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -242,6 +242,12 @@ In other words, do not use it unless really needed. Tell *criu* that one end of a pair of UNIX sockets (created by *socketpair*(2)) with the given _id_ is OK to be disconnected. +*--external* **net[**__inode__**]:**__name__:: + Mark a network namespace as external and do not include it in the + checkpoint. The label 'name' can be used with *--inherit-fd* during + restore to specify a file descriptor to a preconfigured network + namespace. + *--external* **pid[**__inode__**]:**__name__:: Mark a PID namespace as external. This can be later used to restore a process into an existing PID namespace. The label 'name' can be From 32055ece2d1eeb7e5ea7e8345c728e117ae4b737 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 15 Apr 2021 12:04:45 -0400 Subject: [PATCH 005/121] criu: Introduce new device file plugin hooks Currently CRIU cannot handle Checkpoint Restore operations when a device file is involved in a process, however, CRIU allows flexible extensions via special plugins but still, for certain complex devices such as a GPU, the existing hooks are not sufficient. This introduces few new hooks that will be used to support Checkpoint Restore operation with AMD GPU devices and potentially to other similar devices too. - HANDLE_DEVICE_VMA - UPDATE_VMA_MAP - RESUME_DEVICES_LATE *HANDLE_DEVICE_VMA: Hook to detect a suitable plugin to handle device file VMA with PF | IO mappings. *UPDATE_VMA_MAP: Hook to handle VMAs during a device file restore. When restoring VMAs for the device files, criu runs sys_mmap in the pie restore context but the offsets and file path within a device file may change during restore operation so it needs to be adjusted properly. *RESUME_DEVICES_LATE: Hook to do some special handling in late restore phase. During criu restore phase when a device is getting restored with the help of a plugin, some device specific operations might need to be delayed until criu finalizes the VMA placements in address space of the target process. But by the time criu finalizes this, its too late since pie phase is over and control is back to criu master process. This hook allows an external trigger to each resuming task to check whether it has a device specific operation pending such as issuing an ioctl call? Since this is called from criu master process context, supply the pid of the target process and give a chance to each plugin registered to run device specific operation if the target pid is valid. A future patch will add consumers for these plugin hooks to support AMD GPUs. Signed-off-by: Rajneesh Bhardwaj --- criu/cr-restore.c | 23 +++++++++++++++++++ criu/files-reg.c | 17 ++++++++++++++ criu/include/criu-plugin.h | 16 +++++++++++++ criu/plugin.c | 3 +++ criu/proc_parse.c | 47 +++++++++++++++++++++++++++++++++----- 5 files changed, 100 insertions(+), 6 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 9d2d957f85..ed62cc5a28 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2388,6 +2388,29 @@ static int restore_root_task(struct pstree_item *init) pr_err("Unable to flush breakpoints\n"); finalize_restore(); + /* + * Some external devices such as GPUs might need a very late + * trigger to kick-off some events, memory notifiers and for + * restarting the previously restored queues during criu restore + * stage. This is needed since criu pie code may shuffle VMAs + * around so things such as registering MMU notifiers (for GPU + * mapped memory) could be done sanely once the pie code hands + * over the control to master process. + */ + for_each_pstree_item(item) { + pr_info("Run late stage hook from criu master for external devices\n"); + ret = run_plugins(RESUME_DEVICES_LATE, item->pid->real); + /* + * This may not really be an error. Only certain plugin hooks + * (if available) will return success such as amdgpu_plugin that + * validates the pid of the resuming tasks in the kernel mode. + * Most of the times, it'll be -ENOTSUP and in few cases, it + * might actually be a true error code but that would be also + * captured in the plugin so no need to print the error here. + */ + if (ret < 0) + pr_debug("restore late stage hook for external plugin failed\n"); + } ret = run_scripts(ACT_PRE_RESUME); if (ret) diff --git a/criu/files-reg.c b/criu/files-reg.c index ee54d1d7d3..2b0347575c 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -2267,6 +2267,23 @@ static int open_filemap(int pid, struct vma_area *vma) BUG_ON((vma->vmfd == NULL) || !vma->e->has_fdflags); flags = vma->e->fdflags; + /* update the new device file page offsets and file paths set during restore */ + if (vma->e->status & VMA_UNSUPP) { + uint64_t new_pgoff; + char new_path[PATH_MAX]; + int ret; + + struct reg_file_info *rfi = container_of(vma->vmfd, struct reg_file_info, d); + ret = run_plugins(UPDATE_VMA_MAP, rfi->rfe->name, new_path, vma->e->start, vma->e->pgoff, &new_pgoff); + if (ret == 1) { + pr_info("New mmap %#016" PRIx64 "->%#016" PRIx64 " path %s\n", vma->e->pgoff, new_pgoff, + new_path); + vma->e->pgoff = new_pgoff; + rfi->path = xstrdup(new_path); + pr_debug("Updated rfi->path %s\n", rfi->path); + } + } + if (ctx.flags != flags || ctx.desc != vma->vmfd) { if (vma->e->status & VMA_AREA_MEMFD) ret = memfd_open(vma->vmfd, &flags); diff --git a/criu/include/criu-plugin.h b/criu/include/criu-plugin.h index 897666ecdd..0bc7a4255a 100644 --- a/criu/include/criu-plugin.h +++ b/criu/include/criu-plugin.h @@ -22,6 +22,8 @@ #include #include +#include +#include #define CRIU_PLUGIN_GEN_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c)) #define CRIU_PLUGIN_VERSION_MAJOR 0 @@ -48,6 +50,12 @@ enum { CR_PLUGIN_HOOK__DUMP_EXT_LINK = 6, + CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA = 7, + + CR_PLUGIN_HOOK__UPDATE_VMA_MAP = 8, + + CR_PLUGIN_HOOK__RESUME_DEVICES_LATE = 9, + CR_PLUGIN_HOOK__MAX }; @@ -60,6 +68,10 @@ DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_FILE, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_MOUNT, char *mountpoint, int id); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESTORE_EXT_MOUNT, int id, char *mountpoint, char *old_root, int *is_file); DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__DUMP_EXT_LINK, int index, int type, char *kind); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, int fd, const struct stat *stat); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, const char *old_path, char *new_path, const uint64_t addr, + const uint64_t old_pgoff, uint64_t *new_pgoff); +DECLARE_PLUGIN_HOOK_ARGS(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, int pid); enum { CR_PLUGIN_STAGE__DUMP, @@ -130,5 +142,9 @@ typedef int(cr_plugin_restore_file_t)(int id); typedef int(cr_plugin_dump_ext_mount_t)(char *mountpoint, int id); typedef int(cr_plugin_restore_ext_mount_t)(int id, char *mountpoint, char *old_root, int *is_file); typedef int(cr_plugin_dump_ext_link_t)(int index, int type, char *kind); +typedef int(cr_plugin_handle_device_vma_t)(int fd, const struct stat *stat); +typedef int(cr_plugin_update_vma_map_t)(const char *old_path, char *new_path, const uint64_t addr, + const uint64_t old_pgoff, uint64_t *new_pgoff); +typedef int(cr_plugin_resume_devices_late_t)(int pid); #endif /* __CRIU_PLUGIN_H__ */ diff --git a/criu/plugin.c b/criu/plugin.c index 3fe03c7cd7..f3fea28566 100644 --- a/criu/plugin.c +++ b/criu/plugin.c @@ -54,6 +54,9 @@ static cr_plugin_desc_t *cr_gen_plugin_desc(void *h, char *path) __assign_hook(DUMP_EXT_MOUNT, "cr_plugin_dump_ext_mount"); __assign_hook(RESTORE_EXT_MOUNT, "cr_plugin_restore_ext_mount"); __assign_hook(DUMP_EXT_LINK, "cr_plugin_dump_ext_link"); + __assign_hook(HANDLE_DEVICE_VMA, "cr_plugin_handle_device_vma"); + __assign_hook(UPDATE_VMA_MAP, "cr_plugin_update_vma_map"); + __assign_hook(RESUME_DEVICES_LATE, "cr_plugin_resume_devices_late"); #undef __assign_hook diff --git a/criu/proc_parse.c b/criu/proc_parse.c index f3491e7817..8a9ce3a37f 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -45,6 +45,7 @@ #include "protobuf.h" #include "images/fdinfo.pb-c.h" #include "images/mnt.pb-c.h" +#include "plugin.h" #include @@ -103,6 +104,19 @@ bool is_vma_range_fmt(char *line) return __is_vma_range_fmt(line); } +bool handle_vma_plugin(int *fd, struct stat *stat) +{ + int ret; + + ret = run_plugins(HANDLE_DEVICE_VMA, *fd, stat); + if (ret < 0) { + pr_perror("handle_device_vma plugin failed"); + return false; + } + + return true; +} + static void __parse_vmflags(char *buf, u32 *flags, u64 *madv, int *io_pf) { char *tok; @@ -188,6 +202,7 @@ struct vma_file_info { int dev_min; unsigned long ino; struct vma_area *vma; + bool has_device_plugin; }; static inline int vfi_equal(struct vma_file_info *a, struct vma_file_info *b) @@ -577,11 +592,17 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat } else if (*vm_file_fd >= 0) { struct stat *st_buf = vma_area->vmst; - if (S_ISREG(st_buf->st_mode)) + if (S_ISREG(st_buf->st_mode)) { /* regular file mapping -- supported */; - else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO)) + pr_debug("Found regular file mapping, OK\n"); + } else if (S_ISCHR(st_buf->st_mode) && (st_buf->st_rdev == DEVZERO)) { /* devzero mapping -- also makes sense */; - else { + pr_debug("Found devzero mapping, OK\n"); + } else if (handle_vma_plugin(vm_file_fd, st_buf)) { + pr_info("Found device file mapping, plugin is available\n"); + vfi->has_device_plugin = true; + } else { + /* non-regular mapping with no supporting plugin */ pr_err("Can't handle non-regular mapping on %d's map %" PRIx64 "\n", pid, vma_area->e->start); goto err; } @@ -646,9 +667,23 @@ static int vma_list_add(struct vma_area *vma_area, struct vm_area_list *vma_area struct vma_file_info *vfi, struct vma_file_info *prev_vfi) { if (vma_area->e->status & VMA_UNSUPP) { - pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start, - vma_area->e->end); - return -1; + if (vfi->has_device_plugin) { + /* Unsupported VMAs that provide special plugins for + * backup can be treated as regular VMAs and criu + * should only save their metadata in the dump files. + * There can be several special backup plugins hooks + * that might run at different stages during checkpoint + * and restore. + */ + pr_debug("Device file mapping %016" PRIx64 "-%016" PRIx64 " " + "must be supported via device plugins\n", + vma_area->e->start, vma_area->e->end); + + } else { + pr_err("Unsupported mapping found %016" PRIx64 "-%016" PRIx64 "\n", vma_area->e->start, + vma_area->e->end); + return -1; + } } /* Add a guard page only if here is enough space for it */ From 3470e3d012a2706463cd77afce0a33f3581c2721 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 15 Jul 2021 01:34:08 -0400 Subject: [PATCH 006/121] criu/plugin: Implement dummy amdgpu plugin hooks This is just a placeholder dummy plugin and will be replaced by a proper plugin that implements support for AMD GPU devices. This just facilitates the initial pull request and CI build test trigger for early code review of CRIU specific changes. Future PRs will bring in more support for amdgpu_plugin to enable CRIU with AMD ROCm. Signed-off-by: Rajneesh Bhardwaj --- Makefile | 12 ++++++++++-- Makefile.install | 8 +++++++- plugins/amdgpu/Makefile | 13 +++++++++++++ plugins/amdgpu/dummy_plugin.c | 36 +++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 plugins/amdgpu/Makefile create mode 100644 plugins/amdgpu/dummy_plugin.c diff --git a/Makefile b/Makefile index a1e3977f8f..e361e8a8f6 100644 --- a/Makefile +++ b/Makefile @@ -294,15 +294,19 @@ clean mrproper: $(Q) $(MAKE) $(build)=crit $@ .PHONY: clean mrproper +clean-dummy_amdgpu_plugin: + $(Q) $(MAKE) -C plugins/amdgpu clean +.PHONY: clean dummy_amdgpu_plugin + clean-top: $(Q) $(MAKE) -C Documentation clean $(Q) $(MAKE) $(build)=test/compel clean $(Q) $(RM) .gitid .PHONY: clean-top -clean: clean-top +clean: clean-top clean-dummy_amdgpu_plugin -mrproper-top: clean-top +mrproper-top: clean-top clean-dummy_amdgpu_plugin $(Q) $(RM) $(CONFIG_HEADER) $(Q) $(RM) $(VERSION_HEADER) $(Q) $(RM) $(COMPEL_VERSION_HEADER) @@ -330,6 +334,10 @@ test: zdtm $(Q) $(MAKE) -C test .PHONY: test +dummy_amdgpu_plugin: + $(Q) $(MAKE) -C plugins/amdgpu all +.PHONY: dummy_amdgpu_plugin + # # Generating tar requires tag matched CRIU_VERSION. # If not found then simply use GIT's describe with diff --git a/Makefile.install b/Makefile.install index 3987bcc6fb..52e8c06dad 100644 --- a/Makefile.install +++ b/Makefile.install @@ -7,6 +7,7 @@ MANDIR ?= $(PREFIX)/share/man INCLUDEDIR ?= $(PREFIX)/include LIBEXECDIR ?= $(PREFIX)/libexec RUNDIR ?= /run +PLUGINDIR ?= /var/lib/criu # # For recent Debian/Ubuntu with multiarch support. @@ -26,7 +27,7 @@ endif LIBDIR ?= $(PREFIX)/lib export PREFIX BINDIR SBINDIR MANDIR RUNDIR -export LIBDIR INCLUDEDIR LIBEXECDIR +export LIBDIR INCLUDEDIR LIBEXECDIR PLUGINDIR install-man: $(Q) $(MAKE) -C Documentation install @@ -40,6 +41,10 @@ install-criu: criu $(Q) $(MAKE) $(build)=criu install .PHONY: install-criu +install-dummy_amdgpu_plugin: dummy_amdgpu_plugin + $(Q) $(MAKE) -C plugins/amdgpu install +.PHONY: install-dummy_amdgpu_plugin + install-compel: $(compel-install-targets) $(Q) $(MAKE) $(build)=compel install $(Q) $(MAKE) $(build)=compel/plugins install @@ -54,4 +59,5 @@ uninstall: $(Q) $(MAKE) $(build)=criu $@ $(Q) $(MAKE) $(build)=compel $@ $(Q) $(MAKE) $(build)=compel/plugins $@ + $(Q) $(MAKE) -C plugins/amdgpu $@ .PHONY: uninstall diff --git a/plugins/amdgpu/Makefile b/plugins/amdgpu/Makefile new file mode 100644 index 0000000000..45a9ec611f --- /dev/null +++ b/plugins/amdgpu/Makefile @@ -0,0 +1,13 @@ +all: dummy_plugin.so + +dummy_plugin.so: dummy_plugin.c + gcc -g -Werror -D _GNU_SOURCE -Wall -shared -nostartfiles dummy_plugin.c -o dummy_plugin.so -iquote ../../../criu/include -iquote ../../criu/include -fPIC + +clean: + $(Q) $(RM) dummy_plugin.so +install: + $(Q) mkdir -p $(PLUGINDIR) + $(Q) install -m 644 dummy_plugin.so $(PLUGINDIR) + +uninstall: + $(Q) $(RM) $(PLUGINDIR)/dummy_plugin.so diff --git a/plugins/amdgpu/dummy_plugin.c b/plugins/amdgpu/dummy_plugin.c new file mode 100644 index 0000000000..8722760950 --- /dev/null +++ b/plugins/amdgpu/dummy_plugin.c @@ -0,0 +1,36 @@ +#include + +#include "criu-log.h" +#include "criu-plugin.h" + +int dummy_plugin_handle_device_vma(int fd, const struct stat *stat) +{ + pr_info("dummy_plugin: Inside %s for fd = %d\n", __func__, fd); + /* let criu report failure for the unsupported mapping */ + return -ENOTSUP; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__HANDLE_DEVICE_VMA, dummy_plugin_handle_device_vma) + +int dummy_plugin_resume_devices_late(int target_pid) +{ + pr_info("dummy_plugin: Inside %s for target pid = %d\n", __func__, target_pid); + return -ENOTSUP; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, dummy_plugin_resume_devices_late) + +/* + * return 0 if no match found + * return -1 for error or -ENOTSUP. + * return 1 if vmap map must be adjusted. + */ +int dummy_plugin_update_vmamap(const char *old_path, char *new_path, const uint64_t addr, const uint64_t old_offset, + uint64_t *new_offset) +{ + uint64_t temp = 100; + + *new_offset = temp; + pr_info("dummy_plugin: old_pgoff= 0x%lu new_pgoff = 0x%lx old_path = %s new_path = %s addr = 0x%lu\n", + old_offset, *new_offset, old_path, new_path, addr); + return -ENOTSUP; +} +CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__UPDATE_VMA_MAP, dummy_plugin_update_vmamap) From 951f8bc4db77ed71a7929c8a0a6c754a5dbb8919 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 27 Apr 2021 19:08:57 -0400 Subject: [PATCH 007/121] criu/files: Don't cache fd ids for device files Restore operation fails when we perform CR operation of multiple independent proceses that have device files because criu caches the ids for the device files with same mnt_ids, inode pair. This change ensures that even in case of a cached id found for a device, a unique subid is generated and returned which is used for dumping. Suggested-by: Andrei Vagin Signed-off-by: Rajneesh Bhardwaj --- criu/file-ids.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/criu/file-ids.c b/criu/file-ids.c index 1b9d688882..772bd92cf0 100644 --- a/criu/file-ids.c +++ b/criu/file-ids.c @@ -77,8 +77,14 @@ int fd_id_generate_special(struct fd_parms *p, u32 *id) fi = fd_id_cache_lookup(p); if (fi) { - *id = fi->id; - return 0; + if (p->stat.st_mode & (S_IFCHR | S_IFBLK)) { + /* Don't cache the id for mapped devices */ + *id = fd_tree.subid++; + return 1; + } else { + *id = fi->id; + return 0; + } } } From de3a7112b8e44993e077ffe03e4e4aa38bd20325 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 12 Aug 2021 11:05:09 +0000 Subject: [PATCH 008/121] tcp: Skip restoring TCP state when dumping with --tcp-close Since commit e42f5e0 ("tcp: allow to specify --tcp-close on dump"), --tcp-close option can be used when checkpointing. This option skips checkpointing established socket's state (including once established but now closed socket). However, when restoring, we still try to restore closed socket's state. As a result, a non-existent protobuf image is opened. This commit skips TCP_CLOSE socket when restoring established TCP connection and removes the redundant check for TCP_LISTEN socket as TCP_LISTEN socket cannot reach this function. Suggested-by: Andrei Vagin Suggested-by: Radostin Stoyanov Signed-off-by: Bui Quang Minh --- criu/sk-tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index 0afecd2d60..96d5d13bf6 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -451,7 +451,7 @@ int restore_one_tcp(int fd, struct inet_sk_info *ii) pr_info("Restoring TCP connection\n"); - if (opts.tcp_close && ii->ie->state != TCP_LISTEN && ii->ie->state != TCP_CLOSE) { + if (opts.tcp_close) { if (shutdown(fd, SHUT_RDWR) && errno != ENOTCONN) { pr_perror("Unable to shutdown the socket id %x ino %x", ii->ie->id, ii->ie->ino); } From 40fc4f667fb614fef765489843aa038b61c31d2e Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Mon, 20 Sep 2021 20:57:03 +0700 Subject: [PATCH 009/121] zdtm: Dumping/restoring with --tcp-close on TCP_CLOSE socket Signed-off-by: Bui Quang Minh --- test/zdtm/static/Makefile | 1 + test/zdtm/static/socket-tcp-close2.c | 67 +++++++++++++++++++++++++ test/zdtm/static/socket-tcp-close2.desc | 1 + 3 files changed, 69 insertions(+) create mode 100644 test/zdtm/static/socket-tcp-close2.c create mode 100644 test/zdtm/static/socket-tcp-close2.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index c9e6589f07..b6aa621c7e 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -107,6 +107,7 @@ TST_NOFILE := \ socket-tcp4v6-closed \ socket-tcp-close0 \ socket-tcp-close1 \ + socket-tcp-close2 \ socket-dump-tcp-close \ socket-tcp-unconn \ socket-tcp6-unconn \ diff --git a/test/zdtm/static/socket-tcp-close2.c b/test/zdtm/static/socket-tcp-close2.c new file mode 100644 index 0000000000..697c99f39d --- /dev/null +++ b/test/zdtm/static/socket-tcp-close2.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check both dump and restore with tcp_close on TCP_CLOSE sockets"; +const char *test_author = "Bui Quang Minh "; + +static int port = 8880; + +int main(int argc, char **argv) +{ + int fd_s, fd, client; + char c; + + test_init(argc, argv); + signal(SIGPIPE, SIG_IGN); + + fd_s = tcp_init_server(AF_INET, &port); + if (fd_s < 0) { + pr_err("Server initializations failed\n"); + return 1; + } + + client = tcp_init_client(AF_INET, "localhost", port); + if (client < 0) { + pr_err("Client initializations failed\n"); + return 1; + } + + fd = tcp_accept_server(fd_s); + if (fd < 0) { + pr_err("Can't accept client\n"); + return 1; + } + close(fd_s); + + shutdown(client, SHUT_WR); + shutdown(fd, SHUT_WR); + + test_daemon(); + test_waitsig(); + + if (read(fd, &c, 1) != 0) { + fail("read server"); + return 1; + } + if (read(client, &c, 1) != 0) { + fail("read client"); + return 1; + } + if (write(client, &c, 1) != -1) { + fail("write client"); + return 1; + } + if (write(fd, &c, 1) != -1) { + fail("write server"); + return 1; + } + + pass(); + return 0; +} diff --git a/test/zdtm/static/socket-tcp-close2.desc b/test/zdtm/static/socket-tcp-close2.desc new file mode 100644 index 0000000000..c53a1f3153 --- /dev/null +++ b/test/zdtm/static/socket-tcp-close2.desc @@ -0,0 +1 @@ +{'opts': '--tcp-close', 'flags': 'reqrst '} From 4eb1a3da7d65ae798db324153d693adef13ef6ed Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Mon, 4 Oct 2021 20:38:34 +0700 Subject: [PATCH 010/121] criu(8): Add more detailed description about --tcp-close dump option The expected behavior of --tcp-close option when dumpping is to close all established tcp connections including connection that is once established but now closed. This adds an explicit description about that behavior. Signed-off-by: Bui Quang Minh --- Documentation/criu.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 3c4c1eaf87..4c6885fc9e 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -334,7 +334,8 @@ mount -t cgroup -o devices,freezer none devices,freezer Checkpoint established TCP connections. *--tcp-close*:: - Don't dump the state of, or block, established tcp connections. + Don't dump the state of, or block, established tcp connections + (including the connection is once established but now closed). This is useful when tcp connections are not going to be restored. *--skip-in-flight*:: From 96acfa9af099f17a14d985ac5e9ee9769d9ebe2c Mon Sep 17 00:00:00 2001 From: Andrey Vyazovtsev Date: Thu, 26 Aug 2021 22:22:33 +0300 Subject: [PATCH 011/121] Add support for python3 in criu-coredump Resolve the following python3 portability issues: 1) Python 3 needs explicit relative import path. 2) Coredumps are binary data, not unicode strings. Use byte strings (b"" instead of "") and open files in binary format. 3) Some functions (for example: filter) return a list in python 2, but an iterator in python 3. Port code to a common subset of python 2 and python 3 using itertool. 4) Division operator / changed meaning in Python 3. Use explicit integer division (//) where appropriate. Signed-off-by: Andrey Vyazovtsev --- coredump/criu-coredump | 4 +- coredump/criu_coredump/__init__.py | 4 +- coredump/criu_coredump/coredump.py | 68 ++++++++++++++++++------------ coredump/criu_coredump/elf.py | 2 +- 4 files changed, 45 insertions(+), 33 deletions(-) diff --git a/coredump/criu-coredump b/coredump/criu-coredump index 25c188c6bc..d3113d3724 100755 --- a/coredump/criu-coredump +++ b/coredump/criu-coredump @@ -1,4 +1,4 @@ -#!/usr/bin/env python2 +#!/usr/bin/env python import argparse import os @@ -10,7 +10,7 @@ def coredump(opts): for pid in cores: if opts['pid'] and pid != opts['pid']: continue - with open(os.path.realpath(opts['out'])+"/core."+str(pid), 'w+') as f: + with open(os.path.realpath(opts['out'])+"/core."+str(pid), 'wb+') as f: cores[pid].write(f) diff --git a/coredump/criu_coredump/__init__.py b/coredump/criu_coredump/__init__.py index 213af42ec6..7f947518e7 100644 --- a/coredump/criu_coredump/__init__.py +++ b/coredump/criu_coredump/__init__.py @@ -1,2 +1,2 @@ -from coredump import * -import elf +from .coredump import * +from . import elf diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index b37ef22913..d67c335c14 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -29,9 +29,14 @@ # 4) VMAs contents; # import io -import elf +import sys +from . import elf import ctypes from pycriu import images +try: + from itertools import ifilter as filter +except ImportError: + pass # Some memory-related constants PAGESIZE = 4096 @@ -88,7 +93,7 @@ def write(self, f): for note in self.notes: buf.write(note.nhdr) buf.write(note.owner) - buf.write("\0" * (8 - len(note.owner))) + buf.write(b"\0" * (8 - len(note.owner))) buf.write(note.data) offset = ctypes.sizeof(elf.Elf64_Ehdr()) @@ -136,7 +141,7 @@ def _img_open_and_strip(self, name, single=False, pid=None): path += "-" + str(pid) path += ".img" - with open(path) as f: + with open(path, 'rb') as f: img = images.load(f) if single: @@ -177,7 +182,7 @@ def write(self, coredumps_dir, pid=None): for p in self.coredumps: if pid and p != pid: continue - with open(coredumps_dir + "/" + "core." + str(p), 'w+') as f: + with open(coredumps_dir + "/" + "core." + str(p), 'wb+') as f: self.coredumps[p].write(f) def _gen_coredump(self, pid): @@ -295,7 +300,7 @@ def _gen_prpsinfo(self, pid): prpsinfo.pr_state = 3 # Don't even ask me why it is so, just borrowed from linux # source and made pr_state match. - prpsinfo.pr_sname = '.' if prpsinfo.pr_state > 5 else "RSDTZW" [ + prpsinfo.pr_sname = b'.' if prpsinfo.pr_state > 5 else b"RSDTZW" [ prpsinfo.pr_state] prpsinfo.pr_zomb = 1 if prpsinfo.pr_state == 4 else 0 prpsinfo.pr_nice = core["thread_core"][ @@ -307,8 +312,12 @@ def _gen_prpsinfo(self, pid): prpsinfo.pr_ppid = pstree["ppid"] prpsinfo.pr_pgrp = pstree["pgid"] prpsinfo.pr_sid = pstree["sid"] - prpsinfo.pr_fname = core["tc"]["comm"] prpsinfo.pr_psargs = self._gen_cmdline(pid) + if (sys.version_info > (3, 0)): + prpsinfo.pr_fname = core["tc"]["comm"].encode() + else: + prpsinfo.pr_fname = core["tc"]["comm"] + nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 @@ -317,7 +326,7 @@ def _gen_prpsinfo(self, pid): note = elf_note() note.data = prpsinfo - note.owner = "CORE" + note.owner = b"CORE" note.nhdr = nhdr return note @@ -375,7 +384,7 @@ def _gen_prstatus(self, pid, tid): note = elf_note() note.data = prstatus - note.owner = "CORE" + note.owner = b"CORE" note.nhdr = nhdr return note @@ -411,7 +420,7 @@ def _gen_fpregset(self, pid, tid): note = elf_note() note.data = fpregset - note.owner = "CORE" + note.owner = b"CORE" note.nhdr = nhdr return note @@ -452,7 +461,7 @@ def _gen_x86_xstate(self, pid, tid): note = elf_note() note.data = data - note.owner = "LINUX" + note.owner = b"LINUX" note.nhdr = nhdr return note @@ -472,7 +481,7 @@ def _gen_siginfo(self, pid, tid): note = elf_note() note.data = siginfo - note.owner = "CORE" + note.owner = b"CORE" note.nhdr = nhdr return note @@ -482,7 +491,7 @@ def _gen_auxv(self, pid): Generate NT_AUXV note for thread tid of process pid. """ mm = self.mms[pid] - num_auxv = len(mm["mm_saved_auxv"]) / 2 + num_auxv = len(mm["mm_saved_auxv"]) // 2 class elf_auxv(ctypes.Structure): _fields_ = [("auxv", elf.Elf64_auxv_t * num_auxv)] @@ -499,7 +508,7 @@ class elf_auxv(ctypes.Structure): note = elf_note() note.data = auxv - note.owner = "CORE" + note.owner = b"CORE" note.nhdr = nhdr return note @@ -523,10 +532,10 @@ class mmaped_file_info: continue shmid = vma["shmid"] - off = vma["pgoff"] / PAGESIZE + off = vma["pgoff"] // PAGESIZE files = self.reg_files - fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] + fname = next(filter(lambda x: x["id"] == shmid, files))["name"] info = mmaped_file_info() info.start = vma["start"] @@ -569,7 +578,10 @@ class elf_files(ctypes.Structure): setattr(data, "start" + str(i), info.start) setattr(data, "end" + str(i), info.end) setattr(data, "file_ofs" + str(i), info.file_ofs) - setattr(data, "name" + str(i), info.name) + if (sys.version_info > (3, 0)): + setattr(data, "name" + str(i), info.name.encode()) + else: + setattr(data, "name" + str(i), info.name) nhdr = elf.Elf64_Nhdr() @@ -579,7 +591,7 @@ class elf_files(ctypes.Structure): note = elf_note() note.nhdr = nhdr - note.owner = "CORE" + note.owner = b"CORE" note.data = data return note @@ -644,7 +656,7 @@ def _get_page(self, pid, page_no): ppid = self.pstree[pid]["ppid"] return self._get_page(ppid, page_no) else: - with open(self._imgs_dir + "/pages-%s.img" % pages_id) as f: + with open(self._imgs_dir + "/pages-%s.img" % pages_id, 'rb') as f: f.seek(off * PAGESIZE) return f.read(PAGESIZE) @@ -657,16 +669,16 @@ def _gen_mem_chunk(self, pid, vma, size): f = None if size == 0: - return "" + return b"" if vma["status"] & status["VMA_AREA_VVAR"]: #FIXME this is what gdb does, as vvar vma # is not readable from userspace? - return "\0" * size + return b"\0" * size elif vma["status"] & status["VMA_AREA_VSYSCALL"]: #FIXME need to dump it with criu or read from # current process. - return "\0" * size + return b"\0" * size if vma["status"] & status["VMA_FILE_SHARED"] or \ vma["status"] & status["VMA_FILE_PRIVATE"]: @@ -675,9 +687,9 @@ def _gen_mem_chunk(self, pid, vma, size): off = vma["pgoff"] files = self.reg_files - fname = filter(lambda x: x["id"] == shmid, files)[0]["name"] + fname = next(filter(lambda x: x["id"] == shmid, files))["name"] - f = open(fname) + f = open(fname, 'rb') f.seek(off) start = vma["start"] @@ -699,10 +711,10 @@ def _gen_mem_chunk(self, pid, vma, size): # a file, and changed ones -- from pages.img. # Finally, if no page is found neither in pages.img nor # in file, hole in inserted -- a page filled with zeroes. - start_page = start / PAGESIZE - end_page = end / PAGESIZE + start_page = start // PAGESIZE + end_page = end // PAGESIZE - buf = "" + buf = b"" for page_no in range(start_page, end_page + 1): page = None @@ -720,7 +732,7 @@ def _gen_mem_chunk(self, pid, vma, size): if page is None: # Hole - page = PAGESIZE * "\0" + page = PAGESIZE * b"\0" # If it is a start or end page, we need to read # only part of it. @@ -762,7 +774,7 @@ def _gen_cmdline(self, pid): chunk = self._gen_mem_chunk(pid, vma, size) # Replace all '\0's with spaces. - return chunk.replace('\0', ' ') + return chunk.replace(b'\0', b' ') def _get_vma_dump_size(self, vma): """ diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py index e65919e6b8..a670ae866b 100644 --- a/coredump/criu_coredump/elf.py +++ b/coredump/criu_coredump/elf.py @@ -368,7 +368,7 @@ class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct # siginfo_t related constants. _SI_MAX_SIZE = 128 -_SI_PAD_SIZE = (_SI_MAX_SIZE / ctypes.sizeof(ctypes.c_int)) - 4 +_SI_PAD_SIZE = (_SI_MAX_SIZE // ctypes.sizeof(ctypes.c_int)) - 4 # /* kill(). */ From db79777cf017751ec54d2534966b237927f23a15 Mon Sep 17 00:00:00 2001 From: AndreyVV-100 Date: Fri, 23 Jul 2021 12:48:03 +0300 Subject: [PATCH 012/121] Add new files for running criu-coredump via python 2 or 3 Previous commit added support for python3 in criu-coredump. For convenience, add two files (coredump-python2 and coredump-python3) that start criu-coredump with respective python version. Edit env.sh accordingly. Signed-off-by: Andrey Vyazovtsev --- coredump/coredump-python2 | 6 ++++++ coredump/coredump-python3 | 6 ++++++ coredump/{criu-coredump => coredump.py} | 1 - test/others/env.sh | 2 +- 4 files changed, 13 insertions(+), 2 deletions(-) create mode 100755 coredump/coredump-python2 create mode 100755 coredump/coredump-python3 rename coredump/{criu-coredump => coredump.py} (97%) diff --git a/coredump/coredump-python2 b/coredump/coredump-python2 new file mode 100755 index 0000000000..3a15c90a34 --- /dev/null +++ b/coredump/coredump-python2 @@ -0,0 +1,6 @@ +#!/usr/bin/env python2 + +import coredump + +if __name__ == '__main__': + coredump.main() diff --git a/coredump/coredump-python3 b/coredump/coredump-python3 new file mode 100755 index 0000000000..82ec6b8553 --- /dev/null +++ b/coredump/coredump-python3 @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +import coredump + +if __name__ == '__main__': + coredump.main() diff --git a/coredump/criu-coredump b/coredump/coredump.py similarity index 97% rename from coredump/criu-coredump rename to coredump/coredump.py index d3113d3724..56ba54083c 100755 --- a/coredump/criu-coredump +++ b/coredump/coredump.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python import argparse import os diff --git a/test/others/env.sh b/test/others/env.sh index b514e87d9e..e2f63eee35 100755 --- a/test/others/env.sh +++ b/test/others/env.sh @@ -13,5 +13,5 @@ fi #export PYTHON CRIT=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../crit/crit-"${PYTHON}") crit=$CRIT -CRIU_COREDUMP=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../criu-coredump/criu-coredump) +CRIU_COREDUMP=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../criu-coredump/coredump-"${PYTHON}") criu_coredump=$CRIU_COREDUMP From e3f910ff3e9e3326e1df5e6d8e97dbbd8dc4af94 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:31:12 +0100 Subject: [PATCH 013/121] coredump: remove unused import Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/__init__.py | 3 +-- scripts/flake8.cfg | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/coredump/criu_coredump/__init__.py b/coredump/criu_coredump/__init__.py index 7f947518e7..c1a437cf42 100644 --- a/coredump/criu_coredump/__init__.py +++ b/coredump/criu_coredump/__init__.py @@ -1,2 +1 @@ -from .coredump import * -from . import elf +from .coredump import coredump_generator diff --git a/scripts/flake8.cfg b/scripts/flake8.cfg index b6a5877299..bd4f95bb20 100644 --- a/scripts/flake8.cfg +++ b/scripts/flake8.cfg @@ -2,3 +2,5 @@ # E501 line too long # W504 line break after binary operator ignore = E501,W504 +# F401: imported but unused +per-file-ignores = __init__.py:F401 From 8b69c69dbec5e6b2a84083e609c3de4e15661c90 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:32:17 +0100 Subject: [PATCH 014/121] coredump: sort imports Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index d67c335c14..9b4aad5ff7 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -30,9 +30,12 @@ # import io import sys -from . import elf import ctypes + from pycriu import images +from . import elf + + try: from itertools import ifilter as filter except ImportError: From f0f0d7fd59774e1a1a695a49e429239ba4a5a469 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:34:00 +0100 Subject: [PATCH 015/121] coredump: convert indentation to spaces Signed-off-by: Radostin Stoyanov --- coredump/coredump-python2 | 2 +- coredump/coredump-python3 | 2 +- coredump/coredump.py | 58 ++++++++++++++++++++------------------- 3 files changed, 32 insertions(+), 30 deletions(-) diff --git a/coredump/coredump-python2 b/coredump/coredump-python2 index 3a15c90a34..564c05ce9f 100755 --- a/coredump/coredump-python2 +++ b/coredump/coredump-python2 @@ -3,4 +3,4 @@ import coredump if __name__ == '__main__': - coredump.main() + coredump.main() diff --git a/coredump/coredump-python3 b/coredump/coredump-python3 index 82ec6b8553..3032dbadf1 100755 --- a/coredump/coredump-python3 +++ b/coredump/coredump-python3 @@ -3,4 +3,4 @@ import coredump if __name__ == '__main__': - coredump.main() + coredump.main() diff --git a/coredump/coredump.py b/coredump/coredump.py index 56ba54083c..e63abf9515 100755 --- a/coredump/coredump.py +++ b/coredump/coredump.py @@ -3,37 +3,39 @@ import criu_coredump + def coredump(opts): - generator = criu_coredump.coredump_generator() - cores = generator(os.path.realpath(opts['in'])) - for pid in cores: - if opts['pid'] and pid != opts['pid']: - continue - with open(os.path.realpath(opts['out'])+"/core."+str(pid), 'wb+') as f: - cores[pid].write(f) + generator = criu_coredump.coredump_generator() + cores = generator(os.path.realpath(opts['in'])) + for pid in cores: + if opts['pid'] and pid != opts['pid']: + continue + with open(os.path.realpath(opts['out'])+"/core."+str(pid), 'wb+') as f: + cores[pid].write(f) def main(): - desc = 'CRIU core dump' - parser = argparse.ArgumentParser(description=desc, - formatter_class=argparse.RawTextHelpFormatter) - - parser.add_argument('-i', - '--in', - default = '.', - help = 'directory where to get images from') - parser.add_argument('-p', - '--pid', - type = int, - help = 'generate coredump for specific pid(all pids py default)') - parser.add_argument('-o', - '--out', - default = '.', - help = 'directory to write coredumps to') - - opts = vars(parser.parse_args()) - - coredump(opts) + desc = 'CRIU core dump' + parser = argparse.ArgumentParser(description=desc, + formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument('-i', + '--in', + default='.', + help='directory where to get images from') + parser.add_argument('-p', + '--pid', + type=int, + help='generate coredump for specific pid(all pids py default)') + parser.add_argument('-o', + '--out', + default='.', + help='directory to write coredumps to') + + opts = vars(parser.parse_args()) + + coredump(opts) + if __name__ == '__main__': - main() + main() From e108510070507deb370ef0c857eadc5a9e855a1c Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:36:10 +0100 Subject: [PATCH 016/121] python: replace equality with identity test PEP8 recommends for comparisons to singletons like None to always be done with 'is' or 'is not', never the equality operators. https://python.org/dev/peps/pep-0008/#programming-recommendations Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 6 +++--- test/exhaustive/pipe.py | 12 ++++++------ test/exhaustive/unix.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 9b4aad5ff7..a9a8bb27c2 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -725,10 +725,10 @@ def _gen_mem_chunk(self, pid, vma, size): # and choose appropriate. page_mem = self._get_page(pid, page_no) - if f != None: + if f is not None: page = f.read(PAGESIZE) - if page_mem != None: + if page_mem is not None: # Page from pages.img has higher priority # than one from maped file on disk. page = page_mem @@ -755,7 +755,7 @@ def _gen_mem_chunk(self, pid, vma, size): buf += page[n_skip:n_skip + n_read] # Don't forget to close file. - if f != None: + if f is not None: f.close() return buf diff --git a/test/exhaustive/pipe.py b/test/exhaustive/pipe.py index fdadc480c8..7f1c53d34b 100755 --- a/test/exhaustive/pipe.py +++ b/test/exhaustive/pipe.py @@ -75,7 +75,7 @@ def get_pipe_rw(pid, fd): def check_pipe_y(pid, fd, rw, inos): ino = get_pipe_ino(pid, fd) - if ino == None: + if ino is None: return 'missing ' if not inos.has_key(fd): inos[fd] = ino @@ -89,7 +89,7 @@ def check_pipe_y(pid, fd, rw, inos): def check_pipe_n(pid, fd): ino = get_pipe_ino(pid, fd) - if ino == None: + if ino is None: return None else: return 'present ' @@ -102,7 +102,7 @@ def check_pipe_end(kids, fd, comb, rw, inos): res = check_pipe_y(t_pid, fd, rw, inos) else: res = check_pipe_n(t_pid, fd) - if res != None: + if res is not None: return res + 'kid(%d)' % t_nr t_nr += 1 return None @@ -111,7 +111,7 @@ def check_pipe_end(kids, fd, comb, rw, inos): def check_pipe(kids, fds, comb, inos): for e in (0, 1): # 0 == R, 1 == W, see get_pipe_rw() res = check_pipe_end(kids, fds[e], comb[e], e, inos) - if res != None: + if res is not None: return res + 'end(%d)' % e return None @@ -124,7 +124,7 @@ def check_pipes(kids, pipes, comb): p_inos = {} for p_fds in pipes: res = check_pipe(kids, p_fds, comb[p_nr], p_inos) - if res != None: + if res is not None: return res + 'pipe(%d)' % p_nr p_nr += 1 @@ -182,7 +182,7 @@ def make_comb(comb, opts, status_pipe): if v == '0': print('\tCheck pipes') res = check_pipes(kids, pipes, comb) - if res == None: + if res is None: ex_code = 0 else: print('\tFAIL %s' % res) diff --git a/test/exhaustive/unix.py b/test/exhaustive/unix.py index 98dbbb7b0b..114bf957b9 100755 --- a/test/exhaustive/unix.py +++ b/test/exhaustive/unix.py @@ -304,7 +304,7 @@ def get_dgram_actions(self, st): for psk in st.sockets: if psk == self: continue - if psk.peer != None and psk.peer != self.sk_id: + if psk.peer is not None and psk.peer != self.sk_id: # Peer by someone else, can do nothing continue From 2f4e9c36f9686a101d6283b9c60e638175d6c817 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:38:06 +0100 Subject: [PATCH 017/121] coredump: drop unused variable Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index a9a8bb27c2..42c2fed19d 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -834,8 +834,6 @@ class vma_class: vmas = [] for vma in mm["vmas"]: - size = self._get_vma_dump_size(vma) - v = vma_class() v.filesz = self._get_vma_dump_size(vma) v.data = self._gen_mem_chunk(pid, vma, v.filesz) From 7833c71147357c37203328da9668f384fa08bf47 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:42:46 +0100 Subject: [PATCH 018/121] coredump: drop exec permission The shebang line in this file was removed in a previous commit and the file should be non-executable. Signed-off-by: Radostin Stoyanov --- coredump/coredump.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 coredump/coredump.py diff --git a/coredump/coredump.py b/coredump/coredump.py old mode 100755 new mode 100644 From 71c05419e343a31d2f1de3694cc05b2e71400efd Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 21:53:06 +0100 Subject: [PATCH 019/121] coredump: lint fix for block comments Block comment should start with '# ' https://www.flake8rules.com/rules/E265.html Inline comment should start with '# ' https://www.flake8rules.com/rules/E262.html Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 11 +- coredump/criu_coredump/elf.py | 985 +++++++++++++++++------------ 2 files changed, 577 insertions(+), 419 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 42c2fed19d..2748898001 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -223,7 +223,7 @@ def _gen_ehdr(self, pid, phdrs): ehdr.e_phoff = ctypes.sizeof(elf.Elf64_Ehdr()) ehdr.e_ehsize = ctypes.sizeof(elf.Elf64_Ehdr()) ehdr.e_phentsize = ctypes.sizeof(elf.Elf64_Phdr()) - #FIXME Case len(phdrs) > PN_XNUM should be handled properly. + # FIXME Case len(phdrs) > PN_XNUM should be handled properly. # See fs/binfmt_elf.c from linux kernel. ehdr.e_phnum = len(phdrs) @@ -346,7 +346,7 @@ def _gen_prstatus(self, pid, tid): ctypes.memset(ctypes.addressof(prstatus), 0, ctypes.sizeof(prstatus)) - #FIXME setting only some of the fields for now. Revisit later. + # FIXME setting only some of the fields for now. Revisit later. prstatus.pr_pid = tid prstatus.pr_ppid = pstree["ppid"] prstatus.pr_pgrp = pstree["pgid"] @@ -414,7 +414,6 @@ def _gen_fpregset(self, pid, tid): *regs["st_space"]) fpregset.xmm_space = (ctypes.c_uint * len(regs["xmm_space"]))( *regs["xmm_space"]) - #fpregset.padding = regs["padding"] unused nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 @@ -588,7 +587,7 @@ class elf_files(ctypes.Structure): nhdr = elf.Elf64_Nhdr() - nhdr.n_namesz = 5 #XXX strlen + 1 + nhdr.n_namesz = 5 # strlen + 1 nhdr.n_descsz = ctypes.sizeof(elf_files()) nhdr.n_type = elf.NT_FILE @@ -675,11 +674,11 @@ def _gen_mem_chunk(self, pid, vma, size): return b"" if vma["status"] & status["VMA_AREA_VVAR"]: - #FIXME this is what gdb does, as vvar vma + # FIXME this is what gdb does, as vvar vma # is not readable from userspace? return b"\0" * size elif vma["status"] & status["VMA_AREA_VSYSCALL"]: - #FIXME need to dump it with criu or read from + # FIXME need to dump it with criu or read from # current process. return b"\0" * size diff --git a/coredump/criu_coredump/elf.py b/coredump/criu_coredump/elf.py index a670ae866b..092b478575 100644 --- a/coredump/criu_coredump/elf.py +++ b/coredump/criu_coredump/elf.py @@ -16,16 +16,13 @@ ELFMAG0 = 0x7f # #define ELFMAG0 0x7f /* Magic number byte 0 */ EI_MAG1 = 1 # #define EI_MAG1 1 /* File identification byte 1 index */ -ELFMAG1 = ord( - 'E') # #define ELFMAG1 'E' /* Magic number byte 1 */ +ELFMAG1 = ord('E') # #define ELFMAG1 'E' /* Magic number byte 1 */ EI_MAG2 = 2 # #define EI_MAG2 2 /* File identification byte 2 index */ -ELFMAG2 = ord( - 'L') # #define ELFMAG2 'L' /* Magic number byte 2 */ +ELFMAG2 = ord('L') # #define ELFMAG2 'L' /* Magic number byte 2 */ EI_MAG3 = 3 # #define EI_MAG3 3 /* File identification byte 3 index */ -ELFMAG3 = ord( - 'F') # #define ELFMAG3 'F' /* Magic number byte 3 */ +ELFMAG3 = ord('F') # #define ELFMAG3 'F' /* Magic number byte 3 */ EI_CLASS = 4 # #define EI_CLASS 4 /* File class byte index */ @@ -48,22 +45,22 @@ class Elf64_Ehdr(ctypes.Structure): # typedef struct - _fields_ = [ # { + _fields_ = [ ("e_ident", - ctypes.c_ubyte * EI_NIDENT), # unsigned char e_ident[EI_NIDENT]; - ("e_type", Elf64_Half), # Elf64_Half e_type; - ("e_machine", Elf64_Half), # Elf64_Half e_machine; - ("e_version", Elf64_Word), # Elf64_Word e_version; - ("e_entry", Elf64_Addr), # Elf64_Addr e_entry; - ("e_phoff", Elf64_Off), # Elf64_Off e_phoff; - ("e_shoff", Elf64_Off), # Elf64_Off e_shoff; - ("e_flags", Elf64_Word), # Elf64_Word e_flags; - ("e_ehsize", Elf64_Half), # Elf64_Half e_ehsize; - ("e_phentsize", Elf64_Half), # Elf64_Half e_phentsize; - ("e_phnum", Elf64_Half), # Elf64_Half e_phnum; - ("e_shentsize", Elf64_Half), # Elf64_Half e_shentsize; - ("e_shnum", Elf64_Half), # Elf64_Half e_shnum; - ("e_shstrndx", Elf64_Half) # Elf64_Half e_shstrndx; + ctypes.c_ubyte * EI_NIDENT), # unsigned char e_ident[EI_NIDENT]; + ("e_type", Elf64_Half), # Elf64_Half e_type; + ("e_machine", Elf64_Half), # Elf64_Half e_machine; + ("e_version", Elf64_Word), # Elf64_Word e_version; + ("e_entry", Elf64_Addr), # Elf64_Addr e_entry; + ("e_phoff", Elf64_Off), # Elf64_Off e_phoff; + ("e_shoff", Elf64_Off), # Elf64_Off e_shoff; + ("e_flags", Elf64_Word), # Elf64_Word e_flags; + ("e_ehsize", Elf64_Half), # Elf64_Half e_ehsize; + ("e_phentsize", Elf64_Half), # Elf64_Half e_phentsize; + ("e_phnum", Elf64_Half), # Elf64_Half e_phnum; + ("e_shentsize", Elf64_Half), # Elf64_Half e_shentsize; + ("e_shnum", Elf64_Half), # Elf64_Half e_shnum; + ("e_shstrndx", Elf64_Half) # Elf64_Half e_shstrndx; ] # } Elf64_Ehdr; @@ -80,15 +77,15 @@ class Elf64_Ehdr(ctypes.Structure): # typedef struct class Elf64_Phdr(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("p_type", Elf64_Word), # Elf64_Word p_type; - ("p_flags", Elf64_Word), # Elf64_Word p_flags; - ("p_offset", Elf64_Off), # Elf64_Off p_offset; - ("p_vaddr", Elf64_Addr), # Elf64_Addr p_vaddr; - ("p_paddr", Elf64_Addr), # Elf64_Addr p_paddr; - ("p_filesz", Elf64_Xword), # Elf64_Xword p_filesz; - ("p_memsz", Elf64_Xword), # Elf64_Xword p_memsz; - ("p_align", Elf64_Xword), # Elf64_Xword p_align; + _fields_ = [ + ("p_type", Elf64_Word), # Elf64_Word p_type; + ("p_flags", Elf64_Word), # Elf64_Word p_flags; + ("p_offset", Elf64_Off), # Elf64_Off p_offset; + ("p_vaddr", Elf64_Addr), # Elf64_Addr p_vaddr; + ("p_paddr", Elf64_Addr), # Elf64_Addr p_paddr; + ("p_filesz", Elf64_Xword), # Elf64_Xword p_filesz; + ("p_memsz", Elf64_Xword), # Elf64_Xword p_memsz; + ("p_align", Elf64_Xword), # Elf64_Xword p_align; ] # } Elf64_Phdr; @@ -100,78 +97,89 @@ class _Elf64_auxv_t_U(ctypes.Union): class Elf64_auxv_t(ctypes.Structure): # typedef struct - _fields_ = [ # { + _fields_ = [ ("a_type", - ctypes.c_uint64), # uint64_t a_type; /* Entry type */ - ("a_un", _Elf64_auxv_t_U) # union - # { - # uint64_t a_val; /* Integer value */ - # /* We use to have pointer elements added here. We cannot do that, - # though, since it does not work when using 32-bit definitions - # on 64-bit platforms and vice versa. */ - # } a_un; + ctypes.c_uint64), # uint64_t a_type; /* Entry type */ + ("a_un", _Elf64_auxv_t_U) # union + + # uint64_t a_val; /* Integer value */ + # /* We use to have pointer elements added here. We cannot do that, + # though, since it does not work when using 32-bit definitions + # on 64-bit platforms and vice versa. */ + # } a_un; ] # } Elf64_auxv_t; # Elf64_Nhdr related constants. -NT_PRSTATUS = 1 # #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ -NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of fpregset struct */ -NT_PRPSINFO = 3 # #define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ -NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ -NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, -# size might increase */ -NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped -# files */ -NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ +NT_PRSTATUS = 1 # #define NT_PRSTATUS 1 /* Contains copy of prstatus struct */ +NT_FPREGSET = 2 # #define NT_FPREGSET 2 /* Contains copy of fpregset struct */ +NT_PRPSINFO = 3 # #define NT_PRPSINFO 3 /* Contains copy of prpsinfo struct */ +NT_AUXV = 6 # #define NT_AUXV 6 /* Contains copy of auxv array */ +NT_SIGINFO = 0x53494749 # #define NT_SIGINFO 0x53494749 /* Contains copy of siginfo_t, size might increase */ +NT_FILE = 0x46494c45 # #define NT_FILE 0x46494c45 /* Contains information about mapped files */ +NT_X86_XSTATE = 0x202 # #define NT_X86_XSTATE 0x202 /* x86 extended state using xsave */ class Elf64_Nhdr(ctypes.Structure): # typedef struct - _fields_ = [ # { + _fields_ = [ ( "n_namesz", Elf64_Word - ), # Elf64_Word n_namesz; /* Length of the note's name. */ + ), # Elf64_Word n_namesz; /* Length of the note's name. */ ( "n_descsz", Elf64_Word - ), # Elf64_Word n_descsz; /* Length of the note's descriptor. */ + ), # Elf64_Word n_descsz; /* Length of the note's descriptor. */ ("n_type", Elf64_Word - ), # Elf64_Word n_type; /* Type of the note. */ + ), # Elf64_Word n_type; /* Type of the note. */ ] # } Elf64_Nhdr; # Elf64_Shdr related constants. -class Elf64_Shdr(ctypes.Structure): # typedef struct - _fields_ = [ # { +class Elf64_Shdr(ctypes.Structure): + _fields_ = [ ( + # Section name (string tbl index) "sh_name", Elf64_Word - ), # Elf64_Word sh_name; /* Section name (string tbl index) */ - ("sh_type", Elf64_Word - ), # Elf64_Word sh_type; /* Section type */ - ("sh_flags", Elf64_Xword - ), # Elf64_Xword sh_flags; /* Section flags */ + ), + ( + # Section type + "sh_type", Elf64_Word + ), + ( + # Section flags + "sh_flags", Elf64_Xword + ), ( + # Section virtual addr at execution "sh_addr", Elf64_Addr - ), # Elf64_Addr sh_addr; /* Section virtual addr at execution */ + ), ( + # Section file offset "sh_offset", Elf64_Off - ), # Elf64_Off sh_offset; /* Section file offset */ + ), ( + # Section size in bytes "sh_size", Elf64_Xword - ), # Elf64_Xword sh_size; /* Section size in bytes */ + ), ( + # Link to another section "sh_link", Elf64_Word - ), # Elf64_Word sh_link; /* Link to another section */ + ), ( + # Additional section information "sh_info", Elf64_Word - ), # Elf64_Word sh_info; /* Additional section information */ - ("sh_addralign", Elf64_Xword - ), # Elf64_Xword sh_addralign; /* Section alignment */ + ), ( + # Section alignment + "sh_addralign", Elf64_Xword + ), + ( + # Entry size if section holds table "sh_entsize", Elf64_Xword - ) # Elf64_Xword sh_entsize; /* Entry size if section holds table */ - ] # } Elf64_Shdr; + ) + ] # elf_prstatus related constants. @@ -179,188 +187,264 @@ class Elf64_Shdr(ctypes.Structure): # typedef struct # Signal info. class elf_siginfo(ctypes.Structure): # struct elf_siginfo - _fields_ = [ # { - ("si_signo", ctypes.c_int - ), # int si_signo; /* Signal number. */ - ("si_code", ctypes.c_int - ), # int si_code; /* Extra code. */ - ("si_errno", ctypes.c_int - ) # int si_errno; /* Errno. */ - ] # }; + _fields_ = [ + ( + # Signal number + "si_signo", ctypes.c_int + ), + ( + # Extra code + "si_code", ctypes.c_int + ), + ( + # Errno + "si_errno", ctypes.c_int + ) + ] # A time value that is accurate to the nearest # microsecond but also has a range of years. class timeval(ctypes.Structure): # struct timeval - _fields_ = [ # { - ("tv_sec", - ctypes.c_long), # __time_t tv_sec; /* Seconds. */ - ("tv_usec", ctypes.c_long - ) # __suseconds_t tv_usec; /* Microseconds. */ - ] # }; + _fields_ = [ + ( + # __time_t tv_sec; /* Seconds. */ + "tv_sec", ctypes.c_long + ), + ( + # __suseconds_t tv_usec; /* Microseconds. */ + "tv_usec", ctypes.c_long + ) + ] class user_regs_struct(ctypes.Structure): # struct user_regs_struct - _fields_ = [ # { + _fields_ = [ ("r15", - ctypes.c_ulonglong), # __extension__ unsigned long long int r15; + ctypes.c_ulonglong), # __extension__ unsigned long long int r15; ("r14", - ctypes.c_ulonglong), # __extension__ unsigned long long int r14; + ctypes.c_ulonglong), # __extension__ unsigned long long int r14; ("r13", - ctypes.c_ulonglong), # __extension__ unsigned long long int r13; + ctypes.c_ulonglong), # __extension__ unsigned long long int r13; ("r12", - ctypes.c_ulonglong), # __extension__ unsigned long long int r12; + ctypes.c_ulonglong), # __extension__ unsigned long long int r12; ("rbp", - ctypes.c_ulonglong), # __extension__ unsigned long long int rbp; + ctypes.c_ulonglong), # __extension__ unsigned long long int rbp; ("rbx", - ctypes.c_ulonglong), # __extension__ unsigned long long int rbx; + ctypes.c_ulonglong), # __extension__ unsigned long long int rbx; ("r11", - ctypes.c_ulonglong), # __extension__ unsigned long long int r11; + ctypes.c_ulonglong), # __extension__ unsigned long long int r11; ("r10", - ctypes.c_ulonglong), # __extension__ unsigned long long int r10; + ctypes.c_ulonglong), # __extension__ unsigned long long int r10; ("r9", - ctypes.c_ulonglong), # __extension__ unsigned long long int r9; + ctypes.c_ulonglong), # __extension__ unsigned long long int r9; ("r8", - ctypes.c_ulonglong), # __extension__ unsigned long long int r8; + ctypes.c_ulonglong), # __extension__ unsigned long long int r8; ("rax", - ctypes.c_ulonglong), # __extension__ unsigned long long int rax; + ctypes.c_ulonglong), # __extension__ unsigned long long int rax; ("rcx", - ctypes.c_ulonglong), # __extension__ unsigned long long int rcx; + ctypes.c_ulonglong), # __extension__ unsigned long long int rcx; ("rdx", - ctypes.c_ulonglong), # __extension__ unsigned long long int rdx; + ctypes.c_ulonglong), # __extension__ unsigned long long int rdx; ("rsi", - ctypes.c_ulonglong), # __extension__ unsigned long long int rsi; + ctypes.c_ulonglong), # __extension__ unsigned long long int rsi; ("rdi", - ctypes.c_ulonglong), # __extension__ unsigned long long int rdi; + ctypes.c_ulonglong), # __extension__ unsigned long long int rdi; ("orig_rax", ctypes.c_ulonglong - ), # __extension__ unsigned long long int orig_rax; + ), # __extension__ unsigned long long int orig_rax; ("rip", - ctypes.c_ulonglong), # __extension__ unsigned long long int rip; + ctypes.c_ulonglong), # __extension__ unsigned long long int rip; ("cs", - ctypes.c_ulonglong), # __extension__ unsigned long long int cs; + ctypes.c_ulonglong), # __extension__ unsigned long long int cs; ("eflags", - ctypes.c_ulonglong), # __extension__ unsigned long long int eflags; + ctypes.c_ulonglong), # __extension__ unsigned long long int eflags; ("rsp", - ctypes.c_ulonglong), # __extension__ unsigned long long int rsp; + ctypes.c_ulonglong), # __extension__ unsigned long long int rsp; ("ss", - ctypes.c_ulonglong), # __extension__ unsigned long long int ss; + ctypes.c_ulonglong), # __extension__ unsigned long long int ss; ("fs_base", ctypes.c_ulonglong - ), # __extension__ unsigned long long int fs_base; + ), # __extension__ unsigned long long int fs_base; ("gs_base", ctypes.c_ulonglong - ), # __extension__ unsigned long long int gs_base; + ), # __extension__ unsigned long long int gs_base; ("ds", - ctypes.c_ulonglong), # __extension__ unsigned long long int ds; + ctypes.c_ulonglong), # __extension__ unsigned long long int ds; ("es", - ctypes.c_ulonglong), # __extension__ unsigned long long int es; + ctypes.c_ulonglong), # __extension__ unsigned long long int es; ("fs", - ctypes.c_ulonglong), # __extension__ unsigned long long int fs; + ctypes.c_ulonglong), # __extension__ unsigned long long int fs; ("gs", ctypes.c_ulonglong - ) # __extension__ unsigned long long int gs; - ] # }; + ) # __extension__ unsigned long long int gs; + ] -#elf_greg_t = ctypes.c_ulonglong -#ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t) -#elf_gregset_t = elf_greg_t*ELF_NGREG +# elf_greg_t = ctypes.c_ulonglong +# ELF_NGREG = ctypes.sizeof(user_regs_struct)/ctypes.sizeof(elf_greg_t) +# elf_gregset_t = elf_greg_t*ELF_NGREG elf_gregset_t = user_regs_struct class elf_prstatus(ctypes.Structure): # struct elf_prstatus - _fields_ = [ # { + _fields_ = [ ( + # Info associated with signal + # struct elf_siginfo pr_info; "pr_info", elf_siginfo - ), # struct elf_siginfo pr_info; /* Info associated with signal. */ - ("pr_cursig", ctypes.c_short - ), # short int pr_cursig; /* Current signal. */ + ), + ( + # Current signal + # short int pr_cursig; + "pr_cursig", ctypes.c_short + ), ( + # Set of pending signals + # unsigned long int pr_sigpend; "pr_sigpend", ctypes.c_ulong - ), # unsigned long int pr_sigpend; /* Set of pending signals. */ + ), ( + # Set of held signals + # unsigned long int pr_sighold; "pr_sighold", ctypes.c_ulong - ), # unsigned long int pr_sighold; /* Set of held signals. */ - ("pr_pid", ctypes.c_int), # __pid_t pr_pid; - ("pr_ppid", ctypes.c_int), # __pid_t pr_ppid; - ("pr_pgrp", ctypes.c_int), # __pid_t pr_pgrp; - ("pr_sid", ctypes.c_int), # __pid_t pr_sid; - ("pr_utime", - timeval), # struct timeval pr_utime; /* User time. */ - ("pr_stime", timeval - ), # struct timeval pr_stime; /* System time. */ - ( + ), + ( + # Process ID + # __pid_t pr_pid; + "pr_pid", ctypes.c_int + ), + ( + # Parent process ID + # __pid_t pr_ppid; + "pr_ppid", ctypes.c_int + ), + ( + # Parent group ID + # __pid_t pr_pgrp; + "pr_pgrp", ctypes.c_int + ), + ( + # Parent session ID + # __pid_t pr_sid; + "pr_sid", ctypes.c_int + ), + ( + # User time + # struct timeval pr_utime; + "pr_utime", timeval + ), + ( + # System time + # struct timeval pr_stime; + "pr_stime", timeval + ), + ( + # Cumulative user time + # struct timeval pr_cutime; "pr_cutime", timeval - ), # struct timeval pr_cutime; /* Cumulative user time. */ + ), ( + # Cumulative system time + # struct timeval pr_cstime; "pr_cstime", timeval - ), # struct timeval pr_cstime; /* Cumulative system time. */ - ("pr_reg", elf_gregset_t - ), # elf_gregset_t pr_reg; /* GP registers. */ + ), ( + # GP registers + # elf_gregset_t pr_reg; + "pr_reg", elf_gregset_t + ), + ( + # True if math copro being used + # int pr_fpvalid; "pr_fpvalid", ctypes.c_int - ) # int pr_fpvalid; /* True if math copro being used. */ - ] # }; + ) + ] # elf_prpsinfo related constants. -ELF_PRARGSZ = 80 # #define ELF_PRARGSZ (80) /* Number of chars for args. */ +# Number of chars for args +# #define ELF_PRARGSZ (80) +ELF_PRARGSZ = 80 class elf_prpsinfo(ctypes.Structure): # struct elf_prpsinfo - _fields_ = [ # { + _fields_ = [ ( + # Numeric process state + # char pr_state; "pr_state", ctypes.c_byte - ), # char pr_state; /* Numeric process state. */ + ), ( + # Char for pr_state + # char pr_sname; "pr_sname", ctypes.c_char - ), # char pr_sname; /* Char for pr_state. */ - ("pr_zomb", ctypes.c_byte - ), # char pr_zomb; /* Zombie. */ - ("pr_nice", ctypes.c_byte - ), # char pr_nice; /* Nice val. */ - ("pr_flag", ctypes.c_ulong - ), # unsigned long int pr_flag; /* Flags. */ - # #if __WORDSIZE == 32 - # unsigned short int pr_uid; - # unsigned short int pr_gid; - # #else - ("pr_uid", ctypes.c_uint), # unsigned int pr_uid; - ("pr_gid", ctypes.c_uint), # unsigned int pr_gid; - # #endif - ("pr_pid", ctypes.c_int), # int pr_pid, pr_ppid, pr_pgrp, pr_sid; + ), + ( + # Zombie + # char pr_zomb; + "pr_zomb", ctypes.c_byte + ), + ( + # Nice value + # char pr_nice; + "pr_nice", ctypes.c_byte + ), + ( + # Flags + # unsigned long int pr_flag; + "pr_flag", ctypes.c_ulong + ), + ( + # User ID + # unsigned int pr_uid; + "pr_uid", ctypes.c_uint + ), + ( + # Group ID + # unsigned int pr_gid; + "pr_gid", ctypes.c_uint + ), + ("pr_pid", ctypes.c_int), ("pr_ppid", ctypes.c_int), ("pr_pgrp", ctypes.c_int), ("pr_sid", ctypes.c_int), - # /* Lots missing */ + # /* Lots missing */ ( + # Filename of executable + # char pr_fname[16]; "pr_fname", ctypes.c_char * 16 - ), # char pr_fname[16]; /* Filename of executable. */ + ), ( + # Initial part of arg list + # char pr_psargs[ELF_PRARGSZ]; "pr_psargs", ctypes.c_char * ELF_PRARGSZ - ) # char pr_psargs[ELF_PRARGSZ]; /* Initial part of arg list. */ - ] # }; + ) + ] class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct - _fields_ = [ # { - ("cwd", ctypes.c_ushort), # unsigned short int cwd; - ("swd", ctypes.c_ushort), # unsigned short int swd; - ("ftw", ctypes.c_ushort), # unsigned short int ftw; - ("fop", ctypes.c_ushort), # unsigned short int fop; - ("rip", - ctypes.c_ulonglong), # __extension__ unsigned long long int rip; - ("rdp", - ctypes.c_ulonglong), # __extension__ unsigned long long int rdp; - ("mxcsr", ctypes.c_uint), # unsigned int mxcsr; - ("mxcr_mask", ctypes.c_uint), # unsigned int mxcr_mask; - ( - "st_space", ctypes.c_uint * 32 - ), # unsigned int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ - ( - "xmm_space", ctypes.c_uint * 64 - ), # unsigned int xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ - ("padding", - ctypes.c_uint * 24), # unsigned int padding[24]; - ] # }; + _fields_ = [ + # unsigned short int cwd; + ("cwd", ctypes.c_ushort), + # unsigned short int swd; + ("swd", ctypes.c_ushort), + # unsigned short int ftw; + ("ftw", ctypes.c_ushort), + # unsigned short int fop; + ("fop", ctypes.c_ushort), + # __extension__ unsigned long long int rip; + ("rip", ctypes.c_ulonglong), + # __extension__ unsigned long long int rdp; + ("rdp", ctypes.c_ulonglong), + # unsigned int mxcsr; + ("mxcsr", ctypes.c_uint), + # unsigned int mxcr_mask; + ("mxcr_mask", ctypes.c_uint), + # unsigned int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + ("st_space", ctypes.c_uint * 32), + # unsigned int xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */ + ("xmm_space", ctypes.c_uint * 64), + # unsigned int padding[24]; + ("padding", ctypes.c_uint * 24), + ] elf_fpregset_t = user_fpregs_struct @@ -371,315 +455,390 @@ class user_fpregs_struct(ctypes.Structure): # struct user_fpregs_struct _SI_PAD_SIZE = (_SI_MAX_SIZE // ctypes.sizeof(ctypes.c_int)) - 4 -# /* kill(). */ -class _siginfo_t_U_kill(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", ctypes.c_int - ), # __pid_t si_pid; /* Sending process ID. */ +# /* kill(). */ +class _siginfo_t_U_kill(ctypes.Structure): # struct + _fields_ = [ + ( + # Sending process ID + # __pid_t si_pid; + "si_pid", ctypes.c_int + ), ( + # Real user ID of sending process + # __uid_t si_uid; "si_uid", ctypes.c_uint - ) # __uid_t si_uid; /* Real user ID of sending process. */ - ] # } _kill; + ) + ] # } _kill; # Type for data associated with a signal. class sigval_t(ctypes.Union): # typedef union sigval - _fields_ = [ # { - ("sival_int", ctypes.c_int), # int sival_int; - ("sical_ptr", ctypes.c_void_p), # void *sival_ptr; - ] # } sigval_t; - - - # /* POSIX.1b timers. */ -class _siginfo_t_U_timer(ctypes.Structure): # struct - _fields_ = [ # { - ("si_tid", - ctypes.c_int), # int si_tid; /* Timer ID. */ - ("si_overrun", ctypes.c_int - ), # int si_overrun; /* Overrun count. */ - ("si_sigval", sigval_t - ) # sigval_t si_sigval; /* Signal value. */ - ] # } _timer; - - - # /* POSIX.1b signals. */ -class _siginfo_t_U_rt(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", ctypes.c_int - ), # __pid_t si_pid; /* Sending process ID. */ + _fields_ = [ + ("sival_int", ctypes.c_int), # int sival_int; + ("sical_ptr", ctypes.c_void_p), # void *sival_ptr; + ] # } sigval_t; + + +# /* POSIX.1b timers. */ +class _siginfo_t_U_timer(ctypes.Structure): # struct + _fields_ = [ + ( + # Timer ID + # int si_tid; + "si_tid", ctypes.c_int + ), + ( + # Overrun count + # int si_overrun; + "si_overrun", ctypes.c_int + ), ( + # Signal value + # sigval_t si_sigval; + "si_sigval", sigval_t + ) + ] # } _timer; + + +# /* POSIX.1b signals. */ +class _siginfo_t_U_rt(ctypes.Structure): # struct + _fields_ = [ + ( + # Sending process ID + # __pid_t si_pid; + "si_pid", ctypes.c_int + ), + ( + # Real user ID of sending process + # __uid_t si_uid; "si_uid", ctypes.c_uint - ), # __uid_t si_uid; /* Real user ID of sending process. */ - ("si_sigval", sigval_t - ) # sigval_t si_sigval; /* Signal value. */ - ] # } _rt; + ), + ( + # Signal value + # sigval_t si_sigval; + "si_sigval", sigval_t + ) + ] # } _rt; - # /* SIGCHLD. */ -class _siginfo_t_U_sigchld(ctypes.Structure): # struct - _fields_ = [ # { - ("si_pid", - ctypes.c_int), # __pid_t si_pid; /* Which child. */ +# /* SIGCHLD. */ +class _siginfo_t_U_sigchld(ctypes.Structure): # struct + _fields_ = [ + ( + # Which child + # __pid_t si_pid; + "si_pid", ctypes.c_int + ), ( + # Real user ID of sending process + # __uid_t si_uid; "si_uid", ctypes.c_uint - ), # __uid_t si_uid; /* Real user ID of sending process. */ - ("si_status", ctypes.c_int - ), # int si_status; /* Exit value or signal. */ - ("si_utime", ctypes.c_long), # __sigchld_clock_t si_utime; - ("si_stime", ctypes.c_long) # __sigchld_clock_t si_stime; - ] # } _sigchld; + ), + ( + # Exit value or signal + # int si_status; + "si_status", ctypes.c_int + ), + ( + # __sigchld_clock_t si_utime; + "si_utime", ctypes.c_long + ), + ( + # __sigchld_clock_t si_stime; + "si_stime", ctypes.c_long + ) + ] # } _sigchld; - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ -class _siginfo_t_U_sigfault(ctypes.Structure): # struct - _fields_ = [ # { - ("si_addr", ctypes.c_void_p - ), # void *si_addr; /* Faulting insn/memory ref. */ +# /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ +class _siginfo_t_U_sigfault(ctypes.Structure): # struct + _fields_ = [ ( + # Faulting insn/memory ref + # void *si_addr; + "si_addr", ctypes.c_void_p + ), + ( + # Valid LSB of the reported address + # short int si_addr_lsb; "si_addr_lsb", ctypes.c_short - ) # short int si_addr_lsb; /* Valid LSB of the reported address. */ - ] # } _sigfault; + ) + ] # } _sigfault; - # /* SIGPOLL. */ -class _siginfo_t_U_sigpoll(ctypes.Structure): # struct - _fields_ = [ # { - ("si_band", ctypes.c_long - ), # long int si_band; /* Band event for SIGPOLL. */ - ("si_fd", ctypes.c_int) # int si_fd; - ] # } _sigpoll; +# /* SIGPOLL. */ +class _siginfo_t_U_sigpoll(ctypes.Structure): # struct + _fields_ = [ + ( + # Band event for SIGPOLL + # long int si_band; + "si_band", ctypes.c_long + ), + ( + # int si_fd; + "si_fd", ctypes.c_int + ) + ] # } _sigpoll; - # /* SIGSYS. */ -class _siginfo_t_U_sigsys(ctypes.Structure): # struct - _fields_ = [ # { +# /* SIGSYS. */ +class _siginfo_t_U_sigsys(ctypes.Structure): # struct + _fields_ = [ ("_call_addr", ctypes.c_void_p - ), # void *_call_addr; /* Calling user insn. */ + ), # void *_call_addr; /* Calling user insn. */ ( "_syscall", ctypes.c_int - ), # int _syscall; /* Triggering system call number. */ + ), # int _syscall; /* Triggering system call number. */ ("_arch", ctypes.c_uint - ) # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - ] # } _sigsys; + ) # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + ] # } _sigsys; -class _siginfo_t_U(ctypes.Union): # union - _fields_ = [ # { +class _siginfo_t_U(ctypes.Union): # union + _fields_ = [ ("_pad", - ctypes.c_int * _SI_PAD_SIZE), # int _pad[__SI_PAD_SIZE]; - # - # /* kill(). */ - ("_kill", _siginfo_t_U_kill), # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # } _kill; - # - # /* POSIX.1b timers. */ - ("_timer", _siginfo_t_U_timer), # struct - # { - # int si_tid; /* Timer ID. */ - # int si_overrun; /* Overrun count. */ - # sigval_t si_sigval; /* Signal value. */ - # } _timer; - # - # /* POSIX.1b signals. */ - ("_rt", _siginfo_t_U_rt), # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # sigval_t si_sigval; /* Signal value. */ - # } _rt; - # - # /* SIGCHLD. */ - ("_sigchld", _siginfo_t_U_sigchld), # struct - # { - # __pid_t si_pid; /* Which child. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # int si_status; /* Exit value or signal. */ - # __sigchld_clock_t si_utime; - # __sigchld_clock_t si_stime; - # } _sigchld; - # - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ - ("_sigfault", _siginfo_t_U_sigfault), # struct - # { - # void *si_addr; /* Faulting insn/memory ref. */ - # short int si_addr_lsb; /* Valid LSB of the reported address. */ - # } _sigfault; - # - # /* SIGPOLL. */ - ("_sigpoll", _siginfo_t_U_sigpoll), # struct - # { - # long int si_band; /* Band event for SIGPOLL. */ - # int si_fd; - # } _sigpoll; - # - # /* SIGSYS. */ - ("_sigsys", _siginfo_t_U_sigpoll) # struct - # { - # void *_call_addr; /* Calling user insn. */ - # int _syscall; /* Triggering system call number. */ - # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - # } _sigsys; - ] # } _sifields; + ctypes.c_int * _SI_PAD_SIZE), # int _pad[__SI_PAD_SIZE]; + + # /* kill(). */ + ("_kill", _siginfo_t_U_kill), # struct + + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # } _kill; + + # /* POSIX.1b timers. */ + ("_timer", _siginfo_t_U_timer), # struct + + # int si_tid; /* Timer ID. */ + # int si_overrun; /* Overrun count. */ + # sigval_t si_sigval; /* Signal value. */ + # } _timer; + + # /* POSIX.1b signals. */ + ("_rt", _siginfo_t_U_rt), # struct + + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # sigval_t si_sigval; /* Signal value. */ + # } _rt; + + # /* SIGCHLD. */ + ("_sigchld", _siginfo_t_U_sigchld), # struct + + # __pid_t si_pid; /* Which child. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # int si_status; /* Exit value or signal. */ + # __sigchld_clock_t si_utime; + # __sigchld_clock_t si_stime; + # } _sigchld; + + # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ + ("_sigfault", _siginfo_t_U_sigfault), # struct + + # void *si_addr; /* Faulting insn/memory ref. */ + # short int si_addr_lsb; /* Valid LSB of the reported address. */ + # } _sigfault; + + # /* SIGPOLL. */ + ("_sigpoll", _siginfo_t_U_sigpoll), # struct + + # long int si_band; /* Band event for SIGPOLL. */ + # int si_fd; + # } _sigpoll; + + # /* SIGSYS. */ + ("_sigsys", _siginfo_t_U_sigpoll) # struct + + # void *_call_addr; /* Calling user insn. */ + # int _syscall; /* Triggering system call number. */ + # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + # } _sigsys; + ] # } _sifields; class siginfo_t(ctypes.Structure): # typedef struct - _fields_ = [ # { - ("si_signo", ctypes.c_int - ), # int si_signo; /* Signal number. */ + _fields_ = [ + ( + # Signal number + # int si_signo; + "si_signo", ctypes.c_int + ), ( + # If non-zero, an errno value associated with + # int si_errno; "si_errno", ctypes.c_int - ), # int si_errno; /* If non-zero, an errno value associated with - # this signal, as defined in . */ - ("si_code", ctypes.c_int - ), # int si_code; /* Signal code. */ - # - ("_sifields", _siginfo_t_U) # union - # { - # int _pad[__SI_PAD_SIZE]; + ), + ( + # Signal code - this signal, as defined in + # int si_code; + "si_code", ctypes.c_int + ), + ( + # Union + "_sifields", _siginfo_t_U + ) + + # int _pad[__SI_PAD_SIZE]; # - # /* kill(). */ - # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # } _kill; + # /* kill(). */ + # struct + + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # } _kill; # - # /* POSIX.1b timers. */ - # struct - # { - # int si_tid; /* Timer ID. */ - # int si_overrun; /* Overrun count. */ - # sigval_t si_sigval; /* Signal value. */ - # } _timer; + # /* POSIX.1b timers. */ + # struct + + # int si_tid; /* Timer ID. */ + # int si_overrun; /* Overrun count. */ + # sigval_t si_sigval; /* Signal value. */ + # } _timer; # - # /* POSIX.1b signals. */ - # struct - # { - # __pid_t si_pid; /* Sending process ID. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # sigval_t si_sigval; /* Signal value. */ - # } _rt; + # /* POSIX.1b signals. */ + # struct + + # __pid_t si_pid; /* Sending process ID. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # sigval_t si_sigval; /* Signal value. */ + # } _rt; # - # /* SIGCHLD. */ - # struct - # { - # __pid_t si_pid; /* Which child. */ - # __uid_t si_uid; /* Real user ID of sending process. */ - # int si_status; /* Exit value or signal. */ - # __sigchld_clock_t si_utime; - # __sigchld_clock_t si_stime; - # } _sigchld; + # /* SIGCHLD. */ + # struct + + # __pid_t si_pid; /* Which child. */ + # __uid_t si_uid; /* Real user ID of sending process. */ + # int si_status; /* Exit value or signal. */ + # __sigchld_clock_t si_utime; + # __sigchld_clock_t si_stime; + # } _sigchld; # - # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ - # struct - # { - # void *si_addr; /* Faulting insn/memory ref. */ - # short int si_addr_lsb; /* Valid LSB of the reported address. */ - # } _sigfault; + # /* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */ + # struct + + # void *si_addr; /* Faulting insn/memory ref. */ + # short int si_addr_lsb; /* Valid LSB of the reported address. */ + # } _sigfault; # - # /* SIGPOLL. */ - # struct - # { - # long int si_band; /* Band event for SIGPOLL. */ - # int si_fd; - # } _sigpoll; + # /* SIGPOLL. */ + # struct + + # long int si_band; /* Band event for SIGPOLL. */ + # int si_fd; + # } _sigpoll; # - # /* SIGSYS. */ - # struct - # { - # void *_call_addr; /* Calling user insn. */ - # int _syscall; /* Triggering system call number. */ - # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ - # } _sigsys; - # } _sifields; - ] # } siginfo_t __SI_ALIGNMENT; + # /* SIGSYS. */ + # struct + + # void *_call_addr; /* Calling user insn. */ + # int _syscall; /* Triggering system call number. */ + # unsigned int _arch; /* AUDIT_ARCH_* of syscall. */ + # } _sigsys; + # } _sifields; + ] # } siginfo_t __SI_ALIGNMENT; # xsave related. class ymmh_struct(ctypes.Structure): # struct ymmh_struct { - _fields_ = [("ymmh_space", 64 * ctypes.c_uint - ) # u32 ymmh_space[64]; - ] # } __packed; + _fields_ = [ + # u32 ymmh_space[64]; + ("ymmh_space", 64 * ctypes.c_uint) + ] # } __packed; class xsave_hdr_struct(ctypes.Structure): # struct xsave_hdr_struct { _fields_ = [ - ("xstate_bv", ctypes.c_ulonglong - ), # u64 xstate_bv; - ("reserved1", ctypes.c_ulonglong * - 2), # u64 reserved1[2]; - ("reserved2", ctypes.c_ulonglong * 5 - ) # u64 reserved2[5]; + # u64 xstate_bv; + ("xstate_bv", ctypes.c_ulonglong), + # u64 reserved1[2]; + ("reserved1", ctypes.c_ulonglong * 2), + # u64 reserved2[5]; + ("reserved2", ctypes.c_ulonglong * 5) ] # } __packed; class i387_fxsave_struct(ctypes.Structure): # struct i387_fxsave_struct { _fields_ = [ ( + # Control Word + # u16 cwd; "cwd", ctypes.c_ushort - ), # u16 cwd; /* Control Word */ + ), ( + # Status Word + # u16 swd; "swd", ctypes.c_ushort - ), # u16 swd; /* Status Word */ + ), ( + # Tag Word + # u16 twd; "twd", ctypes.c_ushort - ), # u16 twd; /* Tag Word */ + ), ( + # Last Instruction Opcode + # u16 fop; "fop", ctypes.c_ushort - ), # u16 fop; /* Last Instruction Opcode */ - # union { - # struct { + ), + # union { + # struct { ( + # Instruction Pointer + # u64 rip; "rip", ctypes.c_ulonglong - ), # u64 rip; /* Instruction Pointer */ + ), ( + # Data Pointer + # u64 rdp; "rdp", ctypes.c_ulonglong - ), # u64 rdp; /* Data Pointer */ - # }; - # struct { - # u32 fip; /* FPU IP Offset */ - # u32 fcs; /* FPU IP Selector */ - # u32 foo; /* FPU Operand Offset */ - # u32 fos; /* FPU Operand Selector */ - # }; - # }; + ), + + # struct { + # u32 fip; /* FPU IP Offset */ + # u32 fcs; /* FPU IP Selector */ + # u32 foo; /* FPU Operand Offset */ + # u32 fos; /* FPU Operand Selector */ + ( + # MXCSR Register State + # u32 mxcsr; "mxcsr", ctypes.c_uint - ), # u32 mxcsr; /* MXCSR Register State */ + ), ( + # MXCSR Mask + # u32 mxcsr_mask; "mxcsr_mask", ctypes.c_uint - ), # u32 mxcsr_mask; /* MXCSR Mask */ - # - # /* 8*16 bytes for each FP-reg = 128 bytes */ - ("st_space", ctypes.c_uint * 32 - ), # u32 st_space[32]; - # - # /* 16*16 bytes for each XMM-reg = 256 bytes */ - ("xmm_space", ctypes.c_uint * 64 - ), # u32 xmm_space[64]; - # - ("padding", ctypes.c_uint * 12 - ), # u32 padding[12]; - # - # union { - ("padding1", ctypes.c_uint * 12 - ) # u32 padding1[12]; - # u32 sw_reserved[12]; - # }; - # + ), + # 8*16 bytes for each FP-reg = 128 bytes + ( + # u32 st_space[32]; + "st_space", ctypes.c_uint * 32 + ), + # 16*16 bytes for each XMM-reg = 256 bytes + ( + # u32 xmm_space[64]; + "xmm_space", ctypes.c_uint * 64 + ), + ( + # u32 padding[12]; + "padding", ctypes.c_uint * 12 + ), + # union { + ( + # u32 padding1[12]; + "padding1", ctypes.c_uint * 12 + ) + # u32 sw_reserved[12]; ] # } __aligned(16); class elf_xsave_struct(ctypes.Structure): # struct xsave_struct { _fields_ = [ - ("i387", - i387_fxsave_struct), # struct i387_fxsave_struct i387; - ("xsave_hdr", xsave_hdr_struct - ), # struct xsave_hdr_struct xsave_hdr; - ("ymmh", ymmh_struct) # struct ymmh_struct ymmh; + # struct i387_fxsave_struct i387; + ("i387", i387_fxsave_struct), + # struct xsave_hdr_struct xsave_hdr; + ("xsave_hdr", xsave_hdr_struct), + # struct ymmh_struct ymmh; + ("ymmh", ymmh_struct) ] # } __aligned(FP_MIN_ALIGN_BYTES) __packed; From 4abfd7ea70d7c1d8f1ee4b87dcff0e121823b523 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 22:37:26 +0100 Subject: [PATCH 020/121] coredump: fix missing whitespace around operator Missing whitespace around arithmetic operator https://www.flake8rules.com/rules/E226.html Signed-off-by: Radostin Stoyanov --- coredump/coredump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coredump/coredump.py b/coredump/coredump.py index e63abf9515..5e63d21385 100644 --- a/coredump/coredump.py +++ b/coredump/coredump.py @@ -10,7 +10,7 @@ def coredump(opts): for pid in cores: if opts['pid'] and pid != opts['pid']: continue - with open(os.path.realpath(opts['out'])+"/core."+str(pid), 'wb+') as f: + with open(os.path.realpath(opts['out']) + "/core." + str(pid), 'wb+') as f: cores[pid].write(f) From c53231fe5e81412d27ada8bdd4a0dd78f19c3b21 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 22:39:50 +0100 Subject: [PATCH 021/121] coredump: fix too many blank lines https://www.flake8rules.com/rules/E303.html Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 1 - 1 file changed, 1 deletion(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 2748898001..a232132fd6 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -321,7 +321,6 @@ def _gen_prpsinfo(self, pid): else: prpsinfo.pr_fname = core["tc"]["comm"] - nhdr = elf.Elf64_Nhdr() nhdr.n_namesz = 5 nhdr.n_descsz = ctypes.sizeof(elf.elf_prpsinfo()) From 718d0039623a7371130cdb568b39ee9f968d31c4 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 22:42:19 +0100 Subject: [PATCH 022/121] coredump: fix comparison to true Comparison to true should be 'if cond is true:' or 'if cond:' https://www.flake8rules.com/rules/E712.html Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index a232132fd6..41d102db78 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -653,7 +653,7 @@ def _get_page(self, pid, page_no): if not found: continue - if "in_parent" in m and m["in_parent"] == True: + if "in_parent" in m and m["in_parent"]: ppid = self.pstree[pid]["ppid"] return self._get_page(ppid, page_no) else: From bf85f98b0cbdab968d539dbc7c363b596eb0e95e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 22:47:59 +0100 Subject: [PATCH 023/121] coredump: lint fix visually indented line Continuation line over-indented for visual indent https://www.flake8rules.com/rules/E127.html Visually indented line with same indent as next logical line https://www.flake8rules.com/rules/E129.html Signed-off-by: Radostin Stoyanov --- coredump/criu_coredump/coredump.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/coredump/criu_coredump/coredump.py b/coredump/criu_coredump/coredump.py index 41d102db78..881c40b0a9 100644 --- a/coredump/criu_coredump/coredump.py +++ b/coredump/criu_coredump/coredump.py @@ -781,19 +781,19 @@ def _get_vma_dump_size(self, vma): """ Calculate amount of vma to put into core dump. """ - if vma["status"] & status["VMA_AREA_VVAR"] or \ - vma["status"] & status["VMA_AREA_VSYSCALL"] or \ - vma["status"] & status["VMA_AREA_VDSO"]: + if (vma["status"] & status["VMA_AREA_VVAR"] or + vma["status"] & status["VMA_AREA_VSYSCALL"] or + vma["status"] & status["VMA_AREA_VDSO"]): size = vma["end"] - vma["start"] elif vma["prot"] == 0: size = 0 - elif vma["prot"] & prot["PROT_READ"] and \ - vma["prot"] & prot["PROT_EXEC"]: + elif (vma["prot"] & prot["PROT_READ"] and + vma["prot"] & prot["PROT_EXEC"]): size = PAGESIZE - elif vma["status"] & status["VMA_ANON_SHARED"] or \ - vma["status"] & status["VMA_FILE_SHARED"] or \ - vma["status"] & status["VMA_ANON_PRIVATE"] or \ - vma["status"] & status["VMA_FILE_PRIVATE"]: + elif (vma["status"] & status["VMA_ANON_SHARED"] or + vma["status"] & status["VMA_FILE_SHARED"] or + vma["status"] & status["VMA_ANON_PRIVATE"] or + vma["status"] & status["VMA_FILE_PRIVATE"]): size = vma["end"] - vma["start"] else: size = 0 From e56c944db74f9c3b033274271f7a82fef6fed815 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 6 Sep 2021 00:20:58 +0100 Subject: [PATCH 024/121] test/coredump: fix shellcheck errors ShellCheck reports the following problems: SC2086: Double quote to prevent globbing and word splitting. SC2035: Use ./*glob* or -- *glob* so names with dashes won't become options. SC1091: Not following: ../env.sh was not specified as input (see shellcheck -x). Signed-off-by: Radostin Stoyanov --- test/others/criu-coredump/test.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/others/criu-coredump/test.sh b/test/others/criu-coredump/test.sh index 62d9f7edc4..dd774e298b 100755 --- a/test/others/criu-coredump/test.sh +++ b/test/others/criu-coredump/test.sh @@ -1,4 +1,8 @@ -source ../env.sh +#!/bin/bash + +set -x +# shellcheck disable=SC1091 +source ../env.sh || exit 1 function gen_imgs { PID=$(../loop) @@ -9,7 +13,7 @@ function gen_imgs { exit 1 fi - images_list=$(ls -1 *.img) + images_list=$(ls -1 ./*.img) if [ -z "$images_list" ]; then echo "Failed to generate images" exit 1 @@ -32,7 +36,7 @@ function run_test { for x in $cores do echo "=== try readelf $x" - readelf -a $x || exit $? + readelf -a "$x" || exit $? echo "=== done" done From ff7eea013205a1055b6f0e2300db46f6768c6d28 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 22:55:24 +0100 Subject: [PATCH 025/121] make: enable lint for coredump Signed-off-by: Radostin Stoyanov --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index e361e8a8f6..73b647d6d7 100644 --- a/Makefile +++ b/Makefile @@ -427,11 +427,13 @@ lint: flake8 --config=scripts/flake8.cfg test/others/rpc/config_file.py flake8 --config=scripts/flake8.cfg lib/py/images/pb2dict.py flake8 --config=scripts/flake8.cfg scripts/criu-ns + flake8 --config=scripts/flake8.cfg coredump/ shellcheck --version shellcheck scripts/*.sh shellcheck scripts/ci/*.sh scripts/ci/apt-install shellcheck test/others/crit/*.sh shellcheck test/others/libcriu/*.sh + shellcheck test/others/crit/*.sh test/others/criu-coredump/*.sh shellcheck test/others/config-file/*.sh # Do not append \n to pr_perror or fail ! git --no-pager grep -E '^\s*\<(pr_perror|fail)\>.*\\n"' From 4a67277e8f67e988aa14106cb69a14d8e3e65815 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 5 Sep 2021 23:06:56 +0100 Subject: [PATCH 026/121] ci: enable coredump tests Signed-off-by: Radostin Stoyanov --- scripts/ci/run-ci-tests.sh | 3 +++ test/others/env.sh | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 7c66e68023..51aa7db9ea 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -259,6 +259,9 @@ ip net add test # more crit testing make -C test/others/crit run +# coredump testing +make -C test/others/criu-coredump run + # libcriu testing make -C test/others/libcriu run diff --git a/test/others/env.sh b/test/others/env.sh index e2f63eee35..45066f760b 100755 --- a/test/others/env.sh +++ b/test/others/env.sh @@ -13,5 +13,5 @@ fi #export PYTHON CRIT=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../crit/crit-"${PYTHON}") crit=$CRIT -CRIU_COREDUMP=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../criu-coredump/coredump-"${PYTHON}") +CRIU_COREDUMP=$(readlink -f `dirname ${BASH_SOURCE[0]}`/../../coredump/coredump-"${PYTHON}") criu_coredump=$CRIU_COREDUMP From 8ee0ce3b87d8be2a294881e47e076e8095ea834b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 21 Oct 2021 11:47:01 +0300 Subject: [PATCH 027/121] pie/restorer: remove excess hash printf specifier We use here "%#x" printf specifier in pie code, but sbuf_printf core pie printing function knows nothing about '#' specifier. More over simple "%x" in pie does same as "%#x" in stdio printf, see print_hex* functions add "0x" before hex numbers. We've got this error on vzt-cpt runs in Virtuozzo: (04.750271) pie: 158: Adjust id Error: Unknown printf format %# So to fix it we can just remove '#'. Fixes: ecd432fe2 ("timerfd: Implement c/r procedure") Signed-off-by: Pavel Tikhomirov --- criu/pie/restorer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 4304691bbe..0051452e47 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1013,7 +1013,7 @@ static int timerfd_arm(struct task_restore_args *args) t->val.it_value.tv_sec += (time_t)ts.tv_sec; - pr_debug("Adjust id %#x it_value(%llu, %llu) -> it_value(%llu, %llu)\n", t->id, + pr_debug("Adjust id %x it_value(%llu, %llu) -> it_value(%llu, %llu)\n", t->id, (unsigned long long)ts.tv_sec, (unsigned long long)ts.tv_nsec, (unsigned long long)t->val.it_value.tv_sec, (unsigned long long)t->val.it_value.tv_nsec); From 79c194313f14a5a902764ec653a611616125c256 Mon Sep 17 00:00:00 2001 From: "fu.lin" Date: Fri, 17 Sep 2021 17:16:48 +0800 Subject: [PATCH 028/121] tty: fix the null pointer of get_tty_driver v2: split error checking from index variable initialization v3: use PRIx64 for printing dev_t Signed-off-by: fu.lin Signed-off-by: Pavel Tikhomirov --- criu/tty.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/tty.c b/criu/tty.c index 1598ad9562..1462193c5e 100644 --- a/criu/tty.c +++ b/criu/tty.c @@ -1977,6 +1977,12 @@ static int dump_one_tty(int lfd, u32 id, const struct fd_parms *p) pr_info("Dumping tty %d with id %#x\n", lfd, id); driver = get_tty_driver(p->stat.st_rdev, p->stat.st_dev); + if (driver == NULL) { + pr_err("Unable to find a tty driver (rdev %#" PRIx64 " dev %#" PRIx64 ")\n", p->stat.st_rdev, + p->stat.st_dev); + return -1; + } + if (driver->fd_get_index) index = driver->fd_get_index(lfd, p); else From 6dadc8bf0c996385531af6729361751b47b8926b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 22 Oct 2021 17:56:37 +0300 Subject: [PATCH 029/121] util: use nftw in rmrf helper This simplifies the code by removing excess recursion and reusing standard function to walk over file-tree instead of opencoding it. This addresses problem mentioned in my review comment: https://github.com/checkpoint-restore/criu/pull/1495#discussion_r677554523 Fixes: 0db135ac4 ("util: add rm -rf function") Signed-off-by: Pavel Tikhomirov --- criu/apparmor.c | 2 +- criu/include/util.h | 4 ++-- criu/util.c | 46 ++++++++++++++------------------------------- 3 files changed, 17 insertions(+), 35 deletions(-) diff --git a/criu/apparmor.c b/criu/apparmor.c index 328fc606bb..f9ad796195 100644 --- a/criu/apparmor.c +++ b/criu/apparmor.c @@ -630,7 +630,7 @@ int suspend_aa(void) } ret = do_suspend(true); - if (rm_rf(policydir) < 0) + if (rmrf(policydir) < 0) pr_err("failed removing policy dir %s\n", policydir); return ret; diff --git a/criu/include/util.h b/criu/include/util.h index a2dac22335..19d378fc54 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -284,8 +284,8 @@ int setup_tcp_server(char *type, char *addr, unsigned short *port); int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk); int setup_tcp_client(char *hostname); -/* *dir should be writable and at least PATH_MAX long */ -int rm_rf(char *dir); +/* path should be writable and no more than PATH_MAX long */ +int rmrf(char *path); #define LAST_PID_PATH "sys/kernel/ns_last_pid" #define PID_MAX_PATH "sys/kernel/pid_max" diff --git a/criu/util.c b/criu/util.c index 06124c2205..414879971f 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1,4 +1,4 @@ -#define _XOPEN_SOURCE +#define _XOPEN_SOURCE 500 #include #include @@ -26,6 +26,7 @@ #include #include #include +#include #include "linux/mount.h" @@ -1613,44 +1614,25 @@ ssize_t write_all(int fd, const void *buf, size_t size) return n; } -int rm_rf(char *target) +static int remove_one(const char *fpath, const struct stat *sb, int tflag, struct FTW *ftwbuf) { - int offset = strlen(target); - DIR *dir = NULL; - struct dirent *de; - int ret = -1; + int ret; - dir = opendir(target); - if (!dir) { - pr_perror("unable to open %s", target); + ret = remove(fpath); + if (ret) { + pr_perror("rmrf: unable to remove %s", fpath); return -1; } - while ((de = readdir(dir))) { - int n; - - if (dir_dots(de)) - continue; - - n = snprintf(target + offset, PATH_MAX - offset, "/%s", de->d_name); - if (n < 0 || n >= PATH_MAX) { - pr_err("snprintf failed\n"); - goto out; - } - - if (de->d_type == DT_DIR && rm_rf(target)) - goto out; + return 0; +} - if (remove(target) < 0) { - pr_perror("unable to remove %s", target); - goto out; - } - } +#define NFTW_FD_MAX 64 - ret = 0; -out: - target[offset] = 0; - return ret; +int rmrf(char *path) +{ + pr_debug("rmrf: removing %s\n", path); + return nftw(path, remove_one, NFTW_FD_MAX, FTW_DEPTH | FTW_PHYS); } __attribute__((returns_twice)) static pid_t raw_legacy_clone(unsigned long flags, int *pidfd) From d6d76e07bb7f4532bb49c1fce99f3fef7fcd87c6 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Mon, 18 Oct 2021 18:43:14 +0300 Subject: [PATCH 030/121] criu-ns: make pidns init first do setsid We see that on criu-ns dump/restore/dump of the process which initially was not a session leader (with --shell-job option) we see sid == 0 for it and fail with something like: Error (criu/cr-dump.c:1333): A session leader of 41585(41585) is outside of its pid namespace Note: We should not dump processes with sid 0 (even with --shell-job) as on restore we can can put such processes from multiple sessions into one, which is wrong. Fixes: #232 Signed-off-by: Pavel Tikhomirov --- scripts/criu-ns | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/criu-ns b/scripts/criu-ns index d76db3606d..72c0753e5e 100755 --- a/scripts/criu-ns +++ b/scripts/criu-ns @@ -102,6 +102,7 @@ def wrap_restore(): criu_pid = os.fork() if criu_pid == 0: + os.setsid() _mount_new_proc() run_criu(restore_args) From 62090e89b553a022bf564e76840dc67ea786ec65 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Thu, 28 Oct 2021 21:05:57 +0000 Subject: [PATCH 031/121] net: optimize restore_rule() to not open the CR_FD_RULE image file twice Previously, `open_image(CR_FD_RULE, O_RSTR, pid)` was called twice. Opening an image file twice is not allowed when streaming the image. This commit optimizes the code to only open the image file once. Also improved the error path in restore_ip_dump(). Signed-off-by: Nicolas Viennot --- criu/net.c | 51 ++++++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/criu/net.c b/criu/net.c index 7b45f0633f..02115c4de1 100644 --- a/criu/net.c +++ b/criu/net.c @@ -2250,12 +2250,12 @@ static int restore_ip_dump(int type, int pid, char *cmd) sockfd = img_raw_fd(img); if (sockfd < 0) { pr_err("Getting raw FD failed\n"); - return -1; + goto out_image; } tmp_file = tmpfile(); if (!tmp_file) { pr_perror("Failed to open tmpfile"); - return -1; + goto out_image; } while ((n = read(sockfd, buf, 1024)) > 0) { @@ -2264,25 +2264,34 @@ static int restore_ip_dump(int type, int pid, char *cmd) pr_perror("Failed to write to tmpfile " "[written: %d; total: %d]", written, n); - goto close; + goto out_tmp_file; } } if (fseek(tmp_file, 0, SEEK_SET)) { pr_perror("Failed to set file position to beginning of tmpfile"); - goto close; + goto out_tmp_file; } - if (img) { - ret = run_ip_tool(cmd, "restore", NULL, NULL, fileno(tmp_file), -1, 0); - close_image(img); + if (type == CR_FD_RULE) { + /* + * Delete 3 default rules to prevent duplicates. See kernel's + * function fib_default_rules_init() for the details. + */ + run_ip_tool("rule", "flush", NULL, NULL, -1, -1, 0); + run_ip_tool("rule", "delete", "table", "local", -1, -1, 0); } -close: + ret = run_ip_tool(cmd, "restore", NULL, NULL, fileno(tmp_file), -1, 0); + +out_tmp_file: if (fclose(tmp_file)) { pr_perror("Failed to close tmpfile"); } +out_image: + close_image(img); + return ret; } @@ -2304,31 +2313,7 @@ static inline int restore_route(int pid) static inline int restore_rule(int pid) { - struct cr_img *img; - int ret = 0; - - img = open_image(CR_FD_RULE, O_RSTR, pid); - if (!img) { - ret = -1; - goto out; - } - - if (empty_image(img)) - goto close; - - /* - * Delete 3 default rules to prevent duplicates. See kernel's - * function fib_default_rules_init() for the details. - */ - run_ip_tool("rule", "flush", NULL, NULL, -1, -1, 0); - run_ip_tool("rule", "delete", "table", "local", -1, -1, 0); - - if (restore_ip_dump(CR_FD_RULE, pid, "rule")) - ret = -1; -close: - close_image(img); -out: - return ret; + return restore_ip_dump(CR_FD_RULE, pid, "rule"); } /* From e11c742e018aeaf60f0e3abef33c557b4799fb9a Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 27 Oct 2021 07:27:22 +0000 Subject: [PATCH 032/121] ci: replace deprecated codecov bash uploader Replace deprecated codecov bash uploader with new version: https://about.codecov.io/blog/introducing-codecovs-new-uploader/ Signed-off-by: Adrian Reber --- .github/workflows/gcov-test.yml | 2 ++ Makefile | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gcov-test.yml b/.github/workflows/gcov-test.yml index f1b38e77e6..f782c5b9df 100644 --- a/.github/workflows/gcov-test.yml +++ b/.github/workflows/gcov-test.yml @@ -10,5 +10,7 @@ jobs: - uses: actions/checkout@v2 - name: Run Coverage Tests run: sudo -E make -C scripts/ci local GCOV=1 + - name: Run gcov + run: sudo -E find . -name '*gcda' -type f -print0 | sudo -E xargs --null --max-args 128 --max-procs 4 gcov - name: Run Coverage Analysis run: sudo -E make codecov diff --git a/Makefile b/Makefile index 73b647d6d7..d350126159 100644 --- a/Makefile +++ b/Makefile @@ -449,7 +449,9 @@ lint: codecov: SHELL := $(shell which bash) codecov: - bash <(curl -s https://codecov.io/bash) + curl -Os https://uploader.codecov.io/latest/linux/codecov + chmod +x codecov + ./codecov .PHONY: codecov fetch-clang-format: .FORCE From a86a66fc2cbb77cdf0df69f3589c325018261d67 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 4 Nov 2021 09:10:38 +0000 Subject: [PATCH 033/121] ci: fix userfaultfd test failures Newer kernels (5.11) require echo 1 > /proc/sys/vm/unprivileged_userfaultfd Without the 'echo 1' the kernel prints a message like this: uffd: Set unprivileged_userfaultfd sysctl knob to 1 if kernel faults must be handled without obtaining CAP_SYS_PTRACE capability Signed-off-by: Adrian Reber --- scripts/ci/run-ci-tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 51aa7db9ea..096f907fb0 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -197,6 +197,12 @@ fi # shellcheck disable=SC2086 ./test/zdtm.py run -a -p 2 --keep-going $ZDTM_OPTS +# Newer kernels are blocking access to userfaultfd: +# uffd: Set unprivileged_userfaultfd sysctl knob to 1 if kernel faults must be handled without obtaining CAP_SYS_PTRACE capability +if [ -e /proc/sys/vm/unprivileged_userfaultfd ]; then + echo 1 > /proc/sys/vm/unprivileged_userfaultfd +fi + LAZY_EXCLUDE="-x maps04 -x cmdlinenv00 -x maps007" LAZY_TESTS='.*(maps0|uffd-events|lazy-thp|futex|fork).*' From 07a2f0265d3d6eebec6b3a7538179f6b50da270b Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Thu, 4 Nov 2021 09:18:31 +0000 Subject: [PATCH 034/121] ci: use Fedora 34 for lint CI runs Fedora 35 comes with clang 13 which provides different results for clang-format than clang 12 in Fedora 34. Signed-off-by: Adrian Reber --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 50b241e9f7..49eb6aaac3 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,7 +6,7 @@ jobs: build: runs-on: ubuntu-latest container: - image: registry.fedoraproject.org/fedora:latest + image: registry.fedoraproject.org/fedora:34 steps: - name: Install tools run: sudo dnf -y install git make python3-flake8 ShellCheck clang-tools-extra which findutils From b405b1b0142bea0eeec1437a867b1c0ef7a3afe1 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 29 Oct 2021 02:49:31 +0000 Subject: [PATCH 035/121] tests: improve the image streamer process control When exceptions are raised during testing, the image streamer process should be terminated as opposed to being left hanging. This could lead to the whole test suite to be left hanging as it waits for all child processes to exit. Signed-off-by: Nicolas Viennot --- test/zdtm.py | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 0a52e1b96c..fc7b8a1830 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1039,6 +1039,7 @@ def __init__(self, opts): self.__lazy_pages_p = None self.__page_server_p = None self.__dump_process = None + self.__img_streamer_process = None self.__tls = self.__tls_options() if opts['tls'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] @@ -1065,6 +1066,11 @@ def fini(self): self.__dump_process = None if ret: raise test_fail_exc("criu dump exited with %s" % ret) + if self.__img_streamer_process: + ret = self.wait_for_criu_image_streamer() + if ret: + raise test_fail_exc("criu-image-streamer exited with %s" % ret) + return def logs(self): @@ -1219,8 +1225,10 @@ def check_pages_counts(self): stent['pages_written']) if self.__stream: - p = self.spawn_criu_image_streamer("extract") - p.wait() + self.spawn_criu_image_streamer("extract") + ret = self.wait_for_criu_image_streamer() + if ret: + raise test_fail_exc("criu-image-streamer (extract) exited with %s" % ret) real_written = 0 for f in os.listdir(self.__ddir()): @@ -1262,6 +1270,8 @@ def spawn_criu_image_streamer(self, action): "--progress-fd {progress_fd}", action] + log = open(os.path.join(self.__ddir(), "img-streamer.log"), "w") + # * As we are using a shell pipe command, we want to use pipefail. # Otherwise, failures stay unnoticed. For this, we use bash as sh # doesn't support that feature. @@ -1270,7 +1280,9 @@ def spawn_criu_image_streamer(self, action): progress_fd=progress_w, images_dir=self.__ddir(), img_file=os.path.join(self.__ddir(), STREAMED_IMG_FILE_NAME) - )], close_fds=False) + )], stderr=log, close_fds=False) + + log.close() os.close(progress_w) progress = os.fdopen(progress_r, "r") @@ -1287,7 +1299,15 @@ def spawn_criu_image_streamer(self, action): raise test_fail_exc( "criu-image-streamer is not starting (exit_code=%d)" % p.wait()) - return p + progress.close() + + self.__img_streamer_process = p + + def wait_for_criu_image_streamer(self): + ret = self.__img_streamer_process.wait() + grep_errors(os.path.join(self.__ddir(), "img-streamer.log")) + self.__img_streamer_process = None + return ret def dump(self, action, opts=[]): self.__iter += 1 @@ -1319,7 +1339,7 @@ def dump(self, action, opts=[]): a_opts += self.__test.getdopts() if self.__stream: - streamer_p = self.spawn_criu_image_streamer("capture") + self.spawn_criu_image_streamer("capture") a_opts += ["--stream"] if self.__dedup: @@ -1347,9 +1367,9 @@ def dump(self, action, opts=[]): opts=a_opts + opts, nowait=nowait) if self.__stream: - ret = streamer_p.wait() + ret = self.wait_for_criu_image_streamer() if ret: - raise test_fail_exc("criu-image-streamer exited with %d" % ret) + raise test_fail_exc("criu-image-streamer (capture) exited with %d" % ret) if self.__mdedup and self.__iter > 1: self.__criu_act("dedup", opts=[]) @@ -1382,7 +1402,7 @@ def restore(self): r_opts += ['--action-script', os.getcwd() + '/empty-netns-prep.sh'] if self.__stream: - streamer_p = self.spawn_criu_image_streamer("serve") + self.spawn_criu_image_streamer("serve") r_opts += ["--stream"] if self.__dedup: @@ -1419,9 +1439,9 @@ def restore(self): self.__criu_act("restore", opts=r_opts + ["--restore-detached"]) if self.__stream: - ret = streamer_p.wait() + ret = self.wait_for_criu_image_streamer() if ret: - raise test_fail_exc("criu-image-streamer exited with %d" % ret) + raise test_fail_exc("criu-image-streamer (serve) exited with %d" % ret) self.show_stats("restore") @@ -1466,6 +1486,10 @@ def kill(self): print("criu dump exited with %s" % self.__dump_process.wait()) grep_errors(os.path.join(self.__ddir(), "dump.log")) self.__dump_process = None + if self.__img_streamer_process: + self.__img_streamer_process.terminate() + ret = self.wait_for_criu_image_streamer() + print("criu-image-streamer exited with %s" % ret) def try_run_hook(test, args): From d1e0bcf8130183cdcbd7e80dcbe18a5e7674f259 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 29 Jul 2021 14:21:37 +0300 Subject: [PATCH 036/121] sockets: don't call sk_setbufs asyncronously We want to also c/r socket buf locks (SO_BUF_LOCKS) which are also implicitly set by setsockopt(SO_{SND,RCV}BUF*), so we need to order these two properly. That's why we need to wait for sk_setbufs to finish. And there is no much point in seting buffer sizes asyncronously anyway. Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Pavel Tikhomirov --- criu/sockets.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/sockets.c b/criu/sockets.c index 9426b5b940..65aa829501 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -517,7 +517,7 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) pr_info("%d restore sndbuf %d rcv buf %d\n", sk, soe->so_sndbuf, soe->so_rcvbuf); /* setsockopt() multiplies the input values by 2 */ - ret |= userns_call(sk_setbufs, UNS_ASYNC, bufs, sizeof(bufs), sk); + ret |= userns_call(sk_setbufs, 0, bufs, sizeof(bufs), sk); if (soe->has_so_priority) { pr_debug("\trestore priority %d for socket\n", soe->so_priority); From 5e13d03743d7d99f292858669d13d0350938106e Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 29 Jul 2021 14:57:17 +0300 Subject: [PATCH 037/121] kerndat: check for set/getsockopt SO_BUF_LOCK availability This is a new kernel feature to let criu restore sockets with kernel auto-adjusted buffer sizes. Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Pavel Tikhomirov --- criu/cr-check.c | 10 ++++++++++ criu/include/kerndat.h | 1 + criu/include/sockets.h | 4 ++++ criu/kerndat.c | 33 +++++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+) diff --git a/criu/cr-check.c b/criu/cr-check.c index 3575fb3b36..3e268c4395 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1372,6 +1372,14 @@ static int check_network_lock_nftables(void) return 0; } +static int check_sockopt_buf_lock(void) +{ + if (!kdat.has_sockopt_buf_lock) + return -1; + + return 0; +} + static int (*chk_feature)(void); /* @@ -1490,6 +1498,7 @@ int cr_check(void) ret |= check_ns_pid(); ret |= check_apparmor_stacking(); ret |= check_network_lock_nftables(); + ret |= check_sockopt_buf_lock(); } /* @@ -1602,6 +1611,7 @@ static struct feature_list feature_list[] = { { "ns_pid", check_ns_pid }, { "apparmor_stacking", check_apparmor_stacking }, { "network_lock_nftables", check_network_lock_nftables }, + { "sockopt_buf_lock", check_sockopt_buf_lock }, { NULL, NULL }, }; diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 80bad7f11d..2ded7d1da7 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -74,6 +74,7 @@ struct kerndat_s { bool has_pidfd_getfd; bool has_nspid; bool has_nftables_concat; + bool has_sockopt_buf_lock; }; extern struct kerndat_s kdat; diff --git a/criu/include/sockets.h b/criu/include/sockets.h index 3e8f3d6019..399d38664c 100644 --- a/criu/include/sockets.h +++ b/criu/include/sockets.h @@ -123,4 +123,8 @@ extern const char *socket_proto_name(unsigned int proto, char *nm, size_t size); #define ___socket_family_name(family) __socket_info_helper(socket_family_name, family) #define ___socket_proto_name(proto) __socket_info_helper(socket_proto_name, proto) +#ifndef SO_BUF_LOCK +#define SO_BUF_LOCK 72 +#endif + #endif /* __CR_SOCKETS_H__ */ diff --git a/criu/kerndat.c b/criu/kerndat.c index 0e88ba43e2..9f6a6ec428 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -816,6 +816,35 @@ static int kerndat_x86_has_ptrace_fpu_xsave_bug(void) return 0; } +int kerndat_sockopt_buf_lock(void) +{ + int exit_code = -1; + socklen_t len; + u32 buf_lock; + int sock; + + sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sock < 0) { + pr_perror("Unable to create a socket"); + return -1; + } + + len = sizeof(buf_lock); + if (getsockopt(sock, SOL_SOCKET, SO_BUF_LOCK, &buf_lock, &len)) { + if (errno != ENOPROTOOPT) { + pr_perror("Unable to get SO_BUF_LOCK with getsockopt"); + goto err; + } + kdat.has_sockopt_buf_lock = false; + } else + kdat.has_sockopt_buf_lock = true; + + exit_code = 0; +err: + close(sock); + return exit_code; +} + #define KERNDAT_CACHE_FILE KDAT_RUNDIR "/criu.kdat" #define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR "/.criu.kdat" @@ -1359,6 +1388,10 @@ int kerndat_init(void) pr_err("kerndat_has_nftables_concat failed when initializing kerndat.\n"); ret = -1; } + if (!ret && kerndat_sockopt_buf_lock()) { + pr_err("kerndat_sockopt_buf_lock failed when initializing kerndat.\n"); + ret = -1; + } kerndat_lsm(); kerndat_mmap_min_addr(); From 9d02a38d6771a0eb449d5885eddcf74535f8ba8a Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 29 Jul 2021 11:27:13 +0300 Subject: [PATCH 038/121] sockets: c/r bufer size locks When one sets socket buffer sizes with setsockopt(SO_{SND,RCV}BUF*), kernel sets coresponding SOCK_SNDBUF_LOCK or SOCK_RCVBUF_LOCK flags on struct sock. It means that such a socket with explicitly changed buffer size can not be auto-adjusted by kernel (e.g. if there is free memory kernel can auto-increase default socket buffers to improve perfomance). (see tcp_fixup_rcvbuf() and tcp_sndbuf_expand()) CRIU is always changing buf sizes on restore, that means that all sockets receive lock flags on struct sock and become non-auto-adjusted after migration. In some cases it can decrease perfomance of network connections quite a lot. So let's c/r socket buf locks (SO_BUF_LOCKS), so that sockets for which auto-adjustment is available does not lose it. Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Pavel Tikhomirov --- criu/sockets.c | 8 ++++++++ images/sk-opts.proto | 2 ++ 2 files changed, 10 insertions(+) diff --git a/criu/sockets.c b/criu/sockets.c index 65aa829501..db772707b6 100644 --- a/criu/sockets.c +++ b/criu/sockets.c @@ -519,6 +519,10 @@ int restore_socket_opts(int sk, SkOptsEntry *soe) /* setsockopt() multiplies the input values by 2 */ ret |= userns_call(sk_setbufs, 0, bufs, sizeof(bufs), sk); + if (soe->has_so_buf_lock) { + pr_debug("\trestore buf_lock %d for socket\n", soe->so_buf_lock); + ret |= restore_opt(sk, SOL_SOCKET, SO_BUF_LOCK, &soe->so_buf_lock); + } if (soe->has_so_priority) { pr_debug("\trestore priority %d for socket\n", soe->so_priority); ret |= restore_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority); @@ -619,6 +623,10 @@ int dump_socket_opts(int sk, SkOptsEntry *soe) ret |= dump_opt(sk, SOL_SOCKET, SO_SNDBUF, &soe->so_sndbuf); ret |= dump_opt(sk, SOL_SOCKET, SO_RCVBUF, &soe->so_rcvbuf); + if (kdat.has_sockopt_buf_lock) { + soe->has_so_buf_lock = true; + ret |= dump_opt(sk, SOL_SOCKET, SO_BUF_LOCK, &soe->so_buf_lock); + } soe->has_so_priority = true; ret |= dump_opt(sk, SOL_SOCKET, SO_PRIORITY, &soe->so_priority); soe->has_so_rcvlowat = true; diff --git a/images/sk-opts.proto b/images/sk-opts.proto index 2377f6b629..1d24d47cc7 100644 --- a/images/sk-opts.proto +++ b/images/sk-opts.proto @@ -31,6 +31,8 @@ message sk_opts_entry { optional uint32 tcp_keepintvl = 22; optional uint32 so_oobinline = 23; optional uint32 so_linger = 24; + + optional uint32 so_buf_lock = 25; } enum sk_shutdown { From 579b8b02603d29adf664aab99f9df6cda33acfea Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 29 Jul 2021 16:16:12 +0300 Subject: [PATCH 039/121] zdtm: add test for socket buffer size locks Just set all possible values 0-3 and chack if it persists. Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/Makefile | 1 + test/zdtm/static/sock_opts02.c | 73 +++++++++++++++++++++++++++++++ test/zdtm/static/sock_opts02.desc | 1 + 3 files changed, 75 insertions(+) create mode 100644 test/zdtm/static/sock_opts02.c create mode 100644 test/zdtm/static/sock_opts02.desc diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index b6aa621c7e..3e1e0a498a 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -117,6 +117,7 @@ TST_NOFILE := \ socket-linger \ sock_opts00 \ sock_opts01 \ + sock_opts02 \ sk-unix-unconn \ ipc_namespace \ selfexe00 \ diff --git a/test/zdtm/static/sock_opts02.c b/test/zdtm/static/sock_opts02.c new file mode 100644 index 0000000000..21a81a28ff --- /dev/null +++ b/test/zdtm/static/sock_opts02.c @@ -0,0 +1,73 @@ +#include +#include +#include +#include +#include + +#include "zdtmtst.h" + +const char *test_doc = "Check that SO_BUF_LOCK option dumped"; +const char *test_author = "Pavel Tikhomirov "; + +#ifndef SO_BUF_LOCK +#define SO_BUF_LOCK 72 +#endif + +#define NSOCK 4 + +int main(int argc, char **argv) +{ + int sock[NSOCK]; + uint32_t val[NSOCK]; + int ret, i; + int exit_code = 1; + + test_init(argc, argv); + + for (i = 0; i < NSOCK; i++) { + sock[i] = -1; + val[i] = i; + } + + for (i = 0; i < NSOCK; i++) { + sock[i] = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sock[i] < 0) { + pr_perror("can't create socket %d", i); + goto err; + } + + ret = setsockopt(sock[i], SOL_SOCKET, SO_BUF_LOCK, &val[i], sizeof(val[i])); + if (ret < 0) { + pr_perror("can't set SO_BUF_LOCK (%u) on socket %d", val[i], i); + goto err; + } + } + + test_daemon(); + test_waitsig(); + + for (i = 0; i < NSOCK; i++) { + uint32_t tmp; + socklen_t len; + + len = sizeof(tmp); + ret = getsockopt(sock[i], SOL_SOCKET, SO_BUF_LOCK, &tmp, &len); + if (ret < 0) { + pr_perror("can't get SO_BUF_LOCK from socket %d", i); + goto err; + } + + if (tmp != val[i]) { + fail("SO_BUF_LOCK missmatch %u != %u", tmp, val[i]); + goto err; + } + } + + pass(); + exit_code = 0; +err: + for (i = 0; i < NSOCK; i++) + close(sock[i]); + + return exit_code; +} diff --git a/test/zdtm/static/sock_opts02.desc b/test/zdtm/static/sock_opts02.desc new file mode 100644 index 0000000000..37d3a63545 --- /dev/null +++ b/test/zdtm/static/sock_opts02.desc @@ -0,0 +1 @@ +{'flags': 'suid', 'feature': 'sockopt_buf_lock'} From b4cc85671010c1d99659e3ed90b3f03c415e5f8a Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 3 Aug 2021 13:05:32 +0300 Subject: [PATCH 040/121] zdtm: make sock_opts02 also check lock change by SO_*BUF* Reviewed-by: Alexander Mikhalitsyn Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/sock_opts02.c | 63 +++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 9 deletions(-) diff --git a/test/zdtm/static/sock_opts02.c b/test/zdtm/static/sock_opts02.c index 21a81a28ff..7ea98744ac 100644 --- a/test/zdtm/static/sock_opts02.c +++ b/test/zdtm/static/sock_opts02.c @@ -13,32 +13,77 @@ const char *test_author = "Pavel Tikhomirov "; #define SO_BUF_LOCK 72 #endif -#define NSOCK 4 +#ifndef SOCK_SNDBUF_LOCK +#define SOCK_SNDBUF_LOCK 1 +#endif +#ifndef SOCK_RCVBUF_LOCK +#define SOCK_RCVBUF_LOCK 2 +#endif + +#define BUFSIZE 16384 + +struct sk_opt { + int type; + uint32_t val; + uint32_t lock; +} sk_opts[] = { { SO_BUF_LOCK, 0, 0 }, + { SO_BUF_LOCK, SOCK_SNDBUF_LOCK, SOCK_SNDBUF_LOCK }, + { SO_BUF_LOCK, SOCK_RCVBUF_LOCK, SOCK_RCVBUF_LOCK }, + { SO_BUF_LOCK, SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK, SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK }, + { SO_SNDBUF, BUFSIZE, SOCK_SNDBUF_LOCK }, + { SO_RCVBUF, BUFSIZE, SOCK_RCVBUF_LOCK } }; + +#define NSOCK ARRAY_SIZE(sk_opts) + +char *type_to_str(int type) +{ + switch (type) { + case SO_BUF_LOCK: + return "SO_BUF_LOCK"; + case SO_SNDBUFFORCE: + return "SO_SNDBUFFORCE"; + case SO_RCVBUFFORCE: + return "SO_RCVBUFFORCE"; + } + return NULL; +} int main(int argc, char **argv) { int sock[NSOCK]; - uint32_t val[NSOCK]; int ret, i; int exit_code = 1; test_init(argc, argv); - for (i = 0; i < NSOCK; i++) { + for (i = 0; i < NSOCK; i++) sock[i] = -1; - val[i] = i; - } for (i = 0; i < NSOCK; i++) { + uint32_t tmp; + socklen_t len; + sock[i] = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if (sock[i] < 0) { pr_perror("can't create socket %d", i); goto err; } - ret = setsockopt(sock[i], SOL_SOCKET, SO_BUF_LOCK, &val[i], sizeof(val[i])); + ret = setsockopt(sock[i], SOL_SOCKET, sk_opts[i].type, &sk_opts[i].val, sizeof(sk_opts[i].val)); + if (ret < 0) { + pr_perror("can't set %s (%u) on socket %d", type_to_str(sk_opts[i].type), sk_opts[i].val, i); + goto err; + } + + len = sizeof(tmp); + ret = getsockopt(sock[i], SOL_SOCKET, SO_BUF_LOCK, &tmp, &len); if (ret < 0) { - pr_perror("can't set SO_BUF_LOCK (%u) on socket %d", val[i], i); + pr_perror("can't get SO_BUF_LOCK from socket %d", i); + goto err; + } + + if (tmp != sk_opts[i].lock) { + fail("SO_BUF_LOCK missmatch %u != %u", tmp, sk_opts[i].lock); goto err; } } @@ -57,8 +102,8 @@ int main(int argc, char **argv) goto err; } - if (tmp != val[i]) { - fail("SO_BUF_LOCK missmatch %u != %u", tmp, val[i]); + if (tmp != sk_opts[i].lock) { + fail("SO_BUF_LOCK missmatch %u != %u", tmp, sk_opts[i].lock); goto err; } } From 78723d0b5747134ef4b6fe2663fbcfc2b4737e89 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 28 Oct 2021 17:15:52 +0300 Subject: [PATCH 041/121] clang-format: enable AlignTrailingComments Code becomes much more human-readable after enabling it. Example 1: Before: ``` struct file_desc { u32 id; /* File id, unique */ struct hlist_node hash; /* Descriptor hashing and lookup */ struct list_head fd_info_head; /* Chain of fdinfo_list_entry-s with same ID and type but different pids */ struct file_desc_ops *ops; /* Associated operations */ struct list_head fake_master_list; /* To chain in the list of file_desc, which don't have a fle in a task, that having permissions */ }; ``` After: ``` struct file_desc { u32 id; /* File id, unique */ struct hlist_node hash; /* Descriptor hashing and lookup */ struct list_head fd_info_head; /* Chain of fdinfo_list_entry-s with same ID and type but different pids */ struct file_desc_ops *ops; /* Associated operations */ struct list_head fake_master_list; /* To chain in the list of file_desc, which don't * have a fle in a task, that having permissions */ }; ``` Example 2: Before: ``` enum fsconfig_command { FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ }; ``` After: ``` enum fsconfig_command { FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ }; ``` Example 3: Before: ``` ret = libnet_build_tcp(ntohs(sk->dst_addr->v4.sin_port), /* source port */ ntohs(sk->src_addr->v4.sin_port), /* destination port */ data->inq_seq, /* sequence number */ data->outq_seq - data->outq_len, /* acknowledgement num */ flags, /* control flags */ data->rcv_wnd, /* window size */ 0, /* checksum */ 10, /* urgent pointer */ LIBNET_TCP_H + 20, /* TCP packet size */ NULL, /* payload */ 0, /* payload size */ l, /* libnet handle */ 0); /* libnet id */ ``` After: ``` ret = libnet_build_tcp(ntohs(sk->dst_addr->v4.sin_port), /* source port */ ntohs(sk->src_addr->v4.sin_port), /* destination port */ data->inq_seq, /* sequence number */ data->outq_seq - data->outq_len, /* acknowledgement num */ flags, /* control flags */ data->rcv_wnd, /* window size */ 0, /* checksum */ 10, /* urgent pointer */ LIBNET_TCP_H + 20, /* TCP packet size */ NULL, /* payload */ 0, /* payload size */ l, /* libnet handle */ 0); /* libnet id */ ``` Example 4: Before: ``` static struct testcase __testcases[] = { { 2, 1, 2, 1, 2, 1 }, /* session00 */ { 4, 2, 4, 2, 4, 1 }, /* |\_session00 */ { 15, 4, 4, 4, 15, 1 }, /* | |\_session00 */ { 16, 4, 4, 4, 15, 1 }, /* | \_session00 */ { 17, 4, 4, 4, 17, 0 }, /* | |\_session00 */ { 18, 4, 4, 4, 17, 1 }, /* | \_session00 */ { 5, 2, 2, 2, 2, 1 }, /* |\_session00 */ { 8, 2, 8, 2, 8, 1 }, /* |\_session00 */ { 9, 8, 2, 2, 2, 1 }, /* | \_session00 */ { 10, 2, 10, 2, 10, 1 }, /* |\_session00 */ { 11, 10, 11, 2, 11, 1 }, /* | \_session00 */ { 12, 11, 2, 2, 2, 1 }, /* | \_session00 */ { 13, 2, 2, 2, 2, 0 }, /* \_session00 */ { 3, 13, 2, 2, 2, 1 }, /* session00 */ { 6, 2, 6, 2, 6, 0 }, /* \_session00 */ { 14, 6, 6, 6, 6, 1 }, /* session00 */ }; ``` After: ``` static struct testcase __testcases[] = { { 2, 1, 2, 1, 2, 1 }, /* session00 */ { 4, 2, 4, 2, 4, 1 }, /* |\_session00 */ { 15, 4, 4, 4, 15, 1 }, /* | |\_session00 */ { 16, 4, 4, 4, 15, 1 }, /* | \_session00 */ { 17, 4, 4, 4, 17, 0 }, /* | |\_session00 */ { 18, 4, 4, 4, 17, 1 }, /* | \_session00 */ { 5, 2, 2, 2, 2, 1 }, /* |\_session00 */ { 8, 2, 8, 2, 8, 1 }, /* |\_session00 */ { 9, 8, 2, 2, 2, 1 }, /* | \_session00 */ { 10, 2, 10, 2, 10, 1 }, /* |\_session00 */ { 11, 10, 11, 2, 11, 1 }, /* | \_session00 */ { 12, 11, 2, 2, 2, 1 }, /* | \_session00 */ { 13, 2, 2, 2, 2, 0 }, /* \_session00 */ { 3, 13, 2, 2, 2, 1 }, /* session00 */ { 6, 2, 6, 2, 6, 0 }, /* \_session00 */ { 14, 6, 6, 6, 6, 1 }, /* session00 */ }; ``` Signed-off-by: Pavel Tikhomirov --- .clang-format | 2 +- scripts/fetch-clang-format.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.clang-format b/.clang-format index dd4ade3703..96ba5909f4 100644 --- a/.clang-format +++ b/.clang-format @@ -15,7 +15,7 @@ AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left # Unknown to clang-format-4.0 AlignOperands: true -AlignTrailingComments: false +AlignTrailingComments: true AlignConsecutiveMacros: true AllowAllParametersOfDeclarationOnNextLine: false AllowShortBlocksOnASingleLine: false diff --git a/scripts/fetch-clang-format.sh b/scripts/fetch-clang-format.sh index c9006c518b..0e9545f2dc 100755 --- a/scripts/fetch-clang-format.sh +++ b/scripts/fetch-clang-format.sh @@ -12,4 +12,5 @@ curl -s "${URL}" | sed -e " s,Intended for clang-format >= 4,Intended for clang-format >= 11,g; s,ForEachMacros:,ForEachMacros:\n - 'for_each_pstree_item',g; s,\(AlignTrailingComments:.*\)$,\1\nAlignConsecutiveMacros: true,g; + s,AlignTrailingComments: false,AlignTrailingComments: true,g; " > .clang-format From 8e8870f6772d61089b98863fcc93e7968ea7a74c Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 28 Oct 2021 17:17:44 +0300 Subject: [PATCH 042/121] clang-format: do several manual comment fixups Automatic AlignTrailingComments fails to make those comments look right, so let's do it manually, so that they both satisfy AlignTrailingComments and also are human-readable. Signed-off-by: Pavel Tikhomirov --- criu/include/files.h | 10 +++++----- criu/include/page-pipe.h | 6 ++---- criu/include/pagemap.h | 19 ++++++++---------- criu/include/pipes.h | 4 ++-- criu/shmem.c | 3 ++- test/zdtm/static/ipc_namespace.c | 33 ++++++++++++++++---------------- 6 files changed, 36 insertions(+), 39 deletions(-) diff --git a/criu/include/files.h b/criu/include/files.h index 96face71ba..26ce1f42a5 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -121,12 +121,12 @@ unsigned int find_unused_fd(struct pstree_item *, int hint_fd); struct fdinfo_list_entry *find_used_fd(struct pstree_item *, int fd); struct file_desc { - u32 id; /* File id, unique */ - struct hlist_node hash; /* Descriptor hashing and lookup */ - struct list_head fd_info_head; /* Chain of fdinfo_list_entry-s with same ID and type but different pids */ - struct file_desc_ops *ops; /* Associated operations */ + u32 id; /* File id, unique */ + struct hlist_node hash; /* Descriptor hashing and lookup */ + struct list_head fd_info_head; /* Chain of fdinfo_list_entry-s with same ID and type but different pids */ + struct file_desc_ops *ops; /* Associated operations */ struct list_head fake_master_list; /* To chain in the list of file_desc, which don't - have a fle in a task, that having permissions */ + * have a fle in a task, that having permissions */ }; struct fdtype_ops { diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h index a5f97678eb..0917869d97 100644 --- a/criu/include/page-pipe.h +++ b/criu/include/page-pipe.h @@ -130,10 +130,8 @@ struct page_pipe { unsigned int flags; /* PP_FOO flags below */ }; -#define PP_CHUNK_MODE \ - 0x1 /* Restrict the maximum buffer size of pipes - and dump memory for a few iterations */ -#define PP_OWN_IOVS 0x4 /* create_page_pipe allocated IOVs memory */ +#define PP_CHUNK_MODE 0x1 /* Restrict the maximum buffer size of pipes and dump memory for a few iterations */ +#define PP_OWN_IOVS 0x4 /* create_page_pipe allocated IOVs memory */ struct page_pipe *create_page_pipe(unsigned int nr_segs, struct iovec *iovs, unsigned flags); extern void destroy_page_pipe(struct page_pipe *p); diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h index c39c25d0cb..8c71805598 100644 --- a/criu/include/pagemap.h +++ b/criu/include/pagemap.h @@ -63,17 +63,14 @@ struct page_read { struct cr_img *pi; u32 pages_img_id; - PagemapEntry *pe; /* current pagemap we are on */ - struct page_read *parent; /* parent pagemap (if ->in_parent - pagemap is met in image, then - go to this guy for page, see - read_pagemap_page */ - unsigned long cvaddr; /* vaddr we are on */ - off_t pi_off; /* current offset in pages file */ - - struct iovec bunch; /* record consequent neighbour - iovecs to punch together */ - unsigned id; /* for logging */ + PagemapEntry *pe; /* current pagemap we are on */ + struct page_read *parent; /* parent pagemap (if ->in_parent pagemap is met in image, + * then go to this guy for page, see read_pagemap_page */ + unsigned long cvaddr; /* vaddr we are on */ + off_t pi_off; /* current offset in pages file */ + + struct iovec bunch; /* record consequent neighbour iovecs to punch together */ + unsigned id; /* for logging */ unsigned long img_id; /* pagemap image file ID */ PagemapEntry **pmes; diff --git a/criu/include/pipes.h b/criu/include/pipes.h index 6e6310e142..f442d7f65b 100644 --- a/criu/include/pipes.h +++ b/criu/include/pipes.h @@ -49,8 +49,8 @@ extern int restore_pipe_data(int img_type, int pfd, u32 id, struct pipe_data_rst struct pipe_info { PipeEntry *pe; struct list_head pipe_list; /* All pipe_info with the same pipe_id - * This is pure circular list without head */ - struct list_head list; /* global list of pipes */ + * This is pure circular list without head */ + struct list_head list; /* global list of pipes */ struct file_desc d; unsigned int create : 1, reopen : 1; }; diff --git a/criu/shmem.c b/criu/shmem.c index 1b83327ef2..bb48e436b2 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -85,7 +85,8 @@ struct shmem_info { int self_count; /* the number of regions, which belongs to "pid" */ }; - struct { /* For sysvipc restore */ + /* For sysvipc restore */ + struct { struct list_head att; /* list of shmem_sysv_att-s */ int want_write; }; diff --git a/test/zdtm/static/ipc_namespace.c b/test/zdtm/static/ipc_namespace.c index 98241d8163..4273951aed 100644 --- a/test/zdtm/static/ipc_namespace.c +++ b/test/zdtm/static/ipc_namespace.c @@ -19,27 +19,28 @@ extern int shmctl(int __shmid, int __cmd, struct shmid_ds *__buf); struct ipc_ids { int in_use; /* TODO: Check for 0 */ - // unsigned short seq; - // unsigned short seq_max; - // struct rw_semaphore rw_mutex; - // struct idr ipcs_idr; /* TODO */ + + // unsigned short seq; + // unsigned short seq_max; + // struct rw_semaphore rw_mutex; + // struct idr ipcs_idr; /* TODO */ }; struct ipc_ns { struct ipc_ids ids[3]; - int sem_ctls[4]; // + - int used_sems; // + - - int msg_ctlmax; // + - int msg_ctlmnb; // + - int msg_ctlmni; // + - int msg_bytes; // + - int msg_hdrs; // + - int auto_msgmni; // + - int msg_next_id; // + - int sem_next_id; // + - int shm_next_id; // + + int sem_ctls[4]; + int used_sems; + + int msg_ctlmax; + int msg_ctlmnb; + int msg_ctlmni; + int msg_bytes; + int msg_hdrs; + int auto_msgmni; + int msg_next_id; + int sem_next_id; + int shm_next_id; size_t shm_ctlmax; size_t shm_ctlall; From 94ce85cfcb0a780e3698c1d808725f25a02ad73f Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 28 Oct 2021 18:10:14 +0300 Subject: [PATCH 043/121] clang-format: do automatic comment fixups Result of `make indent` after enabling AlignTrailingComments. Signed-off-by: Pavel Tikhomirov --- compel/arch/aarch64/src/lib/infect.c | 2 +- compel/arch/arm/src/lib/infect.c | 2 +- compel/arch/mips/src/lib/include/ldsodefs.h | 10 +- .../mips/src/lib/include/uapi/asm/siginfo.h | 14 +- compel/arch/mips/src/lib/infect.c | 2 +- .../src/lib/include/uapi/asm/infect-types.h | 6 +- compel/arch/ppc64/src/lib/infect.c | 2 +- compel/arch/s390/src/lib/infect.c | 4 +- .../arch/x86/plugins/std/syscalls/syscall32.c | 6 +- .../arch/x86/src/lib/include/uapi/asm/cpu.h | 158 +++++++++--------- .../arch/x86/src/lib/include/uapi/asm/fpu.h | 16 +- compel/arch/x86/src/lib/infect.c | 4 +- compel/include/infect-priv.h | 2 +- compel/include/rpc-pie-priv.h | 2 +- compel/include/uapi/infect.h | 2 +- compel/include/uapi/loglevels.h | 6 +- compel/include/uapi/ptrace.h | 2 +- compel/src/lib/handle-elf.c | 2 +- criu/arch/ppc64/restorer.c | 6 +- criu/arch/ppc64/vdso-pie.c | 4 +- criu/arch/s390/restorer.c | 6 +- criu/arch/s390/vdso-pie.c | 6 +- criu/arch/x86/include/asm/restorer.h | 2 +- criu/arch/x86/sigaction_compat.c | 4 +- criu/cgroup.c | 4 +- criu/cr-check.c | 2 +- criu/cr-dump.c | 2 +- criu/files-reg.c | 4 +- criu/files.c | 4 +- criu/include/aio.h | 4 +- criu/include/autofs.h | 2 +- criu/include/bfd.h | 4 +- criu/include/file-lock.h | 8 +- criu/include/files.h | 4 +- criu/include/image-desc.h | 4 +- criu/include/inet_diag.h | 2 +- criu/include/kcmp.h | 4 +- criu/include/kerndat.h | 2 +- criu/include/linux/mount.h | 14 +- criu/include/mount.h | 12 +- criu/include/namespaces.h | 4 +- criu/include/page-pipe.h | 24 +-- criu/include/page-xfer.h | 2 +- criu/include/pagemap-cache.h | 12 +- criu/include/pstree.h | 4 +- criu/include/restorer.h | 4 +- criu/include/rst_info.h | 2 +- criu/include/servicefd.h | 6 +- criu/include/sk-inet.h | 2 +- criu/include/sysfs_parse.h | 4 +- criu/include/vma.h | 10 +- criu/irmap.c | 4 +- criu/mount.c | 2 +- criu/net.c | 4 +- criu/pagemap.c | 6 +- criu/shmem.c | 2 +- criu/sk-unix.c | 4 +- criu/uffd.c | 4 +- criu/util.c | 4 +- include/common/arch/ppc64/asm/bitops.h | 2 +- include/common/arch/x86/asm/bitops.h | 2 +- include/common/asm-generic/bitops.h | 2 +- soccr/soccr.c | 66 ++++---- soccr/soccr.h | 6 +- test/zdtm/static/aio01.c | 4 +- test/zdtm/static/auto_dev-ioctl.h | 2 +- test/zdtm/static/file_locks00.c | 18 +- test/zdtm/static/ipc_namespace.c | 8 +- test/zdtm/static/netns-dev.c | 140 ++++++++-------- test/zdtm/static/s390x_regs_check.c | 14 +- test/zdtm/static/session01.c | 30 ++-- test/zdtm/static/sigpending.c | 2 +- test/zdtm/transition/ptrace.c | 2 +- 73 files changed, 374 insertions(+), 374 deletions(-) diff --git a/compel/arch/aarch64/src/lib/infect.c b/compel/arch/aarch64/src/lib/infect.c index 7cfa637ebe..bd1ed0da35 100644 --- a/compel/arch/aarch64/src/lib/infect.c +++ b/compel/arch/aarch64/src/lib/infect.c @@ -19,7 +19,7 @@ unsigned __page_shift = 0; */ const char code_syscall[] = { 0x01, 0x00, 0x00, 0xd4, /* SVC #0 */ - 0x00, 0x00, 0x20, 0xd4 /* BRK #0 */ + 0x00, 0x00, 0x20, 0xd4 /* BRK #0 */ }; static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); diff --git a/compel/arch/arm/src/lib/infect.c b/compel/arch/arm/src/lib/infect.c index 6715afdb3c..7700f52caf 100644 --- a/compel/arch/arm/src/lib/infect.c +++ b/compel/arch/arm/src/lib/infect.c @@ -18,7 +18,7 @@ */ const char code_syscall[] = { 0x00, 0x00, 0x00, 0xef, /* SVC #0 */ - 0xf0, 0x01, 0xf0, 0xe7 /* UDF #32 */ + 0xf0, 0x01, 0xf0, 0xe7 /* UDF #32 */ }; static const int code_syscall_aligned = round_up(sizeof(code_syscall), sizeof(long)); diff --git a/compel/arch/mips/src/lib/include/ldsodefs.h b/compel/arch/mips/src/lib/include/ldsodefs.h index 8cfde24962..97e79755d4 100644 --- a/compel/arch/mips/src/lib/include/ldsodefs.h +++ b/compel/arch/mips/src/lib/include/ldsodefs.h @@ -69,8 +69,8 @@ struct La_mips_64_retval; /* An entry in a 64 bit SHT_REL section. */ typedef struct { - Elf32_Word r_sym; /* Symbol index */ - unsigned char r_ssym; /* Special symbol for 2nd relocation */ + Elf32_Word r_sym; /* Symbol index */ + unsigned char r_ssym; /* Special symbol for 2nd relocation */ unsigned char r_type3; /* 3rd relocation type */ unsigned char r_type2; /* 2nd relocation type */ unsigned char r_type1; /* 1st relocation type */ @@ -82,14 +82,14 @@ typedef union { } _Elf64_Mips_R_Info_union; typedef struct { - Elf64_Addr r_offset; /* Address */ + Elf64_Addr r_offset; /* Address */ _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ } Elf64_Mips_Rel; typedef struct { - Elf64_Addr r_offset; /* Address */ + Elf64_Addr r_offset; /* Address */ _Elf64_Mips_R_Info_union r_info; /* Relocation type and symbol index */ - Elf64_Sxword r_addend; /* Addend */ + Elf64_Sxword r_addend; /* Addend */ } Elf64_Mips_Rela; #define ELF64_MIPS_R_SYM(i) ((__extension__(_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym) diff --git a/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h index 82ae6096b7..6db1ddbd30 100644 --- a/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h +++ b/compel/arch/mips/src/lib/include/uapi/asm/siginfo.h @@ -52,14 +52,14 @@ typedef struct siginfo { /* kill() */ struct { - __kernel_pid_t _pid; /* sender's pid */ + __kernel_pid_t _pid; /* sender's pid */ __ARCH_SI_UID_T _uid; /* sender's uid */ } _kill; /* POSIX.1b timers */ struct { __kernel_timer_t _tid; /* timer id */ - int _overrun; /* overrun count */ + int _overrun; /* overrun count */ char _pad[sizeof(__ARCH_SI_UID_T) - sizeof(int)]; sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ @@ -67,16 +67,16 @@ typedef struct siginfo { /* POSIX.1b signals */ struct { - __kernel_pid_t _pid; /* sender's pid */ + __kernel_pid_t _pid; /* sender's pid */ __ARCH_SI_UID_T _uid; /* sender's uid */ sigval_t _sigval; } _rt; /* SIGCHLD */ struct { - __kernel_pid_t _pid; /* which child */ + __kernel_pid_t _pid; /* which child */ __ARCH_SI_UID_T _uid; /* sender's uid */ - int _status; /* exit code */ + int _status; /* exit code */ __ARCH_SI_CLOCK_T _utime; __ARCH_SI_CLOCK_T _stime; } _sigchld; @@ -104,8 +104,8 @@ typedef struct siginfo { /* SIGSYS */ struct { - void *_call_addr; /* calling user insn */ - int _syscall; /* triggering system call number */ + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ unsigned int _arch; /* AUDIT_ARCH_* of syscall */ } _sigsys; } _sifields; diff --git a/compel/arch/mips/src/lib/infect.c b/compel/arch/mips/src/lib/infect.c index 68d0a27285..afa0f5ed5f 100644 --- a/compel/arch/mips/src/lib/infect.c +++ b/compel/arch/mips/src/lib/infect.c @@ -24,7 +24,7 @@ */ const char code_syscall[] = { 0x0c, 0x00, 0x00, 0x00, /* syscall */ - 0x0d, 0x00, 0x00, 0x00 /* break */ + 0x0d, 0x00, 0x00, 0x00 /* break */ }; /* 10-byte legacy floating point register */ diff --git a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h index fe6192e207..8cf8a135fb 100644 --- a/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h +++ b/compel/arch/ppc64/src/lib/include/uapi/asm/infect-types.h @@ -21,13 +21,13 @@ typedef struct { unsigned long xer; unsigned long ccr; unsigned long softe; /* Soft enabled/disabled */ - unsigned long trap; /* Reason for being here */ + unsigned long trap; /* Reason for being here */ /* * N.B. for critical exceptions on 4xx, the dar and dsisr * fields are overloaded to hold srr0 and srr1. */ - unsigned long dar; /* Fault registers */ - unsigned long dsisr; /* on 4xx/Book-E used for ESR */ + unsigned long dar; /* Fault registers */ + unsigned long dsisr; /* on 4xx/Book-E used for ESR */ unsigned long result; /* Result of a system call */ } user_regs_struct_t; diff --git a/compel/arch/ppc64/src/lib/infect.c b/compel/arch/ppc64/src/lib/infect.c index fc174d0dd2..61cd6e9857 100644 --- a/compel/arch/ppc64/src/lib/infect.c +++ b/compel/arch/ppc64/src/lib/infect.c @@ -30,7 +30,7 @@ unsigned __page_shift = 0; */ const uint32_t code_syscall[] = { 0x44000002, /* sc */ - 0x0fe00000 /* twi 31,0,0 */ + 0x0fe00000 /* twi 31,0,0 */ }; static inline __always_unused void __check_code_syscall(void) diff --git a/compel/arch/s390/src/lib/infect.c b/compel/arch/s390/src/lib/infect.c index 77ace713a6..3cd25e71d8 100644 --- a/compel/arch/s390/src/lib/infect.c +++ b/compel/arch/s390/src/lib/infect.c @@ -624,8 +624,8 @@ enum kernel_ts_level { }; /* See arch/s390/include/asm/processor.h */ -#define TASK_SIZE_LEVEL_3 0x40000000000UL /* 4 TB */ -#define TASK_SIZE_LEVEL_4 0x20000000000000UL /* 8 PB */ +#define TASK_SIZE_LEVEL_3 0x40000000000UL /* 4 TB */ +#define TASK_SIZE_LEVEL_4 0x20000000000000UL /* 8 PB */ #define TASK_SIZE_LEVEL_5 0xffffffffffffefffUL /* 16 EB - 0x1000 */ /* diff --git a/compel/arch/x86/plugins/std/syscalls/syscall32.c b/compel/arch/x86/plugins/std/syscalls/syscall32.c index 0f2fec3ff8..d09fd38c71 100644 --- a/compel/arch/x86/plugins/std/syscalls/syscall32.c +++ b/compel/arch/x86/plugins/std/syscalls/syscall32.c @@ -1,9 +1,9 @@ #include "asm/types.h" #include "syscall-32.h" -#define SYS_SOCKET 1 /* sys_socket(2) */ -#define SYS_BIND 2 /* sys_bind(2) */ -#define SYS_CONNECT 3 /* sys_connect(2) */ +#define SYS_SOCKET 1 /* sys_socket(2) */ +#define SYS_BIND 2 /* sys_bind(2) */ +#define SYS_CONNECT 3 /* sys_connect(2) */ #define SYS_SENDTO 11 /* sys_sendto(2) */ #define SYS_RECVFROM 12 /* sys_recvfrom(2) */ #define SYS_SHUTDOWN 13 /* sys_shutdown(2) */ diff --git a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h index 8d54516af4..63ff83dbeb 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/cpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/cpu.h @@ -43,16 +43,16 @@ enum cpuid_leafs { #define NCAPINTS_BITS (NCAPINTS * 32) /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU (0 * 32 + 0) /* Onboard FPU */ -#define X86_FEATURE_VME (0 * 32 + 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE (0 * 32 + 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE (0 * 32 + 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC (0 * 32 + 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR (0 * 32 + 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE (0 * 32 + 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE (0 * 32 + 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 (0 * 32 + 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC (0 * 32 + 9) /* Onboard APIC */ +#define X86_FEATURE_FPU (0 * 32 + 0) /* Onboard FPU */ +#define X86_FEATURE_VME (0 * 32 + 1) /* Virtual Mode Extensions */ +#define X86_FEATURE_DE (0 * 32 + 2) /* Debugging Extensions */ +#define X86_FEATURE_PSE (0 * 32 + 3) /* Page Size Extensions */ +#define X86_FEATURE_TSC (0 * 32 + 4) /* Time Stamp Counter */ +#define X86_FEATURE_MSR (0 * 32 + 5) /* Model-Specific Registers */ +#define X86_FEATURE_PAE (0 * 32 + 6) /* Physical Address Extensions */ +#define X86_FEATURE_MCE (0 * 32 + 7) /* Machine Check Exception */ +#define X86_FEATURE_CX8 (0 * 32 + 8) /* CMPXCHG8 instruction */ +#define X86_FEATURE_APIC (0 * 32 + 9) /* Onboard APIC */ #define X86_FEATURE_SEP (0 * 32 + 11) /* SYSENTER/SYSEXIT */ #define X86_FEATURE_MTRR (0 * 32 + 12) /* Memory Type Range Registers */ #define X86_FEATURE_PGE (0 * 32 + 13) /* Page Global Enable */ @@ -100,12 +100,12 @@ enum cpuid_leafs { #define X86_FEATURE_CENTAUR_MCR (3 * 32 + 3) /* Centaur MCRs (= MTRRs) */ /* CPU types for specific tunings: */ -#define X86_FEATURE_K8 (3 * 32 + 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 (3 * 32 + 5) /* "" Athlon */ -#define X86_FEATURE_P3 (3 * 32 + 6) /* "" P3 */ -#define X86_FEATURE_P4 (3 * 32 + 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC (3 * 32 + 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP (3 * 32 + 9) /* SMP kernel running on UP */ +#define X86_FEATURE_K8 (3 * 32 + 4) /* "" Opteron, Athlon64 */ +#define X86_FEATURE_K7 (3 * 32 + 5) /* "" Athlon */ +#define X86_FEATURE_P3 (3 * 32 + 6) /* "" P3 */ +#define X86_FEATURE_P4 (3 * 32 + 7) /* "" P4 */ +#define X86_FEATURE_CONSTANT_TSC (3 * 32 + 8) /* TSC ticks at a constant rate */ +#define X86_FEATURE_UP (3 * 32 + 9) /* SMP kernel running on UP */ #define X86_FEATURE_ART (3 * 32 + 10) /* Always running timer (ART) */ #define X86_FEATURE_ARCH_PERFMON (3 * 32 + 11) /* Intel Architectural PerfMon */ #define X86_FEATURE_PEBS (3 * 32 + 12) /* Precise-Event Based Sampling */ @@ -129,16 +129,16 @@ enum cpuid_leafs { #define X86_FEATURE_TSC_KNOWN_FREQ (3 * 32 + 31) /* TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ -#define X86_FEATURE_XMM3 (4 * 32 + 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 (4 * 32 + 2) /* 64-bit Debug Store */ -#define X86_FEATURE_MWAIT (4 * 32 + 3) /* "monitor" MONITOR/MWAIT support */ -#define X86_FEATURE_DSCPL (4 * 32 + 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX (4 * 32 + 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX (4 * 32 + 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST (4 * 32 + 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 (4 * 32 + 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 (4 * 32 + 9) /* Supplemental SSE-3 */ +#define X86_FEATURE_XMM3 (4 * 32 + 0) /* "pni" SSE-3 */ +#define X86_FEATURE_PCLMULQDQ (4 * 32 + 1) /* PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 (4 * 32 + 2) /* 64-bit Debug Store */ +#define X86_FEATURE_MWAIT (4 * 32 + 3) /* "monitor" MONITOR/MWAIT support */ +#define X86_FEATURE_DSCPL (4 * 32 + 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ +#define X86_FEATURE_VMX (4 * 32 + 5) /* Hardware virtualization */ +#define X86_FEATURE_SMX (4 * 32 + 6) /* Safer Mode eXtensions */ +#define X86_FEATURE_EST (4 * 32 + 7) /* Enhanced SpeedStep */ +#define X86_FEATURE_TM2 (4 * 32 + 8) /* Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 (4 * 32 + 9) /* Supplemental SSE-3 */ #define X86_FEATURE_CID (4 * 32 + 10) /* Context ID */ #define X86_FEATURE_SDBG (4 * 32 + 11) /* Silicon Debug */ #define X86_FEATURE_FMA (4 * 32 + 12) /* Fused multiply-add */ @@ -162,28 +162,28 @@ enum cpuid_leafs { #define X86_FEATURE_HYPERVISOR (4 * 32 + 31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ -#define X86_FEATURE_XSTORE (5 * 32 + 2) /* "rng" RNG present (xstore) */ -#define X86_FEATURE_XSTORE_EN (5 * 32 + 3) /* "rng_en" RNG enabled */ -#define X86_FEATURE_XCRYPT (5 * 32 + 6) /* "ace" on-CPU crypto (xcrypt) */ -#define X86_FEATURE_XCRYPT_EN (5 * 32 + 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 (5 * 32 + 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN (5 * 32 + 9) /* ACE v2 enabled */ +#define X86_FEATURE_XSTORE (5 * 32 + 2) /* "rng" RNG present (xstore) */ +#define X86_FEATURE_XSTORE_EN (5 * 32 + 3) /* "rng_en" RNG enabled */ +#define X86_FEATURE_XCRYPT (5 * 32 + 6) /* "ace" on-CPU crypto (xcrypt) */ +#define X86_FEATURE_XCRYPT_EN (5 * 32 + 7) /* "ace_en" on-CPU crypto enabled */ +#define X86_FEATURE_ACE2 (5 * 32 + 8) /* Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN (5 * 32 + 9) /* ACE v2 enabled */ #define X86_FEATURE_PHE (5 * 32 + 10) /* PadLock Hash Engine */ #define X86_FEATURE_PHE_EN (5 * 32 + 11) /* PHE enabled */ #define X86_FEATURE_PMM (5 * 32 + 12) /* PadLock Montgomery Multiplier */ #define X86_FEATURE_PMM_EN (5 * 32 + 13) /* PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM (6 * 32 + 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY (6 * 32 + 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM (6 * 32 + 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC (6 * 32 + 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY (6 * 32 + 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM (6 * 32 + 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A (6 * 32 + 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE (6 * 32 + 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH (6 * 32 + 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW (6 * 32 + 9) /* OS Visible Workaround */ +#define X86_FEATURE_LAHF_LM (6 * 32 + 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6 * 32 + 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVM (6 * 32 + 2) /* Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC (6 * 32 + 3) /* Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY (6 * 32 + 4) /* CR8 in 32-bit mode */ +#define X86_FEATURE_ABM (6 * 32 + 5) /* Advanced bit manipulation */ +#define X86_FEATURE_SSE4A (6 * 32 + 6) /* SSE-4A */ +#define X86_FEATURE_MISALIGNSSE (6 * 32 + 7) /* Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH (6 * 32 + 8) /* 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW (6 * 32 + 9) /* OS Visible Workaround */ #define X86_FEATURE_IBS (6 * 32 + 10) /* Instruction Based Sampling */ #define X86_FEATURE_XOP (6 * 32 + 11) /* extended AVX instructions */ #define X86_FEATURE_SKINIT (6 * 32 + 12) /* SKINIT/STGI instructions */ @@ -202,14 +202,14 @@ enum cpuid_leafs { #define X86_FEATURE_MWAITX (6 * 32 + 29) /* MWAIT extension (MONITORX/MWAITX instructions) */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE (9 * 32 + 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST (9 * 32 + 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_BMI1 (9 * 32 + 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE (9 * 32 + 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 (9 * 32 + 5) /* AVX2 instructions */ -#define X86_FEATURE_SMEP (9 * 32 + 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 (9 * 32 + 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS (9 * 32 + 9) /* Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_FSGSBASE (9 * 32 + 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST (9 * 32 + 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_BMI1 (9 * 32 + 3) /* 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE (9 * 32 + 4) /* Hardware Lock Elision */ +#define X86_FEATURE_AVX2 (9 * 32 + 5) /* AVX2 instructions */ +#define X86_FEATURE_SMEP (9 * 32 + 7) /* Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 (9 * 32 + 8) /* 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS (9 * 32 + 9) /* Enhanced REP MOVSB/STOSB instructions */ #define X86_FEATURE_INVPCID (9 * 32 + 10) /* Invalidate Processor Context ID */ #define X86_FEATURE_RTM (9 * 32 + 11) /* Restricted Transactional Memory */ #define X86_FEATURE_CQM (9 * 32 + 12) /* Cache QoS Monitoring */ @@ -238,14 +238,14 @@ enum cpuid_leafs { #define X86_FEATURE_XSAVES (10 * 32 + 3) /* XSAVES/XRSTORS instructions */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 11 */ -#define X86_FEATURE_PREFETCHWT1 (11 * 32 + 0) /* PREFETCHWT1 Intel® Xeon PhiTM only */ -#define X86_FEATURE_AVX512VBMI (11 * 32 + 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (11 * 32 + 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (11 * 32 + 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (11 * 32 + 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_AVX512_VBMI2 (11 * 32 + 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_GFNI (11 * 32 + 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (11 * 32 + 9) /* Vector AES */ +#define X86_FEATURE_PREFETCHWT1 (11 * 32 + 0) /* PREFETCHWT1 Intel® Xeon PhiTM only */ +#define X86_FEATURE_AVX512VBMI (11 * 32 + 1) /* AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (11 * 32 + 2) /* User Mode Instruction Protection */ +#define X86_FEATURE_PKU (11 * 32 + 3) /* Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (11 * 32 + 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VBMI2 (11 * 32 + 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_GFNI (11 * 32 + 8) /* Galois Field New Instructions */ +#define X86_FEATURE_VAES (11 * 32 + 9) /* Vector AES */ #define X86_FEATURE_VPCLMULQDQ (11 * 32 + 10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (11 * 32 + 11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (11 * 32 + 12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ @@ -261,35 +261,35 @@ enum cpuid_leafs { #define X86_FEATURE_CQM_MBM_LOCAL (12 * 32 + 2) /* LLC Local MBM monitoring */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13 * 32 + 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13 * 32 + 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13 * 32 + 2) /* Always save/restore FP error pointers */ +#define X86_FEATURE_CLZERO (13 * 32 + 0) /* CLZERO instruction */ +#define X86_FEATURE_IRPERF (13 * 32 + 1) /* Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13 * 32 + 2) /* Always save/restore FP error pointers */ #define X86_FEATURE_IBPB (13 * 32 + 12) /* Indirect Branch Prediction Barrier */ #define X86_FEATURE_IBRS (13 * 32 + 14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_STIBP (13 * 32 + 15) /* Single Thread Indirect Branch Predictors */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14 * 32 + 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14 * 32 + 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14 * 32 + 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14 * 32 + 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14 * 32 + 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14 * 32 + 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14 * 32 + 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14 * 32 + 9) /* HWP Activity Window */ +#define X86_FEATURE_DTHERM (14 * 32 + 0) /* Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14 * 32 + 1) /* Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14 * 32 + 2) /* Always Running APIC Timer */ +#define X86_FEATURE_PLN (14 * 32 + 4) /* Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14 * 32 + 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14 * 32 + 7) /* Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14 * 32 + 8) /* HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14 * 32 + 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14 * 32 + 10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14 * 32 + 11) /* HWP Package Level Request */ #define X86_FEATURE_HDC (14 * 32 + 13) /* HDC base registers present */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15 * 32 + 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15 * 32 + 1) /* LBR Virtualization support */ -#define X86_FEATURE_SVML (15 * 32 + 2) /* "svm_lock" SVM locking MSR */ -#define X86_FEATURE_NRIPS (15 * 32 + 3) /* "nrip_save" SVM next_rip save */ -#define X86_FEATURE_TSCRATEMSR (15 * 32 + 4) /* "tsc_scale" TSC scaling support */ -#define X86_FEATURE_VMCBCLEAN (15 * 32 + 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15 * 32 + 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15 * 32 + 7) /* Decode Assists support */ +#define X86_FEATURE_NPT (15 * 32 + 0) /* Nested Page Table support */ +#define X86_FEATURE_LBRV (15 * 32 + 1) /* LBR Virtualization support */ +#define X86_FEATURE_SVML (15 * 32 + 2) /* "svm_lock" SVM locking MSR */ +#define X86_FEATURE_NRIPS (15 * 32 + 3) /* "nrip_save" SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (15 * 32 + 4) /* "tsc_scale" TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (15 * 32 + 5) /* "vmcb_clean" VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (15 * 32 + 6) /* flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15 * 32 + 7) /* Decode Assists support */ #define X86_FEATURE_PAUSEFILTER (15 * 32 + 10) /* filtered pause intercept */ #define X86_FEATURE_PFTHRESHOLD (15 * 32 + 12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15 * 32 + 13) /* Virtual Interrupt Controller */ @@ -305,8 +305,8 @@ enum cpuid_leafs { #define X86_FEATURE_SMCA (17 * 32 + 3) /* Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18 * 32 + 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18 * 32 + 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_AVX512_4VNNIW (18 * 32 + 2) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18 * 32 + 3) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_PCONFIG (18 * 32 + 18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18 * 32 + 26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18 * 32 + 27) /* "" Single Thread Indirect Branch Predictors */ diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index d740e3c04a..a16b658af1 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -105,7 +105,7 @@ struct i387_fxsave_struct { uint32_t fos; /* FPU Operand Selector */ }; }; - uint32_t mxcsr; /* MXCSR Register State */ + uint32_t mxcsr; /* MXCSR Register State */ uint32_t mxcsr_mask; /* MXCSR Mask */ /* 8*16 bytes for each FP-reg = 128 bytes */ @@ -277,13 +277,13 @@ typedef struct { } fpu_state_64_t; struct user_i387_ia32_struct { - uint32_t cwd; /* FPU Control Word */ - uint32_t swd; /* FPU Status Word */ - uint32_t twd; /* FPU Tag Word */ - uint32_t fip; /* FPU IP Offset */ - uint32_t fcs; /* FPU IP Selector */ - uint32_t foo; /* FPU Operand Pointer Offset */ - uint32_t fos; /* FPU Operand Pointer Selector */ + uint32_t cwd; /* FPU Control Word */ + uint32_t swd; /* FPU Status Word */ + uint32_t twd; /* FPU Tag Word */ + uint32_t fip; /* FPU IP Offset */ + uint32_t fcs; /* FPU IP Selector */ + uint32_t foo; /* FPU Operand Pointer Offset */ + uint32_t fos; /* FPU Operand Pointer Selector */ uint32_t st_space[20]; /* 8*10 bytes for each FP-reg = 80 bytes */ }; diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 1e344bf3af..2f6c557d0c 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -34,12 +34,12 @@ * Injected syscall instruction */ const char code_syscall[] = { - 0x0f, 0x05, /* syscall */ + 0x0f, 0x05, /* syscall */ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */ }; const char code_int_80[] = { - 0xcd, 0x80, /* int $0x80 */ + 0xcd, 0x80, /* int $0x80 */ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */ }; diff --git a/compel/include/infect-priv.h b/compel/include/infect-priv.h index 1c03f44861..9d34428393 100644 --- a/compel/include/infect-priv.h +++ b/compel/include/infect-priv.h @@ -38,7 +38,7 @@ struct parasite_ctl { unsigned long parasite_ip; /* service routine start ip */ unsigned int *cmd; /* address for command */ - void *args; /* address for arguments */ + void *args; /* address for arguments */ unsigned long args_size; int tsock; /* transport socket for transferring fds */ diff --git a/compel/include/rpc-pie-priv.h b/compel/include/rpc-pie-priv.h index 2a239c6134..5a6b337b22 100644 --- a/compel/include/rpc-pie-priv.h +++ b/compel/include/rpc-pie-priv.h @@ -3,7 +3,7 @@ struct ctl_msg { uint32_t cmd; /* command itself */ uint32_t ack; /* ack on command */ - int32_t err; /* error code on reply */ + int32_t err; /* error code on reply */ }; #define ctl_msg_cmd(_cmd) \ diff --git a/compel/include/uapi/infect.h b/compel/include/uapi/infect.h index c3d2ee6a69..7fa0bd8a0d 100644 --- a/compel/include/uapi/infect.h +++ b/compel/include/uapi/infect.h @@ -106,7 +106,7 @@ struct infect_ctx { unsigned long task_size; unsigned long syscall_ip; /* entry point of infection */ - unsigned long flags; /* fine-tune (e.g. faults) */ + unsigned long flags; /* fine-tune (e.g. faults) */ void (*child_handler)(int, siginfo_t *, void *); /* hander for SIGCHLD deaths */ struct sigaction orig_handler; diff --git a/compel/include/uapi/loglevels.h b/compel/include/uapi/loglevels.h index e76c156578..7a49825d24 100644 --- a/compel/include/uapi/loglevels.h +++ b/compel/include/uapi/loglevels.h @@ -7,10 +7,10 @@ */ enum __compel_log_levels { - COMPEL_LOG_MSG, /* Print message regardless of log level */ + COMPEL_LOG_MSG, /* Print message regardless of log level */ COMPEL_LOG_ERROR, /* Errors only, when we're in trouble */ - COMPEL_LOG_WARN, /* Warnings */ - COMPEL_LOG_INFO, /* Informative, everything is fine */ + COMPEL_LOG_WARN, /* Warnings */ + COMPEL_LOG_INFO, /* Informative, everything is fine */ COMPEL_LOG_DEBUG, /* Debug only */ COMPEL_DEFAULT_LOGLEVEL = COMPEL_LOG_WARN diff --git a/compel/include/uapi/ptrace.h b/compel/include/uapi/ptrace.h index c5291d20d3..533e0569ff 100644 --- a/compel/include/uapi/ptrace.h +++ b/compel/include/uapi/ptrace.h @@ -62,7 +62,7 @@ */ typedef struct { uint64_t filter_off; /* Input: which filter */ - uint64_t flags; /* Output: filter's flags */ + uint64_t flags; /* Output: filter's flags */ } seccomp_metadata_t; #ifdef PTRACE_EVENT_STOP diff --git a/compel/src/lib/handle-elf.c b/compel/src/lib/handle-elf.c index 9662751e0f..22c8f29786 100644 --- a/compel/src/lib/handle-elf.c +++ b/compel/src/lib/handle-elf.c @@ -554,7 +554,7 @@ int __handle_elf(void *mem, size_t size) #endif /* ELF_PPC64 */ #ifdef ELF_X86_64 - case R_X86_64_32: /* Symbol + Addend (4 bytes) */ + case R_X86_64_32: /* Symbol + Addend (4 bytes) */ case R_X86_64_32S: /* Symbol + Addend (4 bytes) */ pr_debug("\t\t\t\tR_X86_64_32 at 0x%-4lx val 0x%x\n", place, value32); pr_out(" { .offset = 0x%-8x, .type = COMPEL_TYPE_INT, " diff --git a/criu/arch/ppc64/restorer.c b/criu/arch/ppc64/restorer.c index c17ba16699..56c09391e7 100644 --- a/criu/arch/ppc64/restorer.c +++ b/criu/arch/ppc64/restorer.c @@ -45,10 +45,10 @@ unsigned long sys_shmat(int shmid, const void *shmaddr, int shmflg) unsigned long raddr; int ret; - ret = sys_ipc(21 /*SHMAT */, shmid, /* first */ - shmflg, /* second */ + ret = sys_ipc(21 /*SHMAT */, shmid, /* first */ + shmflg, /* second */ (unsigned long)&raddr, /* third */ - shmaddr, /* ptr */ + shmaddr, /* ptr */ 0 /* fifth not used */); if (ret) diff --git a/criu/arch/ppc64/vdso-pie.c b/criu/arch/ppc64/vdso-pie.c index f01123efee..a84ae776bb 100644 --- a/criu/arch/ppc64/vdso-pie.c +++ b/criu/arch/ppc64/vdso-pie.c @@ -110,9 +110,9 @@ static inline void put_trampoline_call(unsigned long at, unsigned long to, unsig { uint32_t *addr = (uint32_t *)at; - *addr++ = 0x7C0802a6; /* mflr r0 */ + *addr++ = 0x7C0802a6; /* mflr r0 */ *addr++ = 0x48000001 | ((long)(tr - at - 4) & 0x3fffffc); /* bl tr */ - *(uint64_t *)addr = to; /* the address to read by the trampoline */ + *(uint64_t *)addr = to; /* the address to read by the trampoline */ invalidate_caches(at); } diff --git a/criu/arch/s390/restorer.c b/criu/arch/s390/restorer.c index 6907ad75bf..8b3bc44baf 100644 --- a/criu/arch/s390/restorer.c +++ b/criu/arch/s390/restorer.c @@ -23,10 +23,10 @@ unsigned long sys_shmat(int shmid, const void *shmaddr, int shmflg) unsigned long raddr; int ret; - ret = sys_ipc(21 /*SHMAT */, shmid, /* first */ - shmflg, /* second */ + ret = sys_ipc(21 /*SHMAT */, shmid, /* first */ + shmflg, /* second */ (unsigned long)&raddr, /* third */ - shmaddr, /* ptr */ + shmaddr, /* ptr */ 0 /* fifth not used */); if (ret) diff --git a/criu/arch/s390/vdso-pie.c b/criu/arch/s390/vdso-pie.c index ad504beda0..bf0366b0e4 100644 --- a/criu/arch/s390/vdso-pie.c +++ b/criu/arch/s390/vdso-pie.c @@ -18,9 +18,9 @@ */ typedef struct { u8 larl[6]; /* Load relative address of imm64 */ - u8 lg[6]; /* Load %r1 with imm64 */ - u8 br[2]; /* Branch to %r1 */ - u64 addr; /* Jump address */ + u8 lg[6]; /* Load %r1 with imm64 */ + u8 br[2]; /* Branch to %r1 */ + u64 addr; /* Jump address */ u32 guards; /* Guard bytes */ } __packed jmp_t; diff --git a/criu/arch/x86/include/asm/restorer.h b/criu/arch/x86/include/asm/restorer.h index 23438314f8..f7a6d50589 100644 --- a/criu/arch/x86/include/asm/restorer.h +++ b/criu/arch/x86/include/asm/restorer.h @@ -13,7 +13,7 @@ extern void restore_tls(tls_t *ptls); extern int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act); extern int set_compat_robust_list(uint32_t head_ptr, uint32_t len); -#else /* CONFIG_COMPAT */ +#else /* CONFIG_COMPAT */ static inline void restore_tls(tls_t *ptls) { } diff --git a/criu/arch/x86/sigaction_compat.c b/criu/arch/x86/sigaction_compat.c index f02b2cc0e4..506a8d1bb1 100644 --- a/criu/arch/x86/sigaction_compat.c +++ b/criu/arch/x86/sigaction_compat.c @@ -44,8 +44,8 @@ int arch_compat_rt_sigaction(void *stack32, int sig, rt_sigaction_t_compat *act) memcpy(stack32, act, sizeof(rt_sigaction_t_compat)); arg.nr = __NR32_rt_sigaction; arg.arg0 = sig; - arg.arg1 = (uint32_t)act_stack; /* act */ - arg.arg2 = 0; /* oldact */ + arg.arg1 = (uint32_t)act_stack; /* act */ + arg.arg2 = 0; /* oldact */ arg.arg3 = (uint32_t)sizeof(act->rt_sa_mask); /* sigsetsize */ return do_full_int80(&arg); diff --git a/criu/cgroup.c b/criu/cgroup.c index ccac37fcc5..82d9b16a2e 100644 --- a/criu/cgroup.c +++ b/criu/cgroup.c @@ -81,7 +81,7 @@ static bool cg_set_compare(struct cg_set *set, struct list_head *ctls, int what) if (l2->next != ctls) c2 = list_first_entry(l2, struct cg_ctl, l); - if (!c1 || !c2) /* Nowhere to move next */ + if (!c1 || !c2) /* Nowhere to move next */ return !c1 && !c2; /* Both lists scanned -- match */ if (strcmp(c1->name, c2->name)) @@ -860,7 +860,7 @@ static int dump_cg_dirs(struct list_head *dirs, size_t n_dirs, CgroupDirEntry ** cde->dir_perms->gid = cur->gid; cde->dir_name = cur->path + poff; - if (poff != 1) /* parent isn't "/" */ + if (poff != 1) /* parent isn't "/" */ cde->dir_name++; /* leading / */ cde->n_children = cur->n_children; if (cur->n_children > 0) diff --git a/criu/cr-check.c b/criu/cr-check.c index 3e268c4395..e46c938157 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -1631,7 +1631,7 @@ void pr_check_features(const char *offset, const char *sep, int width) } pr_msg("%s", fl->name); // no \n pos += len; - if ((fl + 1)->name) { // not the last item + if ((fl + 1)->name) { // not the last item pr_msg("%s", sep); // no \n pos += sep_len; } diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 940f622462..ecc99f116f 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -615,7 +615,7 @@ static int dump_task_kobj_ids(struct pstree_item *item) TaskKobjIdsEntry *ids = item->ids; elem.pid = pid; - elem.idx = 0; /* really 0 for all */ + elem.idx = 0; /* really 0 for all */ elem.genid = 0; /* FIXME optimize */ new = 0; diff --git a/criu/files-reg.c b/criu/files-reg.c index 2b0347575c..679477c1ce 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -2236,8 +2236,8 @@ static struct filemap_ctx ctx; void filemap_ctx_init(bool auto_close) { ctx.desc = NULL; /* to fail the first comparison in open_ */ - ctx.fd = -1; /* not to close random fd in _fini */ - ctx.vma = NULL; /* not to put spurious VMA_CLOSE in _fini */ + ctx.fd = -1; /* not to close random fd in _fini */ + ctx.vma = NULL; /* not to put spurious VMA_CLOSE in _fini */ /* flags may remain any */ ctx.close = auto_close; } diff --git a/criu/files.c b/criu/files.c index 93754fb440..69ebc2e824 100644 --- a/criu/files.c +++ b/criu/files.c @@ -506,7 +506,7 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, } p.fd_ctl = ctl; /* Some dump_opts require this to talk to parasite */ - p.dfds = dfds; /* epoll needs to verify if target fd exist */ + p.dfds = dfds; /* epoll needs to verify if target fd exist */ if (S_ISSOCK(p.stat.st_mode)) return dump_socket(&p, lfd, e); @@ -1486,7 +1486,7 @@ int shared_fdt_prepare(struct pstree_item *item) struct inherit_fd { struct list_head inh_list; char *inh_id; /* file identifier */ - int inh_fd; /* criu's descriptor to inherit */ + int inh_fd; /* criu's descriptor to inherit */ int inh_fd_id; }; diff --git a/criu/include/aio.h b/criu/include/aio.h index f8a59dfdff..d1655739d9 100644 --- a/criu/include/aio.h +++ b/criu/include/aio.h @@ -13,8 +13,8 @@ struct task_restore_args; int prepare_aios(struct pstree_item *t, struct task_restore_args *ta); struct aio_ring { - unsigned id; /* kernel internal index number */ - unsigned nr; /* number of io_events */ + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ unsigned head; /* Written to by userland or under ring_lock * mutex by aio_read_events_ring(). */ unsigned tail; diff --git a/criu/include/autofs.h b/criu/include/autofs.h index c4e0f23ed7..b158025c7f 100644 --- a/criu/include/autofs.h +++ b/criu/include/autofs.h @@ -96,7 +96,7 @@ struct args_ismountpoint { struct autofs_dev_ioctl { __u32 ver_major; __u32 ver_minor; - __u32 size; /* total size of data passed in + __u32 size; /* total size of data passed in * including this struct */ __s32 ioctlfd; /* automount command fd */ diff --git a/criu/include/bfd.h b/criu/include/bfd.h index 4268f74d4a..2846ec6286 100644 --- a/criu/include/bfd.h +++ b/criu/include/bfd.h @@ -5,8 +5,8 @@ struct bfd_buf; struct xbuf { - char *mem; /* buffer */ - char *data; /* position we see bytes at */ + char *mem; /* buffer */ + char *data; /* position we see bytes at */ unsigned int sz; /* bytes sitting after b->pos */ struct bfd_buf *buf; }; diff --git a/criu/include/file-lock.h b/criu/include/file-lock.h index 0ce2fa3409..9ab79b66b1 100644 --- a/criu/include/file-lock.h +++ b/criu/include/file-lock.h @@ -30,12 +30,12 @@ #define LOCK_SH 1 /* shared lock */ #define LOCK_EX 2 /* exclusive lock */ #define LOCK_NB \ - 4 /* or'd with one of the above to prevent + 4 /* or'd with one of the above to prevent blocking */ #define LOCK_UN 8 /* remove lock */ -#define LOCK_MAND 32 /* This is a mandatory flock ... */ -#define LOCK_READ 64 /* which allows concurrent read operations */ +#define LOCK_MAND 32 /* This is a mandatory flock ... */ +#define LOCK_READ 64 /* which allows concurrent read operations */ #define LOCK_WRITE 128 /* which allows concurrent write operations */ #define LOCK_RW 192 /* which allows concurrent read & write ops */ @@ -47,7 +47,7 @@ struct file_lock { int fl_kind; int fl_ltype; - pid_t fl_owner; /* process, which created the lock */ + pid_t fl_owner; /* process, which created the lock */ pid_t fl_holder; /* pid of fd on whose the lock is found */ int maj, min; unsigned long i_no; diff --git a/criu/include/files.h b/criu/include/files.h index 26ce1f42a5..aadc09f736 100644 --- a/criu/include/files.h +++ b/criu/include/files.h @@ -82,8 +82,8 @@ enum { struct fdinfo_list_entry { struct list_head desc_list; /* To chain on @fd_info_head */ - struct file_desc *desc; /* Associated file descriptor */ - struct list_head ps_list; /* To chain per-task files */ + struct file_desc *desc; /* Associated file descriptor */ + struct list_head ps_list; /* To chain per-task files */ struct pstree_item *task; FdinfoEntry *fe; int pid; diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 5045baee80..9f369be645 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -122,8 +122,8 @@ enum { /* file descriptors template */ struct cr_fd_desc_tmpl { const char *fmt; /* format for the name */ - u32 magic; /* magic in the header */ - int oflags; /* flags for image_open */ + u32 magic; /* magic in the header */ + int oflags; /* flags for image_open */ }; extern struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX]; diff --git a/criu/include/inet_diag.h b/criu/include/inet_diag.h index ea6f5e14e1..4996dd5563 100644 --- a/criu/include/inet_diag.h +++ b/criu/include/inet_diag.h @@ -31,7 +31,7 @@ struct inet_diag_req_compat { struct inet_diag_sockid id; __u32 idiag_states; /* States to dump */ - __u32 idiag_dbs; /* Tables to dump (NI) */ + __u32 idiag_dbs; /* Tables to dump (NI) */ }; struct inet_diag_req_v2 { diff --git a/criu/include/kcmp.h b/criu/include/kcmp.h index a6774be471..575135f801 100644 --- a/criu/include/kcmp.h +++ b/criu/include/kcmp.h @@ -18,8 +18,8 @@ enum kcmp_type { /* Slot for KCMP_EPOLL_TFD */ typedef struct { - uint32_t efd; /* epoll file descriptor */ - uint32_t tfd; /* target file number */ + uint32_t efd; /* epoll file descriptor */ + uint32_t tfd; /* target file number */ uint32_t toff; /* target offset within same numbered sequence */ } kcmp_epoll_slot_t; diff --git a/criu/include/kerndat.h b/criu/include/kerndat.h index 2ded7d1da7..a28a95802e 100644 --- a/criu/include/kerndat.h +++ b/criu/include/kerndat.h @@ -18,7 +18,7 @@ extern int kerndat_init(void); enum pagemap_func { PM_UNKNOWN, - PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */ + PM_DISABLED, /* /proc/pid/pagemap doesn't open (user mode) */ PM_FLAGS_ONLY, /* pagemap zeroes pfn part (user mode) */ PM_FULL, }; diff --git a/criu/include/linux/mount.h b/criu/include/linux/mount.h index 840d6277e1..9a3a28b100 100644 --- a/criu/include/linux/mount.h +++ b/criu/include/linux/mount.h @@ -8,13 +8,13 @@ #include #else enum fsconfig_command { - FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ - FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ - FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ - FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ - FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ - FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ - FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */ FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ }; #endif diff --git a/criu/include/mount.h b/criu/include/mount.h index 29d80c2a76..833a75ca0f 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -72,13 +72,13 @@ struct mount_info { struct list_head children; struct list_head siblings; - struct list_head mnt_bind; /* circular list of derivatives of one real mount */ - struct list_head mnt_share; /* circular list of shared mounts */ + struct list_head mnt_bind; /* circular list of derivatives of one real mount */ + struct list_head mnt_share; /* circular list of shared mounts */ struct list_head mnt_slave_list; /* list of slave mounts */ - struct list_head mnt_slave; /* slave list entry */ - struct mount_info *mnt_master; /* slave is on master->mnt_slave_list */ - struct list_head mnt_propagate; /* circular list of mounts which propagate from each other */ - struct list_head mnt_notprop; /* temporary list used in can_mount_now */ + struct list_head mnt_slave; /* slave list entry */ + struct mount_info *mnt_master; /* slave is on master->mnt_slave_list */ + struct list_head mnt_propagate; /* circular list of mounts which propagate from each other */ + struct list_head mnt_notprop; /* temporary list used in can_mount_now */ struct list_head mnt_unbindable; /* list of mounts with delayed unbindable */ struct list_head postpone; diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h index 034605917c..e2ea6e17f6 100644 --- a/criu/include/namespaces.h +++ b/criu/include/namespaces.h @@ -128,9 +128,9 @@ struct ns_id { */ union { int nsfd_id; /* a namespace descriptor id in fdstore */ - int ns_fd; /* a namespace file descriptor */ + int ns_fd; /* a namespace file descriptor */ }; - int nlsk; /* for sockets collection */ + int nlsk; /* for sockets collection */ int seqsk; /* to talk to parasite daemons */ struct list_head ids; struct list_head links; diff --git a/criu/include/page-pipe.h b/criu/include/page-pipe.h index 0917869d97..15178c0150 100644 --- a/criu/include/page-pipe.h +++ b/criu/include/page-pipe.h @@ -90,14 +90,14 @@ struct kernel_pipe_buffer { */ struct page_pipe_buf { - int p[2]; /* pipe with pages */ + int p[2]; /* pipe with pages */ unsigned int pipe_size; /* how many pages can be fit into pipe */ - unsigned int pipe_off; /* where this buf is started in a pipe */ - unsigned int pages_in; /* how many pages are there */ - unsigned int nr_segs; /* how many iov-s are busy */ + unsigned int pipe_off; /* where this buf is started in a pipe */ + unsigned int pages_in; /* how many pages are there */ + unsigned int nr_segs; /* how many iov-s are busy */ #define PPB_LAZY (1 << 0) unsigned int flags; - struct iovec *iov; /* vaddr:len map */ + struct iovec *iov; /* vaddr:len map */ struct list_head l; /* links into page_pipe->bufs */ }; @@ -113,19 +113,19 @@ struct page_pipe_buf { #define PP_HOLE_PARENT (1 << 0) struct page_pipe { - unsigned int nr_pipes; /* how many page_pipe_bufs in there */ - struct list_head bufs; /* list of bufs */ - struct list_head free_bufs; /* list of bufs */ + unsigned int nr_pipes; /* how many page_pipe_bufs in there */ + struct list_head bufs; /* list of bufs */ + struct list_head free_bufs; /* list of bufs */ struct page_pipe_buf *prev[PP_PIPE_TYPES]; /* last ppb of each type for pipe sharing */ - unsigned int nr_iovs; /* number of iovs */ - unsigned int free_iov; /* first free iov */ + unsigned int nr_iovs; /* number of iovs */ + unsigned int free_iov; /* first free iov */ struct iovec *iovs; /* iovs. They are provided into create_page_pipe and all bufs have their iov-s in there */ - unsigned int nr_holes; /* number of holes allocated */ + unsigned int nr_holes; /* number of holes allocated */ unsigned int free_hole; /* number of holes in use */ - struct iovec *holes; /* holes */ + struct iovec *holes; /* holes */ unsigned int *hole_flags; unsigned int flags; /* PP_FOO flags below */ }; diff --git a/criu/include/page-xfer.h b/criu/include/page-xfer.h index e0303dfe0a..1bcd4ff205 100644 --- a/criu/include/page-xfer.h +++ b/criu/include/page-xfer.h @@ -36,7 +36,7 @@ struct page_xfer { union { struct /* local */ { struct cr_img *pmi; /* pagemaps */ - struct cr_img *pi; /* pages */ + struct cr_img *pi; /* pages */ }; struct /* page-server */ { diff --git a/criu/include/pagemap-cache.h b/criu/include/pagemap-cache.h index 7612ee0f41..1d8bbffaf6 100644 --- a/criu/include/pagemap-cache.h +++ b/criu/include/pagemap-cache.h @@ -11,13 +11,13 @@ struct vma_area; #define PAGEMAP_PFN_OFF(addr) (PAGE_PFN(addr) * sizeof(u64)) typedef struct { - pid_t pid; /* which process it belongs */ - unsigned long start; /* start of area */ - unsigned long end; /* end of area */ + pid_t pid; /* which process it belongs */ + unsigned long start; /* start of area */ + unsigned long end; /* end of area */ const struct list_head *vma_head; /* list head of VMAs we're serving */ - u64 *map; /* local buffer */ - size_t map_len; /* length of a buffer */ - int fd; /* file to read PMs from */ + u64 *map; /* local buffer */ + size_t map_len; /* length of a buffer */ + int fd; /* file to read PMs from */ } pmc_t; #define PMC_INIT \ diff --git a/criu/include/pstree.h b/criu/include/pstree.h index c5b0fa7ea9..c1c79867b2 100644 --- a/criu/include/pstree.h +++ b/criu/include/pstree.h @@ -15,14 +15,14 @@ struct pstree_item { struct pstree_item *parent; struct list_head children; /* list of my children */ - struct list_head sibling; /* linkage in my parent's children list */ + struct list_head sibling; /* linkage in my parent's children list */ struct pid *pid; pid_t pgid; pid_t sid; pid_t born_sid; - int nr_threads; /* number of threads */ + int nr_threads; /* number of threads */ struct pid *threads; /* array of threads */ CoreEntry **core; TaskKobjIdsEntry *ids; diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 934d60cf9a..308a0b79b3 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -138,7 +138,7 @@ struct task_restore_args { bool has_thp_enabled; /* threads restoration */ - int nr_threads; /* number of threads */ + int nr_threads; /* number of threads */ thread_restore_fcall_t clone_restore_fn; /* helper address for clone() call */ struct thread_restore_args *thread_args; /* array of thread arguments */ struct task_entries *task_entries; @@ -211,7 +211,7 @@ struct task_restore_args { bool can_map_vdso; bool auto_dedup; unsigned long vdso_rt_size; - struct vdso_maps vdso_maps_rt; /* runtime vdso symbols */ + struct vdso_maps vdso_maps_rt; /* runtime vdso symbols */ unsigned long vdso_rt_parked_at; /* safe place to keep vdso */ void **breakpoint; diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h index 2e2107b0eb..9664e0a1ca 100644 --- a/criu/include/rst_info.h +++ b/criu/include/rst_info.h @@ -17,7 +17,7 @@ struct task_entries { }; struct fdt { - int nr; /* How many tasks share this fd table */ + int nr; /* How many tasks share this fd table */ pid_t pid; /* Who should restore this fd table */ /* * The fd table is ready for restoing, if fdt_lock is equal to nr diff --git a/criu/include/servicefd.h b/criu/include/servicefd.h index e75e8444c8..c6979de7f4 100644 --- a/criu/include/servicefd.h +++ b/criu/include/servicefd.h @@ -22,10 +22,10 @@ enum sfd_type { * - For dump -- target ns' proc * - For restore -- CRIU ns' proc */ - ROOT_FD_OFF, /* Root of the namespace we dump/restore */ + ROOT_FD_OFF, /* Root of the namespace we dump/restore */ CGROUP_YARD, - USERNSD_SK, /* Socket for usernsd */ - NS_FD_OFF, /* Node's net namespace fd */ + USERNSD_SK, /* Socket for usernsd */ + NS_FD_OFF, /* Node's net namespace fd */ TRANSPORT_FD_OFF, /* to transfer file descriptors */ RPC_SK_OFF, FDSTORE_SK_OFF, diff --git a/criu/include/sk-inet.h b/criu/include/sk-inet.h index c832d63877..5dd2a65518 100644 --- a/criu/include/sk-inet.h +++ b/criu/include/sk-inet.h @@ -35,7 +35,7 @@ struct inet_sk_desc { unsigned int dst_port; unsigned int state; unsigned int rqlen; - unsigned int wqlen; /* sent + unsent data */ + unsigned int wqlen; /* sent + unsent data */ unsigned int uwqlen; /* unsent data */ unsigned int src_addr[4]; unsigned int dst_addr[4]; diff --git a/criu/include/sysfs_parse.h b/criu/include/sysfs_parse.h index ff0e611486..f987d622f6 100644 --- a/criu/include/sysfs_parse.h +++ b/criu/include/sysfs_parse.h @@ -2,9 +2,9 @@ #define __CR_SYSFS_PARSE_H__ #define SYSFS_AUFS "/sys/fs/aufs/" -#define SBINFO_LEN (3 + 16 + 1) /* si_%lx */ +#define SBINFO_LEN (3 + 16 + 1) /* si_%lx */ #define SBINFO_PATH_LEN (sizeof SYSFS_AUFS + SBINFO_LEN) /* /sys/fs/aufs/ */ -#define AUFSBR_PATH_LEN (SBINFO_PATH_LEN + 6 + 1) /* /sys/fs/aufs//br%3d */ +#define AUFSBR_PATH_LEN (SBINFO_PATH_LEN + 6 + 1) /* /sys/fs/aufs//br%3d */ struct mount_info; struct vma_area; diff --git a/criu/include/vma.h b/criu/include/vma.h index ed9f31ef67..541d6d6fd4 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -10,14 +10,14 @@ #include struct vm_area_list { - struct list_head h; /* list of VMAs */ - unsigned nr; /* nr of all VMAs in the list */ + struct list_head h; /* list of VMAs */ + unsigned nr; /* nr of all VMAs in the list */ unsigned int nr_aios; /* nr of AIOs VMAs in the list */ union { unsigned long nr_priv_pages; /* dmp: nr of pages in private VMAs */ unsigned long rst_priv_size; /* rst: size of private VMAs */ }; - unsigned long nr_priv_pages_longest; /* nr of pages in longest private VMA */ + unsigned long nr_priv_pages_longest; /* nr of pages in longest private VMA */ unsigned long nr_shared_pages_longest; /* nr of pages in longest shared VMA */ }; @@ -53,8 +53,8 @@ struct vma_area { struct /* for restore */ { int (*vm_open)(int pid, struct vma_area *vma); struct file_desc *vmfd; - struct vma_area *pvma; /* parent for inherited VMAs */ - unsigned long *page_bitmap; /* existent pages */ + struct vma_area *pvma; /* parent for inherited VMAs */ + unsigned long *page_bitmap; /* existent pages */ unsigned long premmaped_addr; /* restore only */ /* diff --git a/criu/irmap.c b/criu/irmap.c index 09570c5931..7b9d77bc1f 100644 --- a/criu/irmap.c +++ b/criu/irmap.c @@ -160,8 +160,8 @@ static int irmap_update_dir(struct irmap *t) k = &t->kids[nr - 1]; - k->kids = NULL; /* for xrealloc above */ - k->ino = 0; /* for irmap_update_stat */ + k->kids = NULL; /* for xrealloc above */ + k->ino = 0; /* for irmap_update_stat */ k->nr_kids = -1; /* for irmap_update_dir */ k->path = xsprintf("%s/%s", t->path, de->d_name); if (!k->path) diff --git a/criu/mount.c b/criu/mount.c index ec31f02c23..93725e5269 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -2561,7 +2561,7 @@ static LIST_HEAD(mnt_remap_list); static int remap_id; struct mnt_remap_entry { - struct mount_info *mi; /* child is remaped into the root yards */ + struct mount_info *mi; /* child is remaped into the root yards */ struct mount_info *parent; /* the origin parent for the child*/ struct list_head node; }; diff --git a/criu/net.c b/criu/net.c index 02115c4de1..50655559d7 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1172,7 +1172,7 @@ struct newlink_req { * request. */ struct newlink_extras { - int link; /* IFLA_LINK */ + int link; /* IFLA_LINK */ int target_netns; /* IFLA_NET_NS_FD */ }; @@ -1744,7 +1744,7 @@ static int __restore_link(struct ns_id *ns, struct net_link *link, int nlsk) switch (nde->type) { case ND_TYPE__LOOPBACK: /* fallthrough */ - case ND_TYPE__EXTLINK: /* see comment in images/netdev.proto */ + case ND_TYPE__EXTLINK: /* see comment in images/netdev.proto */ return restore_link_parms(link, nlsk); case ND_TYPE__VENET: return restore_one_link(ns, link, nlsk, venet_link_info, NULL); diff --git a/criu/pagemap.c b/criu/pagemap.c index 77e519dd1f..d996db7fc6 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -30,10 +30,10 @@ * One "job" for the preadv() syscall in pagemap.c */ struct page_read_iov { - off_t from; /* offset in pi file where to start reading from */ - off_t end; /* the end of the read == sum to.iov_len -s */ + off_t from; /* offset in pi file where to start reading from */ + off_t end; /* the end of the read == sum to.iov_len -s */ struct iovec *to; /* destination iovs */ - unsigned int nr; /* their number */ + unsigned int nr; /* their number */ struct list_head l; }; diff --git a/criu/shmem.c b/criu/shmem.c index bb48e436b2..a9ee8d7eb7 100644 --- a/criu/shmem.c +++ b/criu/shmem.c @@ -81,7 +81,7 @@ struct shmem_info { * an region. Each time when we found a process with a smaller pid, * we reset self_count, so we can't have only one counter. */ - int count; /* the number of regions */ + int count; /* the number of regions */ int self_count; /* the number of regions, which belongs to "pid" */ }; diff --git a/criu/sk-unix.c b/criu/sk-unix.c index f3fe60c6eb..a819473b40 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -958,9 +958,9 @@ struct unix_sk_info { struct unix_sk_info *peer; struct pprep_head peer_resolve; /* XXX : union with the above? */ struct file_desc d; - struct hlist_node hash; /* To lookup socket by ino */ + struct hlist_node hash; /* To lookup socket by ino */ struct list_head connected; /* List of sockets, connected to me */ - struct list_head node; /* To link in peer's connected list */ + struct list_head node; /* To link in peer's connected list */ struct list_head scm_fles; struct list_head ghost_node; size_t ghost_dir_pos; diff --git a/criu/uffd.c b/criu/uffd.c index 18bdc040f0..f01e6999b4 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -71,8 +71,8 @@ static mutex_t *lazy_sock_mutex; struct lazy_iov { struct list_head l; - unsigned long start; /* run-time start address, tracks remaps */ - unsigned long end; /* run-time end address, tracks remaps */ + unsigned long start; /* run-time start address, tracks remaps */ + unsigned long end; /* run-time end address, tracks remaps */ unsigned long img_start; /* start address at the dump time */ }; diff --git a/criu/util.c b/criu/util.c index 414879971f..2917102fd4 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1669,8 +1669,8 @@ __attribute__((returns_twice)) static pid_t raw_legacy_clone(unsigned long flags */ "addx %%g0, 0, %%g1" : "=r"(g1), "=r"(o0), "=r"(o1), "=r"(o2) /* outputs */ - : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */ - : "%cc"); /* clobbers */ + : "r"(g1), "r"(o0), "r"(o1), "r"(o2) /* inputs */ + : "%cc"); /* clobbers */ is_error = g1; retval = o0; diff --git a/include/common/arch/ppc64/asm/bitops.h b/include/common/arch/ppc64/asm/bitops.h index 704668263d..dbfa6be7f1 100644 --- a/include/common/arch/ppc64/asm/bitops.h +++ b/include/common/arch/ppc64/asm/bitops.h @@ -196,7 +196,7 @@ static inline unsigned long find_next_bit(const unsigned long *addr, unsigned lo found_first: tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ + if (tmp == 0UL) /* Are any bits set? */ return result + size; /* Nope. */ found_middle: return result + __ffs(tmp); diff --git a/include/common/arch/x86/asm/bitops.h b/include/common/arch/x86/asm/bitops.h index d7a60589b1..c13c1eb451 100644 --- a/include/common/arch/x86/asm/bitops.h +++ b/include/common/arch/x86/asm/bitops.h @@ -113,7 +113,7 @@ static inline unsigned long find_next_bit(const unsigned long *addr, unsigned lo found_first: tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ + if (tmp == 0UL) /* Are any bits set? */ return result + size; /* Nope. */ found_middle: return result + __ffs(tmp); diff --git a/include/common/asm-generic/bitops.h b/include/common/asm-generic/bitops.h index 064ba4cc47..004da4c4ed 100644 --- a/include/common/asm-generic/bitops.h +++ b/include/common/asm-generic/bitops.h @@ -97,7 +97,7 @@ static inline unsigned long find_next_bit(const unsigned long *addr, unsigned lo found_first: tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ + if (tmp == 0UL) /* Are any bits set? */ return result + size; /* Nope. */ found_middle: return result + __ffs(tmp); diff --git a/soccr/soccr.c b/soccr/soccr.c index f6fb1946b7..8be2d28e15 100644 --- a/soccr/soccr.c +++ b/soccr/soccr.c @@ -609,8 +609,8 @@ static int send_fin(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsig libnet_type = LIBNET_RAW4; l = libnet_init(libnet_type, /* injection type */ - NULL, /* network interface */ - errbuf); /* errbuf */ + NULL, /* network interface */ + errbuf); /* errbuf */ if (l == NULL) { loge("libnet_init failed (%s)\n", errbuf); return -1; @@ -623,17 +623,17 @@ static int send_fin(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsig ret = libnet_build_tcp(ntohs(sk->dst_addr->v4.sin_port), /* source port */ ntohs(sk->src_addr->v4.sin_port), /* destination port */ - data->inq_seq, /* sequence number */ - data->outq_seq - data->outq_len, /* acknowledgement num */ - flags, /* control flags */ - data->rcv_wnd, /* window size */ - 0, /* checksum */ - 10, /* urgent pointer */ - LIBNET_TCP_H + 20, /* TCP packet size */ - NULL, /* payload */ - 0, /* payload size */ - l, /* libnet handle */ - 0); /* libnet id */ + data->inq_seq, /* sequence number */ + data->outq_seq - data->outq_len, /* acknowledgement num */ + flags, /* control flags */ + data->rcv_wnd, /* window size */ + 0, /* checksum */ + 10, /* urgent pointer */ + LIBNET_TCP_H + 20, /* TCP packet size */ + NULL, /* payload */ + 0, /* payload size */ + l, /* libnet handle */ + 0); /* libnet id */ if (ret == -1) { loge("Can't build TCP header: %s\n", libnet_geterror(l)); goto err; @@ -646,28 +646,28 @@ static int send_fin(struct libsoccr_sk *sk, struct libsoccr_sk_data *data, unsig memcpy(&src, &sk->src_addr->v6.sin6_addr, sizeof(src)); ret = libnet_build_ipv6(0, 0, LIBNET_TCP_H, /* length */ - IPPROTO_TCP, /* protocol */ - 64, /* hop limit */ - dst, /* source IP */ - src, /* destination IP */ - NULL, /* payload */ - 0, /* payload size */ - l, /* libnet handle */ - 0); /* libnet id */ + IPPROTO_TCP, /* protocol */ + 64, /* hop limit */ + dst, /* source IP */ + src, /* destination IP */ + NULL, /* payload */ + 0, /* payload size */ + l, /* libnet handle */ + 0); /* libnet id */ } else if (family == AF_INET) ret = libnet_build_ipv4(LIBNET_IPV4_H + LIBNET_TCP_H + 20, /* length */ - 0, /* TOS */ - 242, /* IP ID */ - 0, /* IP Frag */ - 64, /* TTL */ - IPPROTO_TCP, /* protocol */ - 0, /* checksum */ - dst_v4, /* source IP */ - src_v4, /* destination IP */ - NULL, /* payload */ - 0, /* payload size */ - l, /* libnet handle */ - 0); /* libnet id */ + 0, /* TOS */ + 242, /* IP ID */ + 0, /* IP Frag */ + 64, /* TTL */ + IPPROTO_TCP, /* protocol */ + 0, /* checksum */ + dst_v4, /* source IP */ + src_v4, /* destination IP */ + NULL, /* payload */ + 0, /* payload size */ + l, /* libnet handle */ + 0); /* libnet id */ else { loge("Unknown socket family\n"); goto err; diff --git a/soccr/soccr.h b/soccr/soccr.h index 934d438277..e7091e5918 100644 --- a/soccr/soccr.h +++ b/soccr/soccr.h @@ -1,9 +1,9 @@ #ifndef __LIBSOCCR_H__ #define __LIBSOCCR_H__ -#include /* sockaddr_in, sockaddr_in6 */ +#include /* sockaddr_in, sockaddr_in6 */ #include /* TCP_REPAIR_WINDOW, TCP_TIMESTAMP */ -#include /* uint32_t */ -#include /* sockaddr */ +#include /* uint32_t */ +#include /* sockaddr */ #include "common/config.h" diff --git a/test/zdtm/static/aio01.c b/test/zdtm/static/aio01.c index ed45192b97..100069b03d 100644 --- a/test/zdtm/static/aio01.c +++ b/test/zdtm/static/aio01.c @@ -14,8 +14,8 @@ const char *test_doc = "Check head and tail restore correct"; const char *test_author = "Kirill Tkhai "; struct aio_ring { - unsigned id; /* kernel internal index number */ - unsigned nr; /* number of io_events */ + unsigned id; /* kernel internal index number */ + unsigned nr; /* number of io_events */ unsigned head; /* Written to by userland or under ring_lock * mutex by aio_read_events_ring(). */ unsigned tail; diff --git a/test/zdtm/static/auto_dev-ioctl.h b/test/zdtm/static/auto_dev-ioctl.h index e65259b307..1b35fe2f7d 100644 --- a/test/zdtm/static/auto_dev-ioctl.h +++ b/test/zdtm/static/auto_dev-ioctl.h @@ -95,7 +95,7 @@ struct args_ismountpoint { struct autofs_dev_ioctl { __u32 ver_major; __u32 ver_minor; - __u32 size; /* total size of data passed in + __u32 size; /* total size of data passed in * including this struct */ __s32 ioctlfd; /* automount command fd */ diff --git a/test/zdtm/static/file_locks00.c b/test/zdtm/static/file_locks00.c index 0b5d1313b2..01782fa7a4 100644 --- a/test/zdtm/static/file_locks00.c +++ b/test/zdtm/static/file_locks00.c @@ -23,10 +23,10 @@ static int lock_reg(int fd, int cmd, int type, int whence, off_t offset, off_t l { struct flock lock; - lock.l_type = type; /* F_RDLCK, F_WRLCK, F_UNLCK */ + lock.l_type = type; /* F_RDLCK, F_WRLCK, F_UNLCK */ lock.l_whence = whence; /* SEEK_SET, SEEK_CUR, SEEK_END */ - lock.l_start = offset; /* byte offset, relative to l_whence */ - lock.l_len = len; /* #bytes (0 means to EOF) */ + lock.l_start = offset; /* byte offset, relative to l_whence */ + lock.l_len = len; /* #bytes (0 means to EOF) */ errno = 0; return fcntl(fd, cmd, &lock); @@ -40,10 +40,10 @@ static int check_read_lock(int fd, int whence, off_t offset, off_t len) struct flock lock; int ret; - lock.l_type = F_RDLCK; /* F_RDLCK, F_WRLCK, F_UNLCK */ + lock.l_type = F_RDLCK; /* F_RDLCK, F_WRLCK, F_UNLCK */ lock.l_whence = whence; /* SEEK_SET, SEEK_CUR, SEEK_END */ - lock.l_start = offset; /* byte offset, relative to l_whence */ - lock.l_len = len; /* #bytes (0 means to EOF) */ + lock.l_start = offset; /* byte offset, relative to l_whence */ + lock.l_len = len; /* #bytes (0 means to EOF) */ lock.l_pid = -1; errno = 0; @@ -69,10 +69,10 @@ static int check_write_lock(int fd, int whence, off_t offset, off_t len) int ret; pid_t ppid = getppid(); - lock.l_type = F_WRLCK; /* F_RDLCK, F_WRLCK, F_UNLCK */ + lock.l_type = F_WRLCK; /* F_RDLCK, F_WRLCK, F_UNLCK */ lock.l_whence = whence; /* SEEK_SET, SEEK_CUR, SEEK_END */ - lock.l_start = offset; /* byte offset, relative to l_whence */ - lock.l_len = len; /* #bytes (0 means to EOF) */ + lock.l_start = offset; /* byte offset, relative to l_whence */ + lock.l_len = len; /* #bytes (0 means to EOF) */ lock.l_pid = -1; errno = 0; diff --git a/test/zdtm/static/ipc_namespace.c b/test/zdtm/static/ipc_namespace.c index 4273951aed..b13b357bac 100644 --- a/test/zdtm/static/ipc_namespace.c +++ b/test/zdtm/static/ipc_namespace.c @@ -52,10 +52,10 @@ struct ipc_ns { // unsigned int mq_queues_count; - unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ - unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ - unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ - unsigned int mq_msg_default; /* initialized to DFLT_MSG */ + unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ + unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ + unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ + unsigned int mq_msg_default; /* initialized to DFLT_MSG */ unsigned int mq_msgsize_default; /* initialized to DFLT_MSGSIZE */ struct user_ns *user_ns; diff --git a/test/zdtm/static/netns-dev.c b/test/zdtm/static/netns-dev.c index e220daa7f4..1e6ee1dea5 100644 --- a/test/zdtm/static/netns-dev.c +++ b/test/zdtm/static/netns-dev.c @@ -55,36 +55,36 @@ struct range { }; struct range rand_range4[] = { - { 0, 1 }, /* accept_local */ - { -1, 0 }, /* accept_source_route */ - { 0, 1 }, /* arp_accept */ - { 0, 2 }, /* arp_announce */ - { 0, 1 }, /* arp_filter */ - { 0, 8 }, /* arp_ignore */ - { 0, 1 }, /* arp_notify */ - { 0, 1 }, /* bootp_relay */ - { 0, 1 }, /* disable_policy */ - { 0, 1 }, /* disable_xfrm */ - { 0, 1 }, /* drop_gratuitous_arp */ - { 0, 1 }, /* drop_unicast_in_l2_multicast */ - { 0, INT_MAX }, /* force_igmp_version */ - { 0, 1 }, /* forwarding */ - { 0, 1 }, /* accept_redirects */ - { 0, INT_MAX }, /* igmpv2_unsolicited_report_interval */ - { 0, INT_MAX }, /* igmpv3_unsolicited_report_interval */ - { 0, 1 }, /* ignore_routes_with_linkdown */ - { 0, 1 }, /* log_martians */ - { 0, 1 }, /* mc_forwarding */ - { -1, INT_MAX }, /* medium_id */ - { 0, 1 }, /* promote_secondaries */ - { 0, 1 }, /* proxy_arp */ - { 0, 1 }, /* proxy_arp_pvlan */ - { 0, 1 }, /* route_localnet */ - { 0, 2 }, /* rp_filter */ - { 0, 1 }, /* secure_redirects */ - { 0, 1 }, /* send_redirects */ - { 0, 1 }, /* shared_media */ - { 0, 1 }, /* src_valid_mark */ + { 0, 1 }, /* accept_local */ + { -1, 0 }, /* accept_source_route */ + { 0, 1 }, /* arp_accept */ + { 0, 2 }, /* arp_announce */ + { 0, 1 }, /* arp_filter */ + { 0, 8 }, /* arp_ignore */ + { 0, 1 }, /* arp_notify */ + { 0, 1 }, /* bootp_relay */ + { 0, 1 }, /* disable_policy */ + { 0, 1 }, /* disable_xfrm */ + { 0, 1 }, /* drop_gratuitous_arp */ + { 0, 1 }, /* drop_unicast_in_l2_multicast */ + { 0, INT_MAX }, /* force_igmp_version */ + { 0, 1 }, /* forwarding */ + { 0, 1 }, /* accept_redirects */ + { 0, INT_MAX }, /* igmpv2_unsolicited_report_interval */ + { 0, INT_MAX }, /* igmpv3_unsolicited_report_interval */ + { 0, 1 }, /* ignore_routes_with_linkdown */ + { 0, 1 }, /* log_martians */ + { 0, 1 }, /* mc_forwarding */ + { -1, INT_MAX }, /* medium_id */ + { 0, 1 }, /* promote_secondaries */ + { 0, 1 }, /* proxy_arp */ + { 0, 1 }, /* proxy_arp_pvlan */ + { 0, 1 }, /* route_localnet */ + { 0, 2 }, /* rp_filter */ + { 0, 1 }, /* secure_redirects */ + { 0, 1 }, /* send_redirects */ + { 0, 1 }, /* shared_media */ + { 0, 1 }, /* src_valid_mark */ { INT_MIN, INT_MAX }, /* tag */ }; @@ -139,47 +139,47 @@ char *devconfs6[] = { #define MAX_ADDRESSES 128 struct range rand_range6[] = { - { 0, 2 }, /* accept_dad */ - { 0, 2 }, /* accept_ra */ - { 0, 1 }, /* accept_ra_defrtr */ - { 0, 1 }, /* accept_ra_from_local */ - { 0, INT_MAX }, /* accept_ra_min_hop_limit */ - { 0, 1 }, /* accept_ra_mtu */ - { 0, 1 }, /* accept_ra_pinfo */ - { 0, INT_MAX }, /* accept_ra_rt_info_max_plen */ - { 0, 1 }, /* accept_ra_rtr_pref */ - { -1, 0 }, /* accept_source_route */ - { 0, 1 }, /* autoconf */ - { 0, INT_MAX }, /* dad_transmits */ - { 0, 1 }, /* disable_ipv6 */ - { 0, 1 }, /* drop_unicast_in_l2_multicast */ - { 0, 1 }, /* drop_unsolicited_na */ - { 0, 2 }, /* force_mld_version */ - { 0, 1 }, /* force_tllao */ - { 0, 1 }, /* forwarding */ - { 0, 1 }, /* accept_redirects */ - { 1, 255 }, /* hop_limit */ - { 0, 1 }, /* ignore_routes_with_linkdown */ - { -1, 1 }, /* keep_addr_on_down */ - { 0, MAX_ADDRESSES }, /* max_addresses */ - { 0, INT_MAX }, /* max_desync_factor */ - { 0, INT_MAX }, /* mldv1_unsolicited_report_interval */ - { 0, INT_MAX }, /* mldv2_unsolicited_report_interval */ + { 0, 2 }, /* accept_dad */ + { 0, 2 }, /* accept_ra */ + { 0, 1 }, /* accept_ra_defrtr */ + { 0, 1 }, /* accept_ra_from_local */ + { 0, INT_MAX }, /* accept_ra_min_hop_limit */ + { 0, 1 }, /* accept_ra_mtu */ + { 0, 1 }, /* accept_ra_pinfo */ + { 0, INT_MAX }, /* accept_ra_rt_info_max_plen */ + { 0, 1 }, /* accept_ra_rtr_pref */ + { -1, 0 }, /* accept_source_route */ + { 0, 1 }, /* autoconf */ + { 0, INT_MAX }, /* dad_transmits */ + { 0, 1 }, /* disable_ipv6 */ + { 0, 1 }, /* drop_unicast_in_l2_multicast */ + { 0, 1 }, /* drop_unsolicited_na */ + { 0, 2 }, /* force_mld_version */ + { 0, 1 }, /* force_tllao */ + { 0, 1 }, /* forwarding */ + { 0, 1 }, /* accept_redirects */ + { 1, 255 }, /* hop_limit */ + { 0, 1 }, /* ignore_routes_with_linkdown */ + { -1, 1 }, /* keep_addr_on_down */ + { 0, MAX_ADDRESSES }, /* max_addresses */ + { 0, INT_MAX }, /* max_desync_factor */ + { 0, INT_MAX }, /* mldv1_unsolicited_report_interval */ + { 0, INT_MAX }, /* mldv2_unsolicited_report_interval */ { IPV6_MIN_MTU, IPV6_MIN_MTU }, /* mtu */ - { 0, 1 }, /* ndisc_notify */ - { 0, 1 }, /* optimistic_dad */ - { 0, 1 }, /* proxy_ndp */ - { 0, INT_MAX }, /* regen_max_retry */ - { 0, ROUTER_MAX }, /* router_probe_interval */ - { 0, ROUTER_MAX }, /* router_solicitation_delay */ - { 0, ROUTER_MAX }, /* router_solicitation_interval */ - { 0, ROUTER_MAX }, /* router_solicitations */ - { 0, 1 }, /* suppress_frag_ndisc */ - { 0, INT_MAX }, /* temp_prefered_lft */ - { 0, INT_MAX }, /* temp_valid_lft */ - { 0, 1 }, /* use_oif_addrs_only */ - { 0, 1 }, /* use_optimistic */ - { 0, 2 }, /* use_tempaddr */ + { 0, 1 }, /* ndisc_notify */ + { 0, 1 }, /* optimistic_dad */ + { 0, 1 }, /* proxy_ndp */ + { 0, INT_MAX }, /* regen_max_retry */ + { 0, ROUTER_MAX }, /* router_probe_interval */ + { 0, ROUTER_MAX }, /* router_solicitation_delay */ + { 0, ROUTER_MAX }, /* router_solicitation_interval */ + { 0, ROUTER_MAX }, /* router_solicitations */ + { 0, 1 }, /* suppress_frag_ndisc */ + { 0, INT_MAX }, /* temp_prefered_lft */ + { 0, INT_MAX }, /* temp_valid_lft */ + { 0, 1 }, /* use_oif_addrs_only */ + { 0, 1 }, /* use_optimistic */ + { 0, 2 }, /* use_tempaddr */ }; struct test_conf { diff --git a/test/zdtm/static/s390x_regs_check.c b/test/zdtm/static/s390x_regs_check.c index 8d6b479974..40c480b3f2 100644 --- a/test/zdtm/static/s390x_regs_check.c +++ b/test/zdtm/static/s390x_regs_check.c @@ -59,11 +59,11 @@ static int pipefd[2]; */ struct reg_set { const char *name; /* Name of regset */ - int nr; /* Number of regset */ - void *data; /* Test data */ - int len; /* Number of bytes of test data */ - bool optional; /* Not all kernels/machines have this reg set */ - bool available; /* Current kernel/machine has this reg set */ + int nr; /* Number of regset */ + void *data; /* Test data */ + int len; /* Number of bytes of test data */ + bool optional; /* Not all kernels/machines have this reg set */ + bool available; /* Current kernel/machine has this reg set */ }; /* @@ -397,8 +397,8 @@ static inline void send_tid_and_loop(int fd) asm volatile("lgr 2,%0\n" /* Arg 1: fd */ "la 3,%1\n" /* Arg 2: &tid */ - "lghi 4,4\n" /* Arg 3: sizeof(int) */ - "svc 4\n" /* __NR_write SVC: */ + "lghi 4,4\n" /* Arg 3: sizeof(int) */ + "svc 4\n" /* __NR_write SVC: */ /* After SVC no more registers are changed */ "0: j 0b\n" /* Loop here */ : diff --git a/test/zdtm/static/session01.c b/test/zdtm/static/session01.c index 0f727a9a67..31a617de93 100644 --- a/test/zdtm/static/session01.c +++ b/test/zdtm/static/session01.c @@ -40,22 +40,22 @@ enum { static struct testcase *testcases; static futex_t *fstate; static struct testcase __testcases[] = { - { 2, 1, 2, 1, 2, 1 }, /* session00 */ - { 4, 2, 4, 2, 4, 1 }, /* |\_session00 */ - { 15, 4, 4, 4, 15, 1 }, /* | |\_session00 */ - { 16, 4, 4, 4, 15, 1 }, /* | \_session00 */ - { 17, 4, 4, 4, 17, 0 }, /* | |\_session00 */ - { 18, 4, 4, 4, 17, 1 }, /* | \_session00 */ - { 5, 2, 2, 2, 2, 1 }, /* |\_session00 */ - { 8, 2, 8, 2, 8, 1 }, /* |\_session00 */ - { 9, 8, 2, 2, 2, 1 }, /* | \_session00 */ - { 10, 2, 10, 2, 10, 1 }, /* |\_session00 */ + { 2, 1, 2, 1, 2, 1 }, /* session00 */ + { 4, 2, 4, 2, 4, 1 }, /* |\_session00 */ + { 15, 4, 4, 4, 15, 1 }, /* | |\_session00 */ + { 16, 4, 4, 4, 15, 1 }, /* | \_session00 */ + { 17, 4, 4, 4, 17, 0 }, /* | |\_session00 */ + { 18, 4, 4, 4, 17, 1 }, /* | \_session00 */ + { 5, 2, 2, 2, 2, 1 }, /* |\_session00 */ + { 8, 2, 8, 2, 8, 1 }, /* |\_session00 */ + { 9, 8, 2, 2, 2, 1 }, /* | \_session00 */ + { 10, 2, 10, 2, 10, 1 }, /* |\_session00 */ { 11, 10, 11, 2, 11, 1 }, /* | \_session00 */ - { 12, 11, 2, 2, 2, 1 }, /* | \_session00 */ - { 13, 2, 2, 2, 2, 0 }, /* \_session00 */ - { 3, 13, 2, 2, 2, 1 }, /* session00 */ - { 6, 2, 6, 2, 6, 0 }, /* \_session00 */ - { 14, 6, 6, 6, 6, 1 }, /* session00 */ + { 12, 11, 2, 2, 2, 1 }, /* | \_session00 */ + { 13, 2, 2, 2, 2, 0 }, /* \_session00 */ + { 3, 13, 2, 2, 2, 1 }, /* session00 */ + { 6, 2, 6, 2, 6, 0 }, /* \_session00 */ + { 14, 6, 6, 6, 6, 1 }, /* session00 */ }; #define TESTS (sizeof(__testcases) / sizeof(struct testcase)) diff --git a/test/zdtm/static/sigpending.c b/test/zdtm/static/sigpending.c index 1641fdd86e..ce03ff55c9 100644 --- a/test/zdtm/static/sigpending.c +++ b/test/zdtm/static/sigpending.c @@ -18,7 +18,7 @@ static int numsig; #define TESTSIG (SIGRTMAX) #define THREADSIG (SIGRTMIN) static siginfo_t share_infos[2]; -static siginfo_t self_infos[64]; /* self */ +static siginfo_t self_infos[64]; /* self */ static siginfo_t thread_infos[3]; /* thread */ static int share_nr; static int self_nr; diff --git a/test/zdtm/transition/ptrace.c b/test/zdtm/transition/ptrace.c index bf6344f1cb..ee10c80043 100644 --- a/test/zdtm/transition/ptrace.c +++ b/test/zdtm/transition/ptrace.c @@ -31,7 +31,7 @@ int main(int argc, char **argv) { int pid, status, i, stopped; #define PT_REGS_SIZE 4096 /* big enough for any arch */ -#define PT_REGS_ALIGN 16 /* big enough for any arch */ +#define PT_REGS_ALIGN 16 /* big enough for any arch */ char regs[PT_REGS_SIZE] __attribute__((aligned(PT_REGS_ALIGN))); int *pids; From 688149930abd96fd9e1db4ee3b67c727bab4f3d8 Mon Sep 17 00:00:00 2001 From: Liu Hua Date: Mon, 1 Nov 2021 20:50:58 +0800 Subject: [PATCH 044/121] cr-dump: fail dumping when zombie process with sid 0 A zombie process with 0 sid has a session leader in outer pidns and has ignored SIGHUP. Criu has no idea to restore this type of process, so fail the dumpping. Signed-off-by: Liu Hua --- criu/cr-dump.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/criu/cr-dump.c b/criu/cr-dump.c index ecc99f116f..c972e343aa 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -1129,6 +1129,13 @@ static int dump_zombies(void) item->pgid = pps_buf.pgid; BUG_ON(!list_empty(&item->children)); + + if (!item->sid) { + pr_err("A session leader of zombie process %d(%d) is outside of its pid namespace\n", + item->pid->real, vpid(item)); + goto err; + } + if (dump_one_zombie(item, &pps_buf) < 0) goto err; } From fef55d6b7cafddc13ecc29920b9726406f820ee6 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 29 Oct 2021 10:35:28 +0300 Subject: [PATCH 045/121] clang-format: make x86_ins_capability_mask human-readable There is no option in clang not to merge as much binary operands as it fits in column limit, but here we need each bit on new line to make it readable, so let's disable clang-format for x86_ins_capability_masks. Signed-off-by: Pavel Tikhomirov --- criu/arch/x86/cpu.c | 139 ++++++++++++++++++++++++++++---------------- 1 file changed, 89 insertions(+), 50 deletions(-) diff --git a/criu/arch/x86/cpu.c b/criu/arch/x86/cpu.c index d02f4abd5b..b3a7ca6365 100644 --- a/criu/arch/x86/cpu.c +++ b/criu/arch/x86/cpu.c @@ -107,64 +107,103 @@ int cpu_dump_cpuinfo(void) #define __ins_bit(__l, __v) (1u << ((__v)-32u * (__l))) +// clang-format off static uint32_t x86_ins_capability_mask[NCAPINTS] = { - [CPUID_1_EDX] = __ins_bit(CPUID_1_EDX, X86_FEATURE_FPU) | __ins_bit(CPUID_1_EDX, X86_FEATURE_TSC) | - __ins_bit(CPUID_1_EDX, X86_FEATURE_CX8) | __ins_bit(CPUID_1_EDX, X86_FEATURE_SEP) | - __ins_bit(CPUID_1_EDX, X86_FEATURE_CMOV) | __ins_bit(CPUID_1_EDX, X86_FEATURE_CLFLUSH) | - __ins_bit(CPUID_1_EDX, X86_FEATURE_MMX) | __ins_bit(CPUID_1_EDX, X86_FEATURE_FXSR) | - __ins_bit(CPUID_1_EDX, X86_FEATURE_XMM) | __ins_bit(CPUID_1_EDX, X86_FEATURE_XMM2), - - [CPUID_8000_0001_EDX] = __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_SYSCALL) | - __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_MMXEXT) | - __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_RDTSCP) | - __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_3DNOWEXT) | - __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_3DNOW), - - [CPUID_LNX_1] = __ins_bit(CPUID_LNX_1, X86_FEATURE_REP_GOOD) | __ins_bit(CPUID_LNX_1, X86_FEATURE_NOPL), - - [CPUID_1_ECX] = __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM3) | __ins_bit(CPUID_1_ECX, X86_FEATURE_PCLMULQDQ) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_MWAIT) | __ins_bit(CPUID_1_ECX, X86_FEATURE_SSSE3) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_CX16) | __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM4_1) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM4_2) | __ins_bit(CPUID_1_ECX, X86_FEATURE_MOVBE) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_POPCNT) | __ins_bit(CPUID_1_ECX, X86_FEATURE_AES) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_XSAVE) | __ins_bit(CPUID_1_ECX, X86_FEATURE_OSXSAVE) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_AVX) | __ins_bit(CPUID_1_ECX, X86_FEATURE_F16C) | - __ins_bit(CPUID_1_ECX, X86_FEATURE_RDRAND), + [CPUID_1_EDX] = + __ins_bit(CPUID_1_EDX, X86_FEATURE_FPU) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_TSC) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_CX8) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_SEP) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_CMOV) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_CLFLUSH) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_MMX) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_FXSR) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_XMM) | + __ins_bit(CPUID_1_EDX, X86_FEATURE_XMM2), + + [CPUID_8000_0001_EDX] = + __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_SYSCALL) | + __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_MMXEXT) | + __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_RDTSCP) | + __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_3DNOWEXT) | + __ins_bit(CPUID_8000_0001_EDX, X86_FEATURE_3DNOW), + + [CPUID_LNX_1] = + __ins_bit(CPUID_LNX_1, X86_FEATURE_REP_GOOD) | + __ins_bit(CPUID_LNX_1, X86_FEATURE_NOPL), + + [CPUID_1_ECX] = + __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM3) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_PCLMULQDQ) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_MWAIT) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_SSSE3) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_CX16) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM4_1) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_XMM4_2) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_MOVBE) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_POPCNT) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_AES) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_XSAVE) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_OSXSAVE) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_AVX) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_F16C) | + __ins_bit(CPUID_1_ECX, X86_FEATURE_RDRAND), [CPUID_8000_0001_ECX] = - __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_ABM) | __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_SSE4A) | - __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_MISALIGNSSE) | - __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_3DNOWPREFETCH) | - __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_XOP) | __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_FMA4) | - __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_TBM), + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_ABM) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_SSE4A) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_MISALIGNSSE) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_3DNOWPREFETCH) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_XOP) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_FMA4) | + __ins_bit(CPUID_8000_0001_ECX, X86_FEATURE_TBM), [CPUID_7_0_EBX] = - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_FSGSBASE) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_BMI1) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_HLE) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX2) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_BMI2) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_ERMS) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_RTM) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_MPX) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512F) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512DQ) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_RDSEED) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_ADX) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_CLFLUSHOPT) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512PF) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512ER) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512CD) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_SHA_NI) | __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512BW) | - __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512VL), - - [CPUID_D_1_EAX] = __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XSAVEOPT) | - __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XSAVEC) | __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XGETBV1), + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_FSGSBASE) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_BMI1) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_HLE) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX2) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_BMI2) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_ERMS) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_RTM) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_MPX) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512F) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512DQ) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_RDSEED) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_ADX) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_CLFLUSHOPT) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512PF) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512ER) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512CD) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_SHA_NI) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512BW) | + __ins_bit(CPUID_7_0_EBX, X86_FEATURE_AVX512VL), + + [CPUID_D_1_EAX] = + __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XSAVEOPT) | + __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XSAVEC) | + __ins_bit(CPUID_D_1_EAX, X86_FEATURE_XGETBV1), [CPUID_7_0_ECX] = - __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512VBMI) | __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VBMI2) | - __ins_bit(CPUID_7_0_ECX, X86_FEATURE_GFNI) | __ins_bit(CPUID_7_0_ECX, X86_FEATURE_VAES) | - __ins_bit(CPUID_7_0_ECX, X86_FEATURE_VPCLMULQDQ) | __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VNNI) | - __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_BITALG) | __ins_bit(CPUID_7_0_ECX, X86_FEATURE_TME) | - __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VPOPCNTDQ) | __ins_bit(CPUID_7_0_ECX, X86_FEATURE_RDPID), - - [CPUID_8000_0008_EBX] = __ins_bit(CPUID_8000_0008_EBX, X86_FEATURE_CLZERO), - - [CPUID_7_0_EDX] = __ins_bit(CPUID_7_0_EDX, X86_FEATURE_AVX512_4VNNIW) | - __ins_bit(CPUID_7_0_EDX, X86_FEATURE_AVX512_4FMAPS), + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512VBMI) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VBMI2) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_GFNI) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_VAES) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_VPCLMULQDQ) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VNNI) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_BITALG) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_TME) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_AVX512_VPOPCNTDQ) | + __ins_bit(CPUID_7_0_ECX, X86_FEATURE_RDPID), + + [CPUID_8000_0008_EBX] = + __ins_bit(CPUID_8000_0008_EBX, X86_FEATURE_CLZERO), + + [CPUID_7_0_EDX] = + __ins_bit(CPUID_7_0_EDX, X86_FEATURE_AVX512_4VNNIW) | + __ins_bit(CPUID_7_0_EDX, X86_FEATURE_AVX512_4FMAPS), }; +// clang-format on #undef __ins_bit From 268bd349a2eea76ab18763bc56c12d5a69a2a51d Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 19 Nov 2021 10:08:37 +0300 Subject: [PATCH 046/121] ci: disable socket-raw test on centos8 We see error in centos8 ci on restore of socket-raw test: inet: \tRestore: family AF_INET type SOCK_RAW proto 66 port 66 state TCP_CLOSE src_addr 0.0.0.0 Error (criu/sk-inet.c:834): inet: Can't create inet socket: Protocol not supported Centos 8 kernel replaces IPPROTO_MPTCP(262) with "in-kernel" value IPPROTO_MPTCP_KERN(66) on inet_create(), but later shows this inkernel value to criu when listing sockets info. Same code in inet_create() returns EPROTONOSUPPORT on the attempr to create socket with IPPROTO_MPTCP_KERN. So this ci error is completely rh8 kernel related. Kernel should not show "in-kernel" value to userspace. But anyway this is already changed in Centos 9 kernel, so we can just skip socket-raw test on Centos 8. v2: use cirrus.yml Signed-off-by: Pavel Tikhomirov --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 671178d8b0..235b9821e4 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -47,7 +47,7 @@ task: pip3 install junit_xml build_script: | - make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 + make -C scripts/ci local SKIP_CI_PREP=1 CC=gcc CD_TO_TOP=1 ZDTM_OPTS="-x zdtm/static/socket-raw" task: name: CentOS 7 based test From 014e4f3002a5b5f01f619252cd0b1b1f4632aa9b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 3 Nov 2021 18:34:11 +0300 Subject: [PATCH 047/121] zdtm.py: make tests with --link_remap exclusive We see that tests mntns_ghost01 and unlink_fstat03 can run simultaneousely and thus the former sees leftover link_remap.* files in the test directory created by the latter, and the latter is still running so it's ok to have link_remap.* at this point. Let's implicitly make all --link-remap tests exclusive (not running in parallel). Fixes: #1633 Signed-off-by: Pavel Tikhomirov --- test/zdtm.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index fc7b8a1830..b62136e962 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1997,7 +1997,22 @@ def run_test(self, name, desc, flavor): raise Exception("The kernel is tainted: %r (%r)" % (taint, self.__taint)) - if test_flag(desc, 'excl'): + ''' + The option --link-remap allows criu to hardlink open files back to the + file-system on dump (should be removed on restore) and we have a sanity + check in check_visible_state that they were actually removed at least + from the root test directory after restore. + + As zdtm runs all tests from the same cwd (e.g.: test/zdtm/static) in + parallel, hardlinks from one test can mess up with sanity checks of + another test or even one test can by mistake use hardlinks created by + another test which is even worse. + + So let's make all tests using --link-remap option non parallel. + ''' + link_remap_excl = '--link-remap' in desc.get('opts', '').split() + desc.get('dopts', '').split() + desc.get('ropts', '').split() + + if test_flag(desc, 'excl') or link_remap_excl: self.wait_all() self.__nr += 1 @@ -2030,7 +2045,7 @@ def run_test(self, name, desc, flavor): "start": time.time() } - if test_flag(desc, 'excl'): + if test_flag(desc, 'excl') or link_remap_excl: self.wait() def __wait_one(self, flags): From fd48f1ae384a4be34f45cbd10c7fd5525fffe2e7 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 29 Oct 2021 03:01:14 +0000 Subject: [PATCH 048/121] tests: improve the deterministic behavior of the test suite Various I/O objects are unclosed when the object falls out of scope. This can lead to non-deterministic behavior. Also fixed a few missing list(). It doesn't play way with python3. e.g., `random.shuffle(filter(...))` doesn't work. Signed-off-by: Nicolas Viennot --- test/zdtm.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index b62136e962..b987700794 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -202,6 +202,8 @@ def __copy_one(self, fname): def __copy_libs(self, binary): ldd = subprocess.Popen(["ldd", binary], stdout=subprocess.PIPE) + stdout, _ = ldd.communicate() + xl = re.compile( r'^(linux-gate.so|linux-vdso(64)?.so|not a dynamic|.*\s*ldd\s)') @@ -216,11 +218,9 @@ def __copy_libs(self, binary): map( lambda x: str(x).strip(), filter(lambda x: str(x).startswith('\t'), - ldd.stdout.read().decode( + stdout.decode( 'ascii').splitlines()))))) - ldd.wait() - for lib in libs: if not os.access(lib, os.F_OK): raise test_fail_exc("Can't find lib %s required by %s" % @@ -331,8 +331,7 @@ def decode_flav(i): def tail(path): p = subprocess.Popen(['tail', '-n1', path], stdout=subprocess.PIPE) - out = p.stdout.readline() - p.wait() + out, _ = p.communicate() return out.decode() @@ -801,7 +800,7 @@ def __init__(self, name, desc, flavor, freezer): if flavor.ns: self.__real_name = name with open(name) as fd: - self.__subs = map(lambda x: x.strip(), fd.readlines()) + self.__subs = list(map(lambda x: x.strip(), fd.readlines())) print("Subs:\n%s" % '\n'.join(self.__subs)) else: self.__real_name = '' @@ -819,8 +818,8 @@ def __get_start_cmd(self, name): subprocess.check_call(s_args + [tname + '.cleanout']) s = subprocess.Popen(s_args + ['--dry-run', tname + '.pid'], stdout=subprocess.PIPE) - cmd = s.stdout.readlines().pop().strip() - s.wait() + out, _ = s.communicate() + cmd = out.decode().splitlines()[-1].strip() return 'cd /' + tdir + ' && ' + cmd @@ -2045,6 +2044,9 @@ def run_test(self, name, desc, flavor): "start": time.time() } + if log: + log.close() + if test_flag(desc, 'excl') or link_remap_excl: self.wait() @@ -2068,6 +2070,9 @@ def __wait_one(self, flags): self.__runtest += 1 if pid != 0: sub = self.__subs.pop(pid) + # The following wait() is not useful for our domain logic. + # It's useful for taming warnings in subprocess.Popen.__del__() + sub['sub'].wait() tc = None if self.__junit_test_cases is not None: tc = TestCase(sub['name'], @@ -2168,9 +2173,9 @@ def all_tests(opts): continue files.append(fp) excl = list(map(lambda x: os.path.join(desc['dir'], x), desc['exclude'])) - tlist = filter( + tlist = list(filter( lambda x: not x.endswith('.checkskip') and not x.endswith('.hook') and - x not in excl, map(lambda x: x.strip(), files)) + x not in excl, map(lambda x: x.strip(), files))) return tlist From d4ae0b77f9ff0380e9766d1a4e0301b1d1521da3 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 23 Nov 2021 15:06:03 +0300 Subject: [PATCH 049/121] clang-format/zdtm: fix clang complains about strange elseifs Clang-format v13 on my Fedora 35 complains about these hunks, more over reading the formating we had before is a pain: } else /* comment */ if (smth) { fail("") return -1; } Let's make explicit {} braces for else, this way it looks much better. Fixes: 93dd984ca ("Run 'make indent' on all C files") Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/mprotect00.c | 16 ++++++++++------ test/zdtm/static/shm-mp.c | 16 ++++++++++------ 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/test/zdtm/static/mprotect00.c b/test/zdtm/static/mprotect00.c index 006b647729..717b7ddcf2 100644 --- a/test/zdtm/static/mprotect00.c +++ b/test/zdtm/static/mprotect00.c @@ -44,10 +44,12 @@ static int check_prot(char *ptr, int prot) fail("PROT_READ bypassed"); return -1; } - } else /* we come here on return from SIGSEGV handler */ + } else { + /* we come here on return from SIGSEGV handler */ if (prot & PROT_READ) { - fail("PROT_READ rejected"); - return -1; + fail("PROT_READ rejected"); + return -1; + } } if (!sigsetjmp(segv_ret, 1)) { @@ -56,10 +58,12 @@ static int check_prot(char *ptr, int prot) fail("PROT_WRITE bypassed"); return -1; } - } else /* we come here on return from SIGSEGV handler */ + } else { + /* we come here on return from SIGSEGV handler */ if (prot & PROT_WRITE) { - fail("PROT_WRITE rejected"); - return -1; + fail("PROT_WRITE rejected"); + return -1; + } } if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) { diff --git a/test/zdtm/static/shm-mp.c b/test/zdtm/static/shm-mp.c index 1929dac191..c95f3d84cc 100644 --- a/test/zdtm/static/shm-mp.c +++ b/test/zdtm/static/shm-mp.c @@ -33,10 +33,12 @@ static int check_prot(char *ptr, char val, int prot) fail("PROT_READ bypassed"); return -1; } - } else /* we come here on return from SIGSEGV handler */ + } else { + /* we come here on return from SIGSEGV handler */ if (prot & PROT_READ) { - fail("PROT_READ rejected"); - return -1; + fail("PROT_READ rejected"); + return -1; + } } if (!sigsetjmp(segv_ret, 1)) { @@ -45,10 +47,12 @@ static int check_prot(char *ptr, char val, int prot) fail("PROT_WRITE bypassed"); return -1; } - } else /* we come here on return from SIGSEGV handler */ + } else { + /* we come here on return from SIGSEGV handler */ if (prot & PROT_WRITE) { - fail("PROT_WRITE rejected"); - return -1; + fail("PROT_WRITE rejected"); + return -1; + } } if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) { From 5643067ce662e5ca1730f36e209f17eaf6818f90 Mon Sep 17 00:00:00 2001 From: Liu Hua Date: Fri, 5 Nov 2021 17:08:51 +0800 Subject: [PATCH 050/121] seize: restore cgroup freezer to right state The new freezer_state is a complete equivalent of old freezer_thawed except for the initial value. If old freezer_thawed was not initialized it was 0 and in freezer_restore_state were threated as if we need to freeze cgroup "back", thus before this patch if criu dump failed before freezing dumpee, criu always freeze dumpee in cr_dump_finish which is wrong. Switching to freezer_state initialized with FREEZER_ERROR fixes the problem. v2: improve description, rename to origin_freezer_state Signed-off-by: Liu Hua --- criu/seize.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/criu/seize.c b/criu/seize.c index 95bf9ef0c1..58564ca746 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -131,11 +131,11 @@ static enum freezer_state get_freezer_state(int fd) return get_freezer_v1_state(fd); } -static bool freezer_thawed; +static enum freezer_state origin_freezer_state = FREEZER_ERROR; const char *get_real_freezer_state(void) { - return freezer_thawed ? thawed : frozen; + return origin_freezer_state == THAWED ? thawed : frozen; } static int freezer_write_state(int fd, enum freezer_state new_state) @@ -192,7 +192,7 @@ static int freezer_restore_state(void) int fd; int ret; - if (!opts.freeze_cgroup || freezer_thawed) + if (!opts.freeze_cgroup || origin_freezer_state != FROZEN) return 0; fd = freezer_open(); @@ -481,9 +481,10 @@ static int freeze_processes(void) close(fd); return -1; } - if (state == THAWED) { - freezer_thawed = true; + origin_freezer_state = state == FREEZING ? FROZEN : state; + + if (state == THAWED) { if (freezer_write_state(fd, FROZEN)) { close(fd); return -1; @@ -534,7 +535,7 @@ static int freeze_processes(void) } err: - if (exit_code == 0 || freezer_thawed) + if (exit_code == 0 || origin_freezer_state == THAWED) exit_code = freezer_write_state(fd, THAWED); if (close(fd)) { From def608a7fefd89ccac455da7a966ad7cd766398c Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 24 Nov 2021 11:37:58 +0300 Subject: [PATCH 051/121] ci: Use latest Fedora for lint ci runs again Now when we fixed clang-format complains in zdtm, let's switch to lates clang-format available. This is effectively a revert of commit 07a2f0265 ("ci: use Fedora 34 for lint CI runs"). Signed-off-by: Pavel Tikhomirov --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 49eb6aaac3..50b241e9f7 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,7 +6,7 @@ jobs: build: runs-on: ubuntu-latest container: - image: registry.fedoraproject.org/fedora:34 + image: registry.fedoraproject.org/fedora:latest steps: - name: Install tools run: sudo dnf -y install git make python3-flake8 ShellCheck clang-tools-extra which findutils From 983464ffad11a0f8c5656492f3c342139acd2527 Mon Sep 17 00:00:00 2001 From: Liu Hua Date: Thu, 4 Nov 2021 10:04:22 +0800 Subject: [PATCH 052/121] crtools: ignore SIGPIPE in swrk mode Criu ignores SIGPIPE in most cases except swrk mode. And in the following situtation criu get killed by SIGPIPE and have no chance to do cleanup: Connection to page server is lost when we do disk-less migration, criu send PS_IOV_FLUSH via a broken connction in disconnect_from_page_server. This patch let criu ignore SIGPIPE in all paths . Signed-off-by: Liu Hua --- criu/crtools.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index 6a75cd1ea2..81c0aa963c 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -106,6 +106,24 @@ int main(int argc, char *argv[], char *envp[]) log_set_loglevel(opts.log_level); + /* + * There kernel might send us lethal signals in the following cases: + * 1) Writing a pipe which reader has disappeared. + * 2) Writing to a socket of type SOCK_STREAM which is no longer connected. + * We deal with write()/Send() failures on our own, and prefer not to get killed. + * So we ignore SIGPIPEs. + * + * Pipes are used in various places: + * 1) Receiving application page data + * 2) Transmitting data to the image streamer + * 3) Emitting logs (potentially to a pipe). + * Sockets are mainly used in transmitting memory data. + */ + if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { + pr_perror("Failed to set a SIGPIPE signal ignore."); + return 1; + } + if (optind < argc && !strcmp(argv[optind], "swrk")) { if (argc != optind + 2) { fprintf(stderr, "Usage: criu swrk \n"); @@ -175,21 +193,6 @@ int main(int argc, char *argv[], char *envp[]) } } - /* - * The kernel might send us lethal signals when writing to a pipe - * which reader has disappeared. We deal with write() failures on our - * own, and prefer not to get killed. So we ignore SIGPIPEs. - * - * Pipes are used in various places: - * 1) Receiving application page data - * 2) Transmitting data to the image streamer - * 3) Emitting logs (potentially to a pipe). - */ - if (signal(SIGPIPE, SIG_IGN) == SIG_ERR) { - pr_perror("Failed to set a SIGPIPE signal ignore."); - return 1; - } - /* * When a process group becomes an orphan, * its processes are sent a SIGHUP signal From 21873e0efd64f023a0d7b472ceca1f394f83e58e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 24 Nov 2021 16:13:01 +0000 Subject: [PATCH 053/121] ci: switch to centos-stream-8 CentOS 8 goes EOL at the end of 2021. This switches our CentOS 8 based tests to CentOS Stream 8 which should be supported until 2024. Signed-off-by: Adrian Reber --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 235b9821e4..ef0de54e99 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -27,7 +27,7 @@ task: compute_engine_instance: image_project: centos-cloud - image: family/centos-8 + image: family/centos-stream-8 platform: linux cpu: 4 memory: 8G From 6754b16c2e95766493ac27e4122a3b63867133f5 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Fri, 19 Nov 2021 20:58:13 +0000 Subject: [PATCH 054/121] check: cleanup child processes Always wait() for forked child processes. It avoid zombie processes in containers that don't have an init process reaping orphans. Signed-off-by: Nicolas Viennot --- criu/cr-check.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/criu/cr-check.c b/criu/cr-check.c index e46c938157..0320b445aa 100644 --- a/criu/cr-check.c +++ b/criu/cr-check.c @@ -575,6 +575,7 @@ static pid_t fork_and_ptrace_attach(int (*child_setup)(void)) if (read(sk, &c, 1) != 1) { close(sk); kill(pid, SIGKILL); + waitpid(pid, NULL, 0); pr_perror("read"); return -1; } @@ -584,6 +585,7 @@ static pid_t fork_and_ptrace_attach(int (*child_setup)(void)) if (ptrace(PTRACE_ATTACH, pid, NULL, NULL) == -1) { pr_perror("Unable to ptrace the child"); kill(pid, SIGKILL); + waitpid(pid, NULL, 0); return -1; } @@ -618,6 +620,7 @@ static int check_ptrace_peeksiginfo(void) } kill(pid, SIGKILL); + waitpid(pid, NULL, 0); return ret; } @@ -768,6 +771,7 @@ static int check_special_mapping_mremap(void) /* Probably, we're interrupted with a signal - cleanup */ pr_err("Failed to wait for a child %d\n", errno); kill(child, SIGKILL); + waitpid(child, NULL, 0); return -1; } @@ -806,6 +810,7 @@ static int check_ptrace_suspend_seccomp(void) } kill(pid, SIGKILL); + waitpid(pid, NULL, 0); return ret; } @@ -846,6 +851,7 @@ static int check_ptrace_dump_seccomp_filters(void) } kill(pid, SIGKILL); + waitpid(pid, NULL, 0); return ret; } From ae3bf06242d6a8bb2a9946cba4ace96e202ee3f4 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 13 Feb 2020 10:43:14 +0300 Subject: [PATCH 055/121] files-reg: fix error handling in open_path 1) On error paths need to close fd and unlock mutex. 2) Make rfi_remap return special return code to identify EEXIST from linkat_hard, all other errors should be reported up. 3) Report unlinkat error as criu should not corrupt fs. Cherry-picked from Virtuozzo criu: https://src.openvz.org/projects/OVZ/repos/criu/commits/fe1d0be14 Changes: use close_safe(), fix order in "Fake %s -> %s link" error message. Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 679477c1ce..4560f253eb 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1867,6 +1867,9 @@ static int make_parent_dirs_if_need(int mntns_root, char *path) * This routine properly resolves d's path handling ghost/link-remaps. * The open_cb is a routine that does actual open, it differs for * files, directories, fifos, etc. + * + * Return 0 on success, -1 on error and 1 to indicate soft error, which can be + * retried. */ static int rfi_remap(struct reg_file_info *rfi, int *level) @@ -1932,6 +1935,8 @@ static int rfi_remap(struct reg_file_info *rfi, int *level) int errno_saved = errno; rm_parent_dirs(mntns_root, path, *level); errno = errno_saved; + if (errno == EEXIST) + return 1; return -1; } @@ -2008,11 +2013,12 @@ static bool validate_file(const int fd, const struct stat *fd_status, const stru int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_file_info *, void *), void *arg) { - int tmp, mntns_root, level = 0; + int tmp = -1, mntns_root, level = 0; struct reg_file_info *rfi; char *orig_path = NULL; char path[PATH_MAX]; int inh_fd = -1; + int ret; if (inherited_fd(d, &tmp)) return tmp; @@ -2049,14 +2055,9 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil */ orig_path = rfi->path; rfi->path = rfi->remap->rpath; - } else if (rfi_remap(rfi, &level) < 0) { + } else if ((ret = rfi_remap(rfi, &level)) == 1) { static char tmp_path[PATH_MAX]; - if (errno != EEXIST) { - pr_perror("Can't link %s -> %s", rfi->remap->rpath, rfi->path); - return -1; - } - /* * The file whose name we're trying to create * exists. Need to pick some other one, we're @@ -2070,12 +2071,15 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil orig_path = rfi->path; rfi->path = tmp_path; snprintf(tmp_path, sizeof(tmp_path), "%s.cr_link", orig_path); - pr_debug("Fake %s -> %s link\n", rfi->path, rfi->remap->rpath); + pr_debug("Fake %s -> %s link\n", rfi->remap->rpath, rfi->path); - if (rfi_remap(rfi, &level) < 0) { + if (rfi_remap(rfi, &level)) { pr_perror("Can't create even fake link!"); - return -1; + goto err; } + } else if (ret < 0) { + pr_perror("Can't link %s -> %s", rfi->remap->rpath, rfi->path); + goto err; } } @@ -2085,7 +2089,7 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil if (tmp < 0) { pr_perror("Can't open file %s", rfi->path); close_safe(&inh_fd); - return -1; + goto err; } close_safe(&inh_fd); @@ -2094,15 +2098,15 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil if (fstat(tmp, &st) < 0) { pr_perror("Can't fstat opened file"); - return -1; + goto err; } if (!validate_file(tmp, &st, rfi)) - return -1; + goto err; if (rfi->rfe->has_mode && (st.st_mode != rfi->rfe->mode)) { pr_err("File %s has bad mode 0%o (expect 0%o)\n", rfi->path, (int)st.st_mode, rfi->rfe->mode); - return -1; + goto err; } /* @@ -2115,7 +2119,11 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil if (rfi->remap) { if (!rfi->remap->is_dir) { - unlinkat(mntns_root, rfi->path, 0); + pr_debug("Unlink: %d:%s\n", rfi->rfe->mnt_id, rfi->path); + if (unlinkat(mntns_root, rfi->path, 0)) { + pr_perror("Failed to unlink the remap file"); + goto err; + } rm_parent_dirs(mntns_root, rfi->path, level); } @@ -2124,10 +2132,17 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil if (orig_path) rfi->path = orig_path; - if (restore_fown(tmp, rfi->rfe->fown)) + if (restore_fown(tmp, rfi->rfe->fown)) { + close(tmp); return -1; + } return tmp; +err: + if (rfi->remap) + mutex_unlock(remap_open_lock); + close_safe(&tmp); + return -1; } int do_open_reg_noseek_flags(int ns_root_fd, struct reg_file_info *rfi, void *arg) From 2894e242562f3c639b6cb63f502d7445076d3db1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 13 Feb 2020 13:03:12 +0300 Subject: [PATCH 056/121] files-reg: fix error handling of rm_parent_dirs If unlinkat fails it means that fs is in "corrupted" state - spoiled with non-unlinked auxiliary directories. While on it add fixme note as this function can be racy and BUG_ON if path contains double slashes. Cherry-picked from Virtuozzo criu: https://src.openvz.org/projects/OVZ/repos/criu/commits/b7b4e69fd Changes: simplify while loop condition, remove confusing FIXME, remove excess !count check in favour of while loop condition check Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 4560f253eb..0e126a32e4 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -1792,30 +1792,42 @@ static int linkat_hard(int odir, char *opath, int ndir, char *npath, uid_t uid, return ret; } -static void rm_parent_dirs(int mntns_root, char *path, int count) +static int rm_parent_dirs(int mntns_root, char *path, int count) { char *p, *prev = NULL; + int ret = -1; - if (!count) - return; - - while (count > 0) { - count -= 1; + while (count-- > 0) { p = strrchr(path, '/'); - if (p) + if (p) { + /* We don't handle "//" in path */ + BUG_ON(prev && (prev - p == 1)); *p = '\0'; + } else { + /* Inconsistent path and count */ + pr_perror("Can't strrchr \"/\" in \"%s\"/\"%s\"]" + " left count=%d\n", + path, prev ? prev + 1 : "", count + 1); + goto err; + } + if (prev) *prev = '/'; + prev = p; - if (unlinkat(mntns_root, path, AT_REMOVEDIR)) + if (unlinkat(mntns_root, path, AT_REMOVEDIR)) { pr_perror("Can't remove %s AT %d", path, mntns_root); - else - pr_debug("Unlinked parent dir: %s AT %d\n", path, mntns_root); - prev = p; + goto err; + } + pr_debug("Unlinked parent dir: %s AT %d\n", path, mntns_root); } + ret = 0; +err: if (prev) *prev = '/'; + + return ret; } /* Construct parent dir name and mkdir parent/grandparents if they're not exist */ @@ -1847,6 +1859,7 @@ static int make_parent_dirs_if_need(int mntns_root, char *path) err = mkdirat(mntns_root, path, 0777); if (err && errno != EEXIST) { pr_perror("Can't create dir: %s AT %d", path, mntns_root); + /* Failing anyway -> no retcode check */ rm_parent_dirs(mntns_root, path, count); count = -1; goto out; @@ -1933,10 +1946,11 @@ static int rfi_remap(struct reg_file_info *rfi, int *level) if (linkat_hard(mntns_root, rpath, mntns_root, path, rfi->remap->uid, rfi->remap->gid, 0) < 0) { int errno_saved = errno; - rm_parent_dirs(mntns_root, path, *level); - errno = errno_saved; - if (errno == EEXIST) + + if (!rm_parent_dirs(mntns_root, path, *level) && errno_saved == EEXIST) { + errno = errno_saved; return 1; + } return -1; } @@ -2124,7 +2138,8 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil pr_perror("Failed to unlink the remap file"); goto err; } - rm_parent_dirs(mntns_root, rfi->path, level); + if (rm_parent_dirs(mntns_root, rfi->path, level)) + goto err; } mutex_unlock(remap_open_lock); From 0a2d380e6b9110ec809101757c243ce316f4d1bb Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 13 Feb 2020 18:28:15 +0300 Subject: [PATCH 057/121] ghost/mount: allocate remounted_rw in shmem to get info from other processes Previousely remounted_rw was not shared between all processes on restore, thus cleanup didn't got this info from rfi_remap and these mounts were wrongly left writable after restore. Cherry-picked from Virtuozzo criu: https://src.openvz.org/projects/OVZ/repos/criu/commits/3a1a592e7 Fixes: fd0a3cd9efb9 ("mount: remount ro mounts writable before ghost-file restore") Signed-off-by: Pavel Tikhomirov --- criu/include/mount.h | 4 ++-- criu/mount.c | 32 ++++++++++++++++++++++---------- criu/proc_parse.c | 2 +- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/criu/include/mount.h b/criu/include/mount.h index 833a75ca0f..7705279e4f 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -84,7 +84,7 @@ struct mount_info { struct list_head postpone; int is_overmounted; - int remounted_rw; + int *remounted_rw; void *private; /* associated filesystem data */ }; @@ -100,7 +100,7 @@ static inline int collect_binfmt_misc(void) } #endif -extern struct mount_info *mnt_entry_alloc(void); +extern struct mount_info *mnt_entry_alloc(bool rst); extern void mnt_entry_free(struct mount_info *mi); extern int __mntns_get_root_fd(pid_t pid); diff --git a/criu/mount.c b/criu/mount.c index 93725e5269..d75ca5598e 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -27,6 +27,7 @@ #include "external.h" #include "clone-noasan.h" #include "fdstore.h" +#include "rst-malloc.h" #include "images/mnt.pb-c.h" @@ -1415,7 +1416,8 @@ int open_mountpoint(struct mount_info *pm) return -1; } -static __maybe_unused int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev) +static __maybe_unused int add_cr_time_mount(struct mount_info *root, char *fsname, const char *path, unsigned int s_dev, + bool rst) { struct mount_info *mi, *t, *parent; bool add_slash = false; @@ -1434,7 +1436,7 @@ static __maybe_unused int add_cr_time_mount(struct mount_info *root, char *fsnam } } - mi = mnt_entry_alloc(); + mi = mnt_entry_alloc(rst); if (!mi) return -1; @@ -2723,7 +2725,7 @@ static int cr_pivot_root(char *root) return exit_code; } -struct mount_info *mnt_entry_alloc() +struct mount_info *mnt_entry_alloc(bool rst) { struct mount_info *new; @@ -2734,6 +2736,13 @@ struct mount_info *mnt_entry_alloc() new = xzalloc(sizeof(struct mount_info)); if (new) { + if (rst) { + new->remounted_rw = shmalloc(sizeof(int)); + if (!new->remounted_rw) { + xfree(new); + return NULL; + } + } new->fd = -1; new->is_overmounted = -1; INIT_LIST_HEAD(&new->children); @@ -2956,7 +2965,7 @@ static int collect_mnt_from_image(struct mount_info **head, struct mount_info ** if (ret <= 0) break; - pm = mnt_entry_alloc(); + pm = mnt_entry_alloc(true); if (!pm) goto err; @@ -3234,7 +3243,7 @@ static int populate_mnt_ns(void) { int ret; - root_yard_mp = mnt_entry_alloc(); + root_yard_mp = mnt_entry_alloc(true); if (!root_yard_mp) return -1; @@ -3247,7 +3256,7 @@ static int populate_mnt_ns(void) #ifdef CONFIG_BINFMT_MISC_VIRTUALIZED if (!opts.has_binfmt_misc && !list_empty(&binfmt_misc_list)) { /* Add to mount tree. Generic code will mount it later */ - ret = add_cr_time_mount(root_yard_mp, "binfmt_misc", BINFMT_MISC_HOME, 0); + ret = add_cr_time_mount(root_yard_mp, "binfmt_misc", BINFMT_MISC_HOME, 0, true); if (ret) return -1; } @@ -3697,7 +3706,7 @@ int collect_mnt_namespaces(bool for_dump) ret = -1; goto err; } else if (ret > 0 && add_cr_time_mount(ns->mnt.mntinfo_tree, "binfmt_misc", BINFMT_MISC_HOME, - s_dev) < 0) { + s_dev, false) < 0) { ret = -1; goto err; } @@ -3838,7 +3847,10 @@ int try_remount_writable(struct mount_info *mi, bool ns) if (!ns) remounted = REMOUNTED_RW_SERVICE; - if (mi->flags & MS_RDONLY && !(mi->remounted_rw & remounted)) { + /* All mounts in mntinfo list should have it on restore */ + BUG_ON(mi->remounted_rw == NULL); + + if (mi->flags & MS_RDONLY && !(*mi->remounted_rw & remounted)) { if (mnt_is_overmounted(mi)) { pr_err("The mount %d is overmounted so paths are invisible\n", mi->mnt_id); return -1; @@ -3861,7 +3873,7 @@ int try_remount_writable(struct mount_info *mi, bool ns) if (call_helper_process(ns_remount_writable, mi)) return -1; } - mi->remounted_rw |= remounted; + *mi->remounted_rw |= remounted; } return 0; @@ -3876,7 +3888,7 @@ static int __remount_readonly_mounts(struct ns_id *ns) if (ns && mi->nsid != ns) continue; - if (!(mi->remounted_rw && REMOUNTED_RW)) + if (!(*mi->remounted_rw && REMOUNTED_RW)) continue; /* diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 8a9ce3a37f..094f9b84e9 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1541,7 +1541,7 @@ struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump) int ret = -1; char *fsname = NULL; - new = mnt_entry_alloc(); + new = mnt_entry_alloc(false); if (!new) goto end; From 3aca580025b38aa043f6ef82f24ecab2d2d258e9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 13 Feb 2020 18:26:52 +0300 Subject: [PATCH 058/121] files-reg: temporary remount writable the mount we do unlink on Previousely I din't mention this case because we had bad error handling in ghost cleanup path. Without these patch but with proper error handling for unlink we have an error in mntns_ghost01 test: Error (criu/files-reg.c:2269): Failed to unlink the remap file: Read-only file system Cherry-picked from Virtuozzo criu: https://src.openvz.org/projects/OVZ/repos/criu/commits/151c859e1 Changes: check lookup_mnt_id return for NULL Fixes: fd0a3cd9efb9 ("mount: remount ro mounts writable before ghost-file restore") Signed-off-by: Pavel Tikhomirov --- criu/files-reg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/criu/files-reg.c b/criu/files-reg.c index 0e126a32e4..57403d2c62 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -2133,6 +2133,11 @@ int open_path(struct file_desc *d, int (*open_cb)(int mntns_root, struct reg_fil if (rfi->remap) { if (!rfi->remap->is_dir) { + struct mount_info *mi = lookup_mnt_id(rfi->rfe->mnt_id); + + if (mi && try_remount_writable(mi, true)) + goto err; + pr_debug("Unlink: %d:%s\n", rfi->rfe->mnt_id, rfi->path); if (unlinkat(mntns_root, rfi->path, 0)) { pr_perror("Failed to unlink the remap file"); From d137057e215de10ee772852126075ce9575e908e Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Tue, 30 Nov 2021 19:03:29 +0300 Subject: [PATCH 059/121] zdtm: add ro-mount check after c/r to mntns_ghost01 This is a test for "ghost/mount: allocate remounted_rw in shmem to get info from other processes" patch, without the patch test fails with: ############# Test zdtm/static/mntns_ghost01 FAIL at result check ############## Test output: ================================ 16:15:19.607: 5: ERR: mntns_ghost01.c:95: open for write on rofs -> 7 (errno = 11 (Resource temporarily unavailable)) 16:15:19.607: 4: FAIL: mntns_ghost01.c:121: Test died (errno = 11 (Resource temporarily unavailable)) Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/mntns_ghost01.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/zdtm/static/mntns_ghost01.c b/test/zdtm/static/mntns_ghost01.c index 20397d543a..2cc2270dd8 100644 --- a/test/zdtm/static/mntns_ghost01.c +++ b/test/zdtm/static/mntns_ghost01.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "zdtmtst.h" @@ -89,6 +90,13 @@ int main(int argc, char **argv) return 1; } + fd = open(ghost_path, O_CREAT | O_WRONLY, 0600); + if (fd >= 0 || errno != EROFS) { + pr_perror("open for write on rofs -> %d", fd); + close(fd); + return 1; + } + return 0; } From bb9c27c217a9b995c101cbb2023c4084ce76a07b Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 25 Nov 2021 16:51:55 +0300 Subject: [PATCH 060/121] clang-format: disable wrong struct pointer declaration format When we declare struct and at the same time declare variable pointer of this struct type, it looks like clang-format threats "*" as a multiplication operator instead of indirection (pointer declaration) operator and puts spaces on both sides, which looks wrong. Signed-off-by: Pavel Tikhomirov --- criu/fdstore.c | 4 +++- test/zdtm/lib/test.c | 4 +++- test/zdtm/static/child_subreaper_and_reparent.c | 4 +++- test/zdtm/static/child_subreaper_existing_child.c | 4 +++- test/zdtm/static/file_fown.c | 4 +++- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/criu/fdstore.c b/criu/fdstore.c index 77935484fd..65264a5116 100644 --- a/criu/fdstore.c +++ b/criu/fdstore.c @@ -13,10 +13,12 @@ #include "rst-malloc.h" #include "log.h" +/* clang-format off */ static struct fdstore_desc { int next_id; mutex_t lock; /* to protect a peek offset */ -} * desc; +} *desc; +/* clang-format on */ int fdstore_init(void) { diff --git a/test/zdtm/lib/test.c b/test/zdtm/lib/test.c index 81da81ebab..57eb42046a 100644 --- a/test/zdtm/lib/test.c +++ b/test/zdtm/lib/test.c @@ -20,9 +20,11 @@ #include "ns.h" futex_t sig_received; +/* clang-format off */ static struct { futex_t stage; -} * test_shared_state; +} *test_shared_state; +/* clang-format on */ enum { TEST_INIT_STAGE = 0, diff --git a/test/zdtm/static/child_subreaper_and_reparent.c b/test/zdtm/static/child_subreaper_and_reparent.c index ba03517bae..c71778ae9f 100644 --- a/test/zdtm/static/child_subreaper_and_reparent.c +++ b/test/zdtm/static/child_subreaper_and_reparent.c @@ -19,11 +19,13 @@ enum { TEST_EXIT, }; +/* clang-format off */ struct shared { futex_t fstate; int parent_before_cr; int parent_after_cr; -} * sh; +} *sh; +/* clang-format on */ int orphan(void) { diff --git a/test/zdtm/static/child_subreaper_existing_child.c b/test/zdtm/static/child_subreaper_existing_child.c index 4805aa41d6..92d22bc4a5 100644 --- a/test/zdtm/static/child_subreaper_existing_child.c +++ b/test/zdtm/static/child_subreaper_existing_child.c @@ -18,10 +18,12 @@ enum { TEST_EXIT, }; +/* clang-format off */ struct shared { futex_t fstate; int ppid_after_reparent; -} * sh; +} *sh; +/* clang-format on */ int orphan(void) { diff --git a/test/zdtm/static/file_fown.c b/test/zdtm/static/file_fown.c index eb42a826eb..2c5ba82c2b 100644 --- a/test/zdtm/static/file_fown.c +++ b/test/zdtm/static/file_fown.c @@ -22,12 +22,14 @@ const char *test_doc = "Check for signal delivery on file owners"; const char *test_author = "Cyrill Gorcunov "; +/* clang-format off */ struct params { int sigio; int pipe_flags[2]; int pipe_pid[2]; int pipe_sig[2]; -} * shared; +} *shared; +/* clang-format on */ static void signal_handler_io(int status) { From 9134e859293a3980b25cff043273be1d41ab7d53 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 4 Dec 2021 16:18:26 +0000 Subject: [PATCH 061/121] ci: Run cross compile on debian stable The current debian stable release is Bullseye, not Buster. However, we can use the 'stable' release instead. This would allow the CI to automatically pick up updates in the future. Signed-off-by: Radostin Stoyanov --- .github/workflows/cross-compile-daily.yml | 2 +- .github/workflows/cross-compile.yml | 2 +- scripts/build/Dockerfile.aarch64-cross.tmpl | 1 - ...aarch64-cross.hdr => Dockerfile.aarch64-stable-cross.hdr} | 0 scripts/build/Dockerfile.aarch64-stable-cross.tmpl | 1 + scripts/build/Dockerfile.armv7-cross.tmpl | 1 - ...ile.armv7-cross.hdr => Dockerfile.armv7-stable-cross.hdr} | 0 scripts/build/Dockerfile.armv7-stable-cross.tmpl | 1 + scripts/build/Dockerfile.mips64el-cross.tmpl | 1 - ...ps64el-cross.hdr => Dockerfile.mips64el-stable-cross.hdr} | 0 scripts/build/Dockerfile.mips64el-stable-cross.tmpl | 1 + scripts/build/Dockerfile.ppc64-cross.tmpl | 1 - ...ile.ppc64-cross.hdr => Dockerfile.ppc64-stable-cross.hdr} | 0 scripts/build/Dockerfile.ppc64-stable-cross.tmpl | 1 + .../{Dockerfile.cross.tmpl => Dockerfile.stable-cross.tmpl} | 5 ++--- scripts/build/Makefile | 2 +- 16 files changed, 9 insertions(+), 10 deletions(-) delete mode 120000 scripts/build/Dockerfile.aarch64-cross.tmpl rename scripts/build/{Dockerfile.aarch64-cross.hdr => Dockerfile.aarch64-stable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.aarch64-stable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.armv7-cross.tmpl rename scripts/build/{Dockerfile.armv7-cross.hdr => Dockerfile.armv7-stable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.armv7-stable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.mips64el-cross.tmpl rename scripts/build/{Dockerfile.mips64el-cross.hdr => Dockerfile.mips64el-stable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.mips64el-stable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.ppc64-cross.tmpl rename scripts/build/{Dockerfile.ppc64-cross.hdr => Dockerfile.ppc64-stable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.ppc64-stable-cross.tmpl rename scripts/build/{Dockerfile.cross.tmpl => Dockerfile.stable-cross.tmpl} (88%) diff --git a/.github/workflows/cross-compile-daily.yml b/.github/workflows/cross-compile-daily.yml index 7012132766..927ddced26 100644 --- a/.github/workflows/cross-compile-daily.yml +++ b/.github/workflows/cross-compile-daily.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: [armv7-cross, aarch64-cross, ppc64-cross, mips64el-cross] + target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross] branches: [criu-dev, master] steps: diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml index 90862e7abd..c6745d43eb 100644 --- a/.github/workflows/cross-compile.yml +++ b/.github/workflows/cross-compile.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: [armv7-cross, aarch64-cross, ppc64-cross, mips64el-cross] + target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross] steps: - uses: actions/checkout@v2 diff --git a/scripts/build/Dockerfile.aarch64-cross.tmpl b/scripts/build/Dockerfile.aarch64-cross.tmpl deleted file mode 120000 index 50eff9213e..0000000000 --- a/scripts/build/Dockerfile.aarch64-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.aarch64-cross.hdr b/scripts/build/Dockerfile.aarch64-stable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.aarch64-cross.hdr rename to scripts/build/Dockerfile.aarch64-stable-cross.hdr diff --git a/scripts/build/Dockerfile.aarch64-stable-cross.tmpl b/scripts/build/Dockerfile.aarch64-stable-cross.tmpl new file mode 120000 index 0000000000..81ef22980f --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-stable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.stable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.armv7-cross.tmpl b/scripts/build/Dockerfile.armv7-cross.tmpl deleted file mode 120000 index 50eff9213e..0000000000 --- a/scripts/build/Dockerfile.armv7-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.armv7-cross.hdr b/scripts/build/Dockerfile.armv7-stable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.armv7-cross.hdr rename to scripts/build/Dockerfile.armv7-stable-cross.hdr diff --git a/scripts/build/Dockerfile.armv7-stable-cross.tmpl b/scripts/build/Dockerfile.armv7-stable-cross.tmpl new file mode 120000 index 0000000000..81ef22980f --- /dev/null +++ b/scripts/build/Dockerfile.armv7-stable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.stable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.mips64el-cross.tmpl b/scripts/build/Dockerfile.mips64el-cross.tmpl deleted file mode 120000 index 50eff9213e..0000000000 --- a/scripts/build/Dockerfile.mips64el-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.mips64el-cross.hdr b/scripts/build/Dockerfile.mips64el-stable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.mips64el-cross.hdr rename to scripts/build/Dockerfile.mips64el-stable-cross.hdr diff --git a/scripts/build/Dockerfile.mips64el-stable-cross.tmpl b/scripts/build/Dockerfile.mips64el-stable-cross.tmpl new file mode 120000 index 0000000000..81ef22980f --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-stable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.stable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64-cross.tmpl b/scripts/build/Dockerfile.ppc64-cross.tmpl deleted file mode 120000 index 50eff9213e..0000000000 --- a/scripts/build/Dockerfile.ppc64-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64-cross.hdr b/scripts/build/Dockerfile.ppc64-stable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.ppc64-cross.hdr rename to scripts/build/Dockerfile.ppc64-stable-cross.hdr diff --git a/scripts/build/Dockerfile.ppc64-stable-cross.tmpl b/scripts/build/Dockerfile.ppc64-stable-cross.tmpl new file mode 120000 index 0000000000..81ef22980f --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-stable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.stable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.cross.tmpl b/scripts/build/Dockerfile.stable-cross.tmpl similarity index 88% rename from scripts/build/Dockerfile.cross.tmpl rename to scripts/build/Dockerfile.stable-cross.tmpl index 8b95fbb1c0..6a68cd1ca6 100644 --- a/scripts/build/Dockerfile.cross.tmpl +++ b/scripts/build/Dockerfile.stable-cross.tmpl @@ -1,9 +1,8 @@ COPY scripts/ci/apt-install /bin/apt-install # Add the cross compiler sources -RUN echo "deb http://deb.debian.org/debian/ buster main" >> /etc/apt/sources.list && \ - dpkg --add-architecture ${DEBIAN_ARCH} && \ - apt-install emdebian-archive-keyring +RUN echo "deb http://deb.debian.org/debian/ stable main" >> /etc/apt/sources.list && \ + dpkg --add-architecture ${DEBIAN_ARCH} RUN apt-install \ crossbuild-essential-${DEBIAN_ARCH} \ diff --git a/scripts/build/Makefile b/scripts/build/Makefile index 62e3a99204..a436c28397 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,5 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos7 armv7hf centos8 -NON_CLANG := armv7-cross aarch64-cross ppc64-cross mips64el-cross +NON_CLANG := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross CREATE_DOCKERFILES := $(ARCHES) $(NON_CLANG) TARGETS := $(ARCHES) alpine archlinux TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) From e16ea6a3c23fb8a5f0ed8c19052ee9b4140108cf Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 4 Dec 2021 16:31:34 +0000 Subject: [PATCH 062/121] ci: Run cross compile with debian testing Debian testing has newer compiler version and running cross compilation tests would allow us to catch any compilation errors early. Signed-off-by: Radostin Stoyanov --- .github/workflows/cross-compile.yml | 11 ++++- .../Dockerfile.aarch64-testing-cross.hdr | 5 +++ .../Dockerfile.aarch64-testing-cross.tmpl | 1 + .../build/Dockerfile.armv7-testing-cross.hdr | 6 +++ .../build/Dockerfile.armv7-testing-cross.tmpl | 1 + .../Dockerfile.mips64el-testing-cross.hdr | 6 +++ .../Dockerfile.mips64el-testing-cross.tmpl | 1 + .../build/Dockerfile.ppc64-testing-cross.hdr | 5 +++ .../build/Dockerfile.ppc64-testing-cross.tmpl | 1 + scripts/build/Dockerfile.testing-cross.tmpl | 42 +++++++++++++++++++ scripts/build/Makefile | 4 +- 11 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 scripts/build/Dockerfile.aarch64-testing-cross.hdr create mode 120000 scripts/build/Dockerfile.aarch64-testing-cross.tmpl create mode 100644 scripts/build/Dockerfile.armv7-testing-cross.hdr create mode 120000 scripts/build/Dockerfile.armv7-testing-cross.tmpl create mode 100644 scripts/build/Dockerfile.mips64el-testing-cross.hdr create mode 120000 scripts/build/Dockerfile.mips64el-testing-cross.tmpl create mode 100644 scripts/build/Dockerfile.ppc64-testing-cross.hdr create mode 120000 scripts/build/Dockerfile.ppc64-testing-cross.tmpl create mode 100644 scripts/build/Dockerfile.testing-cross.tmpl diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml index c6745d43eb..eec6eb5778 100644 --- a/.github/workflows/cross-compile.yml +++ b/.github/workflows/cross-compile.yml @@ -8,7 +8,16 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: [armv7-stable-cross, aarch64-stable-cross, ppc64-stable-cross, mips64el-stable-cross] + target: [ + armv7-stable-cross, + armv7-testing-cross, + aarch64-stable-cross, + aarch64-testing-cross, + ppc64-stable-cross, + ppc64-testing-cross, + mips64el-stable-cross, + mips64el-testing-cross + ] steps: - uses: actions/checkout@v2 diff --git a/scripts/build/Dockerfile.aarch64-testing-cross.hdr b/scripts/build/Dockerfile.aarch64-testing-cross.hdr new file mode 100644 index 0000000000..c61d2af276 --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-testing-cross.hdr @@ -0,0 +1,5 @@ +FROM docker.io/dockcross/base:latest + +ENV ARCH=aarch64 +ENV DEBIAN_ARCH=arm64 +ENV CROSS_TRIPLET=aarch64-linux-gnu diff --git a/scripts/build/Dockerfile.aarch64-testing-cross.tmpl b/scripts/build/Dockerfile.aarch64-testing-cross.tmpl new file mode 120000 index 0000000000..c1dd38b21f --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-testing-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.armv7-testing-cross.hdr b/scripts/build/Dockerfile.armv7-testing-cross.hdr new file mode 100644 index 0000000000..f96dc51f70 --- /dev/null +++ b/scripts/build/Dockerfile.armv7-testing-cross.hdr @@ -0,0 +1,6 @@ +FROM docker.io/dockcross/base:latest + +ENV ARCH=arm +ENV SUBARCH=armv7 +ENV DEBIAN_ARCH=armhf +ENV CROSS_TRIPLET=arm-linux-gnueabihf diff --git a/scripts/build/Dockerfile.armv7-testing-cross.tmpl b/scripts/build/Dockerfile.armv7-testing-cross.tmpl new file mode 120000 index 0000000000..c1dd38b21f --- /dev/null +++ b/scripts/build/Dockerfile.armv7-testing-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.mips64el-testing-cross.hdr b/scripts/build/Dockerfile.mips64el-testing-cross.hdr new file mode 100644 index 0000000000..e78c94aa56 --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-testing-cross.hdr @@ -0,0 +1,6 @@ +FROM dockcross/base:latest + +ENV ARCH=mips +ENV SUBARCH=mips +ENV DEBIAN_ARCH=mips64el +ENV CROSS_TRIPLET=mips64el-linux-gnuabi64 diff --git a/scripts/build/Dockerfile.mips64el-testing-cross.tmpl b/scripts/build/Dockerfile.mips64el-testing-cross.tmpl new file mode 120000 index 0000000000..c1dd38b21f --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-testing-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64-testing-cross.hdr b/scripts/build/Dockerfile.ppc64-testing-cross.hdr new file mode 100644 index 0000000000..38547ac557 --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-testing-cross.hdr @@ -0,0 +1,5 @@ +FROM dockcross/base:latest + +ENV ARCH=ppc64 +ENV DEBIAN_ARCH=ppc64el +ENV CROSS_TRIPLET=powerpc64le-linux-gnu diff --git a/scripts/build/Dockerfile.ppc64-testing-cross.tmpl b/scripts/build/Dockerfile.ppc64-testing-cross.tmpl new file mode 120000 index 0000000000..c1dd38b21f --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-testing-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.testing-cross.tmpl b/scripts/build/Dockerfile.testing-cross.tmpl new file mode 100644 index 0000000000..1d5565310b --- /dev/null +++ b/scripts/build/Dockerfile.testing-cross.tmpl @@ -0,0 +1,42 @@ +COPY scripts/ci/apt-install /bin/apt-install + +# Add the cross compiler sources +RUN echo "deb http://deb.debian.org/debian/ testing main" >> /etc/apt/sources.list && \ + dpkg --add-architecture ${DEBIAN_ARCH} + +RUN apt-install \ + crossbuild-essential-${DEBIAN_ARCH} \ + libc6-dev-${DEBIAN_ARCH}-cross \ + libc6-${DEBIAN_ARCH}-cross \ + libbz2-dev:${DEBIAN_ARCH} \ + libexpat1-dev:${DEBIAN_ARCH} \ + ncurses-dev:${DEBIAN_ARCH} \ + libssl-dev:${DEBIAN_ARCH} \ + protobuf-c-compiler \ + protobuf-compiler \ + python3-protobuf \ + libnl-3-dev:${DEBIAN_ARCH} \ + libprotobuf-dev:${DEBIAN_ARCH} \ + libnet-dev:${DEBIAN_ARCH} \ + libprotobuf-c-dev:${DEBIAN_ARCH} \ + libcap-dev:${DEBIAN_ARCH} \ + libaio-dev:${DEBIAN_ARCH} \ + libnl-route-3-dev:${DEBIAN_ARCH} + +ENV CROSS_COMPILE=${CROSS_TRIPLET}- \ + CROSS_ROOT=/usr/${CROSS_TRIPLET} \ + AS=/usr/bin/${CROSS_TRIPLET}-as \ + AR=/usr/bin/${CROSS_TRIPLET}-ar \ + CC=/usr/bin/${CROSS_TRIPLET}-gcc \ + CPP=/usr/bin/${CROSS_TRIPLET}-cpp \ + CXX=/usr/bin/${CROSS_TRIPLET}-g++ \ + LD=/usr/bin/${CROSS_TRIPLET}-ld \ + FC=/usr/bin/${CROSS_TRIPLET}-gfortran + +ENV PATH="${PATH}:${CROSS_ROOT}/bin" \ + PKG_CONFIG_PATH=/usr/lib/${CROSS_TRIPLET}/pkgconfig + +COPY . /criu +WORKDIR /criu + +RUN make mrproper && date && make -j $(nproc) zdtm && date diff --git a/scripts/build/Makefile b/scripts/build/Makefile index a436c28397..b24fc80ea2 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,5 +1,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos7 armv7hf centos8 -NON_CLANG := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross +STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross +TESTING_CROSS_ARCHES := armv7-testing-cross aarch64-testing-cross ppc64-testing-cross mips64el-testing-cross +NON_CLANG := $(TESTING_CROSS_ARCHES) $(STABLE_CROSS_ARCHES) CREATE_DOCKERFILES := $(ARCHES) $(NON_CLANG) TARGETS := $(ARCHES) alpine archlinux TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) From 8887dfb9174a42b699b1fde2dbf9033f66c8363b Mon Sep 17 00:00:00 2001 From: Salvatore Bonaccorso Date: Sat, 4 Dec 2021 16:59:51 +0000 Subject: [PATCH 063/121] make: Explicitly enable FPU on ARMv7 builds Starting with gcc-11, Debian's armhf compiler no longer builds with a default -mfpu= option. Instead it enables the FPU via an extension to the -march flag (--with-arch=armv7-a+fp). criu's Makefile explicitly passes its own -march=armv7-a setting, which overrides the +fp default, so we end up with no FPU: cc1: error: '-mfloat-abi=hard': selected architecture lacks an FPU Signed-off-by: Radostin Stoyanov --- Makefile | 2 +- test/zdtm/Makefile.inc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index d350126159..c0d0083718 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ ifeq ($(ARCH),arm) endif ifeq ($(ARMV),7) - USERCFLAGS += -march=armv7-a + USERCFLAGS += -march=armv7-a+fp endif ifeq ($(ARMV),8) diff --git a/test/zdtm/Makefile.inc b/test/zdtm/Makefile.inc index 69154fdc96..d345233154 100644 --- a/test/zdtm/Makefile.inc +++ b/test/zdtm/Makefile.inc @@ -25,7 +25,7 @@ ifeq ($(ARCH),arm) ifeq ($(ARMV),6) USERCFLAGS += -march=armv6 else ifeq ($(ARMV),7) - USERCFLAGS += -march=armv7-a + USERCFLAGS += -march=armv7-a+fp else ifeq ($(ARMV),8) # To build aarch32 on armv8 Travis-CI (see criu Makefile) USERCFLAGS += -march=armv7-a From e7efdffa340d0169b4fb18d9ebba214f546ade5d Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 3 Dec 2021 16:48:36 +0000 Subject: [PATCH 064/121] ci: disable broken tests until fixed Broken tests are being tracked at * https://github.com/checkpoint-restore/criu/issues/1669 * https://github.com/checkpoint-restore/criu/issues/1635 This also enables previously disabled BPF related tests: * https://github.com/checkpoint-restore/criu/issues/1354 Signed-off-by: Adrian Reber --- scripts/ci/vagrant.sh | 6 +++--- test/jenkins/criu-fault.sh | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh index 839b100c80..25343f96c3 100755 --- a/scripts/ci/vagrant.sh +++ b/scripts/ci/vagrant.sh @@ -50,9 +50,9 @@ fedora-no-vdso() { vagrant reload ssh default cat /proc/cmdline ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' - # BPF tests are failing see: https://github.com/checkpoint-restore/criu/issues/1354 - # Needs to be fixed, skip for now - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going -x zdtm/static/bpf_hash -x zdtm/static/bpf_array' + # Disabling tests which are broken on 5.15 + # https://github.com/checkpoint-restore/criu/issues/1669 + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going -x zdtm/static/socket_close_data -x zdtm/static/socket_close_data01 -x zdtm/static/fifo_upon_unix_socket01 -x zdtm/static/sk-unix-mntns -x zdtm/static/fifo_upon_unix_socket00 -x zdtm/static/socket-ext -x zdtm/static/sk-unix01 -x zdtm/static/socket_dgram_data -x zdtm/static/sockets_dgram -x zdtm/static/sk-unix-dgram-ghost' # This test (pidfd_store_sk) requires pidfd_getfd syscall which is guaranteed in Fedora 33. # It is also skipped from -a because it runs in RPC mode only ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2' diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index a8c3a5cf7f..bff40aed5e 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -28,6 +28,14 @@ fi ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail + +# Error injection with --fault 134 fails on newer CPUs used in Circle CI on EC2 +# Skip the --fault 134 tests +# https://github.com/checkpoint-restore/criu/issues/1635 +if [ -n "$CIRCLECI" ]; then + exit 0 +fi + # 134 is corrupting extended registers set, should run in a sub-thread (fpu03) # without restore (that will check if parasite corrupts extended registers) ./test/zdtm.py run -t zdtm/static/fpu03 --fault 134 -f h --norst || fail From 3b81160c57cde29450d27edb383deb6e19188dbf Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 5 Dec 2021 16:45:20 +0000 Subject: [PATCH 065/121] test: do not use --keep-going for single zdtm tests Looking at CI logs there are often messages like: "[WARNING] Option --keep-going is more useful when running multiple tests" This commit removes '--keep-going' from single zdtm test runs. Signed-off-by: Adrian Reber --- test/jenkins/criu-dedup.sh | 10 +++++----- test/jenkins/criu-fault.sh | 18 +++++++++--------- test/jenkins/criu-fcg.sh | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/test/jenkins/criu-dedup.sh b/test/jenkins/criu-dedup.sh index 0041496d80..edb1b653d1 100755 --- a/test/jenkins/criu-dedup.sh +++ b/test/jenkins/criu-dedup.sh @@ -9,8 +9,8 @@ prep # Additionally run these tests as they touch a lot of # memory and it makes sense to additionally check it # with delays between iterations -./test/zdtm.py run -t zdtm/transition/maps007 --keep-going --report report -f h --pre 8:.1 --dedup || fail -./test/zdtm.py run -t zdtm/static/mem-touch --keep-going --report report -f h --pre 8:.1 --dedup || fail -./test/zdtm.py run -t zdtm/transition/maps008 --keep-going --report report -f h --pre 8:.1 --dedup || fail -./test/zdtm.py run -t zdtm/transition/maps007 --keep-going --report report -f h --pre 8:.1 --noauto-dedup || fail -./test/zdtm.py run -t zdtm/static/mem-touch --keep-going --report report -f h --pre 8:.1 --noauto-dedup || fail +./test/zdtm.py run -t zdtm/transition/maps007 --report report -f h --pre 8:.1 --dedup || fail +./test/zdtm.py run -t zdtm/static/mem-touch --report report -f h --pre 8:.1 --dedup || fail +./test/zdtm.py run -t zdtm/transition/maps008 --report report -f h --pre 8:.1 --dedup || fail +./test/zdtm.py run -t zdtm/transition/maps007 --report report -f h --pre 8:.1 --noauto-dedup || fail +./test/zdtm.py run -t zdtm/static/mem-touch --report report -f h --pre 8:.1 --noauto-dedup || fail diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index bff40aed5e..0c5c180e90 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -4,10 +4,10 @@ set -e source `dirname $0`/criu-lib.sh prep -./test/zdtm.py run -t zdtm/static/env00 --fault 1 --keep-going --report report -f h || fail -./test/zdtm.py run -t zdtm/static/unlink_fstat00 --fault 2 --keep-going --report report -f h || fail -./test/zdtm.py run -t zdtm/static/maps00 --fault 3 --keep-going --report report -f h || fail -./test/zdtm.py run -t zdtm/static/inotify_irmap --fault 128 --keep-going --pre 2 -f uns || fail +./test/zdtm.py run -t zdtm/static/env00 --fault 1 --report report -f h || fail +./test/zdtm.py run -t zdtm/static/unlink_fstat00 --fault 2 --report report -f h || fail +./test/zdtm.py run -t zdtm/static/maps00 --fault 3 --report report -f h || fail +./test/zdtm.py run -t zdtm/static/inotify_irmap --fault 128 --pre 2 -f uns || fail ./test/zdtm.py run -t zdtm/static/env00 --fault 129 -f uns || fail ./test/zdtm.py run -t zdtm/transition/fork --fault 130 -f h || fail ./test/zdtm.py run -t zdtm/static/vdso01 --fault 127 || fail @@ -17,16 +17,16 @@ if [ "${COMPAT_TEST}" != "y" ] ; then ./test/zdtm.py run -t zdtm/static/vdso01 --fault 133 -f h || fail fi -./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 2 --keep-going --report report || fail -./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 4 --keep-going --report report || fail +./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 2 --report report || fail +./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 4 --report report || fail ./test/zdtm.py run -t zdtm/static/mntns_ghost --fault 6 --report report || fail ./test/zdtm.py run -t zdtm/static/mntns_link_remap --fault 6 --report report || fail ./test/zdtm.py run -t zdtm/static/unlink_fstat03 --fault 6 --report report || fail -./test/zdtm.py run -t zdtm/static/env00 --fault 5 --keep-going --report report || fail -./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --keep-going --report report --pre 2:1 || fail -./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --keep-going --report report --pre 2:1 || fail +./test/zdtm.py run -t zdtm/static/env00 --fault 5 --report report || fail +./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --report report --pre 2:1 || fail +./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail # Error injection with --fault 134 fails on newer CPUs used in Circle CI on EC2 diff --git a/test/jenkins/criu-fcg.sh b/test/jenkins/criu-fcg.sh index ca5054f5e5..81395b7ba6 100755 --- a/test/jenkins/criu-fcg.sh +++ b/test/jenkins/criu-fcg.sh @@ -6,10 +6,10 @@ source `dirname $0`/criu-lib.sh prep mount_tmpfs_to_dump -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:f || fail -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:f --pre 3 || fail -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:f --norst || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:f || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:f --pre 3 || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:f --norst || fail -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:t || fail -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:t --pre 3 || fail -./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --keep-going --report report --freezecg zdtm:t --norst || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:t || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:t --pre 3 || fail +./test/zdtm.py run -t zdtm/transition/thread-bomb -f h --report report --freezecg zdtm:t --norst || fail From 94d3c9694f16342e52afe1f3de7077bac9eaf198 Mon Sep 17 00:00:00 2001 From: ianlang Date: Fri, 3 Dec 2021 15:15:30 +0800 Subject: [PATCH 066/121] files-reg: try dump_ghost_remap if link-remap failed with error ENOENT An issue with dumping deleted reg files in overlayfs: After deleting a file originated from lower layer in merged dir, fstat() on the /proc/$pid/map_files symlink returns st_nlink=1, while linkat() fails with errno ENOENT. Signed-off-by: langyenan --- criu/files-reg.c | 42 +++++++++++++++++++++++++++++++++++------ criu/include/fs-magic.h | 4 ++++ 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index 57403d2c62..6759e00e59 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -959,7 +959,25 @@ void free_link_remaps(void) } static int linkat_hard(int odir, char *opath, int ndir, char *npath, uid_t uid, gid_t gid, int flags); -static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_id *nsid, const struct stat *st) +static void check_overlayfs_fallback(char *path, const struct fd_parms *parms, bool *fallback) +{ + if (!fallback || parms->fs_type != OVERLAYFS_SUPER_MAGIC) + return; + + /* + * In overlayFS, linkat() fails with ENOENT if the removed file is + * originated from lower layer. The cause of failure is that linkat() + * sees the file has st_nlink=0, which is different than st_nlink=1 we + * got from earlier fstat() on lfd. By setting *fb=true, we will fall + * back to dump_ghost_remap() as it is what should have been done to + * removed files with st_nlink=0. + */ + pr_info("Unable to link-remap %s on overlayFS, fall back to dump_ghost_remap\n", path); + *fallback = true; +} + +static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_id *nsid, const struct fd_parms *parms, + bool *fallback) { char link_name[PATH_MAX], *tmp; FileEntry fe = FILE_ENTRY__INIT; @@ -967,6 +985,7 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i FownEntry fwn = FOWN_ENTRY__INIT; int mntns_root; int ret; + const struct stat *ost = &parms->stat; if (!opts.link_remap_ok) { pr_err("Can't create link remap for %s. " @@ -1005,11 +1024,12 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i mntns_root = mntns_get_root_fd(nsid); again: - ret = linkat_hard(lfd, "", mntns_root, link_name, st->st_uid, st->st_gid, AT_EMPTY_PATH); + ret = linkat_hard(lfd, "", mntns_root, link_name, ost->st_uid, ost->st_gid, AT_EMPTY_PATH); if (ret < 0 && errno == ENOENT) { /* Use grand parent, if parent directory does not exist. */ if (trim_last_parent(link_name) < 0) { pr_err("trim failed: @%s@\n", link_name); + check_overlayfs_fallback(path, parms, fallback); return -1; } goto again; @@ -1028,12 +1048,13 @@ static int create_link_remap(char *path, int len, int lfd, u32 *idp, struct ns_i return pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE); } -static int dump_linked_remap(char *path, int len, const struct stat *ost, int lfd, u32 id, struct ns_id *nsid) +static int dump_linked_remap(char *path, int len, const struct fd_parms *parms, int lfd, u32 id, struct ns_id *nsid, + bool *fallback) { u32 lid; RemapFilePathEntry rpe = REMAP_FILE_PATH_ENTRY__INIT; - if (create_link_remap(path, len, lfd, &lid, nsid, ost)) + if (create_link_remap(path, len, lfd, &lid, nsid, parms, fallback)) return -1; rpe.orig_id = id; @@ -1150,6 +1171,7 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, struct stat pst; const struct stat *ost = &parms->stat; int flags = 0; + bool fallback = false; if (parms->fs_type == PROC_SUPER_MAGIC) { /* The file points to /proc/pid/ where pid is a dead @@ -1239,7 +1261,7 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, * links on it) to have some persistent name at hands. */ pr_debug("Dump silly-rename linked remap for %x\n", id); - return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid); + return dump_linked_remap(rpath + 1, plen - 1, parms, lfd, id, nsid, NULL); } mntns_root = mntns_get_root_fd(nsid); @@ -1260,7 +1282,15 @@ static int check_path_remap(struct fd_link *link, const struct fd_parms *parms, if (errno == ENOENT) { link_strip_deleted(link); - return dump_linked_remap(rpath + 1, plen - 1, ost, lfd, id, nsid); + ret = dump_linked_remap(rpath + 1, plen - 1, parms, lfd, id, nsid, &fallback); + if (ret < 0 && fallback) { + /* fallback is true only if following conditions are true: + * 1. linkat() inside dump_linked_remap() failed with ENOENT + * 2. parms->fs_type == overlayFS + */ + return dump_ghost_remap(rpath + 1, ost, lfd, id, nsid); + } + return ret; } pr_perror("Can't stat path"); diff --git a/criu/include/fs-magic.h b/criu/include/fs-magic.h index 46ac8aa27d..ad34f48915 100644 --- a/criu/include/fs-magic.h +++ b/criu/include/fs-magic.h @@ -53,4 +53,8 @@ #define AUTOFS_SUPER_MAGIC 0x0187 #endif +#ifndef OVERLAYFS_SUPER_MAGIC +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 +#endif + #endif /* __CR_FS_MAGIC_H__ */ From b9ae48172fff77d41b5cf19d334ccbe002ac0686 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 7 Dec 2021 09:10:14 +0000 Subject: [PATCH 067/121] util: make page-server IPv6 safe The function run_tcp_server() was the last place CRIU was still using the IPv4 only function inet_ntoa(). It was only used during a print, so that it did not really break anything, but with this commit the output is now no longer: Accepted connection from 0.0.0.0:58396 but correctly displaying the IPv6 address Accepted connection from ::1:58398 if connecting via IPv6. Signed-off-by: Adrian Reber --- criu/util.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/criu/util.c b/criu/util.c index 2917102fd4..8228221861 100644 --- a/criu/util.c +++ b/criu/util.c @@ -1107,7 +1107,7 @@ int setup_tcp_server(char *type, char *addr, unsigned short *port) int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) { int ret; - struct sockaddr_in caddr; + struct sockaddr_storage caddr; socklen_t clen = sizeof(caddr); if (daemon_mode) { @@ -1135,13 +1135,20 @@ int run_tcp_server(bool daemon_mode, int *ask, int cfd, int sk) return -1; if (sk >= 0) { + char port[6]; + char address[INET6_ADDRSTRLEN]; *ask = accept(sk, (struct sockaddr *)&caddr, &clen); if (*ask < 0) { pr_perror("Can't accept connection to server"); goto err; - } else - pr_info("Accepted connection from %s:%u\n", inet_ntoa(caddr.sin_addr), - (int)ntohs(caddr.sin_port)); + } + ret = getnameinfo((struct sockaddr *)&caddr, clen, address, sizeof(address), port, sizeof(port), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret) { + pr_err("Failed converting address: %s\n", gai_strerror(ret)); + goto err; + } + pr_info("Accepted connection from %s:%s\n", address, port); close(sk); } From 4cf1336062e06a4d042771d59eac851ea7273a15 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 2 Dec 2021 23:01:33 +0700 Subject: [PATCH 068/121] sk-unix: Fix TCP_ESTABLISHED checks in unix sockets Since commit 83301b5367a98 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too") in Linux kernel, SOCK_DGRAM unix sockets can have TCP_ESTABLISHED state when connected. So we need to fix checks that assume SOCK_DRAM sockets cannot have TCP_ESTABLISHED state. Signed-off-by: Bui Quang Minh --- criu/sk-unix.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index a819473b40..d3402c3acc 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -460,7 +460,7 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) pr_warn("Shutdown mismatch %u:%d -> %u:%d\n", ue->ino, ue->shutdown, peer->sd.ino, peer->shutdown); } - } else if (ue->state == TCP_ESTABLISHED) { + } else if (ue->state == TCP_ESTABLISHED && ue->type != SOCK_DGRAM) { const struct unix_sk_listen_icon *e; e = lookup_unix_listen_icons(ue->ino); @@ -1851,14 +1851,10 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) close(sks[1]); sk = sks[0]; - } else if (ui->ue->state == TCP_ESTABLISHED && queuer && queuer->ue->ino == FAKE_INO) { + } else if ((ui->ue->state == TCP_ESTABLISHED && ui->ue->type == SOCK_STREAM) && queuer && + queuer->ue->ino == FAKE_INO) { int ret, sks[2]; - if (ui->ue->type != SOCK_STREAM) { - pr_err("Non-stream socket %u in established state\n", ui->ue->ino); - return -1; - } - if (ui->ue->shutdown != SK_SHUTDOWN__BOTH) { pr_err("Wrong shutdown/peer state for %u\n", ui->ue->ino); return -1; From 1cfca8ec896ed84a6ab8878489854fe0529e14a5 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Tue, 7 Dec 2021 23:40:12 +0700 Subject: [PATCH 069/121] ci: Enable disabled unix socket related tests As the unix socket broken tests have been fixed in the pull request https://github.com/checkpoint-restore/criu/pull/1680 We re-enable these tests. Signed-off-by: Bui Quang Minh --- scripts/ci/vagrant.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh index 25343f96c3..40c8416e18 100755 --- a/scripts/ci/vagrant.sh +++ b/scripts/ci/vagrant.sh @@ -50,9 +50,7 @@ fedora-no-vdso() { vagrant reload ssh default cat /proc/cmdline ssh default 'cd /vagrant; tar xf criu.tar; cd criu; make -j 4' - # Disabling tests which are broken on 5.15 - # https://github.com/checkpoint-restore/criu/issues/1669 - ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going -x zdtm/static/socket_close_data -x zdtm/static/socket_close_data01 -x zdtm/static/fifo_upon_unix_socket01 -x zdtm/static/sk-unix-mntns -x zdtm/static/fifo_upon_unix_socket00 -x zdtm/static/socket-ext -x zdtm/static/sk-unix01 -x zdtm/static/socket_dgram_data -x zdtm/static/sockets_dgram -x zdtm/static/sk-unix-dgram-ghost' + ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -a --keep-going' # This test (pidfd_store_sk) requires pidfd_getfd syscall which is guaranteed in Fedora 33. # It is also skipped from -a because it runs in RPC mode only ssh default 'cd /vagrant/criu/test; sudo ./zdtm.py run -t zdtm/transition/pidfd_store_sk --rpc --pre 2' From 010016dc43cd6fec27ba27c636cfd7aa02549433 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 10 Dec 2021 15:35:29 +0000 Subject: [PATCH 070/121] ci: install procps in Alpine The version of ps in Alpine image by default is very limited. It is based on the one from busybox and doesn't support options such as '-p'. Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.alpine | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index a6579c0bbf..cab72e8a18 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -6,6 +6,7 @@ RUN apk update && apk add \ bash \ build-base \ coreutils \ + procps \ git \ gnutls-dev \ libaio-dev \ From 1ddc8a3dca2881bef56855bb7741e8814c3753f6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 3 Dec 2021 16:59:56 +0000 Subject: [PATCH 071/121] test: another try to correctly fix the kernel version We try to disable time namespace based testing for kernels older than 5.11. But we fail to come up with the correct if condition. This changes (major <= 5) to (major < 5). There are no kernels with major > 5 so currently the time namespace based are never run. This should finally change it to run time namespace based tests on kernel versions newer than 5.10. Signed-off-by: Adrian Reber --- test/zdtm_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c index e8d45a9e71..e4b17a5125 100644 --- a/test/zdtm_ct.c +++ b/test/zdtm_ct.c @@ -61,7 +61,7 @@ static int create_timens() if (sscanf(buf.release, "%u.%u", &major, &minor) != 2) return -1; - if ((major <= 5) || (major == 5 && minor < 11)) { + if ((major < 5) || (major == 5 && minor < 11)) { fprintf(stderr, "timens isn't supported on %s\n", buf.release); return 0; } From b270ce214216d6b74d8e3a6e9c6cbfc0c6ffbddd Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 13 Dec 2021 15:19:50 +0300 Subject: [PATCH 072/121] x86/compel/fault-inject: bound xsave features set Since e2e8be37 ("x86/compel/fault-inject: Add a fault-injection for corrupting extended regset") we doing fault-injection test for C/R of threads register set by filling tasks xsave structures with the garbage. But there are some features for which that's not safe. It leads to failures like described in #1635 In this particular case we meet the problem with PKRU feature, the problem that after corrupting pkru registers we may restrict access to some vma areas, so, after that process with the parasite injected get's segfault and crashes. Let's manually specify which features is save to fill with the garbage by keeping proper XFEATURE_MASK_FAULTINJ mask value. Fixes: e2e8be37 ("x86/compel/fault-inject: Add a fault-injection for corrupting extended regset") https://github.com/checkpoint-restore/criu/issues/1635 Signed-off-by: Alexander Mikhalitsyn --- compel/arch/x86/src/lib/include/uapi/asm/fpu.h | 5 +++++ compel/arch/x86/src/lib/infect.c | 1 + 2 files changed, 6 insertions(+) diff --git a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h index a16b658af1..c8ebda0970 100644 --- a/compel/arch/x86/src/lib/include/uapi/asm/fpu.h +++ b/compel/arch/x86/src/lib/include/uapi/asm/fpu.h @@ -80,6 +80,11 @@ enum xfeature { (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | \ XFEATURE_MASK_Hi16_ZMM | XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR) +/* xsave structure features which is safe to fill with garbage (see validate_random_xstate()) */ +#define XFEATURE_MASK_FAULTINJ \ + (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM) + struct fpx_sw_bytes { uint32_t magic1; uint32_t extended_size; diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 2f6c557d0c..37b0ee327c 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -254,6 +254,7 @@ static void validate_random_xstate(struct xsave_struct *xsave) /* No unknown or supervisor features may be set */ hdr->xstate_bv &= XFEATURE_MASK_USER; hdr->xstate_bv &= ~XFEATURE_MASK_SUPERVISOR; + hdr->xstate_bv &= XFEATURE_MASK_FAULTINJ; for (i = 0; i < XFEATURE_MAX; i++) { if (!compel_fpu_has_feature(i)) From ef96f75f2c84575bd5bf702f933807be4275ee40 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 13 Dec 2021 15:32:00 +0300 Subject: [PATCH 073/121] x86/compel/fault-inject: print the initial seed Fixes: e2e8be37 ("x86/compel/fault-inject: Add a fault-injection for corrupting extended regset") Signed-off-by: Alexander Mikhalitsyn --- compel/arch/x86/src/lib/infect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compel/arch/x86/src/lib/infect.c b/compel/arch/x86/src/lib/infect.c index 37b0ee327c..de9013c275 100644 --- a/compel/arch/x86/src/lib/infect.c +++ b/compel/arch/x86/src/lib/infect.c @@ -283,10 +283,10 @@ static int corrupt_extregs(pid_t pid) bool use_xsave = compel_cpu_has_feature(X86_FEATURE_OSXSAVE); user_fpregs_struct_t ext_regs; int *rand_to = (int *)&ext_regs; - unsigned int seed; + unsigned int seed, init_seed; size_t i; - seed = time(NULL); + init_seed = seed = time(NULL); for (i = 0; i < sizeof(ext_regs) / sizeof(int); i++) *rand_to++ = rand_r(&seed); @@ -296,7 +296,7 @@ static int corrupt_extregs(pid_t pid) * - zdtm.py will grep it auto-magically from logs * (and the seed will be known from an automatical testing) */ - pr_err("Corrupting %s for %d, seed %u\n", use_xsave ? "xsave" : "fpuregs", pid, seed); + pr_err("Corrupting %s for %d, seed %u\n", use_xsave ? "xsave" : "fpuregs", pid, init_seed); if (!use_xsave) { if (ptrace(PTRACE_SETFPREGS, pid, NULL, &ext_regs)) { From df36ad5100646b6c3be4798da92b2c5f0b4aa37f Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 13 Dec 2021 15:46:47 +0300 Subject: [PATCH 074/121] ci: enable x86 xsave fault injection tests back Signed-off-by: Alexander Mikhalitsyn --- test/jenkins/criu-fault.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 0c5c180e90..9f20091ccc 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -28,14 +28,6 @@ fi ./test/zdtm.py run -t zdtm/static/maps04 --fault 131 --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/transition/maps008 --fault 131 --report report --pre 2:1 || fail ./test/zdtm.py run -t zdtm/static/maps01 --fault 132 -f h || fail - -# Error injection with --fault 134 fails on newer CPUs used in Circle CI on EC2 -# Skip the --fault 134 tests -# https://github.com/checkpoint-restore/criu/issues/1635 -if [ -n "$CIRCLECI" ]; then - exit 0 -fi - # 134 is corrupting extended registers set, should run in a sub-thread (fpu03) # without restore (that will check if parasite corrupts extended registers) ./test/zdtm.py run -t zdtm/static/fpu03 --fault 134 -f h --norst || fail From 70f20b4460654df0c013f7252365554dc857615b Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 14 Dec 2021 12:54:19 +0000 Subject: [PATCH 075/121] Add documentation for --timeout option The --timeout option was introduced in [1] to prevent criu dump from being able to hang indefinitely and allow users to adjust the time limit in seconds for collecting tasks during the dump operation. [1] https://github.com/checkpoint-restore/criu/commit/d0ff730 Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 4 ++++ criu/crtools.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 4c6885fc9e..f41b1898c2 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -367,6 +367,10 @@ mount -t cgroup -o devices,freezer none devices,freezer Allows to link unlinked files back, if possible (modifies filesystem during *restore*). +*--timeout* 'number':: + Set a time limit in seconds for collecting tasks during the + dump operation. The timeout is 10 seconds by default. + *--ghost-limit* 'size':: Set the maximum size of deleted file to be carried inside image. By default, up to 1M file is allowed. Using this diff --git a/criu/crtools.c b/criu/crtools.c index 81c0aa963c..da47bd684c 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -391,6 +391,8 @@ int main(int argc, char *argv[], char *envp[]) " -j|--" OPT_SHELL_JOB " allow one to dump and restore shell jobs\n" " -l|--" OPT_FILE_LOCKS " handle file locks, for safety, only used for container\n" " -L|--libdir path to a plugin directory (by default " CR_PLUGIN_DEFAULT ")\n" + " --timeout NUM a timeout (in seconds) on collecting tasks during dump\n" + " (default 10 seconds)\n" " --force-irmap force resolving names for inotify/fsnotify watches\n" " --irmap-scan-path FILE\n" " add a path the irmap hints to scan\n" From fc1e7788ce1cba57afc230c6219d570f5d3074ae Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 14 Dec 2021 08:19:16 -0800 Subject: [PATCH 076/121] usernsd: UNS_FDOUT should not require an input descriptor UNS_FDOUT means only that a userns call will return a file descriptor. Signed-off-by: Andrei Vagin --- criu/namespaces.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/criu/namespaces.c b/criu/namespaces.c index 7fa58682b8..c36e631cfd 100644 --- a/criu/namespaces.c +++ b/criu/namespaces.c @@ -1330,11 +1330,6 @@ static int usernsd(int sk) unsc_msg_pid_fd(&um, &pid, &fd); pr_debug("uns: daemon calls %p (%d, %d, %x)\n", call, pid, fd, flags); - if (fd < 0 && flags & UNS_FDOUT) { - pr_err("uns: bad flags/fd %p %d %x\n", call, fd, flags); - BUG(); - } - /* * Caller has sent us bare address of the routine it * wants to call. Since the caller is fork()-ed from the From d67af357dfa24f646f2e8abccede1e25d4376267 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Wed, 1 Dec 2021 11:08:53 +0000 Subject: [PATCH 077/121] libcriu: add setting lsm-mount-context to libcriu Signed-off-by: Adrian Reber --- lib/c/criu.c | 15 +++++++++++++++ lib/c/criu.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index ddc6e0731f..4ee189acac 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -238,6 +238,7 @@ void criu_local_free_opts(criu_opts *opts) free(opts->rpc->freeze_cgroup); free(opts->rpc->log_file); free(opts->rpc->lsm_profile); + free(opts->rpc->lsm_mount_context); free(opts->rpc); criu_free_service(opts); free(opts); @@ -651,6 +652,20 @@ int criu_set_lsm_profile(const char *name) return criu_local_set_lsm_profile(global_opts, name); } +int criu_local_set_lsm_mount_context(criu_opts *opts, const char *name) +{ + opts->rpc->lsm_mount_context = strdup(name); + if (opts->rpc->lsm_mount_context == NULL) { + return -ENOMEM; + } + return 0; +} + +int criu_set_lsm_mount_context(const char *name) +{ + return criu_local_set_lsm_mount_context(global_opts, name); +} + void criu_local_set_timeout(criu_opts *opts, unsigned int timeout) { opts->rpc->timeout = timeout; diff --git a/lib/c/criu.h b/lib/c/criu.h index 949902f559..a374b37f83 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -92,6 +92,7 @@ void criu_set_manage_cgroups(bool manage); void criu_set_manage_cgroups_mode(enum criu_cg_mode mode); int criu_set_freeze_cgroup(const char *name); int criu_set_lsm_profile(const char *name); +int criu_set_lsm_mount_context(const char *name); void criu_set_timeout(unsigned int timeout); void criu_set_auto_ext_mnt(bool val); void criu_set_ext_sharing(bool val); @@ -249,6 +250,7 @@ void criu_local_set_manage_cgroups(criu_opts *opts, bool manage); void criu_local_set_manage_cgroups_mode(criu_opts *opts, enum criu_cg_mode mode); int criu_local_set_freeze_cgroup(criu_opts *opts, const char *name); int criu_local_set_lsm_profile(criu_opts *opts, const char *name); +int criu_local_set_lsm_mount_context(criu_opts *opts, const char *name); void criu_local_set_timeout(criu_opts *opts, unsigned int timeout); void criu_local_set_auto_ext_mnt(criu_opts *opts, bool val); void criu_local_set_ext_sharing(criu_opts *opts, bool val); From 4b75e29f72a84780099798b2e9f5c6651069c6da Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 18 Dec 2021 17:25:17 +0000 Subject: [PATCH 078/121] ci: use unstable release for cross-compile We added cross-compile tests with testing debian release to be able to replicate the error reported in #1653, however, installing build dependencies in this release currently fails with the following error: libc6-dev:armhf : Breaks: libc6-dev-armhf-cross (< 2.33~) but 2.32-1cross4 is to be installed This is not something we can fix, therefore using the debian unstable release (instead of testing) could be more reliable option for our CI. This would still replicate the problem reported in #1653. Signed-off-by: Radostin Stoyanov --- .github/workflows/cross-compile.yml | 8 ++++---- scripts/build/Dockerfile.aarch64-testing-cross.tmpl | 1 - ...ng-cross.hdr => Dockerfile.aarch64-unstable-cross.hdr} | 0 scripts/build/Dockerfile.aarch64-unstable-cross.tmpl | 1 + scripts/build/Dockerfile.armv7-testing-cross.tmpl | 1 - ...ting-cross.hdr => Dockerfile.armv7-unstable-cross.hdr} | 0 scripts/build/Dockerfile.armv7-unstable-cross.tmpl | 1 + scripts/build/Dockerfile.mips64el-testing-cross.tmpl | 1 - ...g-cross.hdr => Dockerfile.mips64el-unstable-cross.hdr} | 0 scripts/build/Dockerfile.mips64el-unstable-cross.tmpl | 1 + scripts/build/Dockerfile.ppc64-testing-cross.tmpl | 1 - ...ting-cross.hdr => Dockerfile.ppc64-unstable-cross.hdr} | 0 scripts/build/Dockerfile.ppc64-unstable-cross.tmpl | 1 + ....testing-cross.tmpl => Dockerfile.unstable-cross.tmpl} | 2 +- scripts/build/Makefile | 4 ++-- 15 files changed, 11 insertions(+), 11 deletions(-) delete mode 120000 scripts/build/Dockerfile.aarch64-testing-cross.tmpl rename scripts/build/{Dockerfile.aarch64-testing-cross.hdr => Dockerfile.aarch64-unstable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.aarch64-unstable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.armv7-testing-cross.tmpl rename scripts/build/{Dockerfile.armv7-testing-cross.hdr => Dockerfile.armv7-unstable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.armv7-unstable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.mips64el-testing-cross.tmpl rename scripts/build/{Dockerfile.mips64el-testing-cross.hdr => Dockerfile.mips64el-unstable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.mips64el-unstable-cross.tmpl delete mode 120000 scripts/build/Dockerfile.ppc64-testing-cross.tmpl rename scripts/build/{Dockerfile.ppc64-testing-cross.hdr => Dockerfile.ppc64-unstable-cross.hdr} (100%) create mode 120000 scripts/build/Dockerfile.ppc64-unstable-cross.tmpl rename scripts/build/{Dockerfile.testing-cross.tmpl => Dockerfile.unstable-cross.tmpl} (93%) diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml index eec6eb5778..461a6e6188 100644 --- a/.github/workflows/cross-compile.yml +++ b/.github/workflows/cross-compile.yml @@ -10,13 +10,13 @@ jobs: matrix: target: [ armv7-stable-cross, - armv7-testing-cross, + armv7-unstable-cross, aarch64-stable-cross, - aarch64-testing-cross, + aarch64-unstable-cross, ppc64-stable-cross, - ppc64-testing-cross, + ppc64-unstable-cross, mips64el-stable-cross, - mips64el-testing-cross + mips64el-unstable-cross ] steps: diff --git a/scripts/build/Dockerfile.aarch64-testing-cross.tmpl b/scripts/build/Dockerfile.aarch64-testing-cross.tmpl deleted file mode 120000 index c1dd38b21f..0000000000 --- a/scripts/build/Dockerfile.aarch64-testing-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.aarch64-testing-cross.hdr b/scripts/build/Dockerfile.aarch64-unstable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.aarch64-testing-cross.hdr rename to scripts/build/Dockerfile.aarch64-unstable-cross.hdr diff --git a/scripts/build/Dockerfile.aarch64-unstable-cross.tmpl b/scripts/build/Dockerfile.aarch64-unstable-cross.tmpl new file mode 120000 index 0000000000..955ae1fd4b --- /dev/null +++ b/scripts/build/Dockerfile.aarch64-unstable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.unstable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.armv7-testing-cross.tmpl b/scripts/build/Dockerfile.armv7-testing-cross.tmpl deleted file mode 120000 index c1dd38b21f..0000000000 --- a/scripts/build/Dockerfile.armv7-testing-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.armv7-testing-cross.hdr b/scripts/build/Dockerfile.armv7-unstable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.armv7-testing-cross.hdr rename to scripts/build/Dockerfile.armv7-unstable-cross.hdr diff --git a/scripts/build/Dockerfile.armv7-unstable-cross.tmpl b/scripts/build/Dockerfile.armv7-unstable-cross.tmpl new file mode 120000 index 0000000000..955ae1fd4b --- /dev/null +++ b/scripts/build/Dockerfile.armv7-unstable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.unstable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.mips64el-testing-cross.tmpl b/scripts/build/Dockerfile.mips64el-testing-cross.tmpl deleted file mode 120000 index c1dd38b21f..0000000000 --- a/scripts/build/Dockerfile.mips64el-testing-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.mips64el-testing-cross.hdr b/scripts/build/Dockerfile.mips64el-unstable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.mips64el-testing-cross.hdr rename to scripts/build/Dockerfile.mips64el-unstable-cross.hdr diff --git a/scripts/build/Dockerfile.mips64el-unstable-cross.tmpl b/scripts/build/Dockerfile.mips64el-unstable-cross.tmpl new file mode 120000 index 0000000000..955ae1fd4b --- /dev/null +++ b/scripts/build/Dockerfile.mips64el-unstable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.unstable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64-testing-cross.tmpl b/scripts/build/Dockerfile.ppc64-testing-cross.tmpl deleted file mode 120000 index c1dd38b21f..0000000000 --- a/scripts/build/Dockerfile.ppc64-testing-cross.tmpl +++ /dev/null @@ -1 +0,0 @@ -Dockerfile.testing-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.ppc64-testing-cross.hdr b/scripts/build/Dockerfile.ppc64-unstable-cross.hdr similarity index 100% rename from scripts/build/Dockerfile.ppc64-testing-cross.hdr rename to scripts/build/Dockerfile.ppc64-unstable-cross.hdr diff --git a/scripts/build/Dockerfile.ppc64-unstable-cross.tmpl b/scripts/build/Dockerfile.ppc64-unstable-cross.tmpl new file mode 120000 index 0000000000..955ae1fd4b --- /dev/null +++ b/scripts/build/Dockerfile.ppc64-unstable-cross.tmpl @@ -0,0 +1 @@ +Dockerfile.unstable-cross.tmpl \ No newline at end of file diff --git a/scripts/build/Dockerfile.testing-cross.tmpl b/scripts/build/Dockerfile.unstable-cross.tmpl similarity index 93% rename from scripts/build/Dockerfile.testing-cross.tmpl rename to scripts/build/Dockerfile.unstable-cross.tmpl index 1d5565310b..dacfd96ef0 100644 --- a/scripts/build/Dockerfile.testing-cross.tmpl +++ b/scripts/build/Dockerfile.unstable-cross.tmpl @@ -1,7 +1,7 @@ COPY scripts/ci/apt-install /bin/apt-install # Add the cross compiler sources -RUN echo "deb http://deb.debian.org/debian/ testing main" >> /etc/apt/sources.list && \ +RUN echo "deb http://deb.debian.org/debian/ unstable main" >> /etc/apt/sources.list && \ dpkg --add-architecture ${DEBIAN_ARCH} RUN apt-install \ diff --git a/scripts/build/Makefile b/scripts/build/Makefile index b24fc80ea2..2c006ad873 100644 --- a/scripts/build/Makefile +++ b/scripts/build/Makefile @@ -1,7 +1,7 @@ ARCHES := x86_64 fedora-asan fedora-rawhide centos7 armv7hf centos8 STABLE_CROSS_ARCHES := armv7-stable-cross aarch64-stable-cross ppc64-stable-cross mips64el-stable-cross -TESTING_CROSS_ARCHES := armv7-testing-cross aarch64-testing-cross ppc64-testing-cross mips64el-testing-cross -NON_CLANG := $(TESTING_CROSS_ARCHES) $(STABLE_CROSS_ARCHES) +UNSTABLE_CROSS_ARCHES := armv7-unstable-cross aarch64-unstable-cross ppc64-unstable-cross mips64el-unstable-cross +NON_CLANG := $(UNSTABLE_CROSS_ARCHES) $(STABLE_CROSS_ARCHES) CREATE_DOCKERFILES := $(ARCHES) $(NON_CLANG) TARGETS := $(ARCHES) alpine archlinux TARGETS_CLANG := $(addsuffix $(TARGETS),-clang) From d99def7dcfa938918368c91021f72a77f738bc61 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 17 Dec 2021 03:01:49 +0000 Subject: [PATCH 079/121] ci: disable glibc rseq support This patch sets the glibc.pthread.rseq tunable [1] to disable rseq support in glibc as a temporary solution for the problem described in [2]. This would allow us to run CI tests until CRIU has rseq support. This commit also disables the rpc tests as they fail even when GLIBC_TUNABLES is set. [1] https://sourceware.org/git/?p=glibc.git;a=commit;h=e3e589829d16af9f7e73c7b70f74f3c5d5003e45 [2] https://github.com/checkpoint-restore/criu/issues/1696 Signed-off-by: Radostin Stoyanov --- scripts/build/Dockerfile.fedora.tmpl | 3 +++ scripts/ci/run-ci-tests.sh | 7 ++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/build/Dockerfile.fedora.tmpl b/scripts/build/Dockerfile.fedora.tmpl index 9d3bb0f879..fd4ba4aefe 100644 --- a/scripts/build/Dockerfile.fedora.tmpl +++ b/scripts/build/Dockerfile.fedora.tmpl @@ -1,5 +1,8 @@ ARG CC=gcc +# FIXME: Temporary solution for https://github.com/checkpoint-restore/criu/issues/1696 +ENV GLIBC_TUNABLES=glibc.pthread.rseq=0 + COPY scripts/ci/prepare-for-fedora-rawhide.sh /bin/prepare-for-fedora-rawhide.sh RUN /bin/prepare-for-fedora-rawhide.sh diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 096f907fb0..bf7331142b 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -235,7 +235,12 @@ if [ -n "$TRAVIS" ] || [ -n "$CIRCLECI" ]; then # Error (criu/tty.c:1014): tty: Don't have tty to inherit session from, aborting make -C test/others/shell-job/ run fi -make -C test/others/rpc/ run + +# FIXME: rpc tests fail even with set glibc tunable +# https://github.com/checkpoint-restore/criu/issues/1696 +if [ "$GLIBC_TUNABLES" != "glibc.pthread.rseq=0" ]; then + make -C test/others/rpc/ run +fi ./test/zdtm.py run -t zdtm/static/env00 --sibling From 7b5909592881607f5d5ecc8de144e5bb83f02438 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 3 Dec 2021 14:32:33 +0000 Subject: [PATCH 080/121] libcriu: add single pre-dump support In contrast to the CLI it is not possible to do a single pre-dump via RPC and thus libcriu. In cr-service.c pre-dump always goes into a pre-dump loop followed by a final dump. runc already works around this to only do a single pre-dump by killing the CRIU process waiting for the message for the final dump. Trying to implement pre-dump in crun via libcriu it is not as easy to work around CRIU's pre-dump loop expectations as with runc that directly talks to CRIU via RPC. We know that LXC/LXD also does single pre-dumps using the CLI and runc also only does single pre-dumps by misusing the pre-dump loop interface. With this commit it is possible to trigger a single pre-dump via RPC and libcriu without misusing the interface provided via cr-service.c. So this commit basically updates CRIU to the existing use cases. The existing pre-dump loop still sounds like a very good idea, but so far most tools have decided to implement the pre-dump loop themselves. With this change we can implement pre-dump in crun to match what is currently implemented in runc. Signed-off-by: Adrian Reber --- criu/cr-service.c | 13 ++++++++----- images/rpc.proto | 2 ++ lib/c/criu.c | 21 ++++++++++++++++++--- lib/c/criu.h | 2 ++ 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 0f8bc4cc10..80d12c7b07 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -169,11 +169,11 @@ int send_criu_dump_resp(int socket_fd, bool success, bool restored) return send_criu_msg(socket_fd, &msg); } -static int send_criu_pre_dump_resp(int socket_fd, bool success) +static int send_criu_pre_dump_resp(int socket_fd, bool success, bool single) { CriuResp msg = CRIU_RESP__INIT; - msg.type = CRIU_REQ_TYPE__PRE_DUMP; + msg.type = single ? CRIU_REQ_TYPE__SINGLE_PRE_DUMP : CRIU_REQ_TYPE__PRE_DUMP; msg.success = success; set_resp_err(&msg); @@ -845,7 +845,7 @@ static int check(int sk, CriuOpts *req) return send_criu_msg(sk, &resp); } -static int pre_dump_using_req(int sk, CriuOpts *req) +static int pre_dump_using_req(int sk, CriuOpts *req, bool single) { int pid, status; bool success = false; @@ -886,7 +886,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req) success = true; out: - if (send_criu_pre_dump_resp(sk, success) == -1) { + if (send_criu_pre_dump_resp(sk, success, single) == -1) { pr_perror("Can't send pre-dump resp"); success = false; } @@ -899,7 +899,7 @@ static int pre_dump_loop(int sk, CriuReq *msg) int ret; do { - ret = pre_dump_using_req(sk, msg->opts); + ret = pre_dump_using_req(sk, msg->opts, false); if (ret < 0) return ret; @@ -1271,6 +1271,9 @@ int cr_service_work(int sk) case CRIU_REQ_TYPE__VERSION: ret = handle_version(sk, msg); break; + case CRIU_REQ_TYPE__SINGLE_PRE_DUMP: + ret = pre_dump_using_req(sk, msg->opts, true); + break; default: send_criu_err(sk, "Invalid req"); diff --git a/images/rpc.proto b/images/rpc.proto index a9f51ac4bf..1d3befd23c 100644 --- a/images/rpc.proto +++ b/images/rpc.proto @@ -172,6 +172,8 @@ enum criu_req_type { WAIT_PID = 11; PAGE_SERVER_CHLD = 12; + + SINGLE_PRE_DUMP = 13; } /* diff --git a/lib/c/criu.c b/lib/c/criu.c index 4ee189acac..500574e33c 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -1527,7 +1527,7 @@ int criu_check(void) return criu_local_check(global_opts); } -int criu_local_dump(criu_opts *opts) +static int dump(bool pre_dump, criu_opts *opts) { int ret = -1; CriuReq req = CRIU_REQ__INIT; @@ -1535,7 +1535,7 @@ int criu_local_dump(criu_opts *opts) saved_errno = 0; - req.type = CRIU_REQ_TYPE__DUMP; + req.type = pre_dump ? CRIU_REQ_TYPE__SINGLE_PRE_DUMP : CRIU_REQ_TYPE__DUMP; req.opts = opts->rpc; ret = send_req_and_recv_resp(opts, &req, &resp); @@ -1543,7 +1543,7 @@ int criu_local_dump(criu_opts *opts) goto exit; if (resp->success) { - if (resp->dump->has_restored && resp->dump->restored) + if (!pre_dump && resp->dump->has_restored && resp->dump->restored) ret = 1; else ret = 0; @@ -1561,11 +1561,26 @@ int criu_local_dump(criu_opts *opts) return ret; } +int criu_local_dump(criu_opts *opts) +{ + return dump(false, opts); +} + int criu_dump(void) { return criu_local_dump(global_opts); } +int criu_local_pre_dump(criu_opts *opts) +{ + return dump(true, opts); +} + +int criu_pre_dump(void) +{ + return criu_local_pre_dump(global_opts); +} + int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)) { int ret = -1, fd = -1, uret; diff --git a/lib/c/criu.h b/lib/c/criu.h index a374b37f83..c6d4f50a8a 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -161,6 +161,7 @@ int criu_get_orphan_pts_master_fd(void); */ int criu_check(void); int criu_dump(void); +int criu_pre_dump(void); int criu_restore(void); int criu_restore_child(void); @@ -279,6 +280,7 @@ void criu_local_set_notify_cb(criu_opts *opts, int (*cb)(char *action, criu_noti int criu_local_check(criu_opts *opts); int criu_local_dump(criu_opts *opts); +int criu_local_pre_dump(criu_opts *opts); int criu_local_restore(criu_opts *opts); int criu_local_restore_child(criu_opts *opts); int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)); From e4ccdc694a8739125da2c7e8e57f8a30c7451eaf Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 14 Dec 2021 13:09:19 +0000 Subject: [PATCH 081/121] tests: added test for single pre-dump support Signed-off-by: Adrian Reber --- test/others/libcriu/.gitignore | 1 + test/others/libcriu/Makefile | 1 + test/others/libcriu/lib.h | 2 + test/others/libcriu/run.sh | 1 + test/others/libcriu/test_iters.c | 2 - test/others/libcriu/test_notify.c | 2 - test/others/libcriu/test_pre_dump.c | 151 ++++++++++++++++++++++++++++ test/others/libcriu/test_sub.c | 2 - 8 files changed, 156 insertions(+), 6 deletions(-) create mode 100644 test/others/libcriu/test_pre_dump.c diff --git a/test/others/libcriu/.gitignore b/test/others/libcriu/.gitignore index cf1342de2a..15abf07acc 100644 --- a/test/others/libcriu/.gitignore +++ b/test/others/libcriu/.gitignore @@ -4,5 +4,6 @@ test_notify test_self test_sub test_join_ns +test_pre_dump output/ libcriu.so.* diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 734e66c1a9..581574da00 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -6,6 +6,7 @@ TESTS += test_notify TESTS += test_iters TESTS += test_errno TESTS += test_join_ns +TESTS += test_pre_dump all: $(TESTS) .PHONY: all diff --git a/test/others/libcriu/lib.h b/test/others/libcriu/lib.h index 6fdf8aef22..59372fca5f 100644 --- a/test/others/libcriu/lib.h +++ b/test/others/libcriu/lib.h @@ -1,3 +1,5 @@ void what_err_ret_mean(int ret); int chk_exit(int status, int want); int get_version(void); + +#define SUCC_ECODE 42 diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 48f25a5f6d..1b6c73448f 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -58,6 +58,7 @@ run_test test_notify if [ "$(uname -m)" = "x86_64" ]; then # Skip this on aarch64 as aarch64 has no dirty page tracking run_test test_iters + run_test test_pre_dump fi run_test test_errno run_test test_join_ns diff --git a/test/others/libcriu/test_iters.c b/test/others/libcriu/test_iters.c index b7e325abb4..edbaf87f6f 100644 --- a/test/others/libcriu/test_iters.c +++ b/test/others/libcriu/test_iters.c @@ -46,8 +46,6 @@ static int next_iter(criu_predump_info pi) return cur_iter < MAX_ITERS; } -#define SUCC_ECODE 42 - int main(int argc, char **argv) { int pid, ret, p[2]; diff --git a/test/others/libcriu/test_notify.c b/test/others/libcriu/test_notify.c index 9a54b812a0..80ad3ffdcb 100644 --- a/test/others/libcriu/test_notify.c +++ b/test/others/libcriu/test_notify.c @@ -10,8 +10,6 @@ #include "lib.h" -#define SUCC_ECODE 42 - static int actions_called = 0; static int notify(char *action, criu_notify_arg_t na) { diff --git a/test/others/libcriu/test_pre_dump.c b/test/others/libcriu/test_pre_dump.c new file mode 100644 index 0000000000..ed9cd2125b --- /dev/null +++ b/test/others/libcriu/test_pre_dump.c @@ -0,0 +1,151 @@ +#include "criu.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lib.h" + +static int wdir_fd, cur_imgdir = -1; + +static int stop = 0; +static void sh(int sig) +{ + stop = 1; +} + +static void open_imgdir(void) +{ + char p[10]; + static int id = 0; + + if (id > 0) { + sprintf(p, "../dir-%d", id); + criu_set_parent_images(p); + } + if (cur_imgdir != -1) + close(cur_imgdir); + sprintf(p, "dir-%d", ++id); + mkdirat(wdir_fd, p, 0700); + cur_imgdir = openat(wdir_fd, p, O_DIRECTORY); + criu_set_images_dir_fd(cur_imgdir); +} + +int main(int argc, char **argv) +{ + int pid, ret, p[2]; + + wdir_fd = open(argv[2], O_DIRECTORY); + if (wdir_fd < 0) { + perror("Can't open wdir"); + return 1; + } + + printf("--- Start loop ---\n"); + pipe(p); + pid = fork(); + if (pid < 0) { + perror("Can't"); + return -1; + } + + if (!pid) { + printf(" `- loop: initializing\n"); + if (setsid() < 0) + exit(1); + if (signal(SIGUSR1, sh) == SIG_ERR) + exit(1); + + close(0); + close(1); + close(2); + close(p[0]); + + ret = SUCC_ECODE; + write(p[1], &ret, sizeof(ret)); + close(p[1]); + + while (!stop) + sleep(1); + exit(SUCC_ECODE); + } + + close(p[1]); + + /* Wait for kid to start */ + ret = -1; + read(p[0], &ret, sizeof(ret)); + if (ret != SUCC_ECODE) { + printf("Error starting loop\n"); + goto err; + } + + /* Wait for pipe to get closed, then dump */ + read(p[0], &ret, 1); + close(p[0]); + + printf("--- Dump loop ---\n"); + criu_init_opts(); + criu_set_service_binary(argv[1]); + criu_set_pid(pid); + criu_set_log_file("dump.log"); + criu_set_log_level(CRIU_LOG_DEBUG); + criu_set_track_mem(true); + + open_imgdir(); + ret = criu_pre_dump(); + if (ret < 0) { + what_err_ret_mean(ret); + kill(pid, SIGKILL); + goto err; + } + + printf(" `- Pre Dump 1 succeeded\n"); + + open_imgdir(); + ret = criu_pre_dump(); + if (ret < 0) { + what_err_ret_mean(ret); + kill(pid, SIGKILL); + goto err; + } + + printf(" `- Pre Dump 2 succeeded\n"); + + open_imgdir(); + ret = criu_dump(); + if (ret < 0) { + what_err_ret_mean(ret); + kill(pid, SIGKILL); + goto err; + } + + printf(" `- Final Dump succeeded\n"); + waitpid(pid, NULL, 0); + + printf("--- Restore ---\n"); + criu_init_opts(); + criu_set_log_level(CRIU_LOG_DEBUG); + criu_set_log_file("restore.log"); + criu_set_images_dir_fd(cur_imgdir); + + pid = criu_restore_child(); + if (pid <= 0) { + what_err_ret_mean(pid); + return -1; + } + + printf(" `- Restore returned pid %d\n", pid); + kill(pid, SIGUSR1); +err: + if (waitpid(pid, &ret, 0) < 0) { + perror(" Can't wait kid"); + return -1; + } + + return chk_exit(ret, SUCC_ECODE); +} diff --git a/test/others/libcriu/test_sub.c b/test/others/libcriu/test_sub.c index 697abf5d55..af1e09408c 100644 --- a/test/others/libcriu/test_sub.c +++ b/test/others/libcriu/test_sub.c @@ -15,8 +15,6 @@ static void sh(int sig) stop = 1; } -#define SUCC_ECODE 42 - int main(int argc, char **argv) { int pid, ret, fd, p[2]; From 98675175666d97a9eeadfcc396df8b894e318e78 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sat, 18 Dec 2021 10:40:11 +0300 Subject: [PATCH 082/121] zdtm.py: clean up MAKEFLAGS env variable before running make instance In most cases we run tests as: ./test/zdtm.py run -a But it's also possible to run tests from root makefile: make test In this case, if criu tree have no ./test/umount2 binary built we get the error like: make[3]: *** No rule to make target 'umount2'. Stop. It's worth to mention this "3". That's because we have build process tree like this: make -> make -> make -> zdtm.py -> make umount2 and also we have MAKEFLAGS variable set to: build=-r -R -f ... And that's bad because "-r" option means no builtin rules and -R means no builtin variables. That makes `make umount2` not working. Let's just cleanup this variable to make things work properly. Fixes: #1699 https://github.com/checkpoint-restore/criu/issues/1699 Signed-off-by: Alexander Mikhalitsyn --- test/zdtm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index b987700794..086ae02853 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -607,9 +607,11 @@ def blocking(self): @staticmethod def available(): if not os.access("umount2", os.X_OK): - subprocess.check_call(["make", "umount2"]) + subprocess.check_call( + ["make", "umount2"], env=dict(os.environ, MAKEFLAGS="")) if not os.access("zdtm_ct", os.X_OK): - subprocess.check_call(["make", "zdtm_ct"]) + subprocess.check_call( + ["make", "zdtm_ct"], env=dict(os.environ, MAKEFLAGS="")) if not os.access("zdtm/lib/libzdtmtst.a", os.F_OK): subprocess.check_call(["make", "-C", "zdtm/"]) subprocess.check_call( From e0bf87fecbcc353d697dccd3fe0801347caf1423 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sat, 18 Dec 2021 11:03:53 +0300 Subject: [PATCH 083/121] zdtm: zdtm_ct fix compilation error with strict-prototypes on MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit zdtm_ct.c:44:12: error: function declaration isn’t a prototype [-Werror=strict-prototypes] 44 | static int create_timens() Signed-off-by: Alexander Mikhalitsyn --- test/zdtm_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/zdtm_ct.c b/test/zdtm_ct.c index e4b17a5125..0e8eeff8a3 100644 --- a/test/zdtm_ct.c +++ b/test/zdtm_ct.c @@ -41,7 +41,7 @@ static inline int _settime(clockid_t clk_id, time_t offset) return 0; } -static int create_timens() +static int create_timens(void) { struct utsname buf; unsigned major, minor; From 5083232273c93f9115bf91bed373d8ab749106d9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 17 Dec 2021 15:33:55 +0300 Subject: [PATCH 084/121] zdtm: remove mntns-deleted-dst test leftover from git Looks like in commit [1] we've non-intentionally added this tmp file to git, let's remove it. Fixes: 01ee29702 ("s390:zdtm: Enable zdtm for s390") [1] Signed-off-by: Pavel Tikhomirov --- test/zdtm/static/mntns-deleted-dst | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test/zdtm/static/mntns-deleted-dst diff --git a/test/zdtm/static/mntns-deleted-dst b/test/zdtm/static/mntns-deleted-dst deleted file mode 100644 index e69de29bb2..0000000000 From b8825cd2d4242b13d7ca598211c513c7b1308d84 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 10 Dec 2021 15:34:19 +0300 Subject: [PATCH 085/121] crtools: remove excess always true condition Several lines above if (optind >= argc) we go to usage label and fail, thus we don't need to check (optind < argc) here as it is always true. Signed-off-by: Pavel Tikhomirov --- criu/crtools.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/crtools.c b/criu/crtools.c index da47bd684c..3adfb190f7 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -124,7 +124,7 @@ int main(int argc, char *argv[], char *envp[]) return 1; } - if (optind < argc && !strcmp(argv[optind], "swrk")) { + if (!strcmp(argv[optind], "swrk")) { if (argc != optind + 2) { fprintf(stderr, "Usage: criu swrk \n"); return 1; From 6006fc1de4d0ee8181682df7fd7a143f4bd89273 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 10 Dec 2021 12:13:06 +0300 Subject: [PATCH 086/121] crtools/rpc: export current criu mode to opts.mode We have multiple options which are valid only on restore or only on dump or in any other specific criu mode, so it would be useful to have info about current mode in opts so that we can validate other options against current mode. Plan is to use it for mount-v2 option as it is only valid on restore, and this would make handling of different types mountpoints much easier. Realization is a bit different for general code and rpc: - When criu mode is set from main() we just parse mode from argv[optind] just after parse_options() found optind of the command. Note that opts.mode is available before check_options(). - For rpc service we reset opts.mode to CR_SWRK each time we restart cr_service_work(), in the original service process we still have CR_SERVICE to differentiate between them, and each request handling function which does setup_opts_from_req sets opts.mode in accordance with the processed request type. And it is also available before check_options(). Now in check_options we can add filters on one mode only options. Signed-off-by: Pavel Tikhomirov --- criu/cr-service.c | 8 +++++ criu/crtools.c | 73 +++++++++++++++++++++++++++++---------- criu/include/cr_options.h | 19 ++++++++++ 3 files changed, 82 insertions(+), 18 deletions(-) diff --git a/criu/cr-service.c b/criu/cr-service.c index 80d12c7b07..59f46b3201 100644 --- a/criu/cr-service.c +++ b/criu/cr-service.c @@ -735,6 +735,7 @@ static int dump_using_req(int sk, CriuOpts *req) bool success = false; bool self_dump = !req->pid; + opts.mode = CR_DUMP; if (setup_opts_from_req(sk, req)) goto exit; @@ -777,6 +778,7 @@ static int restore_using_req(int sk, CriuOpts *req) opts.restore_detach = true; + opts.mode = CR_RESTORE; if (setup_opts_from_req(sk, req)) goto exit; @@ -828,6 +830,7 @@ static int check(int sk, CriuOpts *req) if (pid == 0) { setproctitle("check --rpc"); + opts.mode = CR_CHECK; if (setup_opts_from_req(sk, req)) exit(1); @@ -859,6 +862,7 @@ static int pre_dump_using_req(int sk, CriuOpts *req, bool single) if (pid == 0) { int ret = 1; + opts.mode = CR_PRE_DUMP; if (setup_opts_from_req(sk, req)) goto cout; @@ -936,6 +940,7 @@ static int start_page_server_req(int sk, CriuOpts *req, bool daemon_mode) if (pid == 0) { close(start_pipe[0]); + opts.mode = CR_PAGE_SERVER; if (setup_opts_from_req(sk, req)) goto out_ch; @@ -1182,6 +1187,7 @@ static int handle_cpuinfo(int sk, CriuReq *msg) if (pid == 0) { int ret = 1; + opts.mode = CR_CPUINFO; if (setup_opts_from_req(sk, msg->opts)) goto cout; @@ -1231,6 +1237,8 @@ int cr_service_work(int sk) CriuReq *msg = 0; more: + opts.mode = CR_SWRK; + if (recv_criu_msg(sk, &msg) != 0) { pr_perror("Can't recv request"); goto err; diff --git a/criu/crtools.c b/criu/crtools.c index 3adfb190f7..d10d7f7d0d 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -67,6 +67,38 @@ static int image_dir_mode(char *argv[], int optind) return -1; } +static int parse_criu_mode(char *mode) +{ + if (!strcmp(mode, "dump")) + opts.mode = CR_DUMP; + else if (!strcmp(mode, "pre-dump")) + opts.mode = CR_PRE_DUMP; + else if (!strcmp(mode, "restore")) + opts.mode = CR_RESTORE; + else if (!strcmp(mode, "lazy-pages")) + opts.mode = CR_LAZY_PAGES; + else if (!strcmp(mode, "check")) + opts.mode = CR_CHECK; + else if (!strcmp(mode, "page-server")) + opts.mode = CR_PAGE_SERVER; + else if (!strcmp(mode, "service")) + opts.mode = CR_SERVICE; + else if (!strcmp(mode, "swrk")) + opts.mode = CR_SWRK; + else if (!strcmp(mode, "dedup")) + opts.mode = CR_DEDUP; + else if (!strcmp(mode, "cpuinfo")) + opts.mode = CR_CPUINFO; + else if (!strcmp(mode, "exec")) + opts.mode = CR_EXEC_DEPRECATED; + else if (!strcmp(mode, "show")) + opts.mode = CR_SHOW_DEPRECATED; + else + return -1; + + return 0; +} + int main(int argc, char *argv[], char *envp[]) { int ret = -1; @@ -124,7 +156,12 @@ int main(int argc, char *argv[], char *envp[]) return 1; } - if (!strcmp(argv[optind], "swrk")) { + if (parse_criu_mode(argv[optind])) { + pr_err("unknown command: %s\n", argv[optind]); + goto usage; + } + + if (opts.mode == CR_SWRK) { if (argc != optind + 2) { fprintf(stderr, "Usage: criu swrk \n"); return 1; @@ -156,7 +193,7 @@ int main(int argc, char *argv[], char *envp[]) goto usage; } - if (strcmp(argv[optind], "restore")) { + if (opts.mode != CR_RESTORE) { pr_err("--exec-cmd is available for the restore command only\n"); goto usage; } @@ -173,7 +210,7 @@ int main(int argc, char *argv[], char *envp[]) opts.exec_cmd[argc - optind - 1] = NULL; } else { /* No subcommands except for cpuinfo and restore --exec-cmd */ - if (strcmp(argv[optind], "cpuinfo") && has_sub_command) { + if (opts.mode != CR_CPUINFO && has_sub_command) { pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; } @@ -185,7 +222,7 @@ int main(int argc, char *argv[], char *envp[]) } /* We must not open imgs dir, if service is called */ - if (strcmp(argv[optind], "service")) { + if (opts.mode != CR_SERVICE) { ret = open_image_dir(opts.imgs_dir, image_dir_mode(argv, optind)); if (ret < 0) { pr_err("Couldn't open image dir %s\n", opts.imgs_dir); @@ -197,8 +234,7 @@ int main(int argc, char *argv[], char *envp[]) * When a process group becomes an orphan, * its processes are sent a SIGHUP signal */ - if (!strcmp(argv[optind], "restore") && opts.restore_detach && opts.final_state == TASK_STOPPED && - opts.shell_job) + if (opts.mode == CR_RESTORE && opts.restore_detach && opts.final_state == TASK_STOPPED && opts.shell_job) pr_warn("Stopped and detached shell job will get SIGHUP from OS.\n"); if (chdir(opts.work_dir)) { @@ -218,7 +254,7 @@ int main(int argc, char *argv[], char *envp[]) kdat.can_map_vdso = 0; if (!list_empty(&opts.inherit_fds)) { - if (strcmp(argv[optind], "restore")) { + if (opts.mode != CR_RESTORE) { pr_err("--inherit-fd is restore-only option\n"); return 1; } @@ -229,13 +265,14 @@ int main(int argc, char *argv[], char *envp[]) if (opts.img_parent) pr_info("Will do snapshot from %s\n", opts.img_parent); - if (!strcmp(argv[optind], "dump")) { + if (opts.mode == CR_DUMP) { if (!opts.tree_id) goto opt_pid_missing; + return cr_dump_tasks(opts.tree_id); } - if (!strcmp(argv[optind], "pre-dump")) { + if (opts.mode == CR_PRE_DUMP) { if (!opts.tree_id) goto opt_pid_missing; @@ -247,7 +284,7 @@ int main(int argc, char *argv[], char *envp[]) return cr_pre_dump_tasks(opts.tree_id) != 0; } - if (!strcmp(argv[optind], "restore")) { + if (opts.mode == CR_RESTORE) { if (opts.tree_id) pr_warn("Using -t with criu restore is obsoleted\n"); @@ -262,22 +299,22 @@ int main(int argc, char *argv[], char *envp[]) return ret != 0; } - if (!strcmp(argv[optind], "lazy-pages")) + if (opts.mode == CR_LAZY_PAGES) return cr_lazy_pages(opts.daemon_mode) != 0; - if (!strcmp(argv[optind], "check")) + if (opts.mode == CR_CHECK) return cr_check() != 0; - if (!strcmp(argv[optind], "page-server")) + if (opts.mode == CR_PAGE_SERVER) return cr_page_server(opts.daemon_mode, false, -1) != 0; - if (!strcmp(argv[optind], "service")) + if (opts.mode == CR_SERVICE) return cr_service(opts.daemon_mode); - if (!strcmp(argv[optind], "dedup")) + if (opts.mode == CR_DEDUP) return cr_dedup() != 0; - if (!strcmp(argv[optind], "cpuinfo")) { + if (opts.mode == CR_CPUINFO) { if (!argv[optind + 1]) { pr_err("cpuinfo requires an action: dump or check\n"); goto usage; @@ -288,12 +325,12 @@ int main(int argc, char *argv[], char *envp[]) return cpuinfo_check(); } - if (!strcmp(argv[optind], "exec")) { + if (opts.mode == CR_EXEC_DEPRECATED) { pr_err("The \"exec\" action is deprecated by the Compel library.\n"); return -1; } - if (!strcmp(argv[optind], "show")) { + if (opts.mode == CR_SHOW_DEPRECATED) { pr_err("The \"show\" action is deprecated by the CRIT utility.\n"); pr_err("To view an image use the \"crit decode -i $name --pretty\" command.\n"); return -1; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index a34f8dbbf4..85648bf1c8 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -100,6 +100,22 @@ struct irmap_path_opt { struct irmap *ir; }; +enum criu_mode { + CR_UNSET = 0, + CR_DUMP, + CR_PRE_DUMP, + CR_RESTORE, + CR_LAZY_PAGES, + CR_CHECK, + CR_PAGE_SERVER, + CR_SERVICE, + CR_SWRK, + CR_DEDUP, + CR_CPUINFO, + CR_EXEC_DEPRECATED, + CR_SHOW_DEPRECATED, +}; + struct cr_options { int final_state; int check_extra_features; @@ -188,6 +204,9 @@ struct cr_options { /* This stores which method to use for file validation. */ int file_validation_method; + + /* Shows the mode criu is running at the moment: dump/pre-dump/restore/... */ + enum criu_mode mode; }; extern struct cr_options opts; From 95c3d5131afd2b40271233ea7a024dc19260efd1 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Wed, 15 Dec 2021 10:49:42 +0300 Subject: [PATCH 087/121] crtools: use new opts.mode in image_dir_mode Also while on it there is no "cpuinfo restore", let's remove it. Signed-off-by: Pavel Tikhomirov --- criu/crtools.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/criu/crtools.c b/criu/crtools.c index d10d7f7d0d..7bf92ffd08 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -56,14 +56,23 @@ void flush_early_log_to_stderr(void) static int image_dir_mode(char *argv[], int optind) { - if (!strcmp(argv[optind], "dump") || !strcmp(argv[optind], "pre-dump") || - (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "dump"))) + switch (opts.mode) { + case CR_DUMP: + /* fallthrough */ + case CR_PRE_DUMP: return O_DUMP; - - if (!strcmp(argv[optind], "restore") || - (!strcmp(argv[optind], "cpuinfo") && !strcmp(argv[optind + 1], "restore"))) + case CR_RESTORE: return O_RSTR; + case CR_CPUINFO: + if (!strcmp(argv[optind + 1], "dump")) + return O_DUMP; + /* fallthrough */ + default: + return -1; + } + /* never reached */ + BUG(); return -1; } From d2632238947d8aa9096deb27ae8ba05971f43de9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 16 Dec 2021 18:05:01 +0300 Subject: [PATCH 088/121] crtools: check that cpuinfo command has sub-command This fixes segfault on empty sub-command for cpuinfo. Signed-off-by: Pavel Tikhomirov --- criu/crtools.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/criu/crtools.c b/criu/crtools.c index 7bf92ffd08..0752800f6f 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -222,6 +222,9 @@ int main(int argc, char *argv[], char *envp[]) if (opts.mode != CR_CPUINFO && has_sub_command) { pr_err("excessive parameter%s for command %s\n", (argc - optind) > 2 ? "s" : "", argv[optind]); goto usage; + } else if (opts.mode == CR_CPUINFO && !has_sub_command) { + pr_err("cpuinfo requires an action: dump or check\n"); + goto usage; } } From 2407ff4a1f96fe206bcc246d58f90d24aa8f4e1e Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Tue, 14 Dec 2021 22:28:27 +0700 Subject: [PATCH 089/121] sk-unix: Add support for SOCK_SEQPACKET unix sockets Adjust some SOCK_STREAM cases to handle SOCK_SEQPACKET too. Signed-off-by: Bui Quang Minh --- criu/sk-unix.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/criu/sk-unix.c b/criu/sk-unix.c index d3402c3acc..194193dff1 100644 --- a/criu/sk-unix.c +++ b/criu/sk-unix.c @@ -402,12 +402,12 @@ static int dump_one_unix_fd(int lfd, uint32_t id, const struct fd_parms *p) sk_encode_shutdown(ue, sk->shutdown); /* - * If a stream listening socket has non-zero rqueue, this - * means there are in-flight connections waiting to get + * If a stream/seqpacket listening socket has non-zero rqueue, + * this means there are in-flight connections waiting to get * accept()-ed. We handle them separately with the "icons" * (i stands for in-flight, cons -- for connections) things. */ - if (sk->rqlen != 0 && !(sk->type == SOCK_STREAM && sk->state == TCP_LISTEN)) { + if (sk->rqlen != 0 && sk->state != TCP_LISTEN) { if (dump_sk_queue(lfd, id)) goto err; } @@ -1610,7 +1610,7 @@ static int bind_unix_sk(int sk, struct unix_sk_info *ui) if (ui->ue->name.len == 0) return 0; - if ((ui->ue->type == SOCK_STREAM) && (ui->ue->state == TCP_ESTABLISHED)) { + if ((ui->ue->type != SOCK_DGRAM) && (ui->ue->state == TCP_ESTABLISHED)) { /* * FIXME this can be done, but for doing this properly we * need to bind socket to its name, then rename one to @@ -1851,7 +1851,7 @@ static int open_unixsk_standalone(struct unix_sk_info *ui, int *new_fd) close(sks[1]); sk = sks[0]; - } else if ((ui->ue->state == TCP_ESTABLISHED && ui->ue->type == SOCK_STREAM) && queuer && + } else if ((ui->ue->state == TCP_ESTABLISHED && ui->ue->type != SOCK_DGRAM) && queuer && queuer->ue->ino == FAKE_INO) { int ret, sks[2]; From 41a696dcc5ebef28ce8e9ed28fe9c3e8518868ec Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 16 Dec 2021 09:35:25 +0700 Subject: [PATCH 090/121] zdtm: Add SOCK_SEQPACKET variants to unix socket tests This commit simply makes copies of SOCK_STREAM unix socket tests and uses SOCK_SEQPACKET instead. Signed-off-by: Bui Quang Minh --- test/zdtm/static/Makefile | 23 +++++++++++++++++++ test/zdtm/static/del_standalone_un.c | 8 ++++++- .../zdtm/static/del_standalone_un_seqpacket.c | 1 + test/zdtm/static/scm03-seqpacket.c | 1 + test/zdtm/static/scm03.c | 8 ++++++- test/zdtm/static/sk-unix-rel-seqpacket.c | 1 + test/zdtm/static/sk-unix-rel.c | 10 ++++++-- test/zdtm/static/sk-unix-unconn-seqpacket.c | 1 + test/zdtm/static/sk-unix-unconn.c | 10 ++++++-- test/zdtm/static/sk-unix01-seqpacket.c | 1 + test/zdtm/static/sk-unix01-seqpacket.desc | 1 + test/zdtm/static/sk-unix01.c | 12 +++++++--- test/zdtm/static/socket_queues.c | 8 ++++++- test/zdtm/static/socket_queues_seqpacket.c | 1 + test/zdtm/static/sockets00-seqpacket.c | 1 + test/zdtm/static/sockets00-seqpacket.desc | 1 + test/zdtm/static/sockets00.c | 12 +++++++--- test/zdtm/static/sockets01-seqpacket.c | 1 + test/zdtm/static/sockets01.c | 10 ++++++-- test/zdtm/static/sockets02-seqpacket.c | 1 + test/zdtm/static/sockets02.c | 8 ++++++- test/zdtm/static/sockets03-seqpacket.c | 1 + test/zdtm/static/sockets03-seqpacket.desc | 1 + test/zdtm/static/sockets03.c | 10 ++++++-- test/zdtm/static/sockets_spair.c | 8 ++++++- test/zdtm/static/sockets_spair_seqpacket.c | 1 + 26 files changed, 122 insertions(+), 19 deletions(-) create mode 120000 test/zdtm/static/del_standalone_un_seqpacket.c create mode 120000 test/zdtm/static/scm03-seqpacket.c create mode 120000 test/zdtm/static/sk-unix-rel-seqpacket.c create mode 120000 test/zdtm/static/sk-unix-unconn-seqpacket.c create mode 120000 test/zdtm/static/sk-unix01-seqpacket.c create mode 120000 test/zdtm/static/sk-unix01-seqpacket.desc create mode 120000 test/zdtm/static/socket_queues_seqpacket.c create mode 120000 test/zdtm/static/sockets00-seqpacket.c create mode 120000 test/zdtm/static/sockets00-seqpacket.desc create mode 120000 test/zdtm/static/sockets01-seqpacket.c create mode 120000 test/zdtm/static/sockets02-seqpacket.c create mode 120000 test/zdtm/static/sockets03-seqpacket.c create mode 120000 test/zdtm/static/sockets03-seqpacket.desc create mode 120000 test/zdtm/static/sockets_spair_seqpacket.c diff --git a/test/zdtm/static/Makefile b/test/zdtm/static/Makefile index 3e1e0a498a..4a21978b59 100644 --- a/test/zdtm/static/Makefile +++ b/test/zdtm/static/Makefile @@ -68,9 +68,13 @@ TST_NOFILE := \ utsname \ pstree \ sockets01 \ + sockets01-seqpacket \ sockets02 \ + sockets02-seqpacket \ sockets_spair \ + sockets_spair_seqpacket \ socket_queues \ + socket_queues_seqpacket \ socket-raw \ socket-tcp \ socket-tcp-listen \ @@ -119,6 +123,7 @@ TST_NOFILE := \ sock_opts01 \ sock_opts02 \ sk-unix-unconn \ + sk-unix-unconn-seqpacket \ ipc_namespace \ selfexe00 \ sem \ @@ -187,6 +192,7 @@ TST_NOFILE := \ scm01 \ scm02 \ scm03 \ + scm03-seqpacket \ scm04 \ scm05 \ scm06 \ @@ -284,6 +290,7 @@ TST_FILE = \ file_attr \ deleted_unix_sock \ sk-unix-rel \ + sk-unix-rel-seqpacket \ deleted_dev \ unlink_fstat00 \ unlink_fstat01 \ @@ -311,7 +318,9 @@ TST_FILE = \ cow01 \ fdt_shared \ sockets00 \ + sockets00-seqpacket \ sockets03 \ + sockets03-seqpacket \ sockets_dgram \ file_lease00 \ file_lease01 \ @@ -394,8 +403,10 @@ TST_DIR = \ mnt_enablefs \ autofs \ del_standalone_un \ + del_standalone_un_seqpacket \ sk-unix-mntns \ sk-unix01 \ + sk-unix01-seqpacket \ sk-unix-dgram-ghost \ unsupported_children_collision \ shared_slave_mount_children \ @@ -607,6 +618,18 @@ socket-tcp6-unconn: CFLAGS += -D ZDTM_IPV6 socket-tcp4v6-last-ack: CFLAGS += -D ZDTM_TCP_LAST_ACK -D ZDTM_IPV4V6 socket-tcp4v6-closing: CFLAGS += -D ZDTM_IPV4V6 +sockets00-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sockets01-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sockets02-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sockets03-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sk-unix01-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sk-unix-rel-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sockets_spair_seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +socket_queues_seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +del_standalone_un_seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +sk-unix-unconn-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET +scm03-seqpacket: CFLAGS += -D ZDTM_UNIX_SEQPACKET + pty-console: CFLAGS += -D ZDTM_DEV_CONSOLE shm-unaligned: CFLAGS += -DZDTM_SHM_UNALIGNED diff --git a/test/zdtm/static/del_standalone_un.c b/test/zdtm/static/del_standalone_un.c index c9fa84870b..b4f99e2606 100644 --- a/test/zdtm/static/del_standalone_un.c +++ b/test/zdtm/static/del_standalone_un.c @@ -16,11 +16,17 @@ const char *test_author = "Tycho Andersen "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + static int bind_and_listen(struct sockaddr_un *addr) { int sk; - sk = socket(PF_UNIX, SOCK_STREAM, 0); + sk = socket(PF_UNIX, SOCK_TYPE, 0); if (sk < 0) { fail("socket"); return -1; diff --git a/test/zdtm/static/del_standalone_un_seqpacket.c b/test/zdtm/static/del_standalone_un_seqpacket.c new file mode 120000 index 0000000000..d88fcbad8d --- /dev/null +++ b/test/zdtm/static/del_standalone_un_seqpacket.c @@ -0,0 +1 @@ +del_standalone_un.c \ No newline at end of file diff --git a/test/zdtm/static/scm03-seqpacket.c b/test/zdtm/static/scm03-seqpacket.c new file mode 120000 index 0000000000..f1f86dd8be --- /dev/null +++ b/test/zdtm/static/scm03-seqpacket.c @@ -0,0 +1 @@ +scm03.c \ No newline at end of file diff --git a/test/zdtm/static/scm03.c b/test/zdtm/static/scm03.c index a40fc01015..4453f7e936 100644 --- a/test/zdtm/static/scm03.c +++ b/test/zdtm/static/scm03.c @@ -9,6 +9,12 @@ const char *test_doc = "Check that SCM_RIGHTS are preserved"; const char *test_author = "Pavel Emelyanov "; +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_DGRAM +#endif + static int send_fd(int via, int fd1, int fd2) { struct msghdr h = {}; @@ -105,7 +111,7 @@ int main(int argc, char **argv) test_init(argc, argv); - if (socketpair(PF_UNIX, SOCK_DGRAM, 0, sk) < 0) { + if (socketpair(PF_UNIX, SOCK_TYPE, 0, sk) < 0) { pr_perror("Can't make unix pair"); exit(1); } diff --git a/test/zdtm/static/sk-unix-rel-seqpacket.c b/test/zdtm/static/sk-unix-rel-seqpacket.c new file mode 120000 index 0000000000..1f98e3845d --- /dev/null +++ b/test/zdtm/static/sk-unix-rel-seqpacket.c @@ -0,0 +1 @@ +sk-unix-rel.c \ No newline at end of file diff --git a/test/zdtm/static/sk-unix-rel.c b/test/zdtm/static/sk-unix-rel.c index 10c19080ae..7e4aeafe6c 100644 --- a/test/zdtm/static/sk-unix-rel.c +++ b/test/zdtm/static/sk-unix-rel.c @@ -25,6 +25,12 @@ TEST_OPTION(filename, string, "socket file name", 1); #define TEST_MODE 0640 +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { struct sockaddr_un addr; @@ -54,8 +60,8 @@ int main(int argc, char *argv[]) memcpy(addr.sun_path, filename, addrlen); addrlen += sizeof(addr.sun_family); - sock[0] = socket(AF_UNIX, SOCK_STREAM, 0); - sock[1] = socket(AF_UNIX, SOCK_STREAM, 0); + sock[0] = socket(AF_UNIX, SOCK_TYPE, 0); + sock[1] = socket(AF_UNIX, SOCK_TYPE, 0); if (sock[0] < 0 || sock[1] < 0) { fail("socket"); exit(1); diff --git a/test/zdtm/static/sk-unix-unconn-seqpacket.c b/test/zdtm/static/sk-unix-unconn-seqpacket.c new file mode 120000 index 0000000000..f5c276186a --- /dev/null +++ b/test/zdtm/static/sk-unix-unconn-seqpacket.c @@ -0,0 +1 @@ +sk-unix-unconn.c \ No newline at end of file diff --git a/test/zdtm/static/sk-unix-unconn.c b/test/zdtm/static/sk-unix-unconn.c index caad3d3157..62e48247f6 100644 --- a/test/zdtm/static/sk-unix-unconn.c +++ b/test/zdtm/static/sk-unix-unconn.c @@ -9,6 +9,12 @@ const char *test_doc = "Check unconnected unix sockets"; const char *test_author = "Vagin Andrew "; +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char **argv) { int sk, skc; @@ -19,13 +25,13 @@ int main(int argc, char **argv) test_init(argc, argv); - sk = socket(AF_UNIX, SOCK_STREAM, 0); + sk = socket(AF_UNIX, SOCK_TYPE, 0); if (sk == -1) { pr_perror("socket"); return 1; } - skc = socket(AF_UNIX, SOCK_STREAM, 0); + skc = socket(AF_UNIX, SOCK_TYPE, 0); if (skc == -1) { pr_perror("socket"); return 1; diff --git a/test/zdtm/static/sk-unix01-seqpacket.c b/test/zdtm/static/sk-unix01-seqpacket.c new file mode 120000 index 0000000000..bef734ed69 --- /dev/null +++ b/test/zdtm/static/sk-unix01-seqpacket.c @@ -0,0 +1 @@ +sk-unix01.c \ No newline at end of file diff --git a/test/zdtm/static/sk-unix01-seqpacket.desc b/test/zdtm/static/sk-unix01-seqpacket.desc new file mode 120000 index 0000000000..7a30da25c8 --- /dev/null +++ b/test/zdtm/static/sk-unix01-seqpacket.desc @@ -0,0 +1 @@ +sk-unix01.desc \ No newline at end of file diff --git a/test/zdtm/static/sk-unix01.c b/test/zdtm/static/sk-unix01.c index c2bb8b9edb..5146c027f4 100644 --- a/test/zdtm/static/sk-unix01.c +++ b/test/zdtm/static/sk-unix01.c @@ -24,6 +24,12 @@ const char *test_author = "Cyrill Gorcunov "; char *dirname; TEST_OPTION(dirname, string, "directory name", 1); +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + static int sk_alloc_bind(int type, struct sockaddr_un *addr) { int sk; @@ -256,7 +262,7 @@ int main(int argc, char **argv) unlink(addr.sun_path); - sk_st[0] = sk_alloc_bind(SOCK_STREAM, &addr); + sk_st[0] = sk_alloc_bind(SOCK_TYPE, &addr); if (sk_st[0] < 0) return 1; test_msg("sk-st: alloc/bind/listen %d\n", sk_st[0]); @@ -266,7 +272,7 @@ int main(int argc, char **argv) return 1; } - sk_st[1] = sk_alloc_connect(SOCK_STREAM, &addr); + sk_st[1] = sk_alloc_connect(SOCK_TYPE, &addr); if (sk_st[1] < 0) return 1; test_msg("sk-st: alloc/connect %d\n", sk_st[1]); @@ -279,7 +285,7 @@ int main(int argc, char **argv) } test_msg("sk-st: accept %d\n", sk_st[2]); - sk_st[3] = sk_alloc_connect(SOCK_STREAM, &addr); + sk_st[3] = sk_alloc_connect(SOCK_TYPE, &addr); if (sk_st[3] < 0) return 1; test_msg("sk-st: alloc/connect %d\n", sk_st[3]); diff --git a/test/zdtm/static/socket_queues.c b/test/zdtm/static/socket_queues.c index e30bca0e19..44495f06b9 100644 --- a/test/zdtm/static/socket_queues.c +++ b/test/zdtm/static/socket_queues.c @@ -24,6 +24,12 @@ const char *test_author = "Stanislav Kinsbursky \n"; #define SK_DATA_D1 "packet dgram left" #define SK_DATA_D2 "packet dgram right" +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { int ssk_pair_d[2]; @@ -32,7 +38,7 @@ int main(int argc, char *argv[]) test_init(argc, argv); - if (socketpair(AF_UNIX, SOCK_STREAM, 0, ssk_pair_s) == -1) { + if (socketpair(AF_UNIX, SOCK_TYPE, 0, ssk_pair_s) == -1) { fail("socketpair"); exit(1); } diff --git a/test/zdtm/static/socket_queues_seqpacket.c b/test/zdtm/static/socket_queues_seqpacket.c new file mode 120000 index 0000000000..0f3f93ea65 --- /dev/null +++ b/test/zdtm/static/socket_queues_seqpacket.c @@ -0,0 +1 @@ +socket_queues.c \ No newline at end of file diff --git a/test/zdtm/static/sockets00-seqpacket.c b/test/zdtm/static/sockets00-seqpacket.c new file mode 120000 index 0000000000..4bce9fc31d --- /dev/null +++ b/test/zdtm/static/sockets00-seqpacket.c @@ -0,0 +1 @@ +sockets00.c \ No newline at end of file diff --git a/test/zdtm/static/sockets00-seqpacket.desc b/test/zdtm/static/sockets00-seqpacket.desc new file mode 120000 index 0000000000..4beea26423 --- /dev/null +++ b/test/zdtm/static/sockets00-seqpacket.desc @@ -0,0 +1 @@ +sockets00.desc \ No newline at end of file diff --git a/test/zdtm/static/sockets00.c b/test/zdtm/static/sockets00.c index 53890077b9..ac5d7d6fe6 100644 --- a/test/zdtm/static/sockets00.c +++ b/test/zdtm/static/sockets00.c @@ -25,6 +25,12 @@ TEST_OPTION(filename, string, "socket file name", 1); #define TEST_MODE 0640 +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { int ssk_icon[4]; @@ -58,9 +64,9 @@ int main(int argc, char *argv[]) memcpy(addr.sun_path, path, addrlen); addrlen += sizeof(addr.sun_family); - ssk_icon[0] = socket(AF_UNIX, SOCK_STREAM, 0); - ssk_icon[1] = socket(AF_UNIX, SOCK_STREAM, 0); - ssk_icon[2] = socket(AF_UNIX, SOCK_STREAM, 0); + ssk_icon[0] = socket(AF_UNIX, SOCK_TYPE, 0); + ssk_icon[1] = socket(AF_UNIX, SOCK_TYPE, 0); + ssk_icon[2] = socket(AF_UNIX, SOCK_TYPE, 0); if (ssk_icon[0] < 0 || ssk_icon[1] < 0 || ssk_icon[2] < 0) { fail("socket"); exit(1); diff --git a/test/zdtm/static/sockets01-seqpacket.c b/test/zdtm/static/sockets01-seqpacket.c new file mode 120000 index 0000000000..8d51121e10 --- /dev/null +++ b/test/zdtm/static/sockets01-seqpacket.c @@ -0,0 +1 @@ +sockets01.c \ No newline at end of file diff --git a/test/zdtm/static/sockets01.c b/test/zdtm/static/sockets01.c index e35a31fece..f56cd219e3 100644 --- a/test/zdtm/static/sockets01.c +++ b/test/zdtm/static/sockets01.c @@ -30,6 +30,12 @@ const char *test_author = "Pavel Emelyanov "; #define TEST_MSG "test-message" static char buf[sizeof(TEST_MSG)]; +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { int spu[2], spb[2], dpu[2], dpb[2], dpd[2]; @@ -40,14 +46,14 @@ int main(int argc, char *argv[]) signal(SIGPIPE, SIG_IGN); /* spu -- stream pair, unidirectional shutdown */ - if (socketpair(PF_UNIX, SOCK_STREAM, 0, spu) < 0) + if (socketpair(PF_UNIX, SOCK_TYPE, 0, spu) < 0) fin("no stream pair 1"); if (shutdown(spu[0], SHUT_RD) < 0) fin("no stream shutdown 1"); /* spb -- stream pair, bidirectional shutdown */ - if (socketpair(PF_UNIX, SOCK_STREAM, 0, spb) < 0) + if (socketpair(PF_UNIX, SOCK_TYPE, 0, spb) < 0) fin("no stream pair 2"); if (shutdown(spb[0], SHUT_RDWR) < 0) diff --git a/test/zdtm/static/sockets02-seqpacket.c b/test/zdtm/static/sockets02-seqpacket.c new file mode 120000 index 0000000000..b958315999 --- /dev/null +++ b/test/zdtm/static/sockets02-seqpacket.c @@ -0,0 +1 @@ +sockets02.c \ No newline at end of file diff --git a/test/zdtm/static/sockets02.c b/test/zdtm/static/sockets02.c index 2729ade2c3..d7d84d8152 100644 --- a/test/zdtm/static/sockets02.c +++ b/test/zdtm/static/sockets02.c @@ -16,6 +16,12 @@ const char *test_doc = "Test semi-closed unix stream connection\n"; const char *test_author = "Pavel Emelyanov \n"; +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { int ssk_pair[2], ret; @@ -25,7 +31,7 @@ int main(int argc, char *argv[]) data = (char)lrand48(); - if (socketpair(AF_UNIX, SOCK_STREAM, 0, ssk_pair) == -1) { + if (socketpair(AF_UNIX, SOCK_TYPE, 0, ssk_pair) == -1) { fail("socketpair"); exit(1); } diff --git a/test/zdtm/static/sockets03-seqpacket.c b/test/zdtm/static/sockets03-seqpacket.c new file mode 120000 index 0000000000..997cce6735 --- /dev/null +++ b/test/zdtm/static/sockets03-seqpacket.c @@ -0,0 +1 @@ +sockets03.c \ No newline at end of file diff --git a/test/zdtm/static/sockets03-seqpacket.desc b/test/zdtm/static/sockets03-seqpacket.desc new file mode 120000 index 0000000000..3798a8242f --- /dev/null +++ b/test/zdtm/static/sockets03-seqpacket.desc @@ -0,0 +1 @@ +sockets03.desc \ No newline at end of file diff --git a/test/zdtm/static/sockets03.c b/test/zdtm/static/sockets03.c index cd6f608311..6b0915aaa6 100644 --- a/test/zdtm/static/sockets03.c +++ b/test/zdtm/static/sockets03.c @@ -22,6 +22,12 @@ const char *test_author = "Andrey Ryabinin "; char *filename; TEST_OPTION(filename, string, "socket file name", 1); +#ifdef ZDTM_UNIX_SEQPACKET +#define SOCK_TYPE SOCK_SEQPACKET +#else +#define SOCK_TYPE SOCK_STREAM +#endif + int main(int argc, char *argv[]) { int sk[3]; @@ -52,8 +58,8 @@ int main(int argc, char *argv[]) memcpy(addr.sun_path, path, addrlen); addrlen += sizeof(addr.sun_family); - sk[0] = socket(AF_UNIX, SOCK_STREAM, 0); - sk[1] = socket(AF_UNIX, SOCK_STREAM, 0); + sk[0] = socket(AF_UNIX, SOCK_TYPE, 0); + sk[1] = socket(AF_UNIX, SOCK_TYPE, 0); if (sk[0] < 0 || sk[1] < 0) { fail("socket"); exit(1); diff --git a/test/zdtm/static/sockets_spair.c b/test/zdtm/static/sockets_spair.c index 2dbb132aac..202c2e7901 100644 --- a/test/zdtm/static/sockets_spair.c +++ b/test/zdtm/static/sockets_spair.c @@ -18,6 +18,12 @@ const char *test_author = "Cyrill Gorcunov Date: Tue, 7 Dec 2021 21:29:44 +0000 Subject: [PATCH 091/121] tls: fix typo Signed-off-by: Radostin Stoyanov --- criu/tls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/tls.c b/criu/tls.c index 60bd105bc9..6a7b523ac0 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -8,7 +8,7 @@ #include "cr_options.h" #include "xmalloc.h" -/* Compatability with GnuTLS verson <3.5 */ +/* Compatability with GnuTLS version < 3.5 */ #ifndef GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR #define GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR GNUTLS_E_CERTIFICATE_ERROR #endif From 6ca82330a3bda6b70ea52fcae163a904d0c0b7df Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 7 Dec 2021 22:32:54 +0000 Subject: [PATCH 092/121] tls: use ssize_t for return value Signed-off-by: Radostin Stoyanov --- criu/tls.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/criu/tls.c b/criu/tls.c index 6a7b523ac0..626e529f81 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -53,7 +53,7 @@ void tls_terminate_session(void) ssize_t tls_send(const void *buf, size_t len, int flags) { - int ret; + ssize_t ret; tls_sk_flags = flags; ret = gnutls_record_send(session, buf, len); @@ -95,7 +95,7 @@ int tls_send_data_from_fd(int fd, unsigned long len) return -1; while (len > 0) { - int ret, sent; + ssize_t ret, sent; copied = read(fd, buf, min(len, buf_size)); if (copied <= 0) { @@ -119,7 +119,7 @@ int tls_send_data_from_fd(int fd, unsigned long len) ssize_t tls_recv(void *buf, size_t len, int flags) { - int ret; + ssize_t ret; tls_sk_flags = flags; ret = gnutls_record_recv(session, buf, len); @@ -163,7 +163,7 @@ int tls_recv_data_to_fd(int fd, unsigned long len) gnutls_packet_t packet; while (len > 0) { - int ret, w; + ssize_t ret, w; gnutls_datum_t pdata; ret = gnutls_record_recv_packet(session, &packet); @@ -301,7 +301,7 @@ static int tls_x509_setup_creds(void) static ssize_t _tls_push_cb(void *p, const void *data, size_t sz) { int fd = *(int *)(p); - int ret = send(fd, data, sz, tls_sk_flags); + ssize_t ret = send(fd, data, sz, tls_sk_flags); if (ret < 0 && errno != EAGAIN) { int _errno = errno; pr_perror("Push callback send failed"); @@ -313,7 +313,7 @@ static ssize_t _tls_push_cb(void *p, const void *data, size_t sz) static ssize_t _tls_pull_cb(void *p, void *data, size_t sz) { int fd = *(int *)(p); - int ret = recv(fd, data, sz, tls_sk_flags); + ssize_t ret = recv(fd, data, sz, tls_sk_flags); if (ret < 0 && errno != EAGAIN) { int _errno = errno; pr_perror("Pull callback recv failed"); From 8bddd88b3e15d167bad53b14b161f5cb628f2923 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 16 Dec 2021 14:26:53 +0000 Subject: [PATCH 093/121] tls: add more comments Signed-off-by: Radostin Stoyanov --- criu/tls.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/criu/tls.c b/criu/tls.c index 626e529f81..9985b037db 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -40,13 +40,19 @@ void tls_terminate_session(void) if (session) { do { - /* don't wait for peer to close connection */ + /* + * Initiate a connection shutdown but don't + * wait for peer to close connection. + */ ret = gnutls_bye(session, GNUTLS_SHUT_WR); } while (ret == GNUTLS_E_AGAIN || ret == GNUTLS_E_INTERRUPTED); + /* Free the session object */ gnutls_deinit(session); } tls_sk = -1; + + /* Free the credentials object */ if (x509_cred) gnutls_certificate_free_credentials(x509_cred); } @@ -229,6 +235,7 @@ static int tls_handshake(void) { int ret = -1; while (ret != GNUTLS_E_SUCCESS) { + /* Establish TLS session */ ret = gnutls_handshake(session); if (gnutls_error_is_fatal(ret)) { tls_perror("TLS handshake failed", ret); @@ -257,6 +264,7 @@ static int tls_x509_setup_creds(void) if (opts.tls_key) key = opts.tls_key; + /* Load the trusted CA certificates */ ret = gnutls_certificate_allocate_credentials(&x509_cred); if (ret != GNUTLS_E_SUCCESS) { tls_perror("Failed to allocate x509 credentials", ret); @@ -298,6 +306,10 @@ static int tls_x509_setup_creds(void) return 0; } +/** + * A function used by gnutls to send data. It returns a positive + * number indicating the bytes sent, and -1 on error. + */ static ssize_t _tls_push_cb(void *p, const void *data, size_t sz) { int fd = *(int *)(p); @@ -310,6 +322,11 @@ static ssize_t _tls_push_cb(void *p, const void *data, size_t sz) return ret; } +/** + * A callback function used by gnutls to receive data. + * It returns 0 on connection termination, a positive number + * indicating the number of bytes received, and -1 on error. + */ static ssize_t _tls_pull_cb(void *p, void *data, size_t sz) { int fd = *(int *)(p); @@ -326,26 +343,33 @@ static int tls_x509_setup_session(unsigned int flags) { int ret; + /* Create the session object */ ret = gnutls_init(&session, flags); if (ret != GNUTLS_E_SUCCESS) { tls_perror("Failed to initialize session", ret); return -1; } + /* Install the trusted certificates */ ret = gnutls_credentials_set(session, GNUTLS_CRD_CERTIFICATE, x509_cred); if (ret != GNUTLS_E_SUCCESS) { tls_perror("Failed to set session credentials", ret); return -1; } + /* Configure the cipher preferences */ ret = gnutls_set_default_priority(session); if (ret != GNUTLS_E_SUCCESS) { tls_perror("Failed to set priority", ret); return -1; } + /* Associate the socket with the session object */ gnutls_transport_set_ptr(session, &tls_sk); + + /* Set a push function for gnutls to use to send data */ gnutls_transport_set_push_function(session, _tls_push_cb); + /* set a pull function for gnutls to use to receive data */ gnutls_transport_set_pull_function(session, _tls_pull_cb); if (flags == GNUTLS_SERVER) { From c37140662d22e86e1096c953b888b6496b7e1f64 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 21 Dec 2021 21:59:13 -0800 Subject: [PATCH 094/121] uffd: call disconnect_from_page_server to shutdown a page-server connection We need to be sure that page-server doesn't wait for a new command when we call gnutls_bye() that sends an alert containing a close request. Signed-off-by: Andrei Vagin --- criu/uffd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/uffd.c b/criu/uffd.c index f01e6999b4..45ac8ba774 100644 --- a/criu/uffd.c +++ b/criu/uffd.c @@ -1468,7 +1468,7 @@ int cr_lazy_pages(bool daemon) ret = handle_requests(epollfd, &events, nr_fds); - tls_terminate_session(); + disconnect_from_page_server(); xfree(events); return ret; From fa03dcfff6a5215ef06c1e579eddb0f5c09f8798 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 09:36:09 -0800 Subject: [PATCH 095/121] tls: allow to terminate connections synchronously GNUTLS_SHUT_RDWR sends an alert containing a close request and waits for the peer to reply with the same message. Signed-off-by: Andrei Vagin --- criu/include/tls.h | 4 ++-- criu/page-xfer.c | 5 +++-- criu/tls.c | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/criu/include/tls.h b/criu/include/tls.h index 26f9976fd7..f563c092c6 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -4,7 +4,7 @@ #ifdef CONFIG_GNUTLS int tls_x509_init(int sockfd, bool is_server); -void tls_terminate_session(void); +void tls_terminate_session(bool async); ssize_t tls_send(const void *buf, size_t len, int flags); ssize_t tls_recv(void *buf, size_t len, int flags); @@ -19,7 +19,7 @@ int tls_recv_data_to_fd(int fd, unsigned long len); #define tls_recv(buf, len, flags) (-1) #define tls_send_data_from_fd(fd, len) (-1) #define tls_recv_data_to_fd(fd, len) (-1) -#define tls_terminate_session() +#define tls_terminate_session(async) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 9adf2c8b22..7ff07680ff 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -1259,6 +1259,8 @@ static int page_server_serve(int sk) ret = -1; } + tls_terminate_session(ret != 0); + if (ret == 0 && opts.ps_socket == -1) { char c; @@ -1272,7 +1274,6 @@ static int page_server_serve(int sk) } } - tls_terminate_session(); page_server_close(); pr_info("Session over\n"); @@ -1504,7 +1505,7 @@ int disconnect_from_page_server(void) ret = 0; out: - tls_terminate_session(); + tls_terminate_session(ret != 0); close_safe(&page_server_sk); return ret ?: status; diff --git a/criu/tls.c b/criu/tls.c index 9985b037db..4feaf613b5 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -31,7 +31,7 @@ static gnutls_certificate_credentials_t x509_cred; static int tls_sk = -1; static int tls_sk_flags = 0; -void tls_terminate_session(void) +void tls_terminate_session(bool async) { int ret; @@ -44,7 +44,7 @@ void tls_terminate_session(void) * Initiate a connection shutdown but don't * wait for peer to close connection. */ - ret = gnutls_bye(session, GNUTLS_SHUT_WR); + ret = gnutls_bye(session, async ? GNUTLS_SHUT_WR : GNUTLS_SHUT_RDWR); } while (ret == GNUTLS_E_AGAIN || ret == GNUTLS_E_INTERRUPTED); /* Free the session object */ gnutls_deinit(session); @@ -399,6 +399,6 @@ int tls_x509_init(int sockfd, bool is_server) return 0; err: - tls_terminate_session(); + tls_terminate_session(true); return -1; } From cc1120addd38d51f38c28fefae669172ba82120d Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 09:59:11 -0800 Subject: [PATCH 096/121] page-xfer: stop waiting for a new command after a close command There is no reason to do that and in case of tls, __recv returns EAGAIN instead of 0. Signed-off-by: Andrei Vagin --- criu/page-xfer.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 7ff07680ff..60c793009f 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -50,8 +50,8 @@ static void psi2iovec(struct page_server_iov *ps, struct iovec *iov) #define PS_IOV_ADD_F 6 #define PS_IOV_GET 7 -#define PS_IOV_FLUSH 0x1023 -#define PS_IOV_FLUSH_N_CLOSE 0x1024 +#define PS_IOV_CLOSE 0x1023 +#define PS_IOV_FORCE_CLOSE 0x1024 #define PS_CMD_BITS 16 #define PS_CMD_MASK ((1 << PS_CMD_BITS) - 1) @@ -1223,8 +1223,8 @@ static int page_server_serve(int sk) ret = page_server_add(sk, &pi, flags); break; } - case PS_IOV_FLUSH: - case PS_IOV_FLUSH_N_CLOSE: { + case PS_IOV_CLOSE: + case PS_IOV_FORCE_CLOSE: { int32_t status = 0; ret = 0; @@ -1250,7 +1250,9 @@ static int page_server_serve(int sk) break; } - if (ret || (pi.cmd == PS_IOV_FLUSH_N_CLOSE)) + if (ret) + break; + if (pi.cmd == PS_IOV_CLOSE || pi.cmd == PS_IOV_FORCE_CLOSE) break; } @@ -1491,9 +1493,9 @@ int disconnect_from_page_server(void) * the parent process) so we must order the * page-server to terminate itself. */ - pi.cmd = PS_IOV_FLUSH_N_CLOSE; + pi.cmd = PS_IOV_FORCE_CLOSE; else - pi.cmd = PS_IOV_FLUSH; + pi.cmd = PS_IOV_CLOSE; if (send_psi(page_server_sk, &pi)) goto out; From 5e975ab787ed677b03aa86f104b90984189c906b Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 17:03:04 -0800 Subject: [PATCH 097/121] ci: reenable the lazy-thp test in the lazy-remote mode Signed-off-by: Andrei Vagin --- scripts/ci/run-ci-tests.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index bf7331142b..7eab9f2dc7 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -212,10 +212,8 @@ LAZY_OPTS="-p 2 -T $LAZY_TESTS $LAZY_EXCLUDE $ZDTM_OPTS" ./test/zdtm.py run $LAZY_OPTS --lazy-pages # shellcheck disable=SC2086 ./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages -# FIXME: post-copy migration of THP over TLS (sometimes) fails with: -# Error (criu/tls.c:321): tls: Pull callback recv failed: Connection reset by peer # shellcheck disable=SC2086 -./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls -x lazy-thp +./test/zdtm.py run $LAZY_OPTS --remote-lazy-pages --tls bash -x ./test/jenkins/criu-fault.sh if [ "$UNAME_M" == "x86_64" ]; then From a5576e1e6b30411aa936ef05c4344121be58f183 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 17:06:21 -0800 Subject: [PATCH 098/121] test: log testname.out.inprogress if a test has failed This is required if the test failed by timeout. Signed-off-by: Andrei Vagin --- test/zdtm.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 086ae02853..c0e24a41a9 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -589,11 +589,12 @@ def gone(self, force=True): os.unlink(self.__pidfile()) def print_output(self): - if os.access(self.__name + '.out', os.R_OK): - print("Test output: " + "=" * 32) - with open(self.__name + '.out') as output: - print(output.read()) - print(" <<< " + "=" * 32) + for postfix in ['.out', '.out.inprogress']: + if os.access(self.__name + postfix, os.R_OK): + print("Test output: " + "=" * 32) + with open(self.__name + postfix) as output: + print(output.read()) + print(" <<< " + "=" * 32) def static(self): return self.__name.split('/')[1] == 'static' From bed670f622f97e2d72ab4f76cbc0c6303bd258b1 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 17:08:16 -0800 Subject: [PATCH 099/121] zdtm: print tails of all logs if a test has failed Signed-off-by: Andrei Vagin --- test/zdtm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index c0e24a41a9..14e6aa1b07 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1475,17 +1475,17 @@ def kill(self): self.__lazy_pages_p.terminate() print("criu lazy-pages exited with %s" % self.__lazy_pages_p.wait()) - grep_errors(os.path.join(self.__ddir(), "lazy-pages.log")) + grep_errors(os.path.join(self.__ddir(), "lazy-pages.log"), err=True) self.__lazy_pages_p = None if self.__page_server_p: self.__page_server_p.terminate() print("criu page-server exited with %s" % self.__page_server_p.wait()) - grep_errors(os.path.join(self.__ddir(), "page-server.log")) + grep_errors(os.path.join(self.__ddir(), "page-server.log"), err=True) self.__page_server_p = None if self.__dump_process: self.__dump_process.terminate() - print("criu dump exited with %s" % self.__dump_process.wait()) + print("criu dump exited with %s" % self.__dump_process.wait(), err=True) grep_errors(os.path.join(self.__ddir(), "dump.log")) self.__dump_process = None if self.__img_streamer_process: From 4152a88cf588e94c9bf6edb3706cdf81fd25bce2 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 22 Dec 2021 17:08:42 -0800 Subject: [PATCH 100/121] zdtm/static/uffd-events: add more log messages Signed-off-by: Andrei Vagin --- test/zdtm/static/uffd-events.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/zdtm/static/uffd-events.c b/test/zdtm/static/uffd-events.c index c811bcf4c2..edd6c09ca1 100644 --- a/test/zdtm/static/uffd-events.c +++ b/test/zdtm/static/uffd-events.c @@ -153,28 +153,30 @@ int main(int argc, char **argv) return 1; } + test_msg("For a child process\n"); pid = fork(); if (pid < 0) { fail("Can't fork"); return 1; } - /* check madvise(MADV_DONTNEED) */ + test_msg("Check madvise(MADV_DONTNEED)\n"); if (check_madv_dn(1)) return 1; - /* check growing mremap */ + test_msg("Check growing mremap\n"); if (check_mremap_grow(2)) return 1; - /* check swapped mappings */ + test_msg("Check swapped mappings\n"); if (check_swapped_mappings(3)) return 1; if (pid) { - int status; + int status = -1; - waitpid(-1, &status, 0); + test_msg("Wait for the child %d\n", pid); + waitpid(pid, &status, 0); if (status) { fail("child failed"); return status; From 1d10fc1e0f2589d34b893c6432fcaab2f99018f7 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 17 Dec 2021 14:58:47 +0300 Subject: [PATCH 101/121] mount: split check_mountpoint_fd from __open_mountpoint Now we can reuse "check" part separately in other places. Signed-off-by: Pavel Tikhomirov --- criu/include/mount.h | 1 + criu/mount.c | 60 ++++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/criu/include/mount.h b/criu/include/mount.h index 7705279e4f..23448d5fc0 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -109,6 +109,7 @@ extern int mntns_get_root_by_mnt_id(int mnt_id); extern struct ns_id *lookup_nsid_by_mnt_id(int mnt_id); extern int open_mount(unsigned int s_dev); +extern int check_mountpoint_fd(struct mount_info *pm, int mnt_fd); extern int __open_mountpoint(struct mount_info *pm, int mnt_fd); extern int mnt_is_dir(struct mount_info *pm); extern int open_mountpoint(struct mount_info *pm); diff --git a/criu/mount.c b/criu/mount.c index d75ca5598e..f6347fd9d5 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1018,39 +1018,20 @@ int mnt_is_dir(struct mount_info *pm) return 0; } -/* - * mnt_fd is a file descriptor on the mountpoint, which is closed in an error case. - * If mnt_fd is -1, the mountpoint will be opened by this function. - */ -int __open_mountpoint(struct mount_info *pm, int mnt_fd) +int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) { struct stat st; - int dev; - int ret; - - if (mnt_fd == -1) { - int mntns_root; - - mntns_root = mntns_get_root_fd(pm->nsid); - if (mntns_root < 0) - return -1; - - mnt_fd = openat(mntns_root, pm->ns_mountpoint, O_RDONLY); - if (mnt_fd < 0) { - pr_perror("Can't open %s", pm->ns_mountpoint); - return -1; - } - } + int ret, dev; ret = fstat(mnt_fd, &st); if (ret < 0) { pr_perror("fstat(%s) failed", pm->ns_mountpoint); - goto err; + return -1; } if (pm->s_dev_rt == MOUNT_INVALID_DEV) { pr_err("Resolving over invalid device for %#x %s %s\n", pm->s_dev, pm->fstype->name, pm->ns_mountpoint); - goto err; + return -1; } dev = MKKDEV(major(st.st_dev), minor(st.st_dev)); @@ -1063,13 +1044,38 @@ int __open_mountpoint(struct mount_info *pm, int mnt_fd) if (dev != pm->s_dev_rt) { pr_err("The file system %#x %#x (%#x) %s %s is inaccessible\n", pm->s_dev, pm->s_dev_rt, dev, pm->fstype->name, pm->ns_mountpoint); - goto err; + return -1; + } + + return 0; +} + +/* + * mnt_fd is a file descriptor on the mountpoint, which is closed in an error case. + * If mnt_fd is -1, the mountpoint will be opened by this function. + */ +int __open_mountpoint(struct mount_info *pm, int mnt_fd) +{ + if (mnt_fd == -1) { + int mntns_root; + + mntns_root = mntns_get_root_fd(pm->nsid); + if (mntns_root < 0) + return -1; + + mnt_fd = openat(mntns_root, pm->ns_mountpoint, O_RDONLY); + if (mnt_fd < 0) { + pr_perror("Can't open %s", pm->ns_mountpoint); + return -1; + } + } + + if (check_mountpoint_fd(pm, mnt_fd)) { + close(mnt_fd); + return -1; } return mnt_fd; -err: - close(mnt_fd); - return -1; } int open_mount(unsigned int s_dev) From 0aed7a86a766fa175bac9d573695edc493e26717 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Fri, 17 Dec 2021 15:13:35 +0300 Subject: [PATCH 102/121] mount: remove mnt_fd argument of __open_mountpoint Only place where we used __open_mountpoint with non -1 mnt_fd is open_mountpoint. Let's use check_mountpoint_fd for this case, so that we now can remove mnt_id argument. Also now __open_mountpoint actually always does open. Signed-off-by: Pavel Tikhomirov --- criu/fsnotify.c | 4 ++-- criu/include/mount.h | 2 +- criu/mount.c | 26 ++++++++++++-------------- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/criu/fsnotify.c b/criu/fsnotify.c index b5dd15dd89..22fb749731 100644 --- a/criu/fsnotify.c +++ b/criu/fsnotify.c @@ -132,7 +132,7 @@ static char *alloc_openable(unsigned int s_dev, unsigned long i_ino, FhEntry *f_ if (!mnt_is_dir(m)) continue; - mntfd = __open_mountpoint(m, -1); + mntfd = __open_mountpoint(m); pr_debug("\t\tTrying via mntid %d root %s ns_mountpoint @%s (%d)\n", m->mnt_id, m->root, m->ns_mountpoint, mntfd); if (mntfd < 0) @@ -206,7 +206,7 @@ static int open_handle(unsigned int s_dev, unsigned long i_ino, FhEntry *f_handl if (m->s_dev != s_dev || !mnt_is_dir(m)) continue; - mntfd = __open_mountpoint(m, -1); + mntfd = __open_mountpoint(m); if (mntfd < 0) { pr_warn("Can't open mount for s_dev %x, continue\n", s_dev); continue; diff --git a/criu/include/mount.h b/criu/include/mount.h index 23448d5fc0..3f3a67afaa 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -110,7 +110,7 @@ extern struct ns_id *lookup_nsid_by_mnt_id(int mnt_id); extern int open_mount(unsigned int s_dev); extern int check_mountpoint_fd(struct mount_info *pm, int mnt_fd); -extern int __open_mountpoint(struct mount_info *pm, int mnt_fd); +extern int __open_mountpoint(struct mount_info *pm); extern int mnt_is_dir(struct mount_info *pm); extern int open_mountpoint(struct mount_info *pm); diff --git a/criu/mount.c b/criu/mount.c index f6347fd9d5..ab6d3ed109 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1054,20 +1054,18 @@ int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) * mnt_fd is a file descriptor on the mountpoint, which is closed in an error case. * If mnt_fd is -1, the mountpoint will be opened by this function. */ -int __open_mountpoint(struct mount_info *pm, int mnt_fd) +int __open_mountpoint(struct mount_info *pm) { - if (mnt_fd == -1) { - int mntns_root; + int mntns_root, mnt_fd; - mntns_root = mntns_get_root_fd(pm->nsid); - if (mntns_root < 0) - return -1; + mntns_root = mntns_get_root_fd(pm->nsid); + if (mntns_root < 0) + return -1; - mnt_fd = openat(mntns_root, pm->ns_mountpoint, O_RDONLY); - if (mnt_fd < 0) { - pr_perror("Can't open %s", pm->ns_mountpoint); - return -1; - } + mnt_fd = openat(mntns_root, pm->ns_mountpoint, O_RDONLY); + if (mnt_fd < 0) { + pr_perror("Can't open %s", pm->ns_mountpoint); + return -1; } if (check_mountpoint_fd(pm, mnt_fd)) { @@ -1086,7 +1084,7 @@ int open_mount(unsigned int s_dev) if (!m) return -ENOENT; - return __open_mountpoint(m, -1); + return __open_mountpoint(m); } /* Bind-mount a mount point in a temporary place without children */ @@ -1350,7 +1348,7 @@ int open_mountpoint(struct mount_info *pm) /* No overmounts and children - the entire mount is visible */ if (list_empty(&pm->children) && !mnt_is_overmounted(pm)) - return __open_mountpoint(pm, -1); + return __open_mountpoint(pm); pr_info("Mount is not fully visible %s\n", pm->mountpoint); @@ -1413,7 +1411,7 @@ int open_mountpoint(struct mount_info *pm) goto err; } - return __open_mountpoint(pm, fd); + return fd < 0 ? __open_mountpoint(pm) : check_mountpoint_fd(pm, fd); err: if (ns_old >= 0) /* coverity[check_return] */ From 758f3be2d4766f478b74d4026e3c16c93d3b07f9 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 16 Dec 2021 16:14:41 +0300 Subject: [PATCH 103/121] proc_parse: add helper to resolve sdev from fd New get_sdev_from_fd helper first gets mnt_id from fd using fdinfo and then converts mnt_id to sdev using mountinfo. By default mnt_id to sdev conversion only works for mounts in mntinfo. If parse_mountinfo argument is true, will also parse current process mountinfo when looking for mount sdev, this should be used only with temporary mounts just created by criu in current mntns. v3: add argument to parse self mountinfo for auxiliary mounts Signed-off-by: Pavel Tikhomirov --- criu/include/mount.h | 1 + criu/proc_parse.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/criu/include/mount.h b/criu/include/mount.h index 3f3a67afaa..9c0d7c0105 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -140,6 +140,7 @@ extern void clean_cr_time_mounts(void); extern bool add_skip_mount(const char *mountpoint); struct ns_id; +extern int get_sdev_from_fd(int fd, unsigned int *sdev, bool parse_mountinfo); extern struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump); extern int check_mnt_id(void); diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 094f9b84e9..3017a64e11 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -1527,6 +1527,59 @@ int parse_timens_offsets(struct timespec *boff, struct timespec *moff) return exit_code; } +static int get_mountinfo_sdev_from_mntid(int mnt_id, unsigned int *sdev) +{ + int exit_code = -1; + FILE *f; + + f = fopen_proc(PROC_SELF, "mountinfo"); + if (!f) + return -1; + + while (fgets(buf, BUF_SIZE, f)) { + unsigned int kmaj, kmin; + int id; + + if (sscanf(buf, "%i %*i %u:%u", &id, &kmaj, &kmin) != 3) { + pr_err("Failed to parse mountinfo line %s\n", buf); + goto err; + } + + if (id == mnt_id) { + *sdev = MKKDEV(kmaj, kmin); + exit_code = 0; + break; + } + } +err: + fclose(f); + return exit_code; +} + +/* This works even on btrfs where stat does not show right sdev */ +int get_sdev_from_fd(int fd, unsigned int *sdev, bool parse_mountinfo) +{ + struct mount_info *mi; + int ret, mnt_id; + + ret = get_fd_mntid(fd, &mnt_id); + if (ret < 0) + return -1; + + /* Simple case mnt_id is in dumped mntns */ + mi = lookup_mnt_id(mnt_id); + if (mi) { + *sdev = mi->s_dev_rt; + return 0; + } + + if (!parse_mountinfo) + return -1; + + /* Complex case mnt_id is in mntns created by criu */ + return get_mountinfo_sdev_from_mntid(mnt_id, sdev); +} + struct mount_info *parse_mountinfo(pid_t pid, struct ns_id *nsid, bool for_dump) { struct mount_info *list = NULL; From ca615ea1fbc2a20b4d92ced9a904325a49ab4518 Mon Sep 17 00:00:00 2001 From: Pavel Tikhomirov Date: Thu, 16 Dec 2021 12:57:23 +0300 Subject: [PATCH 104/121] mount/btrfs: make check_mountpoint_fd fallback to get_sdev_from_fd We face that btrfs returns anonymous device in stat instead of real superblock dev for volumes, thus all btrfs volume mounts does not pass check_mountpoint_fd due to dev missmatch between stat and mountinfo. We can use special helper get_sdev_from_fd instead of stat to try to get real dev of fd for btrfs. We move check_mountpoint_fd from open_mountpoint into get_clean_fd and ns_open_mountpoint to the point where temporary mount we open fd to is still in mountinfo, thus get_sdev_from_fd would be able to find tmp mount in mountinfo. Signed-off-by: Pavel Tikhomirov --- criu/include/mount.h | 1 + criu/mount.c | 49 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/criu/include/mount.h b/criu/include/mount.h index 9c0d7c0105..b959d131c4 100644 --- a/criu/include/mount.h +++ b/criu/include/mount.h @@ -109,6 +109,7 @@ extern int mntns_get_root_by_mnt_id(int mnt_id); extern struct ns_id *lookup_nsid_by_mnt_id(int mnt_id); extern int open_mount(unsigned int s_dev); +extern int __check_mountpoint_fd(struct mount_info *pm, int mnt_fd, bool parse_mountinfo); extern int check_mountpoint_fd(struct mount_info *pm, int mnt_fd); extern int __open_mountpoint(struct mount_info *pm); extern int mnt_is_dir(struct mount_info *pm); diff --git a/criu/mount.c b/criu/mount.c index ab6d3ed109..4b57ac7034 100644 --- a/criu/mount.c +++ b/criu/mount.c @@ -1018,10 +1018,11 @@ int mnt_is_dir(struct mount_info *pm) return 0; } -int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) +int __check_mountpoint_fd(struct mount_info *pm, int mnt_fd, bool parse_mountinfo) { struct stat st; - int ret, dev; + unsigned int dev; + int ret; ret = fstat(mnt_fd, &st); if (ret < 0) { @@ -1042,6 +1043,14 @@ int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) * allocates new device ID). */ if (dev != pm->s_dev_rt) { + /* + * For btrfs device numbers in stat and mountinfo can be + * different, fallback to get_sdev_from_fd to get right dev. + */ + if (!strcmp(pm->fstype->name, "btrfs") && !get_sdev_from_fd(mnt_fd, &dev, parse_mountinfo) && + dev == pm->s_dev_rt) + return 0; + pr_err("The file system %#x %#x (%#x) %s %s is inaccessible\n", pm->s_dev, pm->s_dev_rt, dev, pm->fstype->name, pm->ns_mountpoint); return -1; @@ -1050,6 +1059,11 @@ int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) return 0; } +int check_mountpoint_fd(struct mount_info *pm, int mnt_fd) +{ + return __check_mountpoint_fd(pm, mnt_fd, false); +} + /* * mnt_fd is a file descriptor on the mountpoint, which is closed in an error case. * If mnt_fd is -1, the mountpoint will be opened by this function. @@ -1114,12 +1128,34 @@ static int get_clean_fd(struct mount_info *mi) char *mnt_path = NULL; char mnt_path_tmp[] = "/tmp/cr-tmpfs.XXXXXX"; char mnt_path_root[] = "/cr-tmpfs.XXXXXX"; + int fd; mnt_path = get_clean_mnt(mi, mnt_path_tmp, mnt_path_root); if (!mnt_path) return -1; - return open_detach_mount(mnt_path); + fd = open(mnt_path, O_RDONLY | O_DIRECTORY, 0); + if (fd < 0) { + pr_perror("Can't open directory %s", mnt_path); + } else { + if (__check_mountpoint_fd(mi, fd, true)) + goto err_close; + } + + if (umount2(mnt_path, MNT_DETACH)) { + pr_perror("Can't detach mount %s", mnt_path); + goto err_close; + } + + if (rmdir(mnt_path)) { + pr_perror("Can't remove tmp dir %s", mnt_path); + goto err_close; + } + + return fd; +err_close: + close_safe(&fd); + return -1; } /* @@ -1337,6 +1373,11 @@ int ns_open_mountpoint(void *arg) goto err; } + if (__check_mountpoint_fd(mi, *fd, true)) { + close(*fd); + goto err; + } + return 0; err: return 1; @@ -1411,7 +1452,7 @@ int open_mountpoint(struct mount_info *pm) goto err; } - return fd < 0 ? __open_mountpoint(pm) : check_mountpoint_fd(pm, fd); + return fd < 0 ? __open_mountpoint(pm) : fd; err: if (ns_old >= 0) /* coverity[check_return] */ From 125b32d35e227b47040cc2e105771bb184c65f87 Mon Sep 17 00:00:00 2001 From: Nicolas Viennot Date: Tue, 28 Dec 2021 19:30:09 +0000 Subject: [PATCH 105/121] ci: test criu-image-streamer with all tests All the bugs that were in the way got fixed. We can enable all tests. Signed-off-by: Nicolas Viennot --- scripts/ci/run-ci-tests.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 7eab9f2dc7..d0cd55f7c6 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -62,11 +62,9 @@ ci_prep () { } test_stream() { - # We must test CRIU features that dump content into an image file to ensure - # streaming compatibility. - STREAM_TEST_PATTERN='.*(ghost|fifo|unlink|memfd|shmem|socket_queue).*' + # Testing CRIU streaming to criu-image-streamer # shellcheck disable=SC2086 - ./test/zdtm.py run --stream -p 2 --keep-going -T "$STREAM_TEST_PATTERN" $ZDTM_OPTS + ./test/zdtm.py run --stream -p 2 --keep-going -a $ZDTM_OPTS } print_header() { From 3caa619b34653a0c32dd3116c9f6bf0249d9c30d Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 10 Jan 2022 10:37:19 +0000 Subject: [PATCH 106/121] readme: add docker test badge Signed-off-by: Radostin Stoyanov --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd86b2c159..6b86cac9e1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ ![X86_64 GCC Test](https://github.com/checkpoint-restore/criu/workflows/X86_64%20GCC%20Test/badge.svg) -![Podman Test](https://github.com/checkpoint-restore/criu/workflows/Podman%20Test/badge.svg) +![Docker Test](https://github.com/checkpoint-restore/criu/actions/workflows/docker-test.yml/badge.svg) +![Podman Test](https://github.com/checkpoint-restore/criu/actions/workflows/podman-test.yml/badge.svg) [![CircleCI](https://circleci.com/gh/checkpoint-restore/criu.svg?style=svg)](https://circleci.com/gh/checkpoint-restore/criu)

From bf9a5b8e5be78bfac6954850fee1c156eca0f1b7 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 10 Jan 2022 10:37:52 +0000 Subject: [PATCH 107/121] contributing: remove old badges and logo CI badges and logo are already present in the readme file. Signed-off-by: Radostin Stoyanov --- CONTRIBUTING.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 96972296e0..864caf93e2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,8 +1,3 @@ -[![master](https://travis-ci.org/checkpoint-restore/criu.svg?branch=master)](https://travis-ci.org/checkpoint-restore/criu) -[![development](https://travis-ci.org/checkpoint-restore/criu.svg?branch=criu-dev)](https://travis-ci.org/checkpoint-restore/criu) -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/55251ec7db28421da4481fc7c1cb0cee)](https://www.codacy.com/app/xemul/criu?utm_source=github.com&utm_medium=referral&utm_content=xemul/criu&utm_campaign=Badge_Grade) -

- ## How to contribute to CRIU CRIU project is (almost) the never-ending story, because we have to always keep up with the From 3377245bb491f0d05c2f468505b6dd9bd64b06bf Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Fri, 3 Dec 2021 17:13:27 +0000 Subject: [PATCH 108/121] ci: update to latest Vagrant and Fedora images Signed-off-by: Adrian Reber --- scripts/ci/vagrant.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh index 40c8416e18..4a4a164456 100755 --- a/scripts/ci/vagrant.sh +++ b/scripts/ci/vagrant.sh @@ -1,14 +1,14 @@ #!/bin/bash -# This script is used to run vagrant based tests on Travis. -# This script is started via sudo from .travis.yml +# This script is used to run vagrant based tests on Cirrus CI. +# This script is started via .cirrus.yml set -e set -x -VAGRANT_VERSION=2.2.16 -FEDORA_VERSION=34 -FEDORA_BOX_VERSION=34.20210423.0 +VAGRANT_VERSION=2.2.19 +FEDORA_VERSION=35 +FEDORA_BOX_VERSION=35.20211026.0 setup() { if [ -n "$TRAVIS" ]; then From acc68cfe88a2a87260ee599a772391bc124a879d Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Sun, 16 Jan 2022 11:17:33 +0000 Subject: [PATCH 109/121] ci: added .lgtm.yml file A couple of months (or years) ago I looked into lgtm.com for CRIU. Today on a pull request I saw result from lgtm.com for the first time and it failed. Not sure what triggered the lgtm.com message into the CRIU repository, but with the .lgtm.yml file in this commit lgtm.com can actually build CRIU. Signed-off-by: Adrian Reber --- .lgtm.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .lgtm.yml diff --git a/.lgtm.yml b/.lgtm.yml new file mode 100644 index 0000000000..a28c35de0f --- /dev/null +++ b/.lgtm.yml @@ -0,0 +1,30 @@ +extraction: + cpp: + prepare: + packages: + - "protobuf-c-compiler" + - "libprotobuf-c-dev" + - "libprotobuf-dev" + - "build-essential" + - "libprotobuf-dev" + - "libprotobuf-c-dev" + - "protobuf-c-compiler" + - "protobuf-compiler" + - "python3-protobuf" + - "libnet-dev" + - "pkg-config" + - "libnl-3-dev" + - "libbsd0" + - "libbsd-dev" + - "iproute2" + - "libcap-dev" + - "libaio-dev" + - "python3-yaml" + - "libnl-route-3-dev" + - "python-future" + - "gnutls-dev" + configure: + command: + - "ls -laR images/google" + - "ln -s /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto" + - "ls -laR images/google" From 5b43c3ce81f590866b9330a84526362e3b713d01 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 6 Dec 2021 16:51:21 +0000 Subject: [PATCH 110/121] lib: introduce feature check in libcriu This commit adds feature check support to libcriu. It already exists in the CLI and RPC and this just extends it to libcriu. This commit provides one function to do all possible feature checks in one call. The parameter to the feature check function is a structure and the user can enable which features should be checked. Using a structure makes the function extensible without the need to break the API/ABI in the future. Signed-off-by: Adrian Reber --- lib/c/criu.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/c/criu.h | 29 +++++++++++++++++++++ 2 files changed, 101 insertions(+) diff --git a/lib/c/criu.c b/lib/c/criu.c index 500574e33c..dea5896f7b 100644 --- a/lib/c/criu.c +++ b/lib/c/criu.c @@ -1925,3 +1925,75 @@ int criu_join_ns_add(const char *ns, const char *ns_file, const char *extra_opt) { return criu_local_join_ns_add(global_opts, ns, ns_file, extra_opt); } + +int criu_local_feature_check(criu_opts *opts, struct criu_feature_check *features, size_t size) +{ + CriuFeatures criu_features = CRIU_FEATURES__INIT; + struct criu_feature_check features_copy = { 0 }; + CriuReq req = CRIU_REQ__INIT; + CriuResp *resp = NULL; + int ret = -1; + + saved_errno = 0; + + if (!features) + goto exit; + + if (size > sizeof(struct criu_feature_check)) + goto exit; + + memcpy(&features_copy, features, size); + + req.type = CRIU_REQ_TYPE__FEATURE_CHECK; + req.opts = opts->rpc; + + if (features_copy.mem_track) { + criu_features.has_mem_track = true; + criu_features.mem_track = true; + } + if (features_copy.lazy_pages) { + criu_features.has_lazy_pages = true; + criu_features.lazy_pages = true; + } + if (features_copy.pidfd_store) { + criu_features.has_pidfd_store = true; + criu_features.pidfd_store = true; + } + req.features = &criu_features; + + ret = send_req_and_recv_resp(opts, &req, &resp); + if (ret) + goto exit; + + memset(&features_copy, 0, sizeof(struct criu_feature_check)); + + if (resp->success) { + if (resp->features->has_mem_track) { + features_copy.mem_track = resp->features->mem_track; + } + if (resp->features->has_lazy_pages) { + features_copy.lazy_pages = resp->features->lazy_pages; + } + if (resp->features->has_pidfd_store) { + features_copy.pidfd_store = resp->features->pidfd_store; + } + memcpy(features, &features_copy, size); + } else { + ret = -EBADE; + } + +exit: + if (resp) + criu_resp__free_unpacked(resp, NULL); + + swrk_wait(opts); + + errno = saved_errno; + + return ret; +} + +int criu_feature_check(struct criu_feature_check *features, size_t size) +{ + return criu_local_feature_check(global_opts, features, size); +} diff --git a/lib/c/criu.h b/lib/c/criu.h index c6d4f50a8a..aed2c34813 100644 --- a/lib/c/criu.h +++ b/lib/c/criu.h @@ -288,6 +288,35 @@ int criu_local_dump_iters(criu_opts *opts, int (*more)(criu_predump_info pi)); int criu_local_get_version(criu_opts *opts); int criu_local_check_version(criu_opts *opts, int minimum); +/* + * Feature checking allows the user to check if CRIU supports + * certain features. There are CRIU features which do not depend + * on the version of CRIU but on kernel features or architecture. + * + * One example is memory tracking. Memory tracking can be disabled + * in the kernel or there are architectures which do not support + * it (aarch64 for example). By using the feature check a libcriu + * user can easily query CRIU if a certain feature is available. + * + * The features which should be checked can be marked in the + * structure 'struct criu_feature_check'. Each structure member + * that is set to true will result in CRIU checking for the + * availability of that feature in the current combination of + * CRIU/kernel/architecture. + * + * Available features will be set to true when the function + * returns successfully. Missing features will be set to false. + */ + +struct criu_feature_check { + bool mem_track; + bool lazy_pages; + bool pidfd_store; +}; + +int criu_feature_check(struct criu_feature_check *features, size_t size); +int criu_local_feature_check(criu_opts *opts, struct criu_feature_check *features, size_t size); + #ifdef __GNUG__ } #endif From c2a12bb925d343cc6333db7fe5f142d592d5edf6 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Mon, 6 Dec 2021 16:51:21 +0000 Subject: [PATCH 111/121] lib: added tests for feature check in libcriu Signed-off-by: Adrian Reber --- test/others/libcriu/.gitignore | 1 + test/others/libcriu/Makefile | 1 + test/others/libcriu/run.sh | 10 ++++ test/others/libcriu/test_feature_check.c | 65 ++++++++++++++++++++++++ 4 files changed, 77 insertions(+) create mode 100644 test/others/libcriu/test_feature_check.c diff --git a/test/others/libcriu/.gitignore b/test/others/libcriu/.gitignore index 15abf07acc..0f6e52bb4e 100644 --- a/test/others/libcriu/.gitignore +++ b/test/others/libcriu/.gitignore @@ -5,5 +5,6 @@ test_self test_sub test_join_ns test_pre_dump +test_feature_check output/ libcriu.so.* diff --git a/test/others/libcriu/Makefile b/test/others/libcriu/Makefile index 581574da00..ae73305331 100644 --- a/test/others/libcriu/Makefile +++ b/test/others/libcriu/Makefile @@ -7,6 +7,7 @@ TESTS += test_iters TESTS += test_errno TESTS += test_join_ns TESTS += test_pre_dump +TESTS += test_feature_check all: $(TESTS) .PHONY: all diff --git a/test/others/libcriu/run.sh b/test/others/libcriu/run.sh index 1b6c73448f..77bdfb87eb 100755 --- a/test/others/libcriu/run.sh +++ b/test/others/libcriu/run.sh @@ -62,6 +62,16 @@ if [ "$(uname -m)" = "x86_64" ]; then fi run_test test_errno run_test test_join_ns +if criu check --feature mem_dirty_track > /dev/null; then + export CRIU_FEATURE_MEM_TRACK=1 +fi +if criu check --feature uffd-noncoop > /dev/null; then + export CRIU_FEATURE_LAZY_PAGES=1 +fi +if criu check --feature pidfd_store > /dev/null; then + export CRIU_FEATURE_PIDFD_STORE=1 +fi +run_test test_feature_check echo "== Tests done" make libcriu_clean diff --git a/test/others/libcriu/test_feature_check.c b/test/others/libcriu/test_feature_check.c new file mode 100644 index 0000000000..d88e0de230 --- /dev/null +++ b/test/others/libcriu/test_feature_check.c @@ -0,0 +1,65 @@ +#include "criu.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lib.h" + +int main(int argc, char **argv) +{ + int ret; + char *env; + bool mem_track = 0; + bool lazy_pages = 0; + bool pidfd_store = 0; + struct criu_feature_check features = { + .mem_track = true, + .lazy_pages = true, + .pidfd_store = true, + }; + + printf("--- Start feature check ---\n"); + criu_init_opts(); + criu_set_service_binary(argv[1]); + + env = getenv("CRIU_FEATURE_MEM_TRACK"); + if (env) { + mem_track = true; + } + env = getenv("CRIU_FEATURE_LAZY_PAGES"); + if (env) { + lazy_pages = true; + } + env = getenv("CRIU_FEATURE_PIDFD_STORE"); + if (env) { + pidfd_store = true; + } + + ret = criu_feature_check(&features, sizeof(features) + 1); + printf(" `- passing too large structure to libcriu should return -1: %d\n", ret); + if (ret != -1) + return -1; + + ret = criu_feature_check(&features, sizeof(features)); + if (ret < 0) { + what_err_ret_mean(ret); + return ret; + } + + printf(" `- mem_track : %d - expected : %d\n", features.mem_track, mem_track); + if (features.mem_track != mem_track) + return -1; + printf(" `- lazy_pages : %d - expected : %d\n", features.lazy_pages, lazy_pages); + if (features.lazy_pages != lazy_pages) + return -1; + printf(" `- pidfd_store: %d - expected : %d\n", features.pidfd_store, pidfd_store); + if (features.pidfd_store != pidfd_store) + return -1; + + return 0; +} From 4223e680382586628d6650e289596c3a217326c0 Mon Sep 17 00:00:00 2001 From: Liu Hua Date: Thu, 6 Jan 2022 20:44:21 +0800 Subject: [PATCH 112/121] pagemap: tiny fix on truncating memory image When requested iovs are huge, criu needs to invoke more then one preadv()s. In this situation criu truncates memory image with offset of first preadv() and length of last one, which leads to leakage of memory image. This patch fixs truncating with right offset and length. Signed-off-by: Liu Hua --- criu/pagemap.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/criu/pagemap.c b/criu/pagemap.c index d996db7fc6..83f69bba37 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -535,7 +535,6 @@ static int process_async_reads(struct page_read *pr) fd = img_raw_fd(pr->pi); list_for_each_entry_safe(piov, n, &pr->async, l) { ssize_t ret; - off_t start = piov->from; struct iovec *iovs = piov->to; pr_debug("Read piov iovs %d, from %ju, len %ju, first %p:%zu\n", piov->nr, piov->from, @@ -554,13 +553,16 @@ static int process_async_reads(struct page_read *pr) } } - if (ret != piov->end - piov->from) { - if (ret < 0) { - pr_err("Can't read async pr bytes (%zd / %ju read, %ju off, %d iovs)\n", ret, - piov->end - piov->from, piov->from, piov->nr); - return -1; - } + if (ret < 0) { + pr_err("Can't read async pr bytes (%zd / %ju read, %ju off, %d iovs)\n", ret, + piov->end - piov->from, piov->from, piov->nr); + return -1; + } + if (opts.auto_dedup && punch_hole(pr, piov->from, ret, false)) + return -1; + + if (ret != piov->end - piov->from) { /* * The preadv() can return less than requested. It's * valid and doesn't mean error or EOF. We should advance @@ -574,9 +576,6 @@ static int process_async_reads(struct page_read *pr) goto more; } - if (opts.auto_dedup && punch_hole(pr, start, ret, false)) - return -1; - BUG_ON(pr->io_complete); /* FIXME -- implement once needed */ list_del(&piov->l); From a162d2b3a0f7715eaef5090631e2ec8c86bdd138 Mon Sep 17 00:00:00 2001 From: "fu.lin" Date: Sun, 16 Jan 2022 18:37:09 +0800 Subject: [PATCH 113/121] zdtm: fix zdtm/static/maps00 case in arm64 This case sometimes will cause SIGILL signal in arm64 platform. <> notes: The ARM architecture does not require the hardware to ensure coherency between instruction caches and memory, even for locations of shared memory. Therefore, we need flush dcache and icache for self-modifying code. - https://developer.arm.com/documentation/den0024/a/Caches/Point-of-coherency-and-unification Signed-off-by: fu.lin --- test/zdtm/static/maps00.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/zdtm/static/maps00.c b/test/zdtm/static/maps00.c index 10a4cac790..b1e55e8614 100644 --- a/test/zdtm/static/maps00.c +++ b/test/zdtm/static/maps00.c @@ -158,7 +158,13 @@ static int check_map(struct map *map) if (!sigsetjmp(segv_ret, 1)) { if (map->prot & PROT_WRITE) { - memcpy(map->ptr, test_func, getpagesize()); + memcpy(map->ptr, test_func, ONE_MAP_SIZE); + /* The ARM ARM architecture does not require the + * hardware to ensure coherency between instruction + * caches and memory, flushing dcache and icache is + * necessory to prevent SIGILL signal. + */ + __builtin___clear_cache(map->ptr, map->ptr + ONE_MAP_SIZE); } else { if (!(map->flag & MAP_ANONYMOUS)) { uint8_t funlen = (uint8_t *)check_map - (uint8_t *)test_func; From dfada9dfe4545ee19806d8812a85b194b90f08a0 Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 18 Jan 2022 16:49:40 +0000 Subject: [PATCH 114/121] compel: fix GCC 12 failure (out of bounds) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a confusing change as it seems the original code was just wrong. GCC 12 complains with: In function ‘__conv_val’, inlined from ‘std_strtoul’ at compel/plugins/std/string.c:202:7: compel/plugins/std/string.c:154:24: error: array subscript 97 is above array bounds of ‘const char[37]’ [-Werror=array-bounds] 154 | return &conv_tab[__tolower(c)] - conv_tab; | ^~~~~~~~~~~~~~~~~~~~~~~ compel/plugins/std/string.c: In function ‘std_strtoul’: compel/plugins/std/string.c:10:19: note: while referencing ‘conv_tab’ 10 | static const char conv_tab[] = "0123456789abcdefghijklmnopqrstuvwxyz"; | ^~~~~~~~ cc1: all warnings being treated as errors Which sounds correct. The array conv_tab has just 37 elements. If I understand the code correctly we are trying to convert anything that is character between a-z and A-Z to a number for cases where the base is larger than 10. For a base 11 conversion b|B should return 11. For a base 35 conversion z|Z should return 35. This is all for a strtoul() implementation. The original code was: static const char conv_tab[] = "0123456789abcdefghijklmnopqrstuvwxyz"; return &conv_tab[__tolower(c)] - conv_tab; and that seems wrong. If conv_tab would have been some kind of hash it could have worked, but '__tolower()' will always return something larger than 97 ('a') which will always overflow the array. But maybe I just don't get that part of the code. I replaced it with return __tolower(c) - 'a' + 10; which does the right thing: 'A' = 10, 'B' = 11 ... 'Z' = 35 Signed-off-by: Adrian Reber --- compel/plugins/std/string.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/compel/plugins/std/string.c b/compel/plugins/std/string.c index bde1bc68b9..d67e0d1a9d 100644 --- a/compel/plugins/std/string.c +++ b/compel/plugins/std/string.c @@ -151,7 +151,12 @@ static unsigned int __conv_val(unsigned char c) if (__isdigit(c)) return c - '0'; else if (__isalpha(c)) - return &conv_tab[__tolower(c)] - conv_tab; + /** + * If we want the value of something which __isalpha() == true + * it has to be base > 10. 'A' = 10, 'B' = 11 ... 'Z' = 35 + */ + return __tolower(c) - 'a' + 10; + return -1u; } From 93b1526e997cff45b3829aeadb6fac634be7d46f Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 18 Jan 2022 17:20:35 +0000 Subject: [PATCH 115/121] criu: fix configuration file scanner with GCC 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes: criu/config.c: In function ‘parse_statement’: criu/config.c:232:43: error: the comparison will always evaluate as ‘true’ for the pointer operand in ‘*(configuration + (sizetype)((long unsigned int)i * 8)) + ((sizetype)offset + 1)’ must not be NULL [-Werror=address] 232 | if (configuration[i] + offset + 1 != 0 && strchr(configuration[i] + offset, ' ')) { | ^~ Signed-off-by: Adrian Reber --- criu/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/criu/config.c b/criu/config.c index 91fb0b64d4..33f2820a18 100644 --- a/criu/config.c +++ b/criu/config.c @@ -229,7 +229,7 @@ int parse_statement(int i, char *line, char **configuration) tmp_string[0] = 0; /* Check for unsupported configuration file entries */ - if (configuration[i] + offset + 1 != 0 && strchr(configuration[i] + offset, ' ')) { + if (strchr(configuration[i] + offset, ' ')) { int j; len = strlen(configuration[i] + offset); for (j = 0; j < len - 1; j++) { From 04f8368eaee2b29bb92ff0ba4f5c43501408d15e Mon Sep 17 00:00:00 2001 From: Adrian Reber Date: Tue, 18 Jan 2022 17:22:46 +0000 Subject: [PATCH 116/121] compel: fix parasite with GCC 12 Parasite creation started to fail with GCC 12: On x86_64 with: ./compel/compel-host hgen -f criu/pie/restorer.built-in.o -o criu/pie/restorer-blob.h Error (compel/src/lib/handle-elf-host.c:337): Unexpected undefined symbol: `strlen'. External symbol in PIE? On aarch64 with: ld: criu/pie/restorer.o: in function `lsm_set_label': /drone/src/criu/pie/restorer.c:174: undefined reference to `strlen' Line 174 is: "for (len = 0; label[len]; len++)" Adding '-ffreestanding' to parasite compilation fixes these errors because, according to GCC developers: "strlen is a standard C function, so I don't see any bug in that being used unless you do a freestanding compilation (-nostdlib isn't that)." Signed-off-by: Adrian Reber --- compel/src/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/compel/src/main.c b/compel/src/main.c index a9a50959f9..f461ff04d1 100644 --- a/compel/src/main.c +++ b/compel/src/main.c @@ -19,6 +19,7 @@ #define CFLAGS_DEFAULT_SET \ "-Wstrict-prototypes " \ + "-ffreestanding " \ "-fno-stack-protector -nostdlib -fomit-frame-pointer " #define COMPEL_CFLAGS_PIE CFLAGS_DEFAULT_SET "-fpie" From 985b92008c070093f7dd593f7d849e340bdb0493 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 27 Jan 2022 22:13:33 +0000 Subject: [PATCH 117/121] ci: set continue-on-error for cross-compile Running cross compile tests with Debian unstable sometimes fails due to missing or outdated packages. Signed-off-by: Radostin Stoyanov --- .github/workflows/cross-compile.yml | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cross-compile.yml b/.github/workflows/cross-compile.yml index 461a6e6188..be8e7f09c2 100644 --- a/.github/workflows/cross-compile.yml +++ b/.github/workflows/cross-compile.yml @@ -6,18 +6,26 @@ jobs: build: runs-on: ubuntu-latest + continue-on-error: ${{ matrix.experimental }} strategy: + fail-fast: false matrix: + experimental: [false] target: [ armv7-stable-cross, - armv7-unstable-cross, aarch64-stable-cross, - aarch64-unstable-cross, ppc64-stable-cross, - ppc64-unstable-cross, mips64el-stable-cross, - mips64el-unstable-cross ] + include: + - experimental: true + target: armv7-unstable-cross + - experimental: true + target: aarch64-unstable-cross + - experimental: true + target: ppc64-unstable-cross + - experimental: true + target: mips64el-unstable-cross steps: - uses: actions/checkout@v2 From d72daddbb48ad705184d6cddfa70df74d1dd3e20 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 27 Jan 2022 14:49:41 +0000 Subject: [PATCH 118/121] test/autofs: fix use-after-free autofs.c:66:17: error: pointer 'str' may be used after 'realloc' [-Werror=use-after-free] autofs.c: In function 'check_automount': ../lib/zdtmtst.h:131:9: error: pointer 'mountpoint' may be used after 'free' [-Werror=use-after-free] 131 | test_msg("ERR: %s:%d: " format " (errno = %d (%s))\n", __FILE__, __LINE__, ##arg, errno, strerror(errno)) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ autofs.c:277:17: note: in expansion of macro 'pr_perror' 277 | pr_perror("%s: failed to close fd %d", mountpoint, p->fd); | ^~~~~~~~~ autofs.c:268:9: note: call to 'free' here 268 | free(mountpoint); | ^~~~~~~~~~~~~~~~ Fixes: #1731 v2: (@Snorch) always update `str` after successful realloc() Signed-off-by: Radostin Stoyanov --- test/zdtm/static/autofs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/zdtm/static/autofs.c b/test/zdtm/static/autofs.c index 2d6078627f..ad17958427 100644 --- a/test/zdtm/static/autofs.c +++ b/test/zdtm/static/autofs.c @@ -47,6 +47,7 @@ static char *xvstrcat(char *str, const char *fmt, va_list args) ret = -ENOMEM; new = realloc(str, offset + delta); if (new) { + str = new; va_copy(tmp, args); ret = vsnprintf(new + offset, delta, fmt, tmp); va_end(tmp); @@ -54,7 +55,6 @@ static char *xvstrcat(char *str, const char *fmt, va_list args) /* NOTE: vsnprintf returns the amount of bytes * * to allocate. */ delta = ret + 1; - str = new; ret = 0; } } @@ -266,6 +266,7 @@ static int check_automount(struct autofs_params *p) return err; free(mountpoint); + mountpoint = NULL; err = p->setup(p); if (err) { @@ -274,7 +275,7 @@ static int check_automount(struct autofs_params *p) } if (close(p->fd)) { - pr_perror("%s: failed to close fd %d", mountpoint, p->fd); + pr_perror("mountpoint failed to close fd %d", p->fd); return -errno; } From c2698b90d7f4f96dc7dfda2a0e61780cafd94b1b Mon Sep 17 00:00:00 2001 From: Ashutosh Mehra Date: Wed, 26 Jan 2022 14:14:38 -0500 Subject: [PATCH 119/121] Fix formatting in criu documentation Signed-off-by: Ashutosh Mehra --- Documentation/criu.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/criu.txt b/Documentation/criu.txt index f41b1898c2..57b791138b 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -419,7 +419,7 @@ By default the option is set to *fpu* and *ins*. Set the method to be used to validate open files. Validation is done to ensure that the version of the file being restored is the same version when it was dumped. - ++ The 'mode' may be one of the following: *filesize*::: @@ -532,7 +532,7 @@ usually need to be escaped from shell. Restore cgroups configuration associated with a task from the image. Controllers are always restored in an optimistic way -- if already present in system, *criu* reuses it, otherwise it will be created. - ++ The 'mode' may be one of the following: *none*::: Do not restore cgroup properties but require cgroup to @@ -656,7 +656,7 @@ are not adequate, but this can be suppressed by using *--cpu-cap=none*. Set the method to be used to validate open files. Validation is done to ensure that the version of the file being restored is the same version when it was dumped. - ++ The 'mode' may be one of the following: *filesize*::: From ab6213c3789cd62667575f2050d20be71f4d7653 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sat, 15 Jan 2022 01:41:23 +0530 Subject: [PATCH 120/121] bpf: Introduce iterator support This commit introduces basic support in CRIU to make use of eBPF kernel features to aid in the checkpoint/restore process. The immediate usecase is to provide an API to find fd to file pointer mapping, and vice-versa, for quick lookup from one file set (e.g. task, epoll, io_uring) to another. This is done by making use of eBPF iterator. This makes use of task, epoll, and io_uring file iterator features to be introduced in upcoming linux kernel versions. No dependency on clang's BPF toolchain or libbpf is taken, as we don't need those features just yet. It might be inevitable as we make more use of BPF, but for now we can tolerate just writing raw BPF assembly. To this end, also import bpf_insn.h from kernel's samples/bpf directory. Signed-off-by: Kumar Kartikeya Dwivedi --- criu/bpf-util.c | 259 ++++++++++++++++++++++++++++++++++++++++ criu/include/bpf-util.h | 16 +++ criu/include/bpf_insn.h | 233 ++++++++++++++++++++++++++++++++++++ 3 files changed, 508 insertions(+) create mode 100644 criu/bpf-util.c create mode 100644 criu/include/bpf-util.h create mode 100644 criu/include/bpf_insn.h diff --git a/criu/bpf-util.c b/criu/bpf-util.c new file mode 100644 index 0000000000..3652027fbf --- /dev/null +++ b/criu/bpf-util.c @@ -0,0 +1,259 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" +#include "xmalloc.h" +#include "bpf-util.h" +#include "bpf_insn.h" +#include "common/bug.h" + +/* XXX: Propagate the case of errors from bpf_map_update_elem */ + +static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int bpf_map_create(enum bpf_map_type map_type, int key_size, int value_size, int max_entries) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + + return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static int bpf_prog_load_iter(struct bpf_insn *insns, int insn_cnt) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_TRACING; + attr.expected_attach_type = BPF_TRACE_ITER; + + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = insn_cnt; + + return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +struct bpf_insn_buf { + int insn_cap; + int insn_cnt; + struct bpf_insn insns[]; +}; + +static struct bpf_insn_buf *bpf_insn_buf_alloc(void) +{ + struct bpf_insn_buf *ibuf; + + ibuf = xmalloc(offsetof(struct bpf_insn_buf, insns[64])); + if (!ibuf) + return NULL; + ibuf->insn_cap = 64; + ibuf->insn_cnt = 0; + return ibuf; +} + +static void bpf_insn_buf_free(struct bpf_insn_buf *ibuf) +{ + xfree(ibuf); +} + +static int bpf_insn_buf_push(struct bpf_insn_buf *ibuf, struct bpf_insn *insns, int insn_cnt) +{ + BUG_ON(!ibuf); + if (ibuf->insn_cap >= ibuf->insn_cnt + insn_cnt) + goto push; + ibuf = xrealloc(ibuf, offsetof(struct bpf_insn_buf, insns[ibuf->insn_cap + insn_cnt])); + if (!ibuf) + return -ENOMEM; +push: + memcpy(ibuf->insns + ibuf->insn_cnt, insns, insn_cnt * sizeof(*insns)); + ibuf->insn_cnt += insn_cnt; + ibuf->insn_cap += insn_cnt; + return 0; +} + +#define bpf_push(insn) \ + ({ \ + if ((ret = bpf_insn_buf_push(ibuf, (struct bpf_insn[]){ insn }, \ + sizeof((struct bpf_insn[]){ insn }) / sizeof(struct bpf_insn)))) \ + goto exit; \ + }) + +typedef int bpf_insn_buf_fill_cb(struct bpf_fdtable *meta, struct bpf_insn_buf *ibuf, void *userdata); + +enum fill_type { + FILL_TASK_FILE, + FILL_IO_URING, + FILL_EPOLL, +}; + +static int bpf_fill_fdtable(enum fill_type type, int *fill_desc, struct bpf_fdtable *meta, + int index_size, int max_entries, bpf_insn_buf_fill_cb fill_insn, + void *userdata) +{ + int file2index_map_fd, index2file_map_fd, ret; + struct bpf_insn_buf *ibuf; + + BUG_ON(!meta); + BUG_ON(index_size != 4 || index_size != 8); + + file2index_map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(uint64_t), index_size, max_entries); + if (file2index_map_fd < 0) + return -errno; + + index2file_map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, index_size, sizeof(uint64_t), max_entries); + if (index2file_map_fd < 0) { + ret = -errno; + goto end_file2fd; + } + + meta->file2index_map_fd = file2index_map_fd; + meta->index2file_map_fd = index2file_map_fd; + + ibuf = bpf_insn_buf_alloc(); + if (!ibuf) { + ret = -ENOMEM; + goto end_fd2file; + } + + if ((ret = fill_insn(meta, ibuf, userdata))) + goto end_ibuf; + + ret = bpf_prog_load_iter(ibuf->insns, ibuf->insn_cnt); + if (ret < 0) + ret = -errno; + bpf_insn_buf_free(ibuf); + + return ret; +end_ibuf: + bpf_insn_buf_free(ibuf); +end_fd2file: + close(index2file_map_fd); +end_file2fd: + close(file2index_map_fd); + return ret; +} + +static int task_fill_cb(struct bpf_fdtable *meta, struct bpf_insn_buf *ibuf, void *userdata) +{ + int tgid = *(int *)userdata, ret; + + (void)tgid; + /* XXX: Fixup task_struct::tgid offset and compare (requires libbpf dep) */ + bpf_push(BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); + /* index -> file */ + bpf_push(BPF_LD_MAP_FD(BPF_REG_1, meta->index2file_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 /* meta + */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8 + 8 + 8 /* meta + task + fd */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + /* file -> index */ + bpf_push(BPF_MOV64_REG(BPF_REG_1, meta->file2index_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 /* meta + ctx */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 + 8 /* meta + ctx + file */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + bpf_push(BPF_MOV64_IMM(BPF_REG_0, 0)); + bpf_push(BPF_EXIT_INSN()); + return 0; +exit: + return ret; +} + +int bpf_fill_task_fdtable(int tgid, struct bpf_fdtable *meta) +{ + return bpf_fill_fdtable(FILL_TASK_FILE, &tgid, meta, sizeof(int), 65535, task_fill_cb, + &tgid); +} + +static int io_uring_fill_cb(struct bpf_fdtable *meta, struct bpf_insn_buf *ibuf, void *userdata) +{ + int ret; + + /* XXX: Consider skipping in sparse set */ + bpf_push(BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); + /* index -> file */ + bpf_push(BPF_LD_MAP_FD(BPF_REG_1, meta->index2file_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 + 8 /* meta + ctx + file */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8 + 8 /* meta + ctx */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + /* file -> index */ + bpf_push(BPF_MOV64_REG(BPF_REG_1, meta->file2index_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 /* meta + ctx */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 + 8 /* meta + ctx + file */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + bpf_push(BPF_MOV64_IMM(BPF_REG_0, 0)); + bpf_push(BPF_EXIT_INSN()); + return 0; +exit: + return ret; +} + +int bpf_fill_io_uring_fdtable(int io_uring_fd, struct bpf_fdtable *meta) +{ + return bpf_fill_fdtable(FILL_IO_URING, &io_uring_fd, meta, sizeof(unsigned long), + 4096, io_uring_fill_cb, NULL); +} + +int epoll_fill_cb(struct bpf_fdtable *meta, struct bpf_insn_buf *ibuf, void *userdata) +{ + int ret; + + /* XXX: Relocate epitem offsets */ + bpf_push(BPF_MOV64_REG(BPF_REG_6, BPF_REG_1)); + /* index -> file */ + bpf_push(BPF_LD_MAP_FD(BPF_REG_1, meta->index2file_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 + 8 /* meta + ctx + file */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8 + 8 /* meta + ctx */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + /* file -> index */ + bpf_push(BPF_MOV64_REG(BPF_REG_1, meta->file2index_map_fd)); + bpf_push(BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 /* meta + ctx */)); + bpf_push(BPF_MOV64_REG(BPF_REG_3, BPF_REG_6)); + bpf_push(BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8 + 8 + 8 /* meta + ctx + file */)); + bpf_push(BPF_MOV64_IMM(BPF_REG_4, 0)); + bpf_push(BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem)); + bpf_push(BPF_MOV64_IMM(BPF_REG_0, 0)); + bpf_push(BPF_EXIT_INSN()); + return 0; +exit: + return ret; +} + +int bpf_fill_epoll_fdtable(int epoll_fd, struct bpf_fdtable *meta) +{ + return bpf_fill_fdtable(FILL_EPOLL, &epoll_fd, meta, sizeof(unsigned long), + 4096, io_uring_fill_cb, NULL); +} diff --git a/criu/include/bpf-util.h b/criu/include/bpf-util.h new file mode 100644 index 0000000000..b5b9713198 --- /dev/null +++ b/criu/include/bpf-util.h @@ -0,0 +1,16 @@ +#ifndef __CR_BPF_UTIL_H__ +#define __CR_BPF_UTIL_H__ + +#include +#include + +struct bpf_fdtable { + int file2index_map_fd; + int index2file_map_fd; +}; + +int bpf_fill_task_fdtable(pid_t tgid, struct bpf_fdtable *meta); +int bpf_fill_io_uring_fdtable(int io_uring_fd, struct bpf_fdtable *meta); +int bpf_fill_epoll_fdtable(int epoll_fd, struct bpf_fdtable *meta); + +#endif diff --git a/criu/include/bpf_insn.h b/criu/include/bpf_insn.h new file mode 100644 index 0000000000..29c3bb6ad1 --- /dev/null +++ b/criu/include/bpf_insn.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* eBPF instruction mini library */ +#ifndef __BPF_INSN_H +#define __BPF_INSN_H + +struct bpf_insn; + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +#define BPF_ALU64_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_ALU32_REG(OP, DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +#define BPF_ALU64_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_ALU32_IMM(OP, DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Short form of mov, dst_reg = src_reg */ + +#define BPF_MOV64_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +#define BPF_MOV32_REG(DST, SRC) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = 0 }) + +/* Short form of mov, dst_reg = imm32 */ + +#define BPF_MOV64_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + +#ifndef BPF_PSEUDO_MAP_FD +# define BPF_PSEUDO_MAP_FD 1 +#endif + +/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */ +#define BPF_LD_MAP_FD(DST, MAP_FD) \ + BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD) + + +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ + +#define BPF_LD_ABS(SIZE, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ + +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* + * Atomic operations: + * + * BPF_ADD *(uint *) (dst_reg + off16) += src_reg + * BPF_AND *(uint *) (dst_reg + off16) &= src_reg + * BPF_OR *(uint *) (dst_reg + off16) |= src_reg + * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg + * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg); + * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg); + * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg); + * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg); + * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg) + * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg) + */ + +#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = OP }) + +/* Legacy alias */ +#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF) + +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Raw code statement block */ + +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ + ((struct bpf_insn) { \ + .code = CODE, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = IMM }) + +/* Program exit */ + +#define BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + +#endif From 0df5e215cbb35497e9cf3ddb4d7a00df10d38242 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 27 Jun 2021 19:43:21 +0530 Subject: [PATCH 121/121] Introduce io_uring dump/restore support Signed-off-by: Kumar Kartikeya Dwivedi --- criu/Makefile.crtools | 1 + criu/cr-dump.c | 63 ++- criu/cr-restore.c | 3 +- criu/files.c | 3 + criu/image-desc.c | 2 + criu/include/image-desc.h | 2 + criu/include/image.h | 11 +- criu/include/io_uring.h | 108 ++++ criu/include/magic.h | 2 + criu/include/protobuf-desc.h | 2 + criu/include/vma.h | 5 +- criu/io_uring.c | 1036 ++++++++++++++++++++++++++++++++++ criu/mem.c | 7 + criu/proc_parse.c | 306 +++++++++- criu/protobuf-desc.c | 1 + criu/util.c | 1 + images/Makefile | 1 + images/fdinfo.proto | 45 +- images/io_uring.proto | 72 +++ lib/py/images/images.py | 18 + 20 files changed, 1654 insertions(+), 35 deletions(-) create mode 100644 criu/include/io_uring.h create mode 100644 criu/io_uring.c create mode 100644 images/io_uring.proto diff --git a/criu/Makefile.crtools b/criu/Makefile.crtools index 50a2fa9c55..03b64585c1 100644 --- a/criu/Makefile.crtools +++ b/criu/Makefile.crtools @@ -31,6 +31,7 @@ obj-y += fsnotify.o obj-y += image-desc.o obj-y += image.o obj-y += img-streamer.o +obj-y += io_uring.o obj-y += ipc_ns.o obj-y += irmap.o obj-y += kcmp-ids.o diff --git a/criu/cr-dump.c b/criu/cr-dump.c index c972e343aa..be4947afa6 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -85,6 +85,9 @@ #include "pidfd-store.h" #include "apparmor.h" #include "asm/dump.h" +#include "io_uring.h" + +#include "compel/plugins/std/syscall-codes.h" /* * Architectures can overwrite this function to restore register sets that @@ -191,10 +194,11 @@ struct cr_imgset *glob_imgset; static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds) { + char buf[PATH_MAX] = {}; struct dirent *de; - DIR *fd_dir; + int n, pidfd = -1; int size = 0; - int n; + DIR *fd_dir; pr_info("\n"); pr_info("Collecting fds (pid: %d)\n", pid); @@ -204,6 +208,59 @@ static int collect_fds(pid_t pid, struct parasite_drain_fd **dfds) if (!fd_dir) return -1; + /* Before collecting fds, we need to bring io_uring to steady state, + * since it can install fds into task's fdtable, and if we do it later, + * during actual io_uring dump, we will miss dumping these files. + */ + while ((de = readdir(fd_dir))) { + if (dir_dots(de)) + continue; + + n = dirfd(fd_dir); + if (n == -1) { + close(pidfd); + return -1; + } + + n = readlinkat(n, de->d_name, buf, sizeof(buf)); + if (n == -1) { + close(pidfd); + return -1; + } + + if (is_io_uring_link(buf)) { + if (!kdat.has_pidfd_open) { + pr_err("pidfd_open system call not supported\n"); + return -ENOTSUP; + } + + if (!kdat.has_pidfd_getfd) { + pr_err("pidfd_getfd system call not supported\n"); + return -ENOTSUP; + } + + if (pidfd == -1) { + pidfd = syscall(SYS_pidfd_open, pid, 0); + if (pidfd < 0) { + pr_err("Failed to open pidfd for pid %d\n", pid); + return pidfd; + } + } + + if (io_uring_synchronize_fd(syscall(SYS_pidfd_getfd, pidfd, atoi(de->d_name), 0))) { + pr_err("Failed to synchronize io_uring fd %d for pid %d\n", atoi(de->d_name), pid); + close(pidfd); + return -1; + } + } + } + + if (pidfd >= 0) + close(pidfd); + + /* Collect fds now */ + rewinddir(fd_dir); + n = 0; while ((de = readdir(fd_dir))) { if (dir_dots(de)) @@ -489,6 +546,8 @@ static int dump_task_mm(pid_t pid, const struct proc_pid_stat *stat, const struc ret = check_sysvipc_map_dump(pid, vma); else if (vma_entry_is(vma, VMA_AREA_SOCKET)) ret = dump_socket_map(vma_area); + else if (vma_entry_is(vma, VMA_AREA_IO_URING)) + ret = dump_io_uring_map(vma_area); else ret = 0; if (ret) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index ed62cc5a28..01e6749ed5 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -29,6 +29,7 @@ #include "servicefd.h" #include "image.h" #include "img-streamer.h" +#include "io_uring.h" #include "util.h" #include "util-pie.h" #include "criu-log.h" @@ -277,7 +278,7 @@ static struct collect_image_info *cinfos_files[] = { &unix_sk_cinfo, &fifo_cinfo, &pipe_cinfo, &nsfile_cinfo, &packet_sk_cinfo, &netlink_sk_cinfo, &eventfd_cinfo, &epoll_cinfo, &epoll_tfd_cinfo, &signalfd_cinfo, &tunfile_cinfo, &timerfd_cinfo, &inotify_cinfo, &inotify_mark_cinfo, &fanotify_cinfo, - &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, + &fanotify_mark_cinfo, &ext_file_cinfo, &memfd_cinfo, &io_uring_cinfo, }; /* These images are required to restore namespaces */ diff --git a/criu/files.c b/criu/files.c index 69ebc2e824..256ad5821f 100644 --- a/criu/files.c +++ b/criu/files.c @@ -49,6 +49,7 @@ #include "kerndat.h" #include "fdstore.h" #include "bpfmap.h" +#include "io_uring.h" #include "protobuf.h" #include "util.h" @@ -536,6 +537,8 @@ static int dump_one_file(struct pid *pid, int fd, int lfd, struct fd_opts *opts, else if (is_bpfmap_link(link)) ops = &bpfmap_dump_ops; #endif + else if (is_io_uring_link(link)) + ops = &io_uring_dump_ops; else return dump_unsupp_fd(&p, lfd, "anon", link, e); diff --git a/criu/image-desc.c b/criu/image-desc.c index d65d9c0986..b72df0d98e 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -107,6 +107,8 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(BPFMAP_FILE, "bpfmap-file", O_NOBUF), FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF), FD_ENTRY(APPARMOR, "apparmor"), + FD_ENTRY_F(IO_URING_FILE, "io_uring-file", O_NOBUF), + FD_ENTRY_F(IO_URING_DATA, "io_uring-data", O_NOBUF), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 9f369be645..5dd4ae822c 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -71,6 +71,8 @@ enum { CR_FD_MEMFD_INODE, CR_FD_BPFMAP_FILE, CR_FD_BPFMAP_DATA, + CR_FD_IO_URING_FILE, + CR_FD_IO_URING_DATA, _CR_FD_GLOB_TO, CR_FD_TMPFS_IMG, diff --git a/criu/include/image.h b/criu/include/image.h index 14659dbd24..13e0dbcc89 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -79,11 +79,12 @@ #define VMA_ANON_SHARED (1 << 8) #define VMA_ANON_PRIVATE (1 << 9) -#define VMA_AREA_SYSVIPC (1 << 10) -#define VMA_AREA_SOCKET (1 << 11) -#define VMA_AREA_VVAR (1 << 12) -#define VMA_AREA_AIORING (1 << 13) -#define VMA_AREA_MEMFD (1 << 14) +#define VMA_AREA_SYSVIPC (1 << 10) +#define VMA_AREA_SOCKET (1 << 11) +#define VMA_AREA_VVAR (1 << 12) +#define VMA_AREA_AIORING (1 << 13) +#define VMA_AREA_MEMFD (1 << 14) +#define VMA_AREA_IO_URING (1 << 15) #define VMA_CLOSE (1 << 28) #define VMA_NO_PROT_WRITE (1 << 29) diff --git a/criu/include/io_uring.h b/criu/include/io_uring.h new file mode 100644 index 0000000000..5c91a27245 --- /dev/null +++ b/criu/include/io_uring.h @@ -0,0 +1,108 @@ +#ifndef __CR_IO_URING_H__ +#define __CR_IO_URING_H__ + +#include + +#include "files.h" +#include "io_uring.pb-c.h" + +/* Definitions */ +struct __io_uring_restriction { + __u16 opcode; + union { + __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */ + __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */ + __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */ + }; + __u8 resv; + __u32 resv2[3]; +}; + +#ifndef IORING_SETUP_IOPOLL +#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */ +#endif +#ifndef IORING_SETUP_SQPOLL +#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */ +#endif +#ifndef IORING_SETUP_SQ_AFF +#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ +#endif +#ifndef IORING_SETUP_CQSIZE +#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ +#endif +#ifndef IORING_SETUP_ATTACH_WQ +#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#endif +#ifndef IORING_SETUP_R_DISABLED +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ +#endif + +#ifndef IORING_OFF_SQ_RING +#define IORING_OFF_SQ_RING 0ULL +#endif +#ifndef IORING_OFF_CQ_RING +#define IORING_OFF_CQ_RING 0x8000000ULL +#endif +#ifndef IORING_OFF_SQES +#define IORING_OFF_SQES 0x10000000ULL +#endif + +#ifndef IOSQE_IO_DRAIN +#define IOSQE_IO_DRAIN (1U << 1) +#endif + +#define __IORING_RESTRICTION_REGISTER_OP 0 +#define __IORING_RESTRICTION_SQE_OP 1 +#define __IORING_RESTRICTION_SQE_FLAGS_ALLOWED 2 +#define __IORING_RESTRICTION_SQE_FLAGS_REQUIRED 3 +#define __IORING_REGISTER_PERSONALITY 9 +#define __IORING_REGISTER_RESTRICTIONS 11 +#define __IORING_REGISTER_ENABLE_RINGS 12 + +struct io_uring_file_info { + IoUringFileEntry *iofe; + struct file_desc d; +}; + +struct io_uring_data_info { + IoUringDataEntry *iode; +}; + +struct io_uring_group_desc { + struct list_head list; + gid_t group; + char group_name[32]; +}; + +struct io_uring_personality_desc { + int id; + uid_t uid; + uid_t euid; + uid_t suid; + uid_t fsuid; + gid_t gid; + gid_t egid; + gid_t sgid; + gid_t fsgid; + u32 cap_eff[CR_CAP_SIZE]; + size_t nr_groups; + struct list_head group_list; +}; + +struct io_uring_ctx; + +extern struct collect_image_info io_uring_cinfo; +extern struct collect_image_info io_uring_data_cinfo; +extern const struct fdtype_ops io_uring_dump_ops; + +int is_io_uring_link(char *link); +int io_uring_synchronize_fd(int fd); +int collect_io_uring_map(struct vma_area *vma); +int dump_io_uring_map(struct vma_area *vma); +int add_one_io_uring_mapping(uint64_t offset, ino_t inode); + +int io_uring_push_buf(struct io_uring_ctx *ctx, unsigned int idx, long long unsigned int address, unsigned int len); +int io_uring_push_personality(struct io_uring_ctx *ctx, struct io_uring_personality_desc *desc); +IoUringFileEntry *io_uring_get_iofe(struct io_uring_ctx *ctx); + +#endif /* __CR_IO_URING_H__ */ diff --git a/criu/include/magic.h b/criu/include/magic.h index 22d7218e45..b968828e72 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -100,6 +100,8 @@ #define BPFMAP_FILE_MAGIC 0x57506142 /* Alapayevsk */ #define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */ #define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */ +#define IO_URING_FILE_MAGIC 0x55403656 /* Butyn */ +#define IO_URING_DATA_MAGIC 0x54194822 /* Ulyanovsk */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index 3824de101f..dc4634978e 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -70,6 +70,8 @@ enum { PB_BPFMAP_FILE, PB_BPFMAP_DATA, PB_APPARMOR, + PB_IO_URING_FILE, + PB_IO_URING_DATA, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/vma.h b/criu/include/vma.h index 541d6d6fd4..864509881d 100644 --- a/criu/include/vma.h +++ b/criu/include/vma.h @@ -35,7 +35,10 @@ struct vma_area { union { struct /* for dump */ { - int vm_socket_id; + union { + int vm_socket_id; + int io_uring_id; + }; char *aufs_rpath; /* path from aufs root */ char *aufs_fpath; /* full path from global root */ diff --git a/criu/io_uring.c b/criu/io_uring.c new file mode 100644 index 0000000000..906e8f4ac5 --- /dev/null +++ b/criu/io_uring.c @@ -0,0 +1,1036 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" +#include "bitmap.h" +#include "fdinfo.h" +#include "imgset.h" +#include "string.h" +#include "file-ids.h" +#include "io_uring.h" +#include "protobuf.h" +#include "common/list.h" + +#include + +#define CTX_F_SEEN_SQE (1UL << 0) /* SQE ring mapped */ +#define CTX_F_SEEN_SQE_ARR (1UL << 1) /* SQE array mapped */ +#define CTX_F_SEEN_CQE (1UL << 2) /* CQE ring mapped */ +#define CTX_F_SEEN_RINGS (CTX_F_SEEN_SQE | CTX_F_SEEN_SQE_ARR | CTX_F_SEEN_CQE) +#define CTX_F_SINGLE_MMAP (1UL << 3) /* SQE/CQE ring are in single mapping */ +#define CTX_F_DONE_FILE (1UL << 4) /* File dump done */ +#define CTX_F_DONE_DATA (1UL << 5) /* Data dump done */ +#define CTX_F_DONE_ALL (CTX_F_DONE_FILE | CTX_F_DONE_DATA) +#define CTX_F_INIT_IOFE (1UL << 6) /* Iofe set for ctx */ + +#define atomic_load_relaxed(x) __atomic_load_n((x), __ATOMIC_RELAXED) +#define atomic_load_acquire(x) __atomic_load_n((x), __ATOMIC_ACQUIRE) +#define atomic_store_release(x, val) __atomic_store_n((x), (val), __ATOMIC_RELEASE) + +#define IO_URING_HASH_TABLE_BITS 5 +#define IO_URING_HASH_TABLE_MAX (1UL << IO_URING_HASH_TABLE_BITS) +#define IO_URING_HASH_TABLE_MASK (IO_URING_HASH_TABLE_MAX - 1) + +#ifndef IORING_FEAT_SQPOLL_NONFIXED +#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7) +#endif + +struct io_uring_map { + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + unsigned int *sq_array; + unsigned int *sq_ring_tail; + unsigned int *sq_ring_head; + unsigned int *cqe_ring_head; + unsigned int *cqe_ring_tail; + unsigned int *sq_ring_mask; + unsigned int *cqe_ring_mask; + size_t sq_len; + size_t sqe_len; + size_t cqe_len; +}; + +struct io_uring_buf { + struct list_head list; + unsigned int idx; + long long unsigned int address; + unsigned int len; +}; + +/* We store uid name in image to avoid mismatch on restore which could turn into + * a potential security risk, as user name may not match for the same UID and + * user may end up exposing resources to other users unintentionally. + */ +struct io_uring_personality { + struct list_head list; + struct io_uring_personality_desc desc; + char uid_name[32]; + char euid_name[32]; + char suid_name[32]; + char fsuid_name[32]; + char gid_name[32]; + char egid_name[32]; + char sgid_name[32]; + char fsgid_name[32]; +}; + +struct io_uring_ctx { + struct io_uring_ctx *next; + ino_t inode; + u32 id; + u32 state; + union { + struct { + IoUringFileEntry iofe; + struct io_uring_map map; + + struct list_head buf_list; + struct list_head pers_list; + size_t nr_pers; + } dump; + struct { + void *data; + size_t sqe_bytes; + size_t cqe_bytes; + size_t sq_arr_bytes; + } restore; + }; +}; + +static struct io_uring_ctx *ctx_hash_table[IO_URING_HASH_TABLE_MAX]; + +static struct io_uring_ctx *alloc_ctx(void) +{ + struct io_uring_ctx *ctx; + + ctx = xzalloc(sizeof(*ctx)); + if (!ctx) + return NULL; + + INIT_LIST_HEAD(&ctx->dump.buf_list); + INIT_LIST_HEAD(&ctx->dump.pers_list); + + return ctx; +} + +static struct io_uring_ctx *lookup_ctx(ino_t inode) +{ + struct io_uring_ctx *ctx; + + ctx = ctx_hash_table[inode & IO_URING_HASH_TABLE_MASK]; + for (; ctx; ctx = ctx->next) { + if (ctx->inode == inode) + break; + } + + return ctx; +} + +static void insert_ctx(ino_t inode, struct io_uring_ctx *ctx) +{ + struct io_uring_ctx **slot; + + slot = &ctx_hash_table[inode & IO_URING_HASH_TABLE_MASK]; + ctx->next = *slot; + *slot = ctx; +} + +static uint64_t offset_to_state(uint64_t offset) +{ + switch (offset) { + case IORING_OFF_SQ_RING: + return CTX_F_SEEN_SQE; + case IORING_OFF_CQ_RING: + return CTX_F_SEEN_CQE; + case IORING_OFF_SQES: + return CTX_F_SEEN_SQE_ARR; + default: + return 0; + } +} + +static const char *offset_to_str(uint64_t offset) +{ + switch (offset) { + case IORING_OFF_SQ_RING: + return "IORING_OFF_SQ_RING"; + case IORING_OFF_CQ_RING: + return "IORING_OFF_CQ_RING"; + case IORING_OFF_SQES: + return "IORING_OFF_SQES"; + default: + return "Unknown"; + } +} + +int io_uring_push_buf(struct io_uring_ctx *ctx, unsigned int idx, long long unsigned int address, unsigned int len) +{ + struct io_uring_buf *buf; + + buf = xzalloc(sizeof(*buf)); + if (!buf) + return -ENOMEM; + + buf->idx = idx; + buf->address = address; + buf->len = len; + list_add_tail(&buf->list, &ctx->dump.buf_list); + + return 0; +} + +int io_uring_push_personality(struct io_uring_ctx *ctx, struct io_uring_personality_desc *desc) +{ + struct io_uring_personality *p; + struct io_uring_group_desc *g; + struct passwd *pwd; + struct group *grp; + int grps = 0; + + p = xzalloc(sizeof(*p)); + if (!p) + return -ENOMEM; + INIT_LIST_HEAD(&p->list); + + p->desc = *desc; + INIT_LIST_HEAD(&p->desc.group_list); + +#define X(ptr, sub) \ + pwd = getpwuid(desc->sub); \ + if (pwd) \ + strlcpy(ptr->sub##_name, pwd->pw_name, sizeof(ptr->sub##_name)); + X(p, uid); + X(p, euid); + X(p, suid); + X(p, fsuid); +#undef X +#define X(ptr, sub) \ + grp = getgrgid(desc->sub); \ + if (grp) \ + strlcpy(ptr->sub##_name, grp->gr_name, sizeof(ptr->sub##_name)); + X(p, gid); + X(p, egid); + X(p, sgid); + X(p, fsgid); +#undef X + + list_for_each_entry(g, &desc->group_list, list) { + grp = getgrgid(g->group); + if (pwd) + strlcpy(g->group_name, grp->gr_name, sizeof(g->group_name)); + grps++; + } + BUG_ON(grps != desc->nr_groups); + + /* Migrate prepared group list from local desc to personality object */ + list_splice(&desc->group_list, &p->desc.group_list); + + /* ... and append personality object to ctx personality list */ + list_add_tail(&p->list, &ctx->dump.pers_list); + ctx->dump.nr_pers++; + return 0; +} + +IoUringFileEntry *io_uring_get_iofe(struct io_uring_ctx *ctx) +{ + return &ctx->dump.iofe; +} + +/* + * TODO: + * Handle IORING_REGISTER_BUFFERS + * Handle IORING_REGISTER_FILES + * Handle IORING_REGISTER_EVENTFD_{ASYNC} + * + * Handle wq_fd registration + * * Compare in-kernel ctx->sq_data to associate with open fd + * Audit memory cleanup after error at various places + */ + +static int sys_io_uring_setup(unsigned int entries, struct io_uring_params *p) +{ + return (int)syscall(__NR_io_uring_setup, entries, p); +} + +/* XXX: We can expose timeout here to not block indefinitely when trying to sync + * io_uring fd during dump stage, in case forward progress depends on one + * of the stopped threads. + */ +static int sys_io_uring_enter(int ring_fd, unsigned int to_submit, unsigned int min_complete, unsigned int flags) +{ + return (int)syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete, flags, NULL, 0); +} + +static int sys_io_uring_register(int ring_fd, unsigned int opcode, void *arg, unsigned int nr_args) +{ + return (int)syscall(__NR_io_uring_register, ring_fd, opcode, arg, nr_args); +} + +static int io_uring_restore_personality(int fd, IoUringPersonalityId *pers_id) +{ + struct cap_data data[_LINUX_CAPABILITY_U32S_3] = {}; + struct cap_header hdr; + pid_t pid; + int ret; + + /* fork into a new child to manipulate credentials and register personality */ + pid = fork(); + if (pid) { + pid = waitpid(pid, &ret, 0); + if (pid < 0) + return -errno; + return -ret; + } else if (!pid) { + u32 cap[2] = { + pers_id->cap_eff & 0xffffffff00000000, + pers_id->cap_eff & 0x00000000ffffffff, + }; + size_t n_grps = 0, sz = 32; + struct passwd *pwd; + bool group = false; + struct group *grp; + gid_t *groups; + +#define X(c, m, x) \ + if (c) { \ + if (strcmp(c->m##_name, pers_id->x##_name)) \ + pr_warn("User name from image and system do not match for %s %d\n", group ? "GID" : "UID", \ + pers_id->x); \ + } else { \ + pr_warn("No user for %s %d on system\n", group ? "GID" : "UID", pers_id->x); \ + } + pwd = getpwuid(pers_id->uid); + X(pwd, pw, uid); + pwd = getpwuid(pers_id->euid); + X(pwd, pw, euid); + pwd = getpwuid(pers_id->suid); + X(pwd, pw, suid); + pwd = getpwuid(pers_id->fsuid); + X(pwd, pw, fsuid); + + group = true; + + grp = getgrgid(pers_id->gid); + X(grp, gr, gid); + grp = getgrgid(pers_id->egid); + X(grp, gr, egid); + grp = getgrgid(pers_id->sgid); + X(grp, gr, sgid); + grp = getgrgid(pers_id->fsgid); + X(grp, gr, fsgid); +#undef X + + ret = setresuid(pers_id->uid, pers_id->euid, pers_id->suid); + if (ret < 0) + goto end; + ret = setfsuid(pers_id->fsuid); + if (ret < 0) + goto end; + ret = setresgid(pers_id->gid, pers_id->euid, pers_id->suid); + if (ret < 0) + goto end; + ret = setfsgid(pers_id->fsgid); + if (ret < 0) + goto end; + + groups = xmalloc(sz * sizeof(*groups)); + if (!groups) { + errno = ENOMEM; + goto end; + } + + for (int i = 0; i < pers_id->n_group_id; i++) { + IoUringGroupId *gd = pers_id->group_id[i]; + struct group *grp; + gid_t *g; + + grp = getgrgid(gd->group); + if (!grp) + pr_warn("Group name not found for GID %d\n", gd->group); + if (strcmp(gd->group_name, grp->gr_name)) + pr_warn("Group name in image and on system do not match for GID %d\n", gd->group); + + if (sz <= n_grps) { + sz *= 2; + g = xrealloc(groups, sz * sizeof(*g)); + if (!g) { + xfree(groups); + errno = ENOMEM; + goto end; + } + groups = g; + } + groups[n_grps++] = gd->group; + } + + ret = setgroups(n_grps, groups); + xfree(groups); + if (ret < 0) { + errno = -ret; + goto end; + } + + hdr.version = _LINUX_CAPABILITY_VERSION_3; + hdr.pid = 0; + BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE); + + for (int i = 0; i < CR_CAP_SIZE; i++) + data[i].eff = cap[i]; + + ret = syscall(__NR_capset, &hdr, data); + if (ret < 0) { + errno = -ret; + goto end; + } + + ret = sys_io_uring_register(fd, __IORING_REGISTER_PERSONALITY, NULL, 0); + if (ret < 0) { + errno = -ret; + goto end; + } + + exit(0); + end: + exit(errno); + } else { + return -errno; + } + + return 0; +} + +int is_io_uring_link(char *link) +{ + return is_anon_link_type(link, "[io_uring]"); +} + +static void io_uring_submit_nop(struct io_uring_map *map, bool barrier) +{ + unsigned int tail, index; + + BUG_ON(!map); + + tail = atomic_load_acquire(map->sq_ring_tail); + index = tail & *map->sq_ring_mask; + map->sqe[index].opcode = IORING_OP_NOP; + if (barrier) + map->sqe[index].flags = IOSQE_IO_DRAIN; + map->sq_array[index] = index; + atomic_store_release(map->sq_ring_tail, tail + 1); +} + +static int io_uring_consume_n(struct io_uring_map *map, int n) +{ + unsigned int head; + int ret; + + BUG_ON(!map); + + head = *map->cqe_ring_head; + ret = map->cqe[head & *map->cqe_ring_mask].res; + atomic_store_release(map->cqe_ring_head, head + n); + + return ret; +} + +static void io_uring_consume_all(struct io_uring_map *map) +{ + BUG_ON(!map); + + (void)io_uring_consume_n(map, atomic_load_acquire(map->cqe_ring_tail) - *map->cqe_ring_head); +} + +static int map_io_uring_fd(int fd, struct io_uring_params *p, struct io_uring_map *map) +{ + int ret = 0; + + BUG_ON(!p); + BUG_ON(!map); + + /* XXX: Optimize using FEAT_SINGLE_MMAP */ + map->sq_len = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + map->cqe_len = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + map->sqe_len = p->sq_entries * sizeof(struct io_uring_sqe); + + map->sq_array = + mmap(NULL, map->sq_len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (map->sq_array == MAP_FAILED) { + ret = -errno; + pr_perror("Failed to mmap SQ array ring"); + goto end; + } + + map->cqe = mmap(NULL, map->cqe_len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (map->cqe == MAP_FAILED) { + ret = -errno; + pr_perror("Failed to mmap CQE ring"); + goto end_sq_ptr; + } + + map->sq_ring_head = map->sq_array + p->sq_off.head; + map->sq_ring_tail = map->sq_array + p->sq_off.tail; + map->cqe_ring_head = (unsigned int *)map->cqe + p->cq_off.head; + map->cqe_ring_tail = (unsigned int *)map->cqe + p->cq_off.tail; + map->sq_ring_mask = map->sq_array + p->sq_off.ring_mask; + map->cqe_ring_mask = (unsigned int *)map->cqe + p->cq_off.ring_mask; + map->sq_array += p->sq_off.array; + + map->sqe = mmap(NULL, map->sqe_len, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); + if (map->sqe == MAP_FAILED) { + ret = -errno; + pr_perror("Failed to mmap SQE ring"); + goto end_cqe_ptr; + } + + return ret; + + munmap(map->sqe, map->sqe_len); +end_cqe_ptr: + munmap(map->cqe, map->cqe_len); +end_sq_ptr: + munmap(map->sq_array, map->sq_len); +end: + return ret; +} + +static void unmap_io_uring_fd(struct io_uring_map *map) +{ + BUG_ON(!map); + BUG_ON(!map->sqe); + BUG_ON(!map->cqe); + BUG_ON(!map->sq_array); + + munmap(map->sqe, map->sqe_len); + munmap(map->cqe, map->cqe_len); + munmap(map->sq_array, map->sq_len); +} + +int io_uring_synchronize_fd(int fd) +{ + struct io_uring_map map = {}; + struct io_uring_params p; + struct io_uring_ctx *ctx; + unsigned int rem; + struct stat st; + bool sq_poll; + int ret; + + if (fd < 0) + return fd; + + if (fstat(fd, &st)) + return -errno; + + ctx = lookup_ctx(st.st_ino); + if (!ctx) + return -ENOENT; + + assert("File Entry must be unitialized" && !(ctx->state & CTX_F_INIT_IOFE)); + /* Obtains sq_off.array, while the rest are offsets we can get from a + * io_uring_setup call. Also caches this in ctx so that we don't have to + * parse once again. + */ + if (parse_fdinfo(fd, FD_TYPES__IO_URING, ctx)) + return -EINVAL; + ctx->state |= CTX_F_INIT_IOFE; + return 0; + + sq_poll = ctx->dump.iofe.setup_flags & IORING_SETUP_SQPOLL; + + memset(&p, 0, sizeof(p)); + ret = sys_io_uring_setup(1, &p); + if (ret < 0) + return -errno; + close(ret); + + p.sq_off.array = ctx->dump.iofe.sq_off_array; + p.sq_entries = ctx->dump.iofe.sq_entries; + p.cq_entries = ctx->dump.iofe.cq_entries; + + ret = map_io_uring_fd(fd, &p, &map); + if (ret < 0) + return ret; + + /* Preserve head/tail and ring mask */ + ctx->dump.iofe.sq_head = atomic_load_acquire(map.sq_ring_head); + ctx->dump.iofe.sq_tail = *map.sq_ring_tail; + ctx->dump.iofe.cqe_head = *map.cqe_ring_head; + ctx->dump.iofe.sq_ring_mask = *map.sq_ring_mask; + + io_uring_consume_all(&map); + + rem = ctx->dump.iofe.sq_tail - ctx->dump.iofe.sq_head; + /* XXX: Add timeout to gracefully handle indefinite blocking */ + ret = sys_io_uring_enter(fd, rem, rem, IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP); + if (ret < 0) { + ret = -errno; + pr_perror("Failed to call io_uring_enter"); + } + + ctx->dump.iofe.cqe_tail = atomic_load_acquire(map.cqe_ring_tail); + if (sq_poll) + ctx->dump.iofe.sq_head = ctx->dump.iofe.sq_tail; + + ctx->dump.map = map; + return ret; +} + +static int replay_io_uring_data(int fd, struct io_uring_ctx *ctx, struct io_uring_params *p, IoUringFileEntry *iofe) +{ + unsigned int nop_count, cons_count; + struct io_uring_map map; + int ret = 0, flags = 0; + void *data; + + BUG_ON(!ctx); + BUG_ON(!p); + BUG_ON(!iofe); + BUG_ON(p->sq_entries != ctx->restore.sqe_bytes / sizeof(struct io_uring_sqe)); + BUG_ON(p->cq_entries != ctx->restore.cqe_bytes / sizeof(struct io_uring_cqe)); + BUG_ON(p->sq_entries != ctx->restore.sq_arr_bytes / sizeof(unsigned int)); + + /* To replay the data, we first need to advance head and tail to the + * values they were when the io_uring instance was dumped. At the ABI + * level the request and completion structure have same size for all + * operations, so filling IORING_OP_NOP operations and reaping them + * adjust the kernel's offsets, after which we overwrite the ring with + * data we dumped in the image. + */ + if (p->flags & IORING_SETUP_SQPOLL) + flags |= IORING_ENTER_SQ_WAKEUP; + + ret = map_io_uring_fd(fd, p, &map); + if (ret < 0) + return ret; + + nop_count = iofe->sq_head & iofe->sq_ring_mask; + cons_count = iofe->cqe_tail & iofe->cq_ring_mask; + + for (int i = 0; i < nop_count; i++) + io_uring_submit_nop(&map, false); + + ret = sys_io_uring_enter(fd, nop_count, nop_count, IORING_ENTER_GETEVENTS | flags); + if (ret < 0) { + pr_perror("Failed to call io_uring_enter"); + goto end; + } + + io_uring_consume_n(&map, cons_count); + + data = ctx->restore.data; + memcpy(map.sqe, data, ctx->restore.sqe_bytes); + data += ctx->restore.sqe_bytes; + memcpy(map.cqe, data, ctx->restore.cqe_bytes); + data += ctx->restore.cqe_bytes; + memcpy(map.sq_array, data, ctx->restore.sq_arr_bytes); + +end: + xfree(ctx->restore.data); + unmap_io_uring_fd(&map); + return ret; +} + +static int dump_one_io_uring_data(struct io_uring_ctx *ctx, IoUringFileEntry *iofe, int lfd, const struct fd_parms *p) +{ + IoUringDataEntry iode = IO_URING_DATA_ENTRY__INIT; + struct io_uring_map *map; + struct cr_img *img; + int ret; + + map = &ctx->dump.map; + + BUG_ON(!map->sqe); + BUG_ON(!map->cqe); + BUG_ON(!map->sq_array); + + img = img_from_set(glob_imgset, CR_FD_IO_URING_DATA); + BUG_ON(ctx->state & CTX_F_DONE_DATA); + + iode.id = ctx->inode; + iode.sqe_bytes = sizeof(struct io_uring_sqe) * ctx->dump.iofe.sq_entries; + iode.cqe_bytes = sizeof(struct io_uring_cqe) * ctx->dump.iofe.cq_entries; + iode.sq_arr_bytes = sizeof(unsigned int) * ctx->dump.iofe.sq_entries; + + ret = -1; + if (pb_write_one(img, &iode, PB_IO_URING_DATA)) + goto end; + + /* Layout |SQE|CQE|SQARR| */ + if (write(img_raw_fd(img), map->sqe, iode.sqe_bytes) != iode.sqe_bytes) + goto end; + if (write(img_raw_fd(img), map->cqe, iode.cqe_bytes) != iode.cqe_bytes) + goto end; + if (write(img_raw_fd(img), map->sq_array, iode.sq_arr_bytes) != iode.sq_arr_bytes) + goto end; + + ret = 0; + ctx->state |= CTX_F_DONE_DATA; +end: + unmap_io_uring_fd(map); + return ret; +} + +static int dump_one_io_uring(int lfd, u32 id, const struct fd_parms *p) +{ + IoUringFileEntry iofe = IO_URING_FILE_ENTRY__INIT; + struct io_uring_personality *per_i, *ptmp; + struct io_uring_buf *buf_i, *btmp; + FileEntry fe = FILE_ENTRY__INIT; + struct io_uring_ctx *ctx; + int i = 0, j = 0; + + ctx = lookup_ctx(p->stat.st_ino); + if (!ctx) + return -ENOENT; + + BUG_ON(!(ctx->state & CTX_F_INIT_IOFE)); + BUG_ON(ctx->state & CTX_F_DONE_FILE); + + iofe.id = ctx->id = id; + iofe.inode = ctx->inode; + iofe.flags = p->flags; + iofe.fown = (FownEntry *)&p->fown; + + fe.type = FD_TYPES__IO_URING; + fe.id = iofe.id; + fe.io_uring = &iofe; + + list_for_each_entry_safe(buf_i, btmp, &ctx->dump.buf_list, list) { + /* XXX: match struct page address for buf_i->idx from eBPF + * iterator output + */ + xfree(buf_i); + } + + BUG_ON(!list_empty(&ctx->dump.pers_list) && !ctx->dump.nr_pers); + ctx->dump.iofe.n_pers_id = ctx->dump.nr_pers; + ctx->dump.iofe.pers_id = xzalloc(pb_repeated_size(&ctx->dump.iofe, pers_id)); + if (!ctx->dump.iofe.pers_id) + return -ENOMEM; + + list_for_each_entry_safe(per_i, ptmp, &ctx->dump.pers_list, list) { + struct io_uring_group_desc *grp_i, *gtmp; + IoUringPersonalityId *pers_id; + + BUG_ON(i + 1 != per_i->desc.id); + ctx->dump.iofe.pers_id[i] = xzalloc(sizeof(*ctx->dump.iofe.pers_id[i])); + if (!ctx->dump.iofe.pers_id[i]) + return -ENOMEM; + + pers_id = ctx->dump.iofe.pers_id[i]; + +#define X(x) pers_id->x = per_i->desc.x; + X(uid); + X(euid); + X(suid); + X(fsuid); + X(gid); + X(egid); + X(sgid); + X(fsgid); +#undef X + +#define X(x) \ + pers_id->x##_name = xstrdup(per_i->x##_name); \ + if (!pers_id->x##_name) \ + return -ENOMEM; + X(uid); + X(euid); + X(suid); + X(fsuid); + X(gid); + X(egid); + X(sgid); + X(fsgid); +#undef X + memcpy(&pers_id->cap_eff, per_i->desc.cap_eff, sizeof(per_i->desc.cap_eff)); + BUG_ON(!list_empty(&per_i->desc.group_list) && !per_i->desc.nr_groups); + pers_id->n_group_id = per_i->desc.nr_groups; + pers_id->group_id = xzalloc(pb_repeated_size(pers_id, group_id)); + if (!pers_id->group_id) + return -ENOMEM; + /* Now, iterate over group list for personality, and dump each + * group ID and group name + */ + j = 0; + list_for_each_entry_safe(grp_i, gtmp, &per_i->desc.group_list, list) { + pers_id->group_id[j] = xzalloc(sizeof(*pers_id->group_id[j])); + if (!pers_id->group_id[j]) + return -ENOMEM; + pers_id->group_id[j]->group = grp_i->group; + pers_id->group_id[j]->group_name = xstrdup(grp_i->group_name); + if (!pers_id->group_id[j]->group_name) + return -ENOMEM; + j++; + xfree(grp_i); + } + BUG_ON(j != per_i->desc.nr_groups); + i++; + xfree(per_i); + } + BUG_ON(i != ctx->dump.nr_pers); + + if (pb_write_one(img_from_set(glob_imgset, CR_FD_FILES), &fe, PB_FILE)) + return -1; + ctx->state |= CTX_F_DONE_FILE; + + return dump_one_io_uring_data(ctx, &iofe, lfd, p); +} + +const struct fdtype_ops io_uring_dump_ops = { + .type = FD_TYPES__IO_URING, + .dump = dump_one_io_uring, +}; + +static int open_io_uring_desc(struct file_desc *d, int *new_fd) +{ + struct __io_uring_restriction res[4]; + struct io_uring_file_info *iofi; + struct io_uring_ctx *ctx; + struct io_uring_params p; + IoUringFileEntry *iofe; + int fd, ret = -1; + + iofi = container_of(d, struct io_uring_file_info, d); + iofe = iofi->iofe; + + /* XXX: when we handle IORING_REGISTER_FILES, and wq_fd registration, + * handle post_open processing here to re-register files... + * + * For wq_fd, there is a parent io_uring fd that will be restored first + * (without any other dependencies on io_uring instances). Cycles cannot + * be created as io_uring won't allow IORING_REGISTER_FILES for another + * io_uring, so we cannot deadlock, and wq_fd registration won't be + * circular either. wq_fd is determined using ctx->sq_data matching in + * eBPF iteration. + */ + ctx = lookup_ctx(iofe->id); + if (!ctx) + return -ENOENT; + + memset(&p, 0, sizeof(p)); + p.sq_thread_cpu = iofe->sq_thread_cpu; + p.sq_thread_idle = iofe->sq_thread_idle; + p.cq_entries = iofe->cq_entries; + p.flags = iofe->setup_flags | IORING_SETUP_CQSIZE; + + if (iofe->restrictions) + p.flags |= IORING_SETUP_R_DISABLED; + + fd = sys_io_uring_setup(iofe->sq_entries, &p); + if (fd < 0) + return -errno; + + for (int i = 0; i < iofe->n_pers_id; i++) { + IoUringPersonalityId *pers_id = iofe->pers_id[i]; + + ret = io_uring_restore_personality(fd, pers_id); + if (ret < 0) + goto end; + } + + if (iofe->restrictions) { + int nr = 0; + + if (iofe->reg_op) { + res[nr].opcode = __IORING_RESTRICTION_REGISTER_OP; + res[nr++].register_op = iofe->reg_op; + } + + if (iofe->sqe_op) { + res[nr].opcode = __IORING_RESTRICTION_SQE_OP; + res[nr++].sqe_op = iofe->sqe_op; + } + + if (iofe->sqe_flags_allowed) { + res[nr].opcode = __IORING_RESTRICTION_SQE_FLAGS_ALLOWED; + res[nr++].sqe_flags = iofe->sqe_flags_allowed; + } + + if (iofe->sqe_flags_required) { + res[nr].opcode = __IORING_RESTRICTION_SQE_FLAGS_REQUIRED; + res[nr++].sqe_flags = iofe->sqe_flags_required; + } + + BUG_ON(nr >= ARRAY_SIZE(res)); + if (nr) { + ret = sys_io_uring_register(fd, __IORING_REGISTER_RESTRICTIONS, res, nr); + if (ret < 0) + goto end; + } + + ret = sys_io_uring_register(fd, __IORING_REGISTER_ENABLE_RINGS, NULL, 0); + if (ret < 0) + goto end; + } + + if ((p.flags & IORING_SETUP_SQPOLL) && !iofe->nr_user_files && !(p.features & IORING_FEAT_SQPOLL_NONFIXED)) { + ret = -ENOTSUP; + pr_err("Dumped io_uring instance %#08x has IORING_SETUP_SQPOLL flag, but no registered files,\n" + "and system does not support SQPOLL in this mode, as IORING_FEAT_SQPOLL_NONFIXED \n" + "feature is missing\n", + iofe->id); + goto end; + } + + if (rst_file_params(fd, iofe->fown, iofi->iofe->flags)) { + pr_perror("Can't restore file params on io_uring %#08x", iofe->id); + goto end; + } + + ret = replay_io_uring_data(fd, ctx, &p, iofe); + if (ret < 0) + goto end; + + *new_fd = fd; + + return 0; +end: + close(fd); + return ret; +} + +static struct file_desc_ops io_uring_desc_ops = { + .type = FD_TYPES__IO_URING, + .open = open_io_uring_desc, +}; + +static int collect_one_io_uring(void *o, ProtobufCMessage *base, struct cr_img *i) +{ + struct io_uring_file_info *iofi = o; + struct io_uring_ctx *ctx; + + ctx = alloc_ctx(); + if (!ctx) + return -ENOMEM; + + iofi->iofe = pb_msg(base, IoUringFileEntry); + ctx->inode = iofi->iofe->id; + insert_ctx(iofi->iofe->id, ctx); + return file_desc_add(&iofi->d, iofi->iofe->id, &io_uring_desc_ops); +} + +struct collect_image_info io_uring_cinfo = { + .fd_type = CR_FD_IO_URING_FILE, + .pb_type = PB_IO_URING_FILE, + .priv_size = sizeof(struct io_uring_file_info), + .collect = collect_one_io_uring, +}; + +static int collect_one_io_uring_data(void *o, ProtobufCMessage *base, struct cr_img *i) +{ + struct io_uring_data_info *iodi = o; + struct io_uring_ctx *ctx; + size_t bytes; + + iodi->iode = pb_msg(base, IoUringDataEntry); + + ctx = lookup_ctx(iodi->iode->id); + if (!ctx) { + /* Should have been inserted by file collect stage */ + pr_err("Failed to failed io_uring restore ctx for id %#08lx\n", (unsigned long)iodi->iode->id); + return -ENOENT; + } + + bytes = iodi->iode->sqe_bytes + iodi->iode->cqe_bytes + iodi->iode->sq_arr_bytes; + ctx->restore.data = xmalloc(bytes); + if (!ctx->restore.data) + return -ENOMEM; + + return read_img_buf(i, ctx->restore.data, bytes); +} + +struct collect_image_info io_uring_data_cinfo = { + .fd_type = CR_FD_IO_URING_DATA, + .pb_type = PB_IO_URING_DATA, + .priv_size = sizeof(struct io_uring_data_info), + .collect = collect_one_io_uring_data, +}; + +static int open_io_uring_map(int pid, struct vma_area *vma) +{ + struct fdinfo_list_entry *fle; + VmaEntry *vme = vma->e; + struct file_desc *fd; + + fd = find_file_desc_raw(FD_TYPES__IO_URING, vme->shmid); + if (!fd) + return -1; + + list_for_each_entry(fle, &fd->fd_info_head, desc_list) { + if (fle->pid == pid) { + int fd; + + fd = dup(fle->fe->fd); + if (fd < 0) + return -errno; + + vme->fd = fd; + return 0; + } + } + + return -ENOENT; +} + +int collect_io_uring_map(struct vma_area *vma) +{ + vma->vm_open = open_io_uring_map; + return 0; +} + +int dump_io_uring_map(struct vma_area *vma) +{ + struct io_uring_ctx *ctx; + + ctx = lookup_ctx(vma->io_uring_id); + if (!ctx) + return -ENOENT; + + if (!(ctx->state & CTX_F_DONE_ALL)) { + pr_err("Mapping(s) found for io_uring but no fd open, cannot dump " + "io_uring instance without access to io_uring fd corresponding " + "to the mapping\n"); + return -ENOTSUP; + } + + vma->e->shmid = ctx->inode; + return 0; +} + +int add_one_io_uring_mapping(uint64_t offset, ino_t inode) +{ + struct io_uring_ctx *ctx; + uint64_t flag; + + pr_debug("Processing for io_uring mapping at offset=%s\n", offset_to_str(offset)); + flag = offset_to_state(offset); + if (!flag) { + pr_err("Invalid offset of mapping offset=%" PRIu64 "\n", offset); + return -EINVAL; + } + + ctx = lookup_ctx(inode); + if (!ctx) { + pr_debug("No io_uring ctx associated with inode=%lu, creating one...\n", (unsigned long)inode); + + ctx = alloc_ctx(); + if (!ctx) + return -ENOMEM; + + ctx->inode = inode; + insert_ctx(ctx->inode, ctx); + } + + ctx->state |= flag; + return 0; +} diff --git a/criu/mem.c b/criu/mem.c index ca74bfbb65..7a1f355521 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -31,6 +31,9 @@ #include "prctl.h" #include "compel/infect-util.h" #include "pidfd-store.h" +#include "compel/plugins/std/syscall-codes.h" +#include "common/scm.h" +#include "io_uring.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -125,6 +128,8 @@ bool should_dump_page(VmaEntry *vmae, u64 pme) return false; if (vma_entry_is(vmae, VMA_AREA_AIORING)) return true; + if (vma_entry_is(vmae, VMA_AREA_IO_URING)) + return false; if ((pme & (PME_PRESENT | PME_SWAP)) && !__page_is_zero(pme)) return true; @@ -704,6 +709,8 @@ int prepare_mm_pid(struct pstree_item *i) ret = collect_filemap(vma); else if (vma_area_is(vma, VMA_AREA_SOCKET)) ret = collect_socket_map(vma); + else if (vma_area_is(vma, VMA_AREA_IO_URING)) + ret = collect_io_uring_map(vma); else ret = 0; if (ret) diff --git a/criu/proc_parse.c b/criu/proc_parse.c index 3017a64e11..5c593f426e 100644 --- a/criu/proc_parse.c +++ b/criu/proc_parse.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include "types.h" @@ -41,6 +42,7 @@ #include "path.h" #include "fault-injection.h" #include "memfd.h" +#include "io_uring.h" #include "protobuf.h" #include "images/fdinfo.pb-c.h" @@ -76,7 +78,8 @@ static char *buf = __buf.buf; * This is how AIO ring buffers look like in proc */ -#define AIO_FNAME "/[aio]" +#define AIO_FNAME "/[aio]" +#define IO_URING_FNAME "anon_inode:[io_uring]" /* check the @line starts with "%lx-%lx" format */ static bool __is_vma_range_fmt(char *line) @@ -185,7 +188,8 @@ static void parse_vma_vmflags(char *buf, struct vma_area *vma_area) * only exception is VVAR area that mapped by the kernel as * VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP */ - if (io_pf && !vma_area_is(vma_area, VMA_AREA_VVAR) && !vma_entry_is(vma_area->e, VMA_FILE_SHARED)) + if (io_pf && !vma_area_is(vma_area, VMA_AREA_IO_URING) && !vma_area_is(vma_area, VMA_AREA_VVAR) && + !vma_entry_is(vma_area->e, VMA_FILE_SHARED)) vma_area->e->status |= VMA_UNSUPP; if (vma_area->e->madv) @@ -388,14 +392,20 @@ static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, st /* * If vfi is equal (!) and negative @vm_file_fd -- - * we have nothing to borrow for sure. + * we have nothing to borrow for sure, unless it's io_uring */ - if (*vm_file_fd < 0) + if (*vm_file_fd < 0 && !vma_area_is(prev, VMA_AREA_IO_URING)) return 0; pr_debug("vma %" PRIx64 " borrows vfi from previous %" PRIx64 "\n", vma->e->start, prev->e->start); - if (prev->e->status & VMA_AREA_SOCKET) + if (prev->e->status & VMA_AREA_SOCKET) { vma->e->status |= VMA_AREA_SOCKET | VMA_AREA_REGULAR; + } else if (prev->e->status & VMA_AREA_IO_URING) { + vma->e->status |= VMA_AREA_IO_URING | VMA_AREA_REGULAR; + vma->io_uring_id = prev->io_uring_id; + /* Add page to io_uring ctx */ + add_one_io_uring_mapping(vma->e->pgoff, vma->io_uring_id); + } /* * FIXME -- in theory there can be vmas that have @@ -452,6 +462,16 @@ static int vma_get_mapfile(const char *fname, struct vma_area *vma, DIR *mfd, st return 0; } + if (!strncmp(fname, IO_URING_FNAME, sizeof(IO_URING_FNAME) - 1)) { + pr_debug("Marking VMA as IO_URING | REGULAR for inode %lu\n", + (unsigned long)buf.st_ino); + vma->io_uring_id = buf.st_ino; + vma->e->status |= VMA_AREA_IO_URING | VMA_AREA_REGULAR; + /* Add page to io_uring ctx */ + add_one_io_uring_mapping(vma->e->pgoff, vma->io_uring_id); + return 0; + } + pr_err("Unknown shit %o (%s)\n", buf.st_mode, fname); return -1; } @@ -637,6 +657,11 @@ static int handle_vma(pid_t pid, struct vma_area *vma_area, const char *file_pat */ if (vma_area->mnt_id != -1 && get_fd_mntid(*vm_file_fd, &vma_area->mnt_id)) return -1; + } else if (vma_area->e->status & VMA_AREA_IO_URING) { + if (vma_area->e->flags & MAP_PRIVATE) + vma_area->e->status |= VMA_FILE_PRIVATE; + else + vma_area->e->status |= VMA_FILE_SHARED; } else { /* * No file but mapping -- anonymous one. @@ -1798,7 +1823,263 @@ static int parse_bpfmap(struct bfd *f, char *str, BpfmapFileEntry *bpf) #define fdinfo_field(str, field) !strncmp(str, field ":", sizeof(field)) +static int parse_io_uring(struct bfd *f, char *str, struct io_uring_ctx *ctx) +{ + IoUringFileEntry *iofe = io_uring_get_iofe(ctx); + unsigned int nr; + pid_t pid; + int r; + + /* + * Format is: + * + * SqThread: %d + * SqThreadCpu: %d + * UserFiles: %u (number of registered files) (OPTIONAL DATA) + * %5u: %s (idx: filename) + * UserBufs: %u (number of registered buffers) (OPTIONAL DATA) + * %5u: 0x%llx/%u (idx: 0xaddr/len) + * Personalities: (OPTIONAL HEADING and DATA) + * %5d (id) + * Uid: %llu %llu %llu %llu (uid euid suid fsuid) + * Gid: %llu %llu %llu %llu (gid egid sgid fsgid) + * Groups: %llu %llu ... %llu (groups) + * CapEff: %llx ... %llx + * PollList: (OPTIONAL DATA) + * op=%d, task_works=%d (op=opcode, task_works=0 or 1) + * --- (Added by patch) + * Locked: %d (0 or 1) + * SqThreadIdle: %u + * SetupFlags: 0x%x + * SqEntries: %u + * CqEntries: %u + * SqOffArray: %u + * ... (OPTIONAL FIELDS) + * RestrictRegisterOp: %s (bitmap) + * RestrictSqeOp: %s (bitmap) + * RestrictSqeFlagsAllowed: %c (u8) + * RestrictSqeFlagsRequired: %c (u8) + */ + + if (sscanf(str, "SqThread: %d", &pid) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "SqThreadCpu: %d", &iofe->sq_thread_cpu) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "UserFiles: %u", &nr) != 1) + goto end; + if (nr) { + /* Not supported, yet */ + pr_warn("Registered files dump unsupported\n"); + return -ENOTSUP; + do { + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (!strncmp(str, "UserBufs", sizeof("UserBufs") - 1)) + break; + /* skip line, we use eBPF iterator to collect the file + * set registered with io_uring */ + } while (true); + } + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "UserBufs: %u", &nr) != 1) + goto end; + for (int i = 0; i < nr; i++) { + long long unsigned int address; + unsigned int idx, len; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "%5u: 0x%llx/%u", &idx, &address, &len) != 3) + goto end; + + if (io_uring_push_buf(ctx, idx, address, len)) + goto end; + } + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (!strncmp(str, "Personalities", sizeof("Personalities") - 1)) { + for (;;) { + struct io_uring_personality_desc desc = {}; + struct io_uring_group_desc *g, *gtmp; + char *tok; + int id; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + str = str + strspn(str, " "); + if (!strncmp(str, "PollList", sizeof("PollList") - 1)) + break; + else if (sscanf(str, "%5d", &id) != 1) + goto end; + desc.id = id; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + str = str + strspn(str, " "); + if (sscanf(str, " Uid: %u %u %u %u", &desc.uid, &desc.euid, &desc.suid, &desc.fsuid) != 4) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, " Gid: %u %u %u %u", &desc.gid, &desc.egid, &desc.sgid, &desc.fsgid) != 4) + goto end; + + INIT_LIST_HEAD(&desc.group_list); + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + str = strstr(str, ":"); + tok = str + 2; + while ((tok = strtok(tok, " "))) { + struct io_uring_group_desc *gdesc; + + gdesc = xzalloc(sizeof(*gdesc)); + if (!gdesc) + goto end_free; + INIT_LIST_HEAD(&gdesc->list); + + if (sscanf(tok, "%u", &gdesc->group) != 1) + goto end_free; + list_add_tail(&gdesc->list, &desc.group_list); + desc.nr_groups++; + tok = NULL; + } + + /* CapEff */ + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end_free; + str = strstr(str, ":"); + str += 2; + if (cap_parse(str, desc.cap_eff)) + goto end_free; + + if (io_uring_push_personality(ctx, &desc)) + goto end_free; + continue; + end_free: + list_for_each_entry_safe(g, gtmp, &desc.group_list, list) + xfree(g); + goto end; + } + } + + /* PollList: */ + for (; str; str = breadline(f)) { + if (IS_ERR(str)) + goto end; + /* Skip leading space */ + str = str + strspn(str, " "); + if (!strncmp(str, "op", sizeof("op") - 1)) + continue; + else + break; + } + if (IS_ERR_OR_NULL(str)) + goto end; + + /* str obtained from above */ + if (sscanf(str, "Locked: %d", &r) != 1) + goto end; + if (!r) { + pr_err("fdinfo read for io_uring could not take ctx->uring_lock inside kernel\n" + "This indicates that the ring is not idle, hence cannot proceed\n"); + goto end; + } + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "SqThreadIdle: %u", &iofe->sq_thread_idle) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "SetupFlags: %u", &iofe->setup_flags) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "SqEntries: %u", &iofe->sq_entries) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "CqEntries: %u", &iofe->cq_entries) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "SqOffArray: %u", &iofe->sq_off_array) != 1) + goto end; + + /* Printing restrictions is optional */ + str = breadline(f); + if (IS_ERR(str)) + goto end; + if (!str) + return 0; + nr = 0; + /* Upper bits are unused in bitmap */ + if (sscanf(str, "RestrictRegisterOp: %x,%x", &nr, &iofe->reg_op) != 2) { + /* 32-bit long? */ + if (sscanf(str, "RestrictRegisterOp: %x", &iofe->reg_op) != 1) + goto end; + } + BUG_ON(nr); + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "RestrictSqeOp: %x,%x", &nr, &iofe->sqe_op) != 2) { + if (sscanf(str, "RestrictSqeOp: %x", &iofe->sqe_op) != 1) + goto end; + } + BUG_ON(nr); + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "RestrictSqeFlagsAllowed: 0x%x", &iofe->sqe_flags_allowed) != 1) + goto end; + + str = breadline(f); + if (IS_ERR_OR_NULL(str)) + goto end; + if (sscanf(str, "RestrictSqeFlagsRequired: 0x%x", &iofe->sqe_flags_required) != 1) + goto end; + iofe->restrictions = true; + + return 0; +end: + pr_err("Incomplete io_uring fdinfo support\n"); + return -1; +} + static int parse_file_lock_buf(char *buf, struct file_lock *fl, bool is_blocked); + static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) { struct bfd f; @@ -2118,6 +2399,21 @@ static int parse_fdinfo_pid_s(int pid, int fd, int type, void *arg) entry_met = true; continue; } + if (fdinfo_field(str, "ino")) { + if (type != FD_TYPES__IO_URING) + goto parse_err; + + str = breadline(&f); + if (IS_ERR_OR_NULL(str)) + goto parse_err; + + ret = parse_io_uring(&f, str, arg); + if (ret) + goto parse_err; + + entry_met = true; + continue; + } } exit_code = 0; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index ff16b9f5be..9c267de20b 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -68,6 +68,7 @@ #include "images/bpfmap-file.pb-c.h" #include "images/bpfmap-data.pb-c.h" #include "images/apparmor.pb-c.h" +#include "images/io_uring.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/util.c b/criu/util.c index 8228221861..4b924ae0d9 100644 --- a/criu/util.c +++ b/criu/util.c @@ -188,6 +188,7 @@ static void vma_opt_str(const struct vma_area *v, char *opt) opt2s(VMA_ANON_PRIVATE, "ap"); opt2s(VMA_AREA_SYSVIPC, "sysv"); opt2s(VMA_AREA_SOCKET, "sk"); + opt2s(VMA_AREA_IO_URING, "io_uring"); #undef opt2s } diff --git a/images/Makefile b/images/Makefile index 2eaeb7cad2..58e585ad52 100644 --- a/images/Makefile +++ b/images/Makefile @@ -71,6 +71,7 @@ proto-obj-y += img-streamer.o proto-obj-y += bpfmap-file.o proto-obj-y += bpfmap-data.o proto-obj-y += apparmor.o +proto-obj-y += io_uring.o CFLAGS += -iquote $(obj)/ diff --git a/images/fdinfo.proto b/images/fdinfo.proto index 88f1c11860..7530315448 100644 --- a/images/fdinfo.proto +++ b/images/fdinfo.proto @@ -20,6 +20,7 @@ import "pipe.proto"; import "tty.proto"; import "memfd.proto"; import "bpfmap-file.proto"; +import "io_uring.proto"; enum fd_types { UND = 0; @@ -42,6 +43,7 @@ enum fd_types { TIMERFD = 17; MEMFD = 18; BPFMAP = 19; + IO_URING = 20; /* Any number above the real used. Not stored to image */ CTL_TTY = 65534; @@ -57,25 +59,26 @@ message fdinfo_entry { } message file_entry { - required fd_types type = 1; - required uint32 id = 2; - optional reg_file_entry reg = 3; - optional inet_sk_entry isk = 4; - optional ns_file_entry nsf = 5; - optional packet_sock_entry psk = 6; - optional netlink_sk_entry nlsk = 7; - optional eventfd_file_entry efd = 8; - optional eventpoll_file_entry epfd = 9; - optional signalfd_entry sgfd = 10; - optional tunfile_entry tunf = 11; - optional timerfd_entry tfd = 12; - optional inotify_file_entry ify = 13; - optional fanotify_file_entry ffy = 14; - optional ext_file_entry ext = 15; - optional unix_sk_entry usk = 16; - optional fifo_entry fifo = 17; - optional pipe_entry pipe = 18; - optional tty_file_entry tty = 19; - optional memfd_file_entry memfd = 20; - optional bpfmap_file_entry bpf = 21; + required fd_types type = 1; + required uint32 id = 2; + optional reg_file_entry reg = 3; + optional inet_sk_entry isk = 4; + optional ns_file_entry nsf = 5; + optional packet_sock_entry psk = 6; + optional netlink_sk_entry nlsk = 7; + optional eventfd_file_entry efd = 8; + optional eventpoll_file_entry epfd = 9; + optional signalfd_entry sgfd = 10; + optional tunfile_entry tunf = 11; + optional timerfd_entry tfd = 12; + optional inotify_file_entry ify = 13; + optional fanotify_file_entry ffy = 14; + optional ext_file_entry ext = 15; + optional unix_sk_entry usk = 16; + optional fifo_entry fifo = 17; + optional pipe_entry pipe = 18; + optional tty_file_entry tty = 19; + optional memfd_file_entry memfd = 20; + optional bpfmap_file_entry bpf = 21; + optional io_uring_file_entry io_uring = 22; } diff --git a/images/io_uring.proto b/images/io_uring.proto new file mode 100644 index 0000000000..cb933d0b56 --- /dev/null +++ b/images/io_uring.proto @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: MIT + +syntax = "proto2"; + +import "opts.proto"; +import "fown.proto"; + +message io_uring_group_id { + required uint32 group = 1; + required string group_name = 2; +} + +message io_uring_personality_id { + required uint32 uid = 1; + required uint32 euid = 2; + required uint32 suid = 3; + required uint32 fsuid = 4; + required uint32 gid = 5; + required uint32 egid = 6; + required uint32 sgid = 7; + required uint32 fsgid = 8; + required string uid_name = 9; + required string euid_name = 10; + required string suid_name = 11; + required string fsuid_name = 12; + required string gid_name = 13; + required string egid_name = 14; + required string sgid_name = 15; + required string fsgid_name = 16; + required uint64 cap_eff = 17; + repeated io_uring_group_id group_id = 18; +} + +message io_uring_file_entry { + required uint32 id = 1; + required uint32 flags = 2 [(criu).flags = "rfile.flags"]; + required uint64 pos = 3; + required fown_entry fown = 4; + /* Instance */ + required uint32 setup_flags = 5; + required uint32 sq_thread_cpu = 6; + required uint32 sq_thread_idle = 7; + required uint64 nr_user_bufs = 8; + required uint64 nr_user_files = 9; + required uint32 sq_entries = 10; + required uint32 cq_entries = 11; + required uint32 sq_off_array = 12; + required uint32 inode = 13; + /* Ring */ + required uint32 sq_head = 14; + required uint32 sq_tail = 15; + required uint32 cqe_head = 16; + required uint32 cqe_tail = 17; + required uint32 sq_ring_mask = 18; + required uint32 cq_ring_mask = 19; + /* Restrictions */ + required bool restrictions = 20; + required uint32 reg_op = 21; + required uint32 sqe_op = 22; + required uint32 sqe_flags_allowed = 23; + required uint32 sqe_flags_required = 24; + /* Personality */ + repeated io_uring_personality_id pers_id = 25; + optional sint32 mnt_id = 26 [default = -1]; +} + +message io_uring_data_entry { + required uint32 id = 1; + required uint32 sqe_bytes = 2; /* Bytes required for SQEs */ + required uint32 cqe_bytes = 3; /* Bytes required for CQEs */ + required uint32 sq_arr_bytes = 4; /* Bytes required for SQ array */ +} diff --git a/lib/py/images/images.py b/lib/py/images/images.py index 300b1cc69a..3b72edf42e 100644 --- a/lib/py/images/images.py +++ b/lib/py/images/images.py @@ -384,6 +384,21 @@ def skip(self, f, pload): f.seek(pload.bytes, os.SEEK_CUR) return pload.bytes +class io_uring_data_extra_handler: + def load(self, f, pload): + size = pload.sqe_bytes + pload.cqe_bytes + pload.sq_arr_bytes + data = f.read(size) + return base64.encodebytes(data).decode('utf-8') + + def dump(self, extra, f, pload): + data = base64.decodebytes(extra) + f.write(data) + + def skip(self, f, pload): + size = pload.sqe_bytes + pload.cqe_bytes + pload.sq_arr_bytes + f.seek(size, os.SEEK_CUR) + return size + class ipc_sem_set_handler: def load(self, f, pbuff): entry = pb2dict.pb2dict(pbuff) @@ -562,6 +577,9 @@ def skip(self, f, pbuff): 'BPFMAP_DATA': entry_handler(pb.bpfmap_data_entry, bpfmap_data_extra_handler()), 'APPARMOR': entry_handler(pb.apparmor_entry), + 'IO_URING_FILE': entry_handler(pb.io_uring_file_entry), + 'IO_URING_DATA': entry_handler(pb.io_uring_data_entry, + io_uring_data_extra_handler()), }