Skip to content
This repository has been archived by the owner on Jun 18, 2024. It is now read-only.

scx: Make exit debug dump buffer resizable #158

Merged
merged 5 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions include/linux/sched/ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,12 @@ struct sched_ext_ops {
*/
u32 timeout_ms;

/**
* exit_dump_len - scx_exit_info.dump buffer length. If 0, the default
* value of 32768 is used.
*/
u32 exit_dump_len;

/**
* name - BPF scheduler's name
*
Expand Down
28 changes: 17 additions & 11 deletions kernel/sched/ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ enum scx_internal_consts {

SCX_EXIT_BT_LEN = 64,
SCX_EXIT_MSG_LEN = 1024,
SCX_EXIT_DUMP_LEN = 32768,
SCX_EXIT_DUMP_DFL_LEN = 32768,
};

enum scx_ops_enable_state {
Expand Down Expand Up @@ -3270,7 +3270,7 @@ static void free_exit_info(struct scx_exit_info *ei)
kfree(ei);
}

static struct scx_exit_info *alloc_exit_info(void)
static struct scx_exit_info *alloc_exit_info(size_t exit_dump_len)
{
struct scx_exit_info *ei;

Expand All @@ -3280,7 +3280,7 @@ static struct scx_exit_info *alloc_exit_info(void)

ei->bt = kcalloc(sizeof(ei->bt[0]), SCX_EXIT_BT_LEN, GFP_KERNEL);
ei->msg = kzalloc(SCX_EXIT_MSG_LEN, GFP_KERNEL);
ei->dump = kzalloc(SCX_EXIT_DUMP_LEN, GFP_KERNEL);
ei->dump = kzalloc(exit_dump_len, GFP_KERNEL);

if (!ei->bt || !ei->msg || !ei->dump) {
free_exit_info(ei);
Expand Down Expand Up @@ -3519,7 +3519,7 @@ static void scx_dump_task(struct seq_buf *s, struct task_struct *p, char marker,
seq_buf_commit(s, used < avail ? used : -1);
}

static void scx_dump_state(struct scx_exit_info *ei)
static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
{
const char trunc_marker[] = "\n\n~~~~ TRUNCATED ~~~~\n";
unsigned long now = jiffies;
Expand All @@ -3528,7 +3528,10 @@ static void scx_dump_state(struct scx_exit_info *ei)
char *buf;
int cpu;

seq_buf_init(&s, ei->dump, SCX_EXIT_DUMP_LEN - sizeof(trunc_marker));
if (dump_len <= sizeof(trunc_marker))
return;

seq_buf_init(&s, ei->dump, dump_len - sizeof(trunc_marker));

seq_buf_printf(&s, "%s[%d] triggered exit kind %d:\n %s (%s)\n\n",
current->comm, current->pid, ei->kind, ei->reason, ei->msg);
Expand Down Expand Up @@ -3580,15 +3583,14 @@ static void scx_dump_state(struct scx_exit_info *ei)
rq_unlock(rq, &rf);
}

if (seq_buf_has_overflowed(&s)) {
used = strlen(seq_buf_str(&s));
memcpy(ei->dump + used, trunc_marker, sizeof(trunc_marker));
}
if (seq_buf_has_overflowed(&s))
memcpy(ei->dump + seq_buf_used(&s) - 1, trunc_marker,
sizeof(trunc_marker));
}

static void scx_ops_error_irq_workfn(struct irq_work *irq_work)
{
scx_dump_state(scx_exit_info);
scx_dump_state(scx_exit_info, scx_ops.exit_dump_len);
schedule_scx_ops_disable_work();
}

Expand Down Expand Up @@ -3678,7 +3680,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
if (ret < 0)
goto err;

scx_exit_info = alloc_exit_info();
scx_exit_info = alloc_exit_info(ops->exit_dump_len);
if (!scx_exit_info) {
ret = -ENOMEM;
goto err_del;
Expand Down Expand Up @@ -4086,6 +4088,10 @@ static int bpf_scx_init_member(const struct btf_type *t,
return -E2BIG;
ops->timeout_ms = *(u32 *)(udata + moff);
return 1;
case offsetof(struct sched_ext_ops, exit_dump_len):
ops->exit_dump_len =
*(u32 *)(udata + moff) ?: SCX_EXIT_DUMP_DFL_LEN;
return 1;
}

return 0;
Expand Down
3 changes: 1 addition & 2 deletions tools/sched_ext/include/scx/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
#include <stdint.h>
#include <errno.h>

#include "user_exit_info.h"

typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
Expand Down Expand Up @@ -67,6 +65,7 @@ typedef int64_t s64;
bpf_map__initial_value(skel->maps.elfsec##_##arr, &__sz); \
} while (0)

#include "user_exit_info.h"
#include "compat.h"

#endif /* __SCHED_EXT_COMMON_H */
51 changes: 51 additions & 0 deletions tools/sched_ext/include/scx/compat.bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,54 @@ static inline void __COMPAT_scx_bpf_switch_all(void)
}

#endif

/*
* sched_ext_ops.exit_dump_len is a recent addition. Use the following
* definition to support older kernels. See scx_qmap for usage example.
*/
struct sched_ext_ops___no_exit_dump_len {
s32 (*select_cpu)(struct task_struct *, s32, u64);
void (*enqueue)(struct task_struct *, u64);
void (*dequeue)(struct task_struct *, u64);
void (*dispatch)(s32, struct task_struct *);
void (*runnable)(struct task_struct *, u64);
void (*running)(struct task_struct *);
void (*stopping)(struct task_struct *, bool);
void (*quiescent)(struct task_struct *, u64);
bool (*yield)(struct task_struct *, struct task_struct *);
bool (*core_sched_before)(struct task_struct *, struct task_struct *);
void (*set_weight)(struct task_struct *, u32);
void (*set_cpumask)(struct task_struct *, const struct cpumask *);
void (*update_idle)(s32, bool);
void (*cpu_acquire)(s32, struct scx_cpu_acquire_args *);
void (*cpu_release)(s32, struct scx_cpu_release_args *);
s32 (*init_task)(struct task_struct *, struct scx_init_task_args *);
void (*exit_task)(struct task_struct *, struct scx_exit_task_args *);
void (*enable)(struct task_struct *);
void (*disable)(struct task_struct *);
s32 (*cgroup_init)(struct cgroup *, struct scx_cgroup_init_args *);
void (*cgroup_exit)(struct cgroup *);
s32 (*cgroup_prep_move)(struct task_struct *, struct cgroup *, struct cgroup *);
void (*cgroup_move)(struct task_struct *, struct cgroup *, struct cgroup *);
void (*cgroup_cancel_move)(struct task_struct *, struct cgroup *, struct cgroup *);
void (*cgroup_set_weight)(struct cgroup *, u32);
void (*cpu_online)(s32);
void (*cpu_offline)(s32);
s32 (*init)();
void (*exit)(struct scx_exit_info *);
u32 dispatch_max_batch;
u64 flags;
u32 timeout_ms;
char name[128];
};

/* define sched_ext_ops, see compat.h::SCX_OPS_LOAD/ATTACH() */
#define SCX_OPS_DEFINE(__name, ...) \
SEC(".struct_ops.link") \
struct sched_ext_ops __name = { \
__VA_ARGS__, \
}; \
SEC(".struct_ops.link") \
struct sched_ext_ops___no_exit_dump_len __name##___no_exit_dump_len = { \
__VA_ARGS__ \
}; \
64 changes: 63 additions & 1 deletion tools/sched_ext/include/scx/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,34 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v
__val; \
})

static inline bool __COMPAT_struct_has_field(const char *type, const char *field)
{
const struct btf_type *t;
const struct btf_member *m;
const char *n;
s32 tid;
int i;

__COMPAT_load_vmlinux_btf();
tid = btf__find_by_name_kind(__COMPAT_vmlinux_btf, type, BTF_KIND_STRUCT);
if (tid < 0)
return false;

t = btf__type_by_id(__COMPAT_vmlinux_btf, tid);
SCX_BUG_ON(!t, "btf__type_by_id(%d)", tid);

m = btf_members(t);

for (i = 0; i < BTF_INFO_VLEN(t->info); i++) {
n = btf__name_by_offset(__COMPAT_vmlinux_btf, m[i].name_off);
SCX_BUG_ON(!n, "btf__name_by_offset()");
if (!strcmp(n, field))
return true;
}

return false;
}

/*
* An ops flag, %SCX_OPS_SWITCH_PARTIAL, replaced scx_bpf_switch_all() which had
* to be called from ops.init(). To support both before and after, use both
Expand All @@ -78,4 +106,38 @@ static inline bool __COMPAT_read_enum(const char *type, const char *name, u64 *v
#define __COMPAT_SCX_OPS_SWITCH_PARTIAL \
__COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")

#endif
/*
* struct sched_ext_ops can change over time. If compat.bpf.h::SCX_OPS_DEFINE()
* is used to define ops and compat.h::SCX_OPS_LOAD/ATTACH() are used to load
* and attach it, backward compatibility is automatically maintained where
* reasonable.
*
* - sched_ext_ops.exit_dump_len was added later. On kernels which don't support
* it, the value is ignored and a warning is triggered if the value is
* requested to be non-zero.
*/
#define SCX_OPS_LOAD(__skel, __ops_name, __scx_name, __uei_name) ({ \
UEI_SET_SIZE(__skel, __ops_name, __uei_name); \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len")) { \
bpf_map__set_autocreate((__skel)->maps.__ops_name, true); \
bpf_map__set_autocreate((__skel)->maps.__ops_name##___no_exit_dump_len, false); \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hopefully down the road we'll be able to figure out a way to do this that doesn't require us to hard-code the BC struct-ops types like this. LGTM for now though

} else { \
if ((__skel)->struct_ops.__ops_name->exit_dump_len) \
fprintf(stderr, "WARNING: kernel doesn't support setting exit dump len\n"); \
bpf_map__set_autocreate((__skel)->maps.__ops_name, false); \
bpf_map__set_autocreate((__skel)->maps.__ops_name##___no_exit_dump_len, true); \
} \
SCX_BUG_ON(__scx_name##__load((__skel)), "Failed to load skel"); \
})

#define SCX_OPS_ATTACH(__skel, __ops_name) ({ \
struct bpf_link *__link; \
if (__COMPAT_struct_has_field("sched_ext_ops", "exit_dump_len")) \
__link = bpf_map__attach_struct_ops((__skel)->maps.__ops_name); \
else \
__link = bpf_map__attach_struct_ops((__skel)->maps.__ops_name##___no_exit_dump_len); \
SCX_BUG_ON(!__link, "Failed to attach struct_ops"); \
__link; \
})

#endif /* __SCX_COMPAT_H */
79 changes: 46 additions & 33 deletions tools/sched_ext/include/scx/user_exit_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,57 +11,70 @@
#define __USER_EXIT_INFO_H

enum uei_sizes {
UEI_REASON_SIZE = 128,
UEI_MSG_SIZE = 1024,
UEI_DUMP_SIZE = 32768,
UEI_REASON_LEN = 128,
UEI_MSG_LEN = 1024,
UEI_DUMP_DFL_LEN = 32768,
};

struct user_exit_info {
int kind;
char reason[UEI_REASON_SIZE];
char msg[UEI_MSG_SIZE];
char dump[UEI_DUMP_SIZE];
char reason[UEI_REASON_LEN];
char msg[UEI_MSG_LEN];
};

#ifdef __bpf__

#include "vmlinux.h"
#include <bpf/bpf_core_read.h>

static inline void uei_record(struct user_exit_info *uei,
const struct scx_exit_info *ei)
{
bpf_probe_read_kernel_str(uei->reason, sizeof(uei->reason), ei->reason);
bpf_probe_read_kernel_str(uei->msg, sizeof(uei->msg), ei->msg);
bpf_probe_read_kernel_str(uei->dump, sizeof(uei->dump), ei->dump);
/* use __sync to force memory barrier */
__sync_val_compare_and_swap(&uei->kind, uei->kind, ei->kind);
}
#define UEI_DEFINE(__name) \
char RESIZABLE_ARRAY(data, __name##_dump); \
const volatile u32 __name##_dump_len; \
struct user_exit_info __name SEC(".data")

#define UEI_RECORD(__uei_name, __ei) ({ \
bpf_probe_read_kernel_str(__uei_name.reason, \
sizeof(__uei_name.reason), (__ei)->reason); \
bpf_probe_read_kernel_str(__uei_name.msg, \
sizeof(__uei_name.msg), (__ei)->msg); \
bpf_probe_read_kernel_str(__uei_name##_dump, \
__uei_name##_dump_len, (__ei)->dump); \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&__uei_name.kind, __uei_name.kind, \
(__ei)->kind); \
})

#else /* !__bpf__ */

#include <stdio.h>
#include <stdbool.h>

static inline bool uei_exited(struct user_exit_info *uei)
{
/* use __sync to force memory barrier */
return __sync_val_compare_and_swap(&uei->kind, -1, -1);
}
/* no need to call the following explicitly if SCX_OPS_LOAD() is used */
#define UEI_SET_SIZE(__skel, __ops_name, __uei_name) ({ \
u32 __len = (__skel)->struct_ops.__ops_name->exit_dump_len ?: UEI_DUMP_DFL_LEN; \
(__skel)->rodata->__uei_name##_dump_len = __len; \
RESIZE_ARRAY(data, __uei_name##_dump, __len); \
})

#define UEI_EXITED(__skel, __uei_name) ({ \
/* use __sync to force memory barrier */ \
__sync_val_compare_and_swap(&(__skel)->data->__uei_name.kind, -1, -1); \
})

static inline void uei_print(const struct user_exit_info *uei)
{
if (uei->dump[0] != '\0') {
fputs("\nDEBUG DUMP\n", stderr);
fputs("================================================================================\n\n", stderr);
fputs(uei->dump, stderr);
fputs("\n================================================================================\n\n", stderr);
}
fprintf(stderr, "EXIT: %s", uei->reason);
if (uei->msg[0] != '\0')
fprintf(stderr, " (%s)", uei->msg);
fputs("\n", stderr);
}
#define UEI_REPORT(__skel, __uei_name) ({ \
struct user_exit_info *__uei = &(__skel)->data->__uei_name; \
char *__uei_dump = (__skel)->data_##__uei_name##_dump->__uei_name##_dump; \
if (__uei_dump[0] != '\0') { \
fputs("\nDEBUG DUMP\n", stderr); \
fputs("================================================================================\n\n", stderr); \
fputs(__uei_dump, stderr); \
fputs("\n================================================================================\n\n", stderr); \
} \
fprintf(stderr, "EXIT: %s", __uei->reason); \
if (__uei->msg[0] != '\0') \
fprintf(stderr, " (%s)", __uei->msg); \
fputs("\n", stderr); \
})

#endif /* __bpf__ */
#endif /* __USER_EXIT_INFO_H */
Loading
Loading