Skip to content

Commit c5ebced

Browse files
Jiri Olsaacmel
authored andcommitted
perf: Add ability to attach user stack dump to sample
Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump of the user level stack on sample. The size of the dump is specified by sample_stack_user value. Being able to dump parts of the user stack, starting from the stack pointer, will be useful to make a post mortem dwarf CFI based stack unwinding. Added HAVE_PERF_USER_STACK_DUMP config option to determine if the architecture provides user stack dump on perf event samples. This needs access to the user stack pointer which is not unified across architectures. Enabling this for x86 architecture. Signed-off-by: Jiri Olsa <jolsa@redhat.com> Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: "Frank Ch. Eigler" <fche@redhat.com> Cc: Arun Sharma <asharma@fb.com> Cc: Benjamin Redelings <benjamin.redelings@nescent.org> Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Frank Ch. Eigler <fche@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Robert Richter <robert.richter@amd.com> Cc: Stephane Eranian <eranian@google.com> Cc: Tom Zanussi <tzanussi@gmail.com> Cc: Ulrich Drepper <drepper@gmail.com> Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent 5685e0f commit c5ebced

File tree

5 files changed

+190
-2
lines changed

5 files changed

+190
-2
lines changed

arch/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,13 @@ config HAVE_PERF_REGS
228228
Support selective register dumps for perf events. This includes
229229
bit-mapping of each registers and a unique architecture id.
230230

231+
config HAVE_PERF_USER_STACK_DUMP
232+
bool
233+
help
234+
Support user stack dumps for perf event samples. This needs
235+
access to the user stack pointer which is not unified across
236+
architectures.
237+
231238
config HAVE_ARCH_JUMP_LABEL
232239
bool
233240

arch/x86/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ config X86
6161
select PERF_EVENTS
6262
select HAVE_PERF_EVENTS_NMI
6363
select HAVE_PERF_REGS
64+
select HAVE_PERF_USER_STACK_DUMP
6465
select ANON_INODES
6566
select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
6667
select HAVE_CMPXCHG_LOCAL if !M386

include/linux/perf_event.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,9 @@ enum perf_event_sample_format {
131131
PERF_SAMPLE_RAW = 1U << 10,
132132
PERF_SAMPLE_BRANCH_STACK = 1U << 11,
133133
PERF_SAMPLE_REGS_USER = 1U << 12,
134+
PERF_SAMPLE_STACK_USER = 1U << 13,
134135

135-
PERF_SAMPLE_MAX = 1U << 13, /* non-ABI */
136+
PERF_SAMPLE_MAX = 1U << 14, /* non-ABI */
136137
};
137138

138139
/*
@@ -205,6 +206,7 @@ enum perf_event_read_format {
205206
#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
206207
#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
207208
#define PERF_ATTR_SIZE_VER3 88 /* add: sample_regs_user */
209+
#define PERF_ATTR_SIZE_VER4 96 /* add: sample_stack_user */
208210

209211
/*
210212
* Hardware event_id to monitor via a performance monitoring event:
@@ -289,6 +291,14 @@ struct perf_event_attr {
289291
* See asm/perf_regs.h for details.
290292
*/
291293
__u64 sample_regs_user;
294+
295+
/*
296+
* Defines size of the user stack to dump on samples.
297+
*/
298+
__u32 sample_stack_user;
299+
300+
/* Align to u64. */
301+
__u32 __reserved_2;
292302
};
293303

294304
/*
@@ -568,6 +578,10 @@ enum perf_event_type {
568578
*
569579
* { u64 abi; # enum perf_sample_regs_abi
570580
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
581+
*
582+
* { u64 size;
583+
* char data[size];
584+
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
571585
* };
572586
*/
573587
PERF_RECORD_SAMPLE = 9,
@@ -1160,6 +1174,7 @@ struct perf_sample_data {
11601174
struct perf_raw_record *raw;
11611175
struct perf_branch_stack *br_stack;
11621176
struct perf_regs_user regs_user;
1177+
u64 stack_user_size;
11631178
};
11641179

11651180
static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
11721187
data->period = period;
11731188
data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
11741189
data->regs_user.regs = NULL;
1190+
data->stack_user_size = 0;
11751191
}
11761192

11771193
extern void perf_output_sample(struct perf_output_handle *handle,

kernel/events/core.c

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include <linux/perf_event.h>
3737
#include <linux/ftrace_event.h>
3838
#include <linux/hw_breakpoint.h>
39+
#include <linux/mm_types.h>
3940

4041
#include "internal.h"
4142

@@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
37873788
}
37883789
}
37893790

3791+
/*
3792+
* Get remaining task size from user stack pointer.
3793+
*
3794+
* It'd be better to take stack vma map and limit this more
3795+
* precisly, but there's no way to get it safely under interrupt,
3796+
* so using TASK_SIZE as limit.
3797+
*/
3798+
static u64 perf_ustack_task_size(struct pt_regs *regs)
3799+
{
3800+
unsigned long addr = perf_user_stack_pointer(regs);
3801+
3802+
if (!addr || addr >= TASK_SIZE)
3803+
return 0;
3804+
3805+
return TASK_SIZE - addr;
3806+
}
3807+
3808+
static u16
3809+
perf_sample_ustack_size(u16 stack_size, u16 header_size,
3810+
struct pt_regs *regs)
3811+
{
3812+
u64 task_size;
3813+
3814+
/* No regs, no stack pointer, no dump. */
3815+
if (!regs)
3816+
return 0;
3817+
3818+
/*
3819+
* Check if we fit in with the requested stack size into the:
3820+
* - TASK_SIZE
3821+
* If we don't, we limit the size to the TASK_SIZE.
3822+
*
3823+
* - remaining sample size
3824+
* If we don't, we customize the stack size to
3825+
* fit in to the remaining sample size.
3826+
*/
3827+
3828+
task_size = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
3829+
stack_size = min(stack_size, (u16) task_size);
3830+
3831+
/* Current header size plus static size and dynamic size. */
3832+
header_size += 2 * sizeof(u64);
3833+
3834+
/* Do we fit in with the current stack dump size? */
3835+
if ((u16) (header_size + stack_size) < header_size) {
3836+
/*
3837+
* If we overflow the maximum size for the sample,
3838+
* we customize the stack dump size to fit in.
3839+
*/
3840+
stack_size = USHRT_MAX - header_size - sizeof(u64);
3841+
stack_size = round_up(stack_size, sizeof(u64));
3842+
}
3843+
3844+
return stack_size;
3845+
}
3846+
3847+
static void
3848+
perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
3849+
struct pt_regs *regs)
3850+
{
3851+
/* Case of a kernel thread, nothing to dump */
3852+
if (!regs) {
3853+
u64 size = 0;
3854+
perf_output_put(handle, size);
3855+
} else {
3856+
unsigned long sp;
3857+
unsigned int rem;
3858+
u64 dyn_size;
3859+
3860+
/*
3861+
* We dump:
3862+
* static size
3863+
* - the size requested by user or the best one we can fit
3864+
* in to the sample max size
3865+
* data
3866+
* - user stack dump data
3867+
* dynamic size
3868+
* - the actual dumped size
3869+
*/
3870+
3871+
/* Static size. */
3872+
perf_output_put(handle, dump_size);
3873+
3874+
/* Data. */
3875+
sp = perf_user_stack_pointer(regs);
3876+
rem = __output_copy_user(handle, (void *) sp, dump_size);
3877+
dyn_size = dump_size - rem;
3878+
3879+
perf_output_skip(handle, rem);
3880+
3881+
/* Dynamic size. */
3882+
perf_output_put(handle, dyn_size);
3883+
}
3884+
}
3885+
37903886
static void __perf_event_header__init_id(struct perf_event_header *header,
37913887
struct perf_sample_data *data,
37923888
struct perf_event *event)
@@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle,
40644160
mask);
40654161
}
40664162
}
4163+
4164+
if (sample_type & PERF_SAMPLE_STACK_USER)
4165+
perf_output_sample_ustack(handle,
4166+
data->stack_user_size,
4167+
data->regs_user.regs);
40674168
}
40684169

40694170
void perf_prepare_sample(struct perf_event_header *header,
@@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header,
41294230

41304231
header->size += size;
41314232
}
4233+
4234+
if (sample_type & PERF_SAMPLE_STACK_USER) {
4235+
/*
4236+
* Either we need PERF_SAMPLE_STACK_USER bit to be allways
4237+
* processed as the last one or have additional check added
4238+
* in case new sample type is added, because we could eat
4239+
* up the rest of the sample size.
4240+
*/
4241+
struct perf_regs_user *uregs = &data->regs_user;
4242+
u16 stack_size = event->attr.sample_stack_user;
4243+
u16 size = sizeof(u64);
4244+
4245+
if (!uregs->abi)
4246+
perf_sample_regs_user(uregs, regs);
4247+
4248+
stack_size = perf_sample_ustack_size(stack_size, header->size,
4249+
uregs->regs);
4250+
4251+
/*
4252+
* If there is something to dump, add space for the dump
4253+
* itself and for the field that tells the dynamic size,
4254+
* which is how many have been actually dumped.
4255+
*/
4256+
if (stack_size)
4257+
size += sizeof(u64) + stack_size;
4258+
4259+
data->stack_user_size = stack_size;
4260+
header->size += size;
4261+
}
41324262
}
41334263

41344264
static void perf_event_output(struct perf_event *event,
@@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
62056335
}
62066336
}
62076337

6208-
if (attr->sample_type & PERF_SAMPLE_REGS_USER)
6338+
if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
62096339
ret = perf_reg_validate(attr->sample_regs_user);
6340+
if (ret)
6341+
return ret;
6342+
}
6343+
6344+
if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
6345+
if (!arch_perf_have_user_stack_dump())
6346+
return -ENOSYS;
6347+
6348+
/*
6349+
* We have __u32 type for the size, but so far
6350+
* we can only use __u16 as maximum due to the
6351+
* __u16 sample size limit.
6352+
*/
6353+
if (attr->sample_stack_user >= USHRT_MAX)
6354+
ret = -EINVAL;
6355+
else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
6356+
ret = -EINVAL;
6357+
}
62106358

62116359
out:
62126360
return ret;

kernel/events/internal.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx)
158158
recursion[rctx]--;
159159
}
160160

161+
#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
162+
static inline bool arch_perf_have_user_stack_dump(void)
163+
{
164+
return true;
165+
}
166+
167+
#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
168+
#else
169+
static inline bool arch_perf_have_user_stack_dump(void)
170+
{
171+
return false;
172+
}
173+
174+
#define perf_user_stack_pointer(regs) 0
175+
#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
176+
161177
#endif /* _KERNEL_EVENTS_INTERNAL_H */

0 commit comments

Comments
 (0)