Skip to content

Commit 4ea648a

Browse files
WangNan0acmel
authored andcommitted
perf record: Add --tail-synthesize option
When working with overwritable ring buffer there's a inconvenience problem: if perf dumps data after a long period after it starts, non-sample events may lost, which makes following 'perf report' unable to identify proc name and mmap layout. For example: # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output \ dd if=/dev/zero of=/dev/null send SIGUSR2 after dd runs long enough. The resuling perf.data lost correct comm and mmap events: # perf script -i perf.data.2016061522374354 perf 24478 [004] 2581325.601789: raw_syscalls:sys_exit: NR 0 = 512 ^^^^ Should be 'dd' 27b2e8 syscall_slow_exit_work+0xfe2000e3 (/lib/modules/4.6.0-rc3+/build/vmlinux) 203cc7 do_syscall_64+0xfe200117 (/lib/modules/4.6.0-rc3+/build/vmlinux) b18d83 return_from_SYSCALL_64+0xfe200000 (/lib/modules/4.6.0-rc3+/build/vmlinux) 7f47c417edf0 [unknown] ([unknown]) ^^^^^^^^^^^^ Fail to unwind This patch provides a '--tail-synthesize' option, allows perf to collect system status when finalizing output file. In resuling output file, the non-sample events reflect system status when dumping data. After this patch: # perf record -m 4 -e raw_syscalls:* -g --overwrite --switch-output --tail-synthesize \ dd if=/dev/zero of=/dev/null # perf script -i perf.data.2016061600544998 dd 27364 [004] 2583244.994464: raw_syscalls:sys_enter: NR 1 (1, ... ^^ Correct comm 203a18 syscall_trace_enter_phase2+0xfe2001a8 ([kernel.kallsyms]) 203aa5 syscall_trace_enter+0xfe200055 ([kernel.kallsyms]) 203caa do_syscall_64+0xfe2000fa ([kernel.kallsyms]) b18d83 return_from_SYSCALL_64+0xfe200000 ([kernel.kallsyms]) d8e50 __GI___libc_write+0xffff01d9639f4010 (/tmp/oxygen_root-w00229757/lib64/libc-2.18.so) ^^^^^ Correct unwind This option doesn't aim to solve this problem completely. If a process terminates before SIGUSR2, we still lost its COMM and MMAP events. For example, we can't unwind correctly from the final perf.data we get from the previous example, because when perf collects the final output file (when we press C-c), 'dd' has been terminated so its '/proc/<pid>/mmap' becomes empty. However, this is a cheaper choice. To completely solve this problem we need to continously output non-sample events. To satisify the requirement of daemonization, we need to merge them periodically. It is possible but requires much more code and cycles. Automatically select --tail-synthesize when --overwrite is provided. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Nilay Vaish <nilayvaish@gmail.com> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1468485287-33422-16-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent f06149c commit 4ea648a

File tree

3 files changed

+34
-6
lines changed

3 files changed

+34
-6
lines changed

tools/perf/Documentation/perf-record.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,12 @@ options.
367367
'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
368368
in config file is set to true.
369369

370+
--tail-synthesize::
371+
Instead of collecting non-sample events (for example, fork, comm, mmap) at
372+
the beginning of record, collect them during finalizing an output file.
373+
The collected non-sample events reflects the status of the system when
374+
record is finished.
375+
370376
--overwrite::
371377
Makes all events use an overwritable ring buffer. An overwritable ring
372378
buffer works like a flight recorder: when it gets full, the kernel will
@@ -381,6 +387,8 @@ those fitting in the ring buffer at that moment.
381387
'overwrite' attribute can also be set or canceled for an event using
382388
config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
383389

390+
Implies --tail-synthesize.
391+
384392
SEE ALSO
385393
--------
386394
linkperf:perf-stat[1], linkperf:perf-list[1]

tools/perf/builtin-record.c

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -604,13 +604,16 @@ record__finish_output(struct record *rec)
604604
return;
605605
}
606606

607-
static int record__synthesize_workload(struct record *rec)
607+
static int record__synthesize_workload(struct record *rec, bool tail)
608608
{
609609
struct {
610610
struct thread_map map;
611611
struct thread_map_data map_data;
612612
} thread_map;
613613

614+
if (rec->opts.tail_synthesize != tail)
615+
return 0;
616+
614617
thread_map.map.nr = 1;
615618
thread_map.map.map[0].pid = rec->evlist->workload.pid;
616619
thread_map.map.map[0].comm = NULL;
@@ -621,7 +624,7 @@ static int record__synthesize_workload(struct record *rec)
621624
rec->opts.proc_map_timeout);
622625
}
623626

624-
static int record__synthesize(struct record *rec);
627+
static int record__synthesize(struct record *rec, bool tail);
625628

626629
static int
627630
record__switch_output(struct record *rec, bool at_exit)
@@ -632,6 +635,10 @@ record__switch_output(struct record *rec, bool at_exit)
632635
/* Same Size: "2015122520103046"*/
633636
char timestamp[] = "InvalidTimestamp";
634637

638+
record__synthesize(rec, true);
639+
if (target__none(&rec->opts.target))
640+
record__synthesize_workload(rec, true);
641+
635642
rec->samples = 0;
636643
record__finish_output(rec);
637644
err = fetch_current_timestamp(timestamp, sizeof(timestamp));
@@ -654,7 +661,7 @@ record__switch_output(struct record *rec, bool at_exit)
654661

655662
/* Output tracking events */
656663
if (!at_exit) {
657-
record__synthesize(rec);
664+
record__synthesize(rec, false);
658665

659666
/*
660667
* In 'perf record --switch-output' without -a,
@@ -666,7 +673,7 @@ record__switch_output(struct record *rec, bool at_exit)
666673
* perf_event__synthesize_thread_map() for those events.
667674
*/
668675
if (target__none(&rec->opts.target))
669-
record__synthesize_workload(rec);
676+
record__synthesize_workload(rec, false);
670677
}
671678
return fd;
672679
}
@@ -720,7 +727,7 @@ static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
720727
return NULL;
721728
}
722729

723-
static int record__synthesize(struct record *rec)
730+
static int record__synthesize(struct record *rec, bool tail)
724731
{
725732
struct perf_session *session = rec->session;
726733
struct machine *machine = &session->machines.host;
@@ -730,6 +737,9 @@ static int record__synthesize(struct record *rec)
730737
int fd = perf_data_file__fd(file);
731738
int err = 0;
732739

740+
if (rec->opts.tail_synthesize != tail)
741+
return 0;
742+
733743
if (file->is_pipe) {
734744
err = perf_event__synthesize_attrs(tool, session,
735745
process_synthesized_event);
@@ -893,7 +903,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
893903

894904
machine = &session->machines.host;
895905

896-
err = record__synthesize(rec);
906+
err = record__synthesize(rec, false);
897907
if (err < 0)
898908
goto out_child;
899909

@@ -1057,6 +1067,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
10571067
if (!quiet)
10581068
fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
10591069

1070+
if (target__none(&rec->opts.target))
1071+
record__synthesize_workload(rec, true);
1072+
10601073
out_child:
10611074
if (forks) {
10621075
int exit_status;
@@ -1075,6 +1088,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
10751088
} else
10761089
status = err;
10771090

1091+
record__synthesize(rec, true);
10781092
/* this will be recalculated during process_buildids() */
10791093
rec->samples = 0;
10801094

@@ -1399,6 +1413,8 @@ struct option __record_options[] = {
13991413
OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
14001414
&record.opts.no_inherit_set,
14011415
"child tasks do not inherit counters"),
1416+
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1417+
"synthesize non-sample events at the end of output"),
14021418
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
14031419
OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
14041420
OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
@@ -1610,6 +1626,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
16101626
}
16111627
}
16121628

1629+
if (record.opts.overwrite)
1630+
record.opts.tail_synthesize = true;
1631+
16131632
if (rec->evlist->nr_entries == 0 &&
16141633
perf_evlist__add_default(rec->evlist) < 0) {
16151634
pr_err("Not enough memory for event selector list\n");

tools/perf/perf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct record_opts {
5959
bool record_switch_events;
6060
bool all_kernel;
6161
bool all_user;
62+
bool tail_synthesize;
6263
bool overwrite;
6364
unsigned int freq;
6465
unsigned int mmap_pages;

0 commit comments

Comments
 (0)