Skip to content

Commit dcad1a2

Browse files
Namhyung Kimrostedt
Namhyung Kim
authored andcommitted
tracing/uprobes: Fetch args before reserving a ring buffer
Fetching from user space should be done in a non-atomic context. So use a per-cpu buffer and copy its content to the ring buffer atomically. Note that we can migrate during accessing user memory thus use a per-cpu mutex to protect concurrent accesses. This is needed since we'll be able to fetch args from an user memory which can be swapped out. Before that uprobes could fetch args from registers only which saved in a kernel space. While at it, use __get_data_size() and store_trace_args() to reduce code duplication. And add struct uprobe_cpu_buffer and its helpers as suggested by Oleg. Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Acked-by: Oleg Nesterov <oleg@redhat.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: zhangwei(Jovi) <jovi.zhangwei@huawei.com> Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
1 parent a473414 commit dcad1a2

File tree

1 file changed

+132
-14
lines changed

1 file changed

+132
-14
lines changed

kernel/trace/trace_uprobe.c

Lines changed: 132 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -652,21 +652,117 @@ static const struct file_operations uprobe_profile_ops = {
652652
.release = seq_release,
653653
};
654654

655+
struct uprobe_cpu_buffer {
656+
struct mutex mutex;
657+
void *buf;
658+
};
659+
static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer;
660+
static int uprobe_buffer_refcnt;
661+
662+
static int uprobe_buffer_init(void)
663+
{
664+
int cpu, err_cpu;
665+
666+
uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer);
667+
if (uprobe_cpu_buffer == NULL)
668+
return -ENOMEM;
669+
670+
for_each_possible_cpu(cpu) {
671+
struct page *p = alloc_pages_node(cpu_to_node(cpu),
672+
GFP_KERNEL, 0);
673+
if (p == NULL) {
674+
err_cpu = cpu;
675+
goto err;
676+
}
677+
per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p);
678+
mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex);
679+
}
680+
681+
return 0;
682+
683+
err:
684+
for_each_possible_cpu(cpu) {
685+
if (cpu == err_cpu)
686+
break;
687+
free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf);
688+
}
689+
690+
free_percpu(uprobe_cpu_buffer);
691+
return -ENOMEM;
692+
}
693+
694+
static int uprobe_buffer_enable(void)
695+
{
696+
int ret = 0;
697+
698+
BUG_ON(!mutex_is_locked(&event_mutex));
699+
700+
if (uprobe_buffer_refcnt++ == 0) {
701+
ret = uprobe_buffer_init();
702+
if (ret < 0)
703+
uprobe_buffer_refcnt--;
704+
}
705+
706+
return ret;
707+
}
708+
709+
static void uprobe_buffer_disable(void)
710+
{
711+
BUG_ON(!mutex_is_locked(&event_mutex));
712+
713+
if (--uprobe_buffer_refcnt == 0) {
714+
free_percpu(uprobe_cpu_buffer);
715+
uprobe_cpu_buffer = NULL;
716+
}
717+
}
718+
719+
static struct uprobe_cpu_buffer *uprobe_buffer_get(void)
720+
{
721+
struct uprobe_cpu_buffer *ucb;
722+
int cpu;
723+
724+
cpu = raw_smp_processor_id();
725+
ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu);
726+
727+
/*
728+
* Use per-cpu buffers for fastest access, but we might migrate
729+
* so the mutex makes sure we have sole access to it.
730+
*/
731+
mutex_lock(&ucb->mutex);
732+
733+
return ucb;
734+
}
735+
736+
static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb)
737+
{
738+
mutex_unlock(&ucb->mutex);
739+
}
740+
655741
static void uprobe_trace_print(struct trace_uprobe *tu,
656742
unsigned long func, struct pt_regs *regs)
657743
{
658744
struct uprobe_trace_entry_head *entry;
659745
struct ring_buffer_event *event;
660746
struct ring_buffer *buffer;
747+
struct uprobe_cpu_buffer *ucb;
661748
void *data;
662-
int size, i;
749+
int size, dsize, esize;
663750
struct ftrace_event_call *call = &tu->tp.call;
664751

665-
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
752+
dsize = __get_data_size(&tu->tp, regs);
753+
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
754+
755+
if (WARN_ON_ONCE(!uprobe_cpu_buffer || tu->tp.size + dsize > PAGE_SIZE))
756+
return;
757+
758+
ucb = uprobe_buffer_get();
759+
store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
760+
761+
size = esize + tu->tp.size + dsize;
666762
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
667-
size + tu->tp.size, 0, 0);
763+
size, 0, 0);
668764
if (!event)
669-
return;
765+
goto out;
670766

671767
entry = ring_buffer_event_data(event);
672768
if (is_ret_probe(tu)) {
@@ -678,13 +774,13 @@ static void uprobe_trace_print(struct trace_uprobe *tu,
678774
data = DATAOF_TRACE_ENTRY(entry, false);
679775
}
680776

681-
for (i = 0; i < tu->tp.nr_args; i++) {
682-
call_fetch(&tu->tp.args[i].fetch, regs,
683-
data + tu->tp.args[i].offset);
684-
}
777+
memcpy(data, ucb->buf, tu->tp.size + dsize);
685778

686779
if (!call_filter_check_discard(call, entry, buffer, event))
687780
trace_buffer_unlock_commit(buffer, event, 0, 0);
781+
782+
out:
783+
uprobe_buffer_put(ucb);
688784
}
689785

690786
/* uprobe handler */
@@ -752,6 +848,10 @@ probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
752848
if (trace_probe_is_enabled(&tu->tp))
753849
return -EINTR;
754850

851+
ret = uprobe_buffer_enable();
852+
if (ret < 0)
853+
return ret;
854+
755855
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
756856

757857
tu->tp.flags |= flag;
@@ -772,6 +872,8 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
772872

773873
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
774874
tu->tp.flags &= ~flag;
875+
876+
uprobe_buffer_disable();
775877
}
776878

777879
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
@@ -898,11 +1000,24 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
8981000
struct ftrace_event_call *call = &tu->tp.call;
8991001
struct uprobe_trace_entry_head *entry;
9001002
struct hlist_head *head;
1003+
struct uprobe_cpu_buffer *ucb;
9011004
void *data;
902-
int size, rctx, i;
1005+
int size, dsize, esize;
1006+
int rctx;
1007+
1008+
dsize = __get_data_size(&tu->tp, regs);
1009+
esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
9031010

904-
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
905-
size = ALIGN(size + tu->tp.size + sizeof(u32), sizeof(u64)) - sizeof(u32);
1011+
if (WARN_ON_ONCE(!uprobe_cpu_buffer))
1012+
return;
1013+
1014+
size = esize + tu->tp.size + dsize;
1015+
size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32);
1016+
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
1017+
return;
1018+
1019+
ucb = uprobe_buffer_get();
1020+
store_trace_args(esize, &tu->tp, regs, ucb->buf, dsize);
9061021

9071022
preempt_disable();
9081023
head = this_cpu_ptr(call->perf_events);
@@ -922,15 +1037,18 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
9221037
data = DATAOF_TRACE_ENTRY(entry, false);
9231038
}
9241039

925-
for (i = 0; i < tu->tp.nr_args; i++) {
926-
struct probe_arg *parg = &tu->tp.args[i];
1040+
memcpy(data, ucb->buf, tu->tp.size + dsize);
1041+
1042+
if (size - esize > tu->tp.size + dsize) {
1043+
int len = tu->tp.size + dsize;
9271044

928-
call_fetch(&parg->fetch, regs, data + parg->offset);
1045+
memset(data + len, 0, size - esize - len);
9291046
}
9301047

9311048
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
9321049
out:
9331050
preempt_enable();
1051+
uprobe_buffer_put(ucb);
9341052
}
9351053

9361054
/* uprobe profile handler */

0 commit comments

Comments
 (0)