Skip to content

Commit 3b23e49

Browse files
Torsten DuweMark Rutland
Torsten Duwe
authored and
Mark Rutland
committed
arm64: implement ftrace with regs
This patch implements FTRACE_WITH_REGS for arm64, which allows a traced function's arguments (and some other registers) to be captured into a struct pt_regs, allowing these to be inspected and/or modified. This is a building block for live-patching, where a function's arguments may be forwarded to another function. This is also necessary to enable ftrace and in-kernel pointer authentication at the same time, as it allows the LR value to be captured and adjusted prior to signing. Using GCC's -fpatchable-function-entry=N option, we can have the compiler insert a configurable number of NOPs between the function entry point and the usual prologue. This also ensures functions are AAPCS compliant (e.g. disabling inter-procedural register allocation). For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the following: | unsigned long bar(void); | | unsigned long foo(void) | { | return bar() + 1; | } ... to: | <foo>: | nop | nop | stp x29, x30, [sp, #-16]! | mov x29, sp | bl 0 <bar> | add x0, x0, #0x1 | ldp x29, x30, [sp], #16 | ret This patch builds the kernel with -fpatchable-function-entry=2, prefixing each function with two NOPs. To trace a function, we replace these NOPs with a sequence that saves the LR into a GPR, then calls an ftrace entry assembly function which saves this and other relevant registers: | mov x9, x30 | bl <ftrace-entry> Since patchable functions are AAPCS compliant (and the kernel does not use x18 as a platform register), x9-x18 can be safely clobbered in the patched sequence and the ftrace entry code. There are now two ftrace entry functions, ftrace_regs_entry (which saves all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is allocated for each within modules. Signed-off-by: Torsten Duwe <duwe@suse.de> [Mark: rework asm, comments, PLTs, initialization, commit message] Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reviewed-by: Amit Daniel Kachhap <amit.kachhap@arm.com> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Reviewed-by: Torsten Duwe <duwe@suse.de> Tested-by: Amit Daniel Kachhap <amit.kachhap@arm.com> Tested-by: Torsten Duwe <duwe@suse.de> Cc: AKASHI Takahiro <takahiro.akashi@linaro.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Julien Thierry <jthierry@redhat.com> Cc: Will Deacon <will@kernel.org>
1 parent 1f377e0 commit 3b23e49

File tree

8 files changed

+252
-25
lines changed

8 files changed

+252
-25
lines changed

arch/arm64/Kconfig

+2
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ config ARM64
143143
select HAVE_DEBUG_KMEMLEAK
144144
select HAVE_DMA_CONTIGUOUS
145145
select HAVE_DYNAMIC_FTRACE
146+
select HAVE_DYNAMIC_FTRACE_WITH_REGS \
147+
if $(cc-option,-fpatchable-function-entry=2)
146148
select HAVE_EFFICIENT_UNALIGNED_ACCESS
147149
select HAVE_FAST_GUP
148150
select HAVE_FTRACE_MCOUNT_RECORD

arch/arm64/Makefile

+5
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
9595
KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds
9696
endif
9797

98+
ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
99+
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
100+
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
101+
endif
102+
98103
# Default value
99104
head-y := arch/arm64/kernel/head.o
100105

arch/arm64/include/asm/ftrace.h

+23
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,20 @@
1111
#include <asm/insn.h>
1212

1313
#define HAVE_FUNCTION_GRAPH_FP_TEST
14+
15+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
16+
#define ARCH_SUPPORTS_FTRACE_OPS 1
17+
#else
1418
#define MCOUNT_ADDR ((unsigned long)_mcount)
19+
#endif
20+
21+
/* The BL at the callsite's adjusted rec->ip */
1522
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
1623

24+
#define FTRACE_PLT_IDX 0
25+
#define FTRACE_REGS_PLT_IDX 1
26+
#define NR_FTRACE_PLTS 2
27+
1728
/*
1829
* Currently, gcc tends to save the link register after the local variables
1930
* on the stack. This causes the max stack tracer to report the function
@@ -43,13 +54,25 @@ extern void return_to_handler(void);
4354

4455
static inline unsigned long ftrace_call_adjust(unsigned long addr)
4556
{
57+
/*
58+
* Adjust addr to point at the BL in the callsite.
59+
* See ftrace_init_nop() for the callsite sequence.
60+
*/
61+
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
62+
return addr + AARCH64_INSN_SIZE;
4663
/*
4764
* addr is the address of the mcount call instruction.
4865
* recordmcount does the necessary offset calculation.
4966
*/
5067
return addr;
5168
}
5269

70+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
71+
struct dyn_ftrace;
72+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
73+
#define ftrace_init_nop ftrace_init_nop
74+
#endif
75+
5376
#define ftrace_return_address(n) return_address(n)
5477

5578
/*

arch/arm64/include/asm/module.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ struct mod_arch_specific {
2121
struct mod_plt_sec init;
2222

2323
/* for CONFIG_DYNAMIC_FTRACE */
24-
struct plt_entry *ftrace_trampoline;
24+
struct plt_entry *ftrace_trampolines;
2525
};
2626
#endif
2727

arch/arm64/kernel/entry-ftrace.S

+135-5
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,137 @@
77
*/
88

99
#include <linux/linkage.h>
10+
#include <asm/asm-offsets.h>
1011
#include <asm/assembler.h>
1112
#include <asm/ftrace.h>
1213
#include <asm/insn.h>
1314

15+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
16+
/*
17+
* Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
18+
* the regular function prologue. For an enabled callsite, ftrace_init_nop() and
19+
* ftrace_make_call() have patched those NOPs to:
20+
*
21+
* MOV X9, LR
22+
* BL <entry>
23+
*
24+
* ... where <entry> is either ftrace_caller or ftrace_regs_caller.
25+
*
26+
* Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
27+
* live, and x9-x18 are safe to clobber.
28+
*
29+
* We save the callsite's context into a pt_regs before invoking any ftrace
30+
* callbacks. So that we can get a sensible backtrace, we create a stack record
31+
* for the callsite and the ftrace entry assembly. This is not sufficient for
32+
* reliable stacktrace: until we create the callsite stack record, its caller
33+
* is missing from the LR and existing chain of frame records.
34+
*/
35+
.macro ftrace_regs_entry, allregs=0
36+
/* Make room for pt_regs, plus a callee frame */
37+
sub sp, sp, #(S_FRAME_SIZE + 16)
38+
39+
/* Save function arguments (and x9 for simplicity) */
40+
stp x0, x1, [sp, #S_X0]
41+
stp x2, x3, [sp, #S_X2]
42+
stp x4, x5, [sp, #S_X4]
43+
stp x6, x7, [sp, #S_X6]
44+
stp x8, x9, [sp, #S_X8]
45+
46+
/* Optionally save the callee-saved registers, always save the FP */
47+
.if \allregs == 1
48+
stp x10, x11, [sp, #S_X10]
49+
stp x12, x13, [sp, #S_X12]
50+
stp x14, x15, [sp, #S_X14]
51+
stp x16, x17, [sp, #S_X16]
52+
stp x18, x19, [sp, #S_X18]
53+
stp x20, x21, [sp, #S_X20]
54+
stp x22, x23, [sp, #S_X22]
55+
stp x24, x25, [sp, #S_X24]
56+
stp x26, x27, [sp, #S_X26]
57+
stp x28, x29, [sp, #S_X28]
58+
.else
59+
str x29, [sp, #S_FP]
60+
.endif
61+
62+
/* Save the callsite's SP and LR */
63+
add x10, sp, #(S_FRAME_SIZE + 16)
64+
stp x9, x10, [sp, #S_LR]
65+
66+
/* Save the PC after the ftrace callsite */
67+
str x30, [sp, #S_PC]
68+
69+
/* Create a frame record for the callsite above pt_regs */
70+
stp x29, x9, [sp, #S_FRAME_SIZE]
71+
add x29, sp, #S_FRAME_SIZE
72+
73+
/* Create our frame record within pt_regs. */
74+
stp x29, x30, [sp, #S_STACKFRAME]
75+
add x29, sp, #S_STACKFRAME
76+
.endm
77+
78+
ENTRY(ftrace_regs_caller)
79+
ftrace_regs_entry 1
80+
b ftrace_common
81+
ENDPROC(ftrace_regs_caller)
82+
83+
ENTRY(ftrace_caller)
84+
ftrace_regs_entry 0
85+
b ftrace_common
86+
ENDPROC(ftrace_caller)
87+
88+
ENTRY(ftrace_common)
89+
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
90+
mov x1, x9 // parent_ip (callsite's LR)
91+
ldr_l x2, function_trace_op // op
92+
mov x3, sp // regs
93+
94+
GLOBAL(ftrace_call)
95+
bl ftrace_stub
96+
97+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
98+
GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
99+
nop // If enabled, this will be replaced
100+
// "b ftrace_graph_caller"
101+
#endif
102+
103+
/*
104+
* At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
105+
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
106+
* to restore x0-x8, x29, and x30.
107+
*/
108+
ftrace_common_return:
109+
/* Restore function arguments */
110+
ldp x0, x1, [sp]
111+
ldp x2, x3, [sp, #S_X2]
112+
ldp x4, x5, [sp, #S_X4]
113+
ldp x6, x7, [sp, #S_X6]
114+
ldr x8, [sp, #S_X8]
115+
116+
/* Restore the callsite's FP, LR, PC */
117+
ldr x29, [sp, #S_FP]
118+
ldr x30, [sp, #S_LR]
119+
ldr x9, [sp, #S_PC]
120+
121+
/* Restore the callsite's SP */
122+
add sp, sp, #S_FRAME_SIZE + 16
123+
124+
ret x9
125+
ENDPROC(ftrace_common)
126+
127+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
128+
ENTRY(ftrace_graph_caller)
129+
ldr x0, [sp, #S_PC]
130+
sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
131+
add x1, sp, #S_LR // parent_ip (callsite's LR)
132+
ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
133+
bl prepare_ftrace_return
134+
b ftrace_common_return
135+
ENDPROC(ftrace_graph_caller)
136+
#else
137+
#endif
138+
139+
#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
140+
14141
/*
15142
* Gcc with -pg will put the following code in the beginning of each function:
16143
* mov x0, x30
@@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
160287

161288
mcount_exit
162289
ENDPROC(ftrace_caller)
163-
#endif /* CONFIG_DYNAMIC_FTRACE */
164-
165-
ENTRY(ftrace_stub)
166-
ret
167-
ENDPROC(ftrace_stub)
168290

169291
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
170292
/*
@@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)
184306

185307
mcount_exit
186308
ENDPROC(ftrace_graph_caller)
309+
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
310+
#endif /* CONFIG_DYNAMIC_FTRACE */
311+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
312+
313+
ENTRY(ftrace_stub)
314+
ret
315+
ENDPROC(ftrace_stub)
187316

317+
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
188318
/*
189319
* void return_to_handler(void)
190320
*

arch/arm64/kernel/ftrace.c

+70-14
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
6262
return ftrace_modify_code(pc, 0, new, false);
6363
}
6464

65+
#ifdef CONFIG_ARM64_MODULE_PLTS
66+
static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
67+
{
68+
struct plt_entry *plt = mod->arch.ftrace_trampolines;
69+
70+
if (addr == FTRACE_ADDR)
71+
return &plt[FTRACE_PLT_IDX];
72+
if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
73+
return &plt[FTRACE_REGS_PLT_IDX];
74+
return NULL;
75+
}
76+
#endif
77+
6578
/*
6679
* Turn on the call to ftrace_caller() in instrumented function
6780
*/
@@ -74,19 +87,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
7487
if (offset < -SZ_128M || offset >= SZ_128M) {
7588
#ifdef CONFIG_ARM64_MODULE_PLTS
7689
struct module *mod;
77-
78-
/*
79-
* There is only one ftrace trampoline per module. For now,
80-
* this is not a problem since on arm64, all dynamic ftrace
81-
* invocations are routed via ftrace_caller(). This will need
82-
* to be revisited if support for multiple ftrace entry points
83-
* is added in the future, but for now, the pr_err() below
84-
* deals with a theoretical issue only.
85-
*/
86-
if (addr != FTRACE_ADDR) {
87-
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
88-
return -EINVAL;
89-
}
90+
struct plt_entry *plt;
9091

9192
/*
9293
* On kernels that support module PLTs, the offset between the
@@ -105,7 +106,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
105106
if (WARN_ON(!mod))
106107
return -EINVAL;
107108

108-
addr = (unsigned long)mod->arch.ftrace_trampoline;
109+
plt = get_ftrace_plt(mod, addr);
110+
if (!plt) {
111+
pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
112+
return -EINVAL;
113+
}
114+
115+
addr = (unsigned long)plt;
109116
#else /* CONFIG_ARM64_MODULE_PLTS */
110117
return -EINVAL;
111118
#endif /* CONFIG_ARM64_MODULE_PLTS */
@@ -117,6 +124,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
117124
return ftrace_modify_code(pc, old, new, true);
118125
}
119126

127+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
128+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
129+
unsigned long addr)
130+
{
131+
unsigned long pc = rec->ip;
132+
u32 old, new;
133+
134+
old = aarch64_insn_gen_branch_imm(pc, old_addr,
135+
AARCH64_INSN_BRANCH_LINK);
136+
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
137+
138+
return ftrace_modify_code(pc, old, new, true);
139+
}
140+
141+
/*
142+
* The compiler has inserted two NOPs before the regular function prologue.
143+
* All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
144+
* and x9-x18 are free for our use.
145+
*
146+
* At runtime we want to be able to swing a single NOP <-> BL to enable or
147+
* disable the ftrace call. The BL requires us to save the original LR value,
148+
* so here we insert a <MOV X9, LR> over the first NOP so the instructions
149+
* before the regular prologue are:
150+
*
151+
* | Compiled | Disabled | Enabled |
152+
* +----------+------------+------------+
153+
* | NOP | MOV X9, LR | MOV X9, LR |
154+
* | NOP | NOP | BL <entry> |
155+
*
156+
* The LR value will be recovered by ftrace_regs_entry, and restored into LR
157+
* before returning to the regular function prologue. When a function is not
158+
* being traced, the MOV is not harmful given x9 is not live per the AAPCS.
159+
*
160+
* Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
161+
* the BL.
162+
*/
163+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
164+
{
165+
unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
166+
u32 old, new;
167+
168+
old = aarch64_insn_gen_nop();
169+
new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
170+
AARCH64_INSN_REG_LR,
171+
AARCH64_INSN_VARIANT_64BIT);
172+
return ftrace_modify_code(pc, old, new, true);
173+
}
174+
#endif
175+
120176
/*
121177
* Turn off the call to ftrace_caller() in instrumented function
122178
*/

arch/arm64/kernel/module-plts.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include <linux/elf.h>
7+
#include <linux/ftrace.h>
78
#include <linux/kernel.h>
89
#include <linux/module.h>
910
#include <linux/sort.h>
@@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
330331
tramp->sh_type = SHT_NOBITS;
331332
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
332333
tramp->sh_addralign = __alignof__(struct plt_entry);
333-
tramp->sh_size = sizeof(struct plt_entry);
334+
tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
334335
}
335336

336337
return 0;

0 commit comments

Comments
 (0)