Skip to content

Commit eec3796

Browse files
rnavmpe
authored andcommitted
powerpc64/ftrace: Move ftrace sequence out of line
Function profile sequence on powerpc includes two instructions at the beginning of each function: mflr r0 bl ftrace_caller The call to ftrace_caller() gets nop'ed out during kernel boot and is patched in when ftrace is enabled. Given the sequence, we cannot return from ftrace_caller with 'blr' as we need to keep LR and r0 intact. This results in link stack (return address predictor) imbalance when ftrace is enabled. To address that, we would like to use a three instruction sequence: mflr r0 bl ftrace_caller mtlr r0 Further more, to support DYNAMIC_FTRACE_WITH_CALL_OPS, we need to reserve two instruction slots before the function. This results in a total of five instruction slots to be reserved for ftrace use on each function that is traced. Move the function profile sequence out-of-line to minimize its impact. To do this, we reserve a single nop at function entry using -fpatchable-function-entry=1 and add a pass on vmlinux.o to determine the total number of functions that can be traced. This is then used to generate a .S file reserving the appropriate amount of space for use as ftrace stubs, which is built and linked into vmlinux. On bootup, the stub space is split into separate stubs per function and populated with the proper instruction sequence. A pointer to the associated stub is maintained in dyn_arch_ftrace. For modules, space for ftrace stubs is reserved from the generic module stub space. This is restricted to and enabled by default only on 64-bit powerpc, though there are some changes to accommodate 32-bit powerpc. This is done so that 32-bit powerpc could choose to opt into this based on further tests and benchmarks. As an example, after this patch, kernel functions will have a single nop at function entry: <kernel_clone>: addis r2,r12,467 addi r2,r2,-16028 nop mfocrf r11,8 ... When ftrace is enabled, the nop is converted to an unconditional branch to the stub associated with that function: <kernel_clone>: addis r2,r12,467 addi r2,r2,-16028 b ftrace_ool_stub_text_end+0x11b28 mfocrf r11,8 ... The associated stub: <ftrace_ool_stub_text_end+0x11b28>: mflr r0 bl ftrace_caller mtlr r0 b kernel_clone+0xc ... This change showed an improvement of ~10% in null_syscall benchmark on a Power 10 system with ftrace enabled. Signed-off-by: Naveen N Rao <naveen@kernel.org> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://patch.msgid.link/20241030070850.1361304-13-hbathini@linux.ibm.com
1 parent 1198c9c commit eec3796

File tree

12 files changed

+380
-38
lines changed

12 files changed

+380
-38
lines changed

arch/powerpc/Kbuild

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ obj-$(CONFIG_KEXEC_CORE) += kexec/
1919
obj-$(CONFIG_KEXEC_FILE) += purgatory/
2020

2121
# for cleaning
22-
subdir- += boot
22+
subdir- += boot tools

arch/powerpc/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,10 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
569569
def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
570570
def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
571571

572+
config PPC_FTRACE_OUT_OF_LINE
573+
def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY
574+
select ARCH_WANTS_PRE_LINK_VMLINUX
575+
572576
config HOTPLUG_CPU
573577
bool "Support for enabling/disabling CPUs"
574578
depends on SMP && (PPC_PSERIES || \

arch/powerpc/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,11 @@ CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float)
148148
ifdef CONFIG_FUNCTION_TRACER
149149
ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
150150
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
151+
ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
152+
CC_FLAGS_FTRACE := -fpatchable-function-entry=1
153+
else
151154
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
155+
endif
152156
else
153157
CC_FLAGS_FTRACE := -pg
154158
ifdef CONFIG_MPROFILE_KERNEL

arch/powerpc/include/asm/ftrace.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
2424
struct module;
2525
struct dyn_ftrace;
2626
struct dyn_arch_ftrace {
27+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
28+
/* pointer to the associated out-of-line stub */
29+
unsigned long ool_stub;
30+
#endif
2731
};
2832

2933
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -130,6 +134,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
130134

131135
#ifdef CONFIG_FUNCTION_TRACER
132136
extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
137+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
138+
struct ftrace_ool_stub {
139+
u32 insn[4];
140+
};
141+
extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[];
142+
extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count;
143+
#endif
133144
void ftrace_free_init_tramp(void);
134145
unsigned long ftrace_call_adjust(unsigned long addr);
135146
#else

arch/powerpc/include/asm/module.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ struct mod_arch_specific {
4747
#ifdef CONFIG_DYNAMIC_FTRACE
4848
unsigned long tramp;
4949
unsigned long tramp_regs;
50+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
51+
struct ftrace_ool_stub *ool_stubs;
52+
unsigned int ool_stub_count;
53+
unsigned int ool_stub_index;
54+
#endif
5055
#endif
5156
};
5257

arch/powerpc/kernel/asm-offsets.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,5 +675,9 @@ int main(void)
675675
DEFINE(BPT_SIZE, BPT_SIZE);
676676
#endif
677677

678+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
679+
DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub));
680+
#endif
681+
678682
return 0;
679683
}

arch/powerpc/kernel/module_64.c

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,9 @@ static int relacmp(const void *_x, const void *_y)
205205

206206
/* Get size of potential trampolines required. */
207207
static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
208-
const Elf64_Shdr *sechdrs)
208+
const Elf64_Shdr *sechdrs,
209+
char *secstrings,
210+
struct module *me)
209211
{
210212
/* One extra reloc so it's always 0-addr terminated */
211213
unsigned long relocs = 1;
@@ -244,6 +246,24 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
244246
/* stubs for ftrace_caller and ftrace_regs_caller */
245247
relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS);
246248

249+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
250+
/* stubs for the function tracer */
251+
for (i = 1; i < hdr->e_shnum; i++) {
252+
if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) {
253+
me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long);
254+
me->arch.ool_stub_index = 0;
255+
relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub),
256+
sizeof(struct ppc64_stub_entry)) /
257+
sizeof(struct ppc64_stub_entry);
258+
break;
259+
}
260+
}
261+
if (i == hdr->e_shnum) {
262+
pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name);
263+
return -ENOEXEC;
264+
}
265+
#endif
266+
247267
pr_debug("Looks like a total of %lu stubs, max\n", relocs);
248268
return relocs * sizeof(struct ppc64_stub_entry);
249269
}
@@ -454,7 +474,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
454474
#endif
455475

456476
/* Override the stubs size */
457-
sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
477+
sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me);
458478

459479
return 0;
460480
}
@@ -1079,6 +1099,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr,
10791099
return 0;
10801100
}
10811101

1102+
static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me)
1103+
{
1104+
#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
1105+
unsigned int i, total_stubs, num_stubs;
1106+
struct ppc64_stub_entry *stub;
1107+
1108+
total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub);
1109+
num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub),
1110+
sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry);
1111+
1112+
/* Find the next available entry */
1113+
stub = (void *)sechdrs[me->arch.stubs_section].sh_addr;
1114+
for (i = 0; stub_func_addr(stub[i].funcdata); i++)
1115+
if (WARN_ON(i >= total_stubs))
1116+
return -1;
1117+
1118+
if (WARN_ON(i + num_stubs > total_stubs))
1119+
return -1;
1120+
1121+
stub += i;
1122+
me->arch.ool_stubs = (struct ftrace_ool_stub *)stub;
1123+
1124+
/* reserve stubs */
1125+
for (i = 0; i < num_stubs; i++)
1126+
if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP()))
1127+
return -1;
1128+
#endif
1129+
1130+
return 0;
1131+
}
1132+
10821133
int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
10831134
{
10841135
mod->arch.tramp = stub_for_addr(sechdrs,
@@ -1097,6 +1148,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
10971148
if (!mod->arch.tramp)
10981149
return -ENOENT;
10991150

1151+
if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod))
1152+
return -ENOENT;
1153+
11001154
return 0;
11011155
}
11021156
#endif

0 commit comments

Comments
 (0)