Skip to content

Commit 2bfc6cd

Browse files
AlexGhitipalmer-dabbelt
authored andcommitted
riscv: Move kernel mapping outside of linear mapping
This is a preparatory patch for relocatable kernel and sv48 support. The kernel used to be linked at PAGE_OFFSET address therefore we could use the linear mapping for the kernel mapping. But the relocated kernel base address will be different from PAGE_OFFSET and since in the linear mapping, two different virtual addresses cannot point to the same physical address, the kernel mapping needs to lie outside the linear mapping so that we don't have to copy it at the same physical offset. The kernel mapping is moved to the last 2GB of the address space, BPF is now always after the kernel and modules use the 2GB memory range right before the kernel, so BPF and modules regions do not overlap. KASLR implementation will simply have to move the kernel in the last 2GB range and just take care of leaving enough space for BPF. In addition, by moving the kernel to the end of the address space, both sv39 and sv48 kernels will be exactly the same without needing to be relocated at runtime. Suggested-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr> [Palmer: Squash the STRICT_RWX fix, and a !MMU fix] Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
1 parent 8a07ac3 commit 2bfc6cd

File tree

12 files changed

+182
-36
lines changed

12 files changed

+182
-36
lines changed

arch/riscv/boot/loader.lds.S

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
22

33
#include <asm/page.h>
4+
#include <asm/pgtable.h>
45

56
OUTPUT_ARCH(riscv)
67
ENTRY(_start)
78

89
SECTIONS
910
{
10-
. = PAGE_OFFSET;
11+
. = KERNEL_LINK_ADDR;
1112

1213
.payload : {
1314
*(.payload)

arch/riscv/include/asm/page.h

+24-2
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,37 @@ typedef struct page *pgtable_t;
9090

9191
#ifdef CONFIG_MMU
9292
extern unsigned long va_pa_offset;
93+
#ifdef CONFIG_64BIT
94+
extern unsigned long va_kernel_pa_offset;
95+
#endif
9396
extern unsigned long pfn_base;
9497
#define ARCH_PFN_OFFSET (pfn_base)
9598
#else
9699
#define va_pa_offset 0
100+
#ifdef CONFIG_64BIT
101+
#define va_kernel_pa_offset 0
102+
#endif
97103
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
98104
#endif /* CONFIG_MMU */
99105

100-
#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
101-
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
106+
#ifdef CONFIG_64BIT
107+
extern unsigned long kernel_virt_addr;
108+
109+
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset))
110+
#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset))
111+
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
112+
113+
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
114+
#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset)
115+
#define __va_to_pa_nodebug(x) ({ \
116+
unsigned long _x = x; \
117+
(_x < kernel_virt_addr) ? \
118+
linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
119+
})
120+
#else
121+
#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
122+
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
123+
#endif
102124

103125
#ifdef CONFIG_DEBUG_VIRTUAL
104126
extern phys_addr_t __virt_to_phys(unsigned long x);

arch/riscv/include/asm/pgtable.h

+31-8
Original file line numberDiff line numberDiff line change
@@ -11,23 +11,38 @@
1111

1212
#include <asm/pgtable-bits.h>
1313

14-
#ifndef __ASSEMBLY__
14+
#ifndef CONFIG_MMU
15+
#define KERNEL_LINK_ADDR PAGE_OFFSET
16+
#else
1517

16-
/* Page Upper Directory not used in RISC-V */
17-
#include <asm-generic/pgtable-nopud.h>
18-
#include <asm/page.h>
19-
#include <asm/tlbflush.h>
20-
#include <linux/mm_types.h>
18+
#define ADDRESS_SPACE_END (UL(-1))
2119

22-
#ifdef CONFIG_MMU
20+
#ifdef CONFIG_64BIT
21+
/* Leave 2GB for kernel and BPF at the end of the address space */
22+
#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
23+
#else
24+
#define KERNEL_LINK_ADDR PAGE_OFFSET
25+
#endif
2326

2427
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
2528
#define VMALLOC_END (PAGE_OFFSET - 1)
2629
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
2730

2831
#define BPF_JIT_REGION_SIZE (SZ_128M)
32+
#ifdef CONFIG_64BIT
33+
/* KASLR should leave at least 128MB for BPF after the kernel */
34+
#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
35+
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
36+
#else
2937
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
3038
#define BPF_JIT_REGION_END (VMALLOC_END)
39+
#endif
40+
41+
/* Modules always live before the kernel */
42+
#ifdef CONFIG_64BIT
43+
#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
44+
#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
45+
#endif
3146

3247
/*
3348
* Roughly size the vmemmap space to be large enough to fit enough
@@ -57,9 +72,16 @@
5772
#define FIXADDR_SIZE PGDIR_SIZE
5873
#endif
5974
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
60-
6175
#endif
6276

77+
#ifndef __ASSEMBLY__
78+
79+
/* Page Upper Directory not used in RISC-V */
80+
#include <asm-generic/pgtable-nopud.h>
81+
#include <asm/page.h>
82+
#include <asm/tlbflush.h>
83+
#include <linux/mm_types.h>
84+
6385
#ifdef CONFIG_64BIT
6486
#include <asm/pgtable-64.h>
6587
#else
@@ -484,6 +506,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
484506

485507
#define kern_addr_valid(addr) (1) /* FIXME */
486508

509+
extern char _start[];
487510
extern void *dtb_early_va;
488511
extern uintptr_t dtb_early_pa;
489512
void setup_bootmem(void);

arch/riscv/include/asm/set_memory.h

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ int set_memory_x(unsigned long addr, int numpages);
1717
int set_memory_nx(unsigned long addr, int numpages);
1818
int set_memory_rw_nx(unsigned long addr, int numpages);
1919
void protect_kernel_text_data(void);
20+
void protect_kernel_linear_mapping_text_rodata(void);
2021
#else
2122
static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
2223
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }

arch/riscv/kernel/head.S

+2-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ pe_head_start:
6969
#ifdef CONFIG_MMU
7070
relocate:
7171
/* Relocate return address */
72-
li a1, PAGE_OFFSET
72+
la a1, kernel_virt_addr
73+
REG_L a1, 0(a1)
7374
la a2, _start
7475
sub a1, a1, a2
7576
add ra, ra, a1

arch/riscv/kernel/module.c

+2-4
Original file line numberDiff line numberDiff line change
@@ -408,12 +408,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
408408
}
409409

410410
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
411-
#define VMALLOC_MODULE_START \
412-
max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
413411
void *module_alloc(unsigned long size)
414412
{
415-
return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
416-
VMALLOC_END, GFP_KERNEL,
413+
return __vmalloc_node_range(size, 1, MODULES_VADDR,
414+
MODULES_END, GFP_KERNEL,
417415
PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
418416
__builtin_return_address(0));
419417
}

arch/riscv/kernel/setup.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,13 @@ void __init setup_arch(char **cmdline_p)
263263

264264
sbi_init();
265265

266-
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
266+
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
267267
protect_kernel_text_data();
268+
#if defined(CONFIG_64BIT) && defined(CONFIG_MMU)
269+
protect_kernel_linear_mapping_text_rodata();
270+
#endif
271+
}
272+
268273
#ifdef CONFIG_SWIOTLB
269274
swiotlb_init(1);
270275
#endif

arch/riscv/kernel/vmlinux.lds.S

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
* Copyright (C) 2017 SiFive
55
*/
66

7-
#define LOAD_OFFSET PAGE_OFFSET
7+
#include <asm/pgtable.h>
8+
#define LOAD_OFFSET KERNEL_LINK_ADDR
89
#include <asm/vmlinux.lds.h>
910
#include <asm/page.h>
1011
#include <asm/cache.h>

arch/riscv/mm/fault.c

+13
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
231231
return;
232232
}
233233

234+
#ifdef CONFIG_64BIT
235+
/*
236+
* Modules in 64bit kernels lie in their own virtual region which is not
237+
* in the vmalloc region, but dealing with page faults in this region
238+
* or the vmalloc region amounts to doing the same thing: checking that
239+
* the mapping exists in init_mm.pgd and updating user page table, so
240+
* just use vmalloc_fault.
241+
*/
242+
if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
243+
vmalloc_fault(regs, code, addr);
244+
return;
245+
}
246+
#endif
234247
/* Enable interrupts if they were enabled in the parent context. */
235248
if (likely(regs->status & SR_PIE))
236249
local_irq_enable();

0 commit comments

Comments
 (0)