Skip to content

Commit 2ba3e69

Browse files
joergroedeltorvalds
authored andcommitted
mm/vmalloc: track which page-table levels were modified
Track at which levels in the page-table entries were modified by vmap/vunmap. After the page-table has been modified, use that information do decide whether the new arch_sync_kernel_mappings() needs to be called. [akpm@linux-foundation.org: map_kernel_range_noflush() needs the arch_sync_kernel_mappings() call] Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Andy Lutomirski <luto@kernel.org> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: Steven Rostedt (VMware) <rostedt@goodmis.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vlastimil Babka <vbabka@suse.cz> Link: http://lkml.kernel.org/r/20200515140023.25469-3-joro@8bytes.org Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent d862613 commit 2ba3e69

File tree

2 files changed

+85
-26
lines changed

2 files changed

+85
-26
lines changed

include/linux/vmalloc.h

+16
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,22 @@ extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
133133
void vmalloc_sync_mappings(void);
134134
void vmalloc_sync_unmappings(void);
135135

136+
/*
137+
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
138+
* and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
139+
* needs to be called.
140+
*/
141+
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
142+
#define ARCH_PAGE_TABLE_SYNC_MASK 0
143+
#endif
144+
145+
/*
146+
* There is no default implementation for arch_sync_kernel_mappings(). It is
147+
* relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
148+
* is 0.
149+
*/
150+
void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
151+
136152
/*
137153
* Lowlevel-APIs (not for driver use!)
138154
*/

mm/vmalloc.c

+69-26
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ static void free_work(struct work_struct *w)
6969

7070
/*** Page table manipulation functions ***/
7171

72-
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
72+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
73+
pgtbl_mod_mask *mask)
7374
{
7475
pte_t *pte;
7576

@@ -78,59 +79,81 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
7879
pte_t ptent = ptep_get_and_clear(&init_mm, addr, pte);
7980
WARN_ON(!pte_none(ptent) && !pte_present(ptent));
8081
} while (pte++, addr += PAGE_SIZE, addr != end);
82+
*mask |= PGTBL_PTE_MODIFIED;
8183
}
8284

83-
static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end)
85+
static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
86+
pgtbl_mod_mask *mask)
8487
{
8588
pmd_t *pmd;
8689
unsigned long next;
90+
int cleared;
8791

8892
pmd = pmd_offset(pud, addr);
8993
do {
9094
next = pmd_addr_end(addr, end);
91-
if (pmd_clear_huge(pmd))
95+
96+
cleared = pmd_clear_huge(pmd);
97+
if (cleared || pmd_bad(*pmd))
98+
*mask |= PGTBL_PMD_MODIFIED;
99+
100+
if (cleared)
92101
continue;
93102
if (pmd_none_or_clear_bad(pmd))
94103
continue;
95-
vunmap_pte_range(pmd, addr, next);
104+
vunmap_pte_range(pmd, addr, next, mask);
96105
} while (pmd++, addr = next, addr != end);
97106
}
98107

99-
static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end)
108+
static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
109+
pgtbl_mod_mask *mask)
100110
{
101111
pud_t *pud;
102112
unsigned long next;
113+
int cleared;
103114

104115
pud = pud_offset(p4d, addr);
105116
do {
106117
next = pud_addr_end(addr, end);
107-
if (pud_clear_huge(pud))
118+
119+
cleared = pud_clear_huge(pud);
120+
if (cleared || pud_bad(*pud))
121+
*mask |= PGTBL_PUD_MODIFIED;
122+
123+
if (cleared)
108124
continue;
109125
if (pud_none_or_clear_bad(pud))
110126
continue;
111-
vunmap_pmd_range(pud, addr, next);
127+
vunmap_pmd_range(pud, addr, next, mask);
112128
} while (pud++, addr = next, addr != end);
113129
}
114130

115-
static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
131+
static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
132+
pgtbl_mod_mask *mask)
116133
{
117134
p4d_t *p4d;
118135
unsigned long next;
136+
int cleared;
119137

120138
p4d = p4d_offset(pgd, addr);
121139
do {
122140
next = p4d_addr_end(addr, end);
123-
if (p4d_clear_huge(p4d))
141+
142+
cleared = p4d_clear_huge(p4d);
143+
if (cleared || p4d_bad(*p4d))
144+
*mask |= PGTBL_P4D_MODIFIED;
145+
146+
if (cleared)
124147
continue;
125148
if (p4d_none_or_clear_bad(p4d))
126149
continue;
127-
vunmap_pud_range(p4d, addr, next);
150+
vunmap_pud_range(p4d, addr, next, mask);
128151
} while (p4d++, addr = next, addr != end);
129152
}
130153

131154
/**
132155
* unmap_kernel_range_noflush - unmap kernel VM area
133-
* @addr: start of the VM area to unmap
156+
* @start: start of the VM area to unmap
134157
* @size: size of the VM area to unmap
135158
*
136159
* Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify
@@ -141,24 +164,33 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end)
141164
* for calling flush_cache_vunmap() on to-be-mapped areas before calling this
142165
* function and flush_tlb_kernel_range() after.
143166
*/
144-
void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
167+
void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
145168
{
146-
unsigned long end = addr + size;
169+
unsigned long end = start + size;
147170
unsigned long next;
148171
pgd_t *pgd;
172+
unsigned long addr = start;
173+
pgtbl_mod_mask mask = 0;
149174

150175
BUG_ON(addr >= end);
176+
start = addr;
151177
pgd = pgd_offset_k(addr);
152178
do {
153179
next = pgd_addr_end(addr, end);
180+
if (pgd_bad(*pgd))
181+
mask |= PGTBL_PGD_MODIFIED;
154182
if (pgd_none_or_clear_bad(pgd))
155183
continue;
156-
vunmap_p4d_range(pgd, addr, next);
184+
vunmap_p4d_range(pgd, addr, next, &mask);
157185
} while (pgd++, addr = next, addr != end);
186+
187+
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
188+
arch_sync_kernel_mappings(start, end);
158189
}
159190

160191
static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
161-
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
192+
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
193+
pgtbl_mod_mask *mask)
162194
{
163195
pte_t *pte;
164196

@@ -167,7 +199,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
167199
* callers keep track of where we're up to.
168200
*/
169201

170-
pte = pte_alloc_kernel(pmd, addr);
202+
pte = pte_alloc_kernel_track(pmd, addr, mask);
171203
if (!pte)
172204
return -ENOMEM;
173205
do {
@@ -180,55 +212,59 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
180212
set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
181213
(*nr)++;
182214
} while (pte++, addr += PAGE_SIZE, addr != end);
215+
*mask |= PGTBL_PTE_MODIFIED;
183216
return 0;
184217
}
185218

186219
static int vmap_pmd_range(pud_t *pud, unsigned long addr,
187-
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
220+
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
221+
pgtbl_mod_mask *mask)
188222
{
189223
pmd_t *pmd;
190224
unsigned long next;
191225

192-
pmd = pmd_alloc(&init_mm, pud, addr);
226+
pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
193227
if (!pmd)
194228
return -ENOMEM;
195229
do {
196230
next = pmd_addr_end(addr, end);
197-
if (vmap_pte_range(pmd, addr, next, prot, pages, nr))
231+
if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
198232
return -ENOMEM;
199233
} while (pmd++, addr = next, addr != end);
200234
return 0;
201235
}
202236

203237
static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
204-
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
238+
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
239+
pgtbl_mod_mask *mask)
205240
{
206241
pud_t *pud;
207242
unsigned long next;
208243

209-
pud = pud_alloc(&init_mm, p4d, addr);
244+
pud = pud_alloc_track(&init_mm, p4d, addr, mask);
210245
if (!pud)
211246
return -ENOMEM;
212247
do {
213248
next = pud_addr_end(addr, end);
214-
if (vmap_pmd_range(pud, addr, next, prot, pages, nr))
249+
if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
215250
return -ENOMEM;
216251
} while (pud++, addr = next, addr != end);
217252
return 0;
218253
}
219254

220255
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
221-
unsigned long end, pgprot_t prot, struct page **pages, int *nr)
256+
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
257+
pgtbl_mod_mask *mask)
222258
{
223259
p4d_t *p4d;
224260
unsigned long next;
225261

226-
p4d = p4d_alloc(&init_mm, pgd, addr);
262+
p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
227263
if (!p4d)
228264
return -ENOMEM;
229265
do {
230266
next = p4d_addr_end(addr, end);
231-
if (vmap_pud_range(p4d, addr, next, prot, pages, nr))
267+
if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
232268
return -ENOMEM;
233269
} while (p4d++, addr = next, addr != end);
234270
return 0;
@@ -255,21 +291,28 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
255291
int map_kernel_range_noflush(unsigned long addr, unsigned long size,
256292
pgprot_t prot, struct page **pages)
257293
{
294+
unsigned long start = addr;
258295
unsigned long end = addr + size;
259296
unsigned long next;
260297
pgd_t *pgd;
261298
int err = 0;
262299
int nr = 0;
300+
pgtbl_mod_mask mask = 0;
263301

264302
BUG_ON(addr >= end);
265303
pgd = pgd_offset_k(addr);
266304
do {
267305
next = pgd_addr_end(addr, end);
268-
err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr);
306+
if (pgd_bad(*pgd))
307+
mask |= PGTBL_PGD_MODIFIED;
308+
err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
269309
if (err)
270310
return err;
271311
} while (pgd++, addr = next, addr != end);
272312

313+
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
314+
arch_sync_kernel_mappings(start, end);
315+
273316
return 0;
274317
}
275318

0 commit comments

Comments
 (0)