Skip to content

Commit ba76149

Browse files
aagittorvalds
authored andcommitted
thp: khugepaged
Add khugepaged to relocate fragmented pages into hugepages if new hugepages become available. (this is indipendent of the defrag logic that will have to make new hugepages available) The fundamental reason why khugepaged is unavoidable, is that some memory can be fragmented and not everything can be relocated. So when a virtual machine quits and releases gigabytes of hugepages, we want to use those freely available hugepages to create huge-pmd in the other virtual machines that may be running on fragmented memory, to maximize the CPU efficiency at all times. The scan is slow, it takes nearly zero cpu time, except when it copies data (in which case it means we definitely want to pay for that cpu time) so it seems a good tradeoff. In addition to the hugepages being released by other process releasing memory, we have the strong suspicion that the performance impact of potentially defragmenting hugepages during or before each page fault could lead to more performance inconsistency than allocating small pages at first and having them collapsed into large pages later... if they prove themselfs to be long lived mappings (khugepaged scan is slow so short lived mappings have low probability to run into khugepaged if compared to long lived mappings). Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 7913417 commit ba76149

File tree

5 files changed

+1136
-10
lines changed

5 files changed

+1136
-10
lines changed

include/linux/huge_mm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ enum transparent_hugepage_flag {
2525
TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
2626
TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
2727
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
28+
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
2829
#ifdef CONFIG_DEBUG_VM
2930
TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
3031
#endif

include/linux/khugepaged.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#ifndef _LINUX_KHUGEPAGED_H
2+
#define _LINUX_KHUGEPAGED_H
3+
4+
#include <linux/sched.h> /* MMF_VM_HUGEPAGE */
5+
6+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
7+
extern int __khugepaged_enter(struct mm_struct *mm);
8+
extern void __khugepaged_exit(struct mm_struct *mm);
9+
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
10+
11+
#define khugepaged_enabled() \
12+
(transparent_hugepage_flags & \
13+
((1<<TRANSPARENT_HUGEPAGE_FLAG) | \
14+
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)))
15+
#define khugepaged_always() \
16+
(transparent_hugepage_flags & \
17+
(1<<TRANSPARENT_HUGEPAGE_FLAG))
18+
#define khugepaged_req_madv() \
19+
(transparent_hugepage_flags & \
20+
(1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
21+
#define khugepaged_defrag() \
22+
(transparent_hugepage_flags & \
23+
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG))
24+
25+
static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
26+
{
27+
if (test_bit(MMF_VM_HUGEPAGE, &oldmm->flags))
28+
return __khugepaged_enter(mm);
29+
return 0;
30+
}
31+
32+
static inline void khugepaged_exit(struct mm_struct *mm)
33+
{
34+
if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
35+
__khugepaged_exit(mm);
36+
}
37+
38+
static inline int khugepaged_enter(struct vm_area_struct *vma)
39+
{
40+
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
41+
if (khugepaged_always() ||
42+
(khugepaged_req_madv() &&
43+
vma->vm_flags & VM_HUGEPAGE))
44+
if (__khugepaged_enter(vma->vm_mm))
45+
return -ENOMEM;
46+
return 0;
47+
}
48+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
49+
static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
50+
{
51+
return 0;
52+
}
53+
static inline void khugepaged_exit(struct mm_struct *mm)
54+
{
55+
}
56+
static inline int khugepaged_enter(struct vm_area_struct *vma)
57+
{
58+
return 0;
59+
}
60+
static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
61+
{
62+
return 0;
63+
}
64+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
65+
66+
#endif /* _LINUX_KHUGEPAGED_H */

include/linux/sched.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ extern int get_dumpable(struct mm_struct *mm);
434434
#endif
435435
/* leave room for more dump flags */
436436
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
437+
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
437438

438439
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
439440

kernel/fork.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#include <linux/posix-timers.h>
6767
#include <linux/user-return-notifier.h>
6868
#include <linux/oom.h>
69+
#include <linux/khugepaged.h>
6970

7071
#include <asm/pgtable.h>
7172
#include <asm/pgalloc.h>
@@ -328,6 +329,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
328329
rb_parent = NULL;
329330
pprev = &mm->mmap;
330331
retval = ksm_fork(mm, oldmm);
332+
if (retval)
333+
goto out;
334+
retval = khugepaged_fork(mm, oldmm);
331335
if (retval)
332336
goto out;
333337

@@ -546,6 +550,7 @@ void mmput(struct mm_struct *mm)
546550
if (atomic_dec_and_test(&mm->mm_users)) {
547551
exit_aio(mm);
548552
ksm_exit(mm);
553+
khugepaged_exit(mm); /* must run before exit_mmap */
549554
exit_mmap(mm);
550555
set_mm_exe_file(mm, NULL);
551556
if (!list_empty(&mm->mmlist)) {

0 commit comments

Comments
 (0)