Skip to content

Commit

Permalink
anolis: mm, kidled: fix race when free idle age
Browse files Browse the repository at this point in the history
fix #36837630

When building kernel with idle age not in page's flag, kernel will panic
as below:

[   13.977004] BUG: unable to handle kernel paging request at ffffc90000eba2b9
[   13.978021] PGD 13ad35067 P4D 13ad35067 PUD 13ad36067 PMD 139b88067 PTE 0
[   13.979014] Oops: 0002 [#1] SMP PTI
[   13.979533] CPU: 12 PID: 112 Comm: kidled Not tainted 4.19.91+ torvalds#586
[   13.980450] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[   13.982136] RIP: 0010:free_pcp_prepare+0x49/0xc0
[   13.982945] Code: 44 00 00 48 8b 15 1f 9d 13 01 48 8b 0d f8 9c 13 01 48 b8 00 00 00 00 00 16 00 00 48 01 d8 48 c1 f8 06 48 85 0
[   13.985674] RSP: 0018:ffffc900003ffe20 EFLAGS: 00010202
[   13.986429] RAX: 00000000001352b9 RBX: ffffea0004d4ae80 RCX: 0000000000000001
[   13.987468] RDX: ffffc90000d85000 RSI: 0000000000000000 RDI: ffffea0004d4ae80
[   13.988504] RBP: ffffea0004d4ae80 R08: ffffc90000ec6000 R09: 0000000000000000
[   13.989534] R10: 0000000000008e1c R11: ffffffff828c1b6d R12: ffffc90000d85000
[   13.990581] R13: ffffffff82306700 R14: 0000000000000001 R15: ffff88813adbab50
[   13.991634] FS:  0000000000000000(0000) GS:ffff88813bb00000(0000) knlGS:0000000000000000
[   13.992814] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   13.993648] CR2: ffffc90000eba2b9 CR3: 000000000220a006 CR4: 00000000003706e0
[   13.994681] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   13.995721] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   13.996763] Call Trace:
[   13.997137]  free_unref_page+0x11/0x60
[   13.997693]  __vunmap+0x4e/0xb0
[   13.998159]  kidled.cold+0x1b/0x53
[   13.998680]  ? __schedule+0x31c/0x6d0
[   13.999222]  ? finish_wait+0x80/0x80
[   13.999751]  ? kidled_mem_cgroup_move_stats+0x270/0x270
[   14.000514]  kthread+0x117/0x130
[   14.001006]  ? kthread_create_worker_on_cpu+0x70/0x70
[   14.001751]  ret_from_fork+0x35/0x40

This patch uses rcu lock to fix this race window, caller can only access
the idle age under read lock, see kidled_get/set/inc_page_age(). Note
the kidled and the memory hotplug process will also use the
mem_hotplug_lock to avoid race between alloc and free.

Since it may sleep in kidle_free_page_age(), call it earlier to avoid
sleep with pgdat_resize_lock held.

Signed-off-by: Gang Deng <gavin.dg@linux.alibaba.com>
Reviewed-by: zhongjiang-ali <zhongjiang-ali@linux.alibaba.com>
Reviewed-by: Xu Yu <xuyu@linux.alibaba.com>
  • Loading branch information
Gang Deng committed Sep 18, 2021
1 parent 8defdd1 commit e211d28
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 25 deletions.
4 changes: 4 additions & 0 deletions include/linux/kidled.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,10 @@ void kidled_mem_cgroup_move_stats(struct mem_cgroup *from,
unsigned int nr_pages);
#endif /* CONFIG_MEMCG */

#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
void kidled_free_page_age(pg_data_t *pgdat);
#endif

#else /* !CONFIG_KIDLED */

#ifdef CONFIG_MEMCG
Expand Down
34 changes: 25 additions & 9 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1155,38 +1155,54 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
static inline int kidled_get_page_age(pg_data_t *pgdat, unsigned long pfn)
{
u8 *age = pgdat->node_page_age;
u8 *age, age_val;

if (unlikely(!age))
rcu_read_lock();
age = rcu_dereference(pgdat->node_page_age);
if (unlikely(!age)) {
rcu_read_unlock();
return -EINVAL;
}

age += (pfn - pgdat->node_start_pfn);
return *age;
age_val = *age;
rcu_read_unlock();
return age_val;
}

static inline int kidled_inc_page_age(pg_data_t *pgdat, unsigned long pfn)
{
u8 *age = pgdat->node_page_age;
u8 *age, age_val;

if (unlikely(!age))
rcu_read_lock();
age = rcu_dereference(pgdat->node_page_age);
if (unlikely(!age)) {
rcu_read_unlock();
return -EINVAL;
}

age += (pfn - pgdat->node_start_pfn);
*age += 1;
age_val = ++*age;
rcu_read_unlock();

return *age;
return age_val;
}

static inline void kidled_set_page_age(pg_data_t *pgdat,
unsigned long pfn, int val)
{
u8 *age = pgdat->node_page_age;
u8 *age;

if (unlikely(!age))
rcu_read_lock();
age = rcu_dereference(pgdat->node_page_age);
if (unlikely(!age)) {
rcu_read_unlock();
return;
}

age += (pfn - pgdat->node_start_pfn);
*age = val;
rcu_read_unlock();
}
#else
static inline int kidled_get_page_age(pg_data_t *pgdat, unsigned long pfn)
Expand Down
28 changes: 22 additions & 6 deletions mm/kidled.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,13 +400,16 @@ static bool kidled_scan_node(pg_data_t *pgdat,

#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
if (unlikely(!pgdat->node_page_age)) {
u8 *age;

/* This node has none memory, skip it. */
if (!pgdat->node_spanned_pages)
return true;

pgdat->node_page_age = vzalloc(pgdat->node_spanned_pages);
if (unlikely(!pgdat->node_page_age))
age = vzalloc(pgdat->node_spanned_pages);
if (unlikely(!age))
return false;
rcu_assign_pointer(pgdat->node_page_age, age);
}
#endif /* KIDLED_AGE_NOT_IN_PAGE_FLAGS */

Expand All @@ -429,6 +432,20 @@ static bool kidled_scan_node(pg_data_t *pgdat,
return pfn >= node_end;
}

#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
void kidled_free_page_age(pg_data_t *pgdat)
{
u8 *age;

age = rcu_access_pointer(pgdat->node_page_age);
if (age) {
rcu_assign_pointer(pgdat->node_page_age, NULL);
synchronize_rcu();
vfree(age);
}
}
#endif

static inline void kidled_scan_done(struct kidled_scan_period scan_period)
{
kidled_mem_cgroup_scan_done(scan_period);
Expand All @@ -448,10 +465,9 @@ static inline void kidled_reset(bool free)
if (!pgdat->node_page_age)
continue;

if (free) {
vfree(pgdat->node_page_age);
pgdat->node_page_age = NULL;
} else {
if (free)
kidled_free_page_age(pgdat);
else {
memset(pgdat->node_page_age, 0,
pgdat->node_spanned_pages);
}
Expand Down
16 changes: 6 additions & 10 deletions mm/memory_hotplug.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <linux/bootmem.h>
#include <linux/compaction.h>
#include <linux/rmap.h>
#include <linux/kidled.h>

#include <asm/tlbflush.h>

Expand Down Expand Up @@ -750,12 +751,6 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
pgdat->node_start_pfn = start_pfn;

pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
if (pgdat->node_page_age) {
vfree(pgdat->node_page_age);
pgdat->node_page_age = NULL;
}
#endif
}

void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
Expand All @@ -765,6 +760,10 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
int nid = pgdat->node_id;
unsigned long flags;

#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
kidled_free_page_age(pgdat);
#endif

if (zone_is_empty(zone))
init_currently_empty_zone(zone, start_pfn, nr_pages);

Expand Down Expand Up @@ -1927,10 +1926,7 @@ void try_offline_node(int nid)
return;

#ifdef KIDLED_AGE_NOT_IN_PAGE_FLAGS
if (pgdat->node_page_age) {
vfree(pgdat->node_page_age);
pgdat->node_page_age = NULL;
}
kidled_free_page_age(pgdat);
#endif

/*
Expand Down

0 comments on commit e211d28

Please sign in to comment.