Skip to content

Commit

Permalink
mm/munlock: replace clear_page_mlock() by final clearance
Browse files Browse the repository at this point in the history
Placing munlock_vma_page() at the end of page_remove_rmap() shifts most
of the munlocking to clear_page_mlock(), since PageMlocked is typically
still set when mapcount has fallen to 0.  That is not what we want: we
want /proc/vmstat's unevictable_pgs_cleared to remain as a useful check
on the integrity of of the mlock/munlock protocol - small numbers are
not surprising, but big numbers mean the protocol is not working.

That could be easily fixed by placing munlock_vma_page() at the start of
page_remove_rmap(); but later in the series we shall want to batch the
munlocking, and that too would tend to leave PageMlocked still set at
the point when it is checked.

So delete clear_page_mlock() now: leave it instead to release_pages()
(and __page_cache_release()) to do this backstop clearing of Mlocked,
when page refcount has fallen to 0.  If a pinned page occasionally gets
counted as Mlocked and Unevictable until it is unpinned, that's okay.

A slightly regrettable side-effect of this change is that, since
release_pages() and __page_cache_release() may be called at interrupt
time, those places which update NR_MLOCK with interrupts enabled
had better use mod_zone_page_state() than __mod_zone_page_state()
(but holding the lruvec lock always has interrupts disabled).

This change, forcing Mlocked off when refcount 0 instead of earlier
when mapcount 0, is not fundamental: it can be reversed if performance
or something else is found to suffer; but this is the easiest way to
separate the stats - let's not complicate that without good reason.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
  • Loading branch information
Hugh Dickins authored and Matthew Wilcox (Oracle) committed Feb 17, 2022
1 parent cea86fe commit b109b87
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 59 deletions.
12 changes: 0 additions & 12 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,17 +425,6 @@ static inline void munlock_vma_page(struct page *page,
munlock_page(page);
}

/*
* Clear the page's PageMlocked(). This can be useful in a situation where
* we want to unconditionally remove a page from the pagecache -- e.g.,
* on truncation or freeing.
*
* It is legal to call this function for any page, mlocked or not.
* If called for a page that is still mapped by mlocked vmas, all we do
* is revert to lazy LRU behaviour -- semantics are not broken.
*/
extern void clear_page_mlock(struct page *page);

extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);

/*
Expand Down Expand Up @@ -509,7 +498,6 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
}
#else /* !CONFIG_MMU */
static inline void unmap_mapping_folio(struct folio *folio) { }
static inline void clear_page_mlock(struct page *page) { }
static inline void mlock_vma_page(struct page *page,
struct vm_area_struct *vma, bool compound) { }
static inline void munlock_vma_page(struct page *page,
Expand Down
30 changes: 0 additions & 30 deletions mm/mlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,36 +48,6 @@ EXPORT_SYMBOL(can_do_mlock);
* PageUnevictable is set to indicate the unevictable state.
*/

/*
* LRU accounting for clear_page_mlock()
*/
void clear_page_mlock(struct page *page)
{
int nr_pages;

if (!TestClearPageMlocked(page))
return;

nr_pages = thp_nr_pages(page);
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
/*
* The previous TestClearPageMlocked() corresponds to the smp_mb()
* in __pagevec_lru_add_fn().
*
* See __pagevec_lru_add_fn for more explanation.
*/
if (!isolate_lru_page(page)) {
putback_lru_page(page);
} else {
/*
* We lost the race. the page already moved to evictable list.
*/
if (PageUnevictable(page))
count_vm_events(UNEVICTABLE_PGSTRANDED, nr_pages);
}
}

/**
* mlock_page - mlock a page
* @page: page to be mlocked, either a normal page or a THP head.
Expand Down
9 changes: 0 additions & 9 deletions mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1315,9 +1315,6 @@ static void page_remove_file_rmap(struct page *page, bool compound)
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
}

static void page_remove_anon_compound_rmap(struct page *page)
Expand Down Expand Up @@ -1357,9 +1354,6 @@ static void page_remove_anon_compound_rmap(struct page *page)
nr = thp_nr_pages(page);
}

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);

if (nr)
__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
}
Expand Down Expand Up @@ -1398,9 +1392,6 @@ void page_remove_rmap(struct page *page,
*/
__dec_lruvec_page_state(page, NR_ANON_MAPPED);

if (unlikely(PageMlocked(page)))
clear_page_mlock(page);

if (PageTransCompound(page))
deferred_split_huge_page(compound_head(page));

Expand Down
32 changes: 24 additions & 8 deletions mm/swap.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
};

/*
* This path almost never happens for VM activity - pages are normally
* freed via pagevecs. But it gets used by networking.
* This path almost never happens for VM activity - pages are normally freed
* via pagevecs. But it gets used by networking - and for compound pages.
*/
static void __page_cache_release(struct page *page)
{
Expand All @@ -89,6 +89,14 @@ static void __page_cache_release(struct page *page)
__clear_page_lru_flags(page);
unlock_page_lruvec_irqrestore(lruvec, flags);
}
/* See comment on PageMlocked in release_pages() */
if (unlikely(PageMlocked(page))) {
int nr_pages = thp_nr_pages(page);

__ClearPageMlocked(page);
mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
}
__ClearPageWaiters(page);
}

Expand Down Expand Up @@ -489,12 +497,8 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
unevictable = (vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED;
if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
int nr_pages = thp_nr_pages(page);
/*
* We use the irq-unsafe __mod_zone_page_state because this
* counter is not modified from interrupt context, and the pte
* lock is held(spinlock), which implies preemption disabled.
*/
__mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);

mod_zone_page_state(page_zone(page), NR_MLOCK, nr_pages);
count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
lru_cache_add(page);
Expand Down Expand Up @@ -969,6 +973,18 @@ void release_pages(struct page **pages, int nr)
__clear_page_lru_flags(page);
}

/*
* In rare cases, when truncation or holepunching raced with
* munlock after VM_LOCKED was cleared, Mlocked may still be
* found set here. This does not indicate a problem, unless
* "unevictable_pgs_cleared" appears worryingly large.
*/
if (unlikely(PageMlocked(page))) {
__ClearPageMlocked(page);
dec_zone_page_state(page, NR_MLOCK);
count_vm_event(UNEVICTABLE_PGCLEARED);
}

__ClearPageWaiters(page);

list_add(&page->lru, &pages_to_free);
Expand Down

0 comments on commit b109b87

Please sign in to comment.