Skip to content

Commit

Permalink
mm: memcontrol: per-lruvec stats infrastructure
Browse files Browse the repository at this point in the history
lruvecs are at the intersection of the NUMA node and memcg, which is the
scope for most paging activity.

Introduce a convenient accounting infrastructure that maintains statistics
per node, per memcg, and the lruvec itself.

Then convert over accounting sites for statistics that are already tracked
in both nodes and memcgs and can be easily switched.

Link: http://lkml.kernel.org/r/20170530181724.27197-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
  • Loading branch information
hnaz committed Jun 6, 2017
1 parent 9032776 commit 365b416
Show file tree
Hide file tree
Showing 6 changed files with 225 additions and 52 deletions.
238 changes: 208 additions & 30 deletions include/linux/memcontrol.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
#include <linux/page_counter.h>
#include <linux/vmpressure.h>
#include <linux/eventfd.h>
#include <linux/mmzone.h>
#include <linux/mm.h>
#include <linux/vmstat.h>
#include <linux/writeback.h>
#include <linux/page-flags.h>

Expand Down Expand Up @@ -98,11 +99,16 @@ struct mem_cgroup_reclaim_iter {
unsigned int generation;
};

struct lruvec_stat {
long count[NR_VM_NODE_STAT_ITEMS];
};

/*
* per-zone information in memory controller.
*/
struct mem_cgroup_per_node {
struct lruvec lruvec;
struct lruvec_stat __percpu *lruvec_stat;
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];

struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
Expand Down Expand Up @@ -496,23 +502,18 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
return val;
}

static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val)
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val)
{
if (!mem_cgroup_disabled())
this_cpu_add(memcg->stat->count[idx], val);
}

static inline void inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, 1);
__this_cpu_add(memcg->stat->count[idx], val);
}

static inline void dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx, int val)
{
mod_memcg_state(memcg, idx, -1);
if (!mem_cgroup_disabled())
this_cpu_add(memcg->stat->count[idx], val);
}

/**
Expand All @@ -532,23 +533,82 @@ static inline void dec_memcg_state(struct mem_cgroup *memcg,
*
* Kernel pages are an exception to this, since they'll never move.
*/
static inline void __mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx, int val)
{
if (page->mem_cgroup)
__mod_memcg_state(page->mem_cgroup, idx, val);
}

static inline void mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx, int val)
{
if (page->mem_cgroup)
mod_memcg_state(page->mem_cgroup, idx, val);
}

static inline void inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
mod_memcg_page_state(page, idx, 1);
struct mem_cgroup_per_node *pn;
long val = 0;
int cpu;

if (mem_cgroup_disabled())
return node_page_state(lruvec_pgdat(lruvec), idx);

pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
for_each_possible_cpu(cpu)
val += per_cpu(pn->lruvec_stat->count[idx], cpu);

if (val < 0)
val = 0;

return val;
}

static inline void dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
mod_memcg_page_state(page, idx, -1);
struct mem_cgroup_per_node *pn;

__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
if (mem_cgroup_disabled())
return;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
__mod_memcg_state(pn->memcg, idx, val);
__this_cpu_add(pn->lruvec_stat->count[idx], val);
}

static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
struct mem_cgroup_per_node *pn;

mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
if (mem_cgroup_disabled())
return;
pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
mod_memcg_state(pn->memcg, idx, val);
this_cpu_add(pn->lruvec_stat->count[idx], val);
}

static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct lruvec *lruvec;

lruvec = mem_cgroup_lruvec(page_pgdat(page), page->mem_cgroup);
__mod_lruvec_state(lruvec, idx, val);
}

static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
struct lruvec *lruvec;

lruvec = mem_cgroup_lruvec(page_pgdat(page), page->mem_cgroup);
mod_lruvec_state(lruvec, idx, val);
}

unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
Expand Down Expand Up @@ -777,19 +837,21 @@ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
return 0;
}

static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx,
int nr)
static inline void __mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx,
int nr)
{
}

static inline void inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
static inline void mod_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx,
int nr)
{
}

static inline void dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
static inline void __mod_memcg_page_state(struct page *page,
enum memcg_stat_item idx,
int nr)
{
}

Expand All @@ -799,14 +861,34 @@ static inline void mod_memcg_page_state(struct page *page,
{
}

static inline void inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
return node_page_state(lruvec_pgdat(lruvec), idx);
}

static inline void dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
static inline void __mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
__mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}

static inline void mod_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx, int val)
{
mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
}

static inline void __mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
__mod_node_page_state(page_pgdat(page), idx, val);
}

static inline void mod_lruvec_page_state(struct page *page,
enum node_stat_item idx, int val)
{
mod_node_page_state(page_pgdat(page), idx, val);
}

static inline
Expand Down Expand Up @@ -838,6 +920,102 @@ void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
}
#endif /* CONFIG_MEMCG */

static inline void __inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
__mod_memcg_state(memcg, idx, 1);
}

static inline void __dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
__mod_memcg_state(memcg, idx, -1);
}

static inline void __inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
__mod_memcg_page_state(page, idx, 1);
}

static inline void __dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
__mod_memcg_page_state(page, idx, -1);
}

static inline void __inc_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
__mod_lruvec_state(lruvec, idx, 1);
}

static inline void __dec_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
__mod_lruvec_state(lruvec, idx, -1);
}

static inline void __inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
__mod_lruvec_page_state(page, idx, 1);
}

static inline void __dec_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
__mod_lruvec_page_state(page, idx, -1);
}

static inline void inc_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, 1);
}

static inline void dec_memcg_state(struct mem_cgroup *memcg,
enum memcg_stat_item idx)
{
mod_memcg_state(memcg, idx, -1);
}

static inline void inc_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
mod_memcg_page_state(page, idx, 1);
}

static inline void dec_memcg_page_state(struct page *page,
enum memcg_stat_item idx)
{
mod_memcg_page_state(page, idx, -1);
}

static inline void inc_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
mod_lruvec_state(lruvec, idx, 1);
}

static inline void dec_lruvec_state(struct lruvec *lruvec,
enum node_stat_item idx)
{
mod_lruvec_state(lruvec, idx, -1);
}

static inline void inc_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
mod_lruvec_page_state(page, idx, 1);
}

static inline void dec_lruvec_page_state(struct page *page,
enum node_stat_item idx)
{
mod_lruvec_page_state(page, idx, -1);
}

#ifdef CONFIG_CGROUP_WRITEBACK

struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg);
Expand Down
1 change: 0 additions & 1 deletion include/linux/vmstat.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

#include <linux/types.h>
#include <linux/percpu.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/vm_event_item.h>
#include <linux/atomic.h>
Expand Down
6 changes: 6 additions & 0 deletions mm/memcontrol.c
Original file line number Diff line number Diff line change
Expand Up @@ -4122,6 +4122,12 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
if (!pn)
return 1;

pn->lruvec_stat = alloc_percpu(struct lruvec_stat);
if (!pn->lruvec_stat) {
kfree(pn);
return 1;
}

lruvec_init(&pn->lruvec);
pn->usage_in_excess = 0;
pn->on_tree = false;
Expand Down
15 changes: 5 additions & 10 deletions mm/page-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -2433,8 +2433,7 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
inode_attach_wb(inode, page);
wb = inode_to_wb(inode);

inc_memcg_page_state(page, NR_FILE_DIRTY);
__inc_node_page_state(page, NR_FILE_DIRTY);
__inc_lruvec_page_state(page, NR_FILE_DIRTY);
__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
__inc_node_page_state(page, NR_DIRTIED);
__inc_wb_stat(wb, WB_RECLAIMABLE);
Expand All @@ -2455,8 +2454,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb)
{
if (mapping_cap_account_dirty(mapping)) {
dec_memcg_page_state(page, NR_FILE_DIRTY);
dec_node_page_state(page, NR_FILE_DIRTY);
dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE);
task_io_account_cancelled_write(PAGE_SIZE);
Expand Down Expand Up @@ -2712,8 +2710,7 @@ int clear_page_dirty_for_io(struct page *page)
*/
wb = unlocked_inode_to_wb_begin(inode, &locked);
if (TestClearPageDirty(page)) {
dec_memcg_page_state(page, NR_FILE_DIRTY);
dec_node_page_state(page, NR_FILE_DIRTY);
dec_lruvec_page_state(page, NR_FILE_DIRTY);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
dec_wb_stat(wb, WB_RECLAIMABLE);
ret = 1;
Expand Down Expand Up @@ -2759,8 +2756,7 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page);
}
if (ret) {
dec_memcg_page_state(page, NR_WRITEBACK);
dec_node_page_state(page, NR_WRITEBACK);
dec_lruvec_page_state(page, NR_WRITEBACK);
dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
inc_node_page_state(page, NR_WRITTEN);
}
Expand Down Expand Up @@ -2814,8 +2810,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
ret = TestSetPageWriteback(page);
}
if (!ret) {
inc_memcg_page_state(page, NR_WRITEBACK);
inc_node_page_state(page, NR_WRITEBACK);
inc_lruvec_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
}
unlock_page_memcg(page);
Expand Down
Loading

0 comments on commit 365b416

Please sign in to comment.