Skip to content

Commit faced7e

Browse files
giuseppehtejun
authored andcommitted
mm: hugetlb controller for cgroups v2
In the effort of supporting cgroups v2 into Kubernetes, I stumped on the lack of the hugetlb controller. When the controller is enabled, it exposes four new files for each hugetlb size on non-root cgroups: - hugetlb.<hugepagesize>.current - hugetlb.<hugepagesize>.max - hugetlb.<hugepagesize>.events - hugetlb.<hugepagesize>.events.local The differences with the legacy hierarchy are in the file names and using the value "max" instead of "-1" to disable a limit. The file .limit_in_bytes is renamed to .max. The file .usage_in_bytes is renamed to .current. .failcnt is not provided as a single file anymore, but its value can be read through the new flat-keyed files .events and .events.local, through the "max" key. Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com> Signed-off-by: Tejun Heo <tj@kernel.org>
1 parent 6afa873 commit faced7e

File tree

3 files changed

+218
-12
lines changed

3 files changed

+218
-12
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ v1 is available under Documentation/admin-guide/cgroup-v1/.
6161
5-6. Device
6262
5-7. RDMA
6363
5-7-1. RDMA Interface Files
64+
5-8. HugeTLB
65+
5.8-1. HugeTLB Interface Files
6466
5-8. Misc
6567
5-8-1. perf_event
6668
5-N. Non-normative information
@@ -2056,6 +2058,33 @@ RDMA Interface Files
20562058
mlx4_0 hca_handle=1 hca_object=20
20572059
ocrdma1 hca_handle=1 hca_object=23
20582060

2061+
HugeTLB
2062+
-------
2063+
2064+
The HugeTLB controller allows to limit the HugeTLB usage per control group and
2065+
enforces the controller limit during page fault.
2066+
2067+
HugeTLB Interface Files
2068+
~~~~~~~~~~~~~~~~~~~~~~~
2069+
2070+
hugetlb.<hugepagesize>.current
2071+
Show current usage for "hugepagesize" hugetlb. It exists for all
2072+
the cgroup except root.
2073+
2074+
hugetlb.<hugepagesize>.max
2075+
Set/show the hard limit of "hugepagesize" hugetlb usage.
2076+
The default value is "max". It exists for all the cgroup except root.
2077+
2078+
hugetlb.<hugepagesize>.events
2079+
A read-only flat-keyed file which exists on non-root cgroups.
2080+
2081+
max
2082+
The number of allocation failure due to HugeTLB limit
2083+
2084+
hugetlb.<hugepagesize>.events.local
2085+
Similar to hugetlb.<hugepagesize>.events but the fields in the file
2086+
are local to the cgroup i.e. not hierarchical. The file modified event
2087+
generated on this file reflects only the local events.
20592088

20602089
Misc
20612090
----

include/linux/hugetlb.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,8 @@ struct hstate {
432432
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
433433
#ifdef CONFIG_CGROUP_HUGETLB
434434
/* cgroup control files */
435-
struct cftype cgroup_files[5];
435+
struct cftype cgroup_files_dfl[5];
436+
struct cftype cgroup_files_legacy[5];
436437
#endif
437438
char name[HSTATE_NAME_LEN];
438439
};

mm/hugetlb_cgroup.c

Lines changed: 187 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* Copyright IBM Corporation, 2012
44
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
55
*
6+
* Cgroup v2
7+
* Copyright (C) 2019 Red Hat, Inc.
8+
* Author: Giuseppe Scrivano <gscrivan@redhat.com>
9+
*
610
* This program is free software; you can redistribute it and/or modify it
711
* under the terms of version 2.1 of the GNU Lesser General Public License
812
* as published by the Free Software Foundation.
@@ -19,18 +23,36 @@
1923
#include <linux/hugetlb.h>
2024
#include <linux/hugetlb_cgroup.h>
2125

26+
enum hugetlb_memory_event {
27+
HUGETLB_MAX,
28+
HUGETLB_NR_MEMORY_EVENTS,
29+
};
30+
2231
struct hugetlb_cgroup {
2332
struct cgroup_subsys_state css;
33+
2434
/*
2535
* the counter to account for hugepages from hugetlb.
2636
*/
2737
struct page_counter hugepage[HUGE_MAX_HSTATE];
38+
39+
atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
40+
atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
41+
42+
/* Handle for "hugetlb.events" */
43+
struct cgroup_file events_file[HUGE_MAX_HSTATE];
44+
45+
/* Handle for "hugetlb.events.local" */
46+
struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
2847
};
2948

3049
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
3150
#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
3251
#define MEMFILE_ATTR(val) ((val) & 0xffff)
3352

53+
#define hugetlb_cgroup_from_counter(counter, idx) \
54+
container_of(counter, struct hugetlb_cgroup, hugepage[idx])
55+
3456
static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
3557

3658
static inline
@@ -178,6 +200,19 @@ static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
178200
} while (hugetlb_cgroup_have_usage(h_cg));
179201
}
180202

203+
static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
204+
enum hugetlb_memory_event event)
205+
{
206+
atomic_long_inc(&hugetlb->events_local[idx][event]);
207+
cgroup_file_notify(&hugetlb->events_local_file[idx]);
208+
209+
do {
210+
atomic_long_inc(&hugetlb->events[idx][event]);
211+
cgroup_file_notify(&hugetlb->events_file[idx]);
212+
} while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
213+
!hugetlb_cgroup_is_root(hugetlb));
214+
}
215+
181216
int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
182217
struct hugetlb_cgroup **ptr)
183218
{
@@ -202,8 +237,12 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
202237
}
203238
rcu_read_unlock();
204239

205-
if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter))
240+
if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
241+
&counter)) {
206242
ret = -ENOMEM;
243+
hugetlb_event(hugetlb_cgroup_from_counter(counter, idx), idx,
244+
HUGETLB_MAX);
245+
}
207246
css_put(&h_cg->css);
208247
done:
209248
*ptr = h_cg;
@@ -283,10 +322,45 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
283322
}
284323
}
285324

325+
static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
326+
{
327+
int idx;
328+
u64 val;
329+
struct cftype *cft = seq_cft(seq);
330+
unsigned long limit;
331+
struct page_counter *counter;
332+
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
333+
334+
idx = MEMFILE_IDX(cft->private);
335+
counter = &h_cg->hugepage[idx];
336+
337+
limit = round_down(PAGE_COUNTER_MAX,
338+
1 << huge_page_order(&hstates[idx]));
339+
340+
switch (MEMFILE_ATTR(cft->private)) {
341+
case RES_USAGE:
342+
val = (u64)page_counter_read(counter);
343+
seq_printf(seq, "%llu\n", val * PAGE_SIZE);
344+
break;
345+
case RES_LIMIT:
346+
val = (u64)counter->max;
347+
if (val == limit)
348+
seq_puts(seq, "max\n");
349+
else
350+
seq_printf(seq, "%llu\n", val * PAGE_SIZE);
351+
break;
352+
default:
353+
BUG();
354+
}
355+
356+
return 0;
357+
}
358+
286359
static DEFINE_MUTEX(hugetlb_limit_mutex);
287360

288361
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
289-
char *buf, size_t nbytes, loff_t off)
362+
char *buf, size_t nbytes, loff_t off,
363+
const char *max)
290364
{
291365
int ret, idx;
292366
unsigned long nr_pages;
@@ -296,7 +370,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
296370
return -EINVAL;
297371

298372
buf = strstrip(buf);
299-
ret = page_counter_memparse(buf, "-1", &nr_pages);
373+
ret = page_counter_memparse(buf, max, &nr_pages);
300374
if (ret)
301375
return ret;
302376

@@ -316,6 +390,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
316390
return ret ?: nbytes;
317391
}
318392

393+
static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
394+
char *buf, size_t nbytes, loff_t off)
395+
{
396+
return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
397+
}
398+
399+
static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
400+
char *buf, size_t nbytes, loff_t off)
401+
{
402+
return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
403+
}
404+
319405
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
320406
char *buf, size_t nbytes, loff_t off)
321407
{
@@ -350,7 +436,36 @@ static char *mem_fmt(char *buf, int size, unsigned long hsize)
350436
return buf;
351437
}
352438

353-
static void __init __hugetlb_cgroup_file_init(int idx)
439+
static int __hugetlb_events_show(struct seq_file *seq, bool local)
440+
{
441+
int idx;
442+
long max;
443+
struct cftype *cft = seq_cft(seq);
444+
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
445+
446+
idx = MEMFILE_IDX(cft->private);
447+
448+
if (local)
449+
max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
450+
else
451+
max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
452+
453+
seq_printf(seq, "max %lu\n", max);
454+
455+
return 0;
456+
}
457+
458+
static int hugetlb_events_show(struct seq_file *seq, void *v)
459+
{
460+
return __hugetlb_events_show(seq, false);
461+
}
462+
463+
static int hugetlb_events_local_show(struct seq_file *seq, void *v)
464+
{
465+
return __hugetlb_events_show(seq, true);
466+
}
467+
468+
static void __init __hugetlb_cgroup_file_dfl_init(int idx)
354469
{
355470
char buf[32];
356471
struct cftype *cft;
@@ -360,38 +475,93 @@ static void __init __hugetlb_cgroup_file_init(int idx)
360475
mem_fmt(buf, 32, huge_page_size(h));
361476

362477
/* Add the limit file */
363-
cft = &h->cgroup_files[0];
478+
cft = &h->cgroup_files_dfl[0];
479+
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
480+
cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
481+
cft->seq_show = hugetlb_cgroup_read_u64_max;
482+
cft->write = hugetlb_cgroup_write_dfl;
483+
cft->flags = CFTYPE_NOT_ON_ROOT;
484+
485+
/* Add the current usage file */
486+
cft = &h->cgroup_files_dfl[1];
487+
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
488+
cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
489+
cft->seq_show = hugetlb_cgroup_read_u64_max;
490+
cft->flags = CFTYPE_NOT_ON_ROOT;
491+
492+
/* Add the events file */
493+
cft = &h->cgroup_files_dfl[2];
494+
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
495+
cft->private = MEMFILE_PRIVATE(idx, 0);
496+
cft->seq_show = hugetlb_events_show;
497+
cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
498+
cft->flags = CFTYPE_NOT_ON_ROOT;
499+
500+
/* Add the events.local file */
501+
cft = &h->cgroup_files_dfl[3];
502+
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
503+
cft->private = MEMFILE_PRIVATE(idx, 0);
504+
cft->seq_show = hugetlb_events_local_show;
505+
cft->file_offset = offsetof(struct hugetlb_cgroup,
506+
events_local_file[idx]),
507+
cft->flags = CFTYPE_NOT_ON_ROOT;
508+
509+
/* NULL terminate the last cft */
510+
cft = &h->cgroup_files_dfl[4];
511+
memset(cft, 0, sizeof(*cft));
512+
513+
WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
514+
h->cgroup_files_dfl));
515+
}
516+
517+
static void __init __hugetlb_cgroup_file_legacy_init(int idx)
518+
{
519+
char buf[32];
520+
struct cftype *cft;
521+
struct hstate *h = &hstates[idx];
522+
523+
/* format the size */
524+
mem_fmt(buf, 32, huge_page_size(h));
525+
526+
/* Add the limit file */
527+
cft = &h->cgroup_files_legacy[0];
364528
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
365529
cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
366530
cft->read_u64 = hugetlb_cgroup_read_u64;
367-
cft->write = hugetlb_cgroup_write;
531+
cft->write = hugetlb_cgroup_write_legacy;
368532

369533
/* Add the usage file */
370-
cft = &h->cgroup_files[1];
534+
cft = &h->cgroup_files_legacy[1];
371535
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
372536
cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
373537
cft->read_u64 = hugetlb_cgroup_read_u64;
374538

375539
/* Add the MAX usage file */
376-
cft = &h->cgroup_files[2];
540+
cft = &h->cgroup_files_legacy[2];
377541
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
378542
cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
379543
cft->write = hugetlb_cgroup_reset;
380544
cft->read_u64 = hugetlb_cgroup_read_u64;
381545

382546
/* Add the failcntfile */
383-
cft = &h->cgroup_files[3];
547+
cft = &h->cgroup_files_legacy[3];
384548
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
385549
cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
386550
cft->write = hugetlb_cgroup_reset;
387551
cft->read_u64 = hugetlb_cgroup_read_u64;
388552

389553
/* NULL terminate the last cft */
390-
cft = &h->cgroup_files[4];
554+
cft = &h->cgroup_files_legacy[4];
391555
memset(cft, 0, sizeof(*cft));
392556

393557
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
394-
h->cgroup_files));
558+
h->cgroup_files_legacy));
559+
}
560+
561+
static void __init __hugetlb_cgroup_file_init(int idx)
562+
{
563+
__hugetlb_cgroup_file_dfl_init(idx);
564+
__hugetlb_cgroup_file_legacy_init(idx);
395565
}
396566

397567
void __init hugetlb_cgroup_file_init(void)
@@ -433,8 +603,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
433603
return;
434604
}
435605

606+
static struct cftype hugetlb_files[] = {
607+
{} /* terminate */
608+
};
609+
436610
struct cgroup_subsys hugetlb_cgrp_subsys = {
437611
.css_alloc = hugetlb_cgroup_css_alloc,
438612
.css_offline = hugetlb_cgroup_css_offline,
439613
.css_free = hugetlb_cgroup_css_free,
614+
.dfl_cftypes = hugetlb_files,
615+
.legacy_cftypes = hugetlb_files,
440616
};

0 commit comments

Comments
 (0)