From b1aa4df98a0f1c0235e6665b6752bdb5bb0f71ea Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 12 Jun 2024 09:24:08 +0000 Subject: [PATCH] mm/hugetlb_cgroup: prepare cftypes based on template Unlike other cgroup subsystems, the hugetlb cgroup does not provide a static array of cftype that explicitly displays the properties, handling functions, etc., of each file. Instead, it dynamically creates the attribute of cftypes based on the hstate during the startup procedure. This reduces the readability of the code. To fix this issue, introduce two templates of cftypes, and rebuild the attributes according to the hstate to make it ready to be added to cgroup framework. Link: https://lkml.kernel.org/r/20240612092409.2027592-3-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Cc: Muchun Song Cc: Oscar Salvador Cc: kernel test robot From: Xiu Jianfeng Subject: mm/hugetlb_cgroup: register lockdep key for cftype Date: Tue, 18 Jun 2024 07:19:22 +0000 When CONFIG_DEBUG_LOCK_ALLOC is enabled, the following commands can trigger a bug, mount -t cgroup2 none /sys/fs/cgroup cd /sys/fs/cgroup echo "+hugetlb" > cgroup.subtree_control The log is as below: BUG: key ffff8880046d88d8 has not been registered! ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 3 PID: 226 at kernel/locking/lockdep.c:4945 lockdep_init_map_type+0x185/0x220 Modules linked in: CPU: 3 PID: 226 Comm: bash Not tainted 6.10.0-rc4-next-20240617-g76db4c64526c #544 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 RIP: 0010:lockdep_init_map_type+0x185/0x220 Code: 00 85 c0 0f 84 6c ff ff ff 8b 3d 6a d1 85 01 85 ff 0f 85 5e ff ff ff 48 c7 c6 21 99 4a 82 48 c7 c7 60 29 49 82 e8 3b 2e f5 RSP: 0018:ffffc9000083fc30 EFLAGS: 00000282 RAX: 0000000000000000 RBX: ffffffff828dd820 RCX: 0000000000000027 RDX: ffff88803cd9cac8 RSI: 0000000000000001 RDI: ffff88803cd9cac0 RBP: ffff88800674fbb0 R08: ffffffff828ce248 R09: 00000000ffffefff R10: ffffffff8285e260 R11: ffffffff828b8eb8 R12: ffff8880046d88d8 R13: 0000000000000000 R14: 0000000000000000 R15: ffff8880067281c0 FS: 00007f68601ea740(0000) GS:ffff88803cd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005614f3ebc740 CR3: 000000000773a000 CR4: 00000000000006f0 Call Trace: ? __warn+0x77/0xd0 ? lockdep_init_map_type+0x185/0x220 ? report_bug+0x189/0x1a0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? lockdep_init_map_type+0x185/0x220 __kernfs_create_file+0x79/0x100 cgroup_addrm_files+0x163/0x380 ? find_held_lock+0x2b/0x80 ? find_held_lock+0x2b/0x80 ? find_held_lock+0x2b/0x80 css_populate_dir+0x73/0x180 cgroup_apply_control_enable+0x12f/0x3a0 cgroup_subtree_control_write+0x30b/0x440 kernfs_fop_write_iter+0x13a/0x1f0 vfs_write+0x341/0x450 ksys_write+0x64/0xe0 do_syscall_64+0x4b/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f68602d9833 Code: 8b 15 61 26 0e 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 64 8b 04 25 18 00 00 00 85 c0 75 14 b8 01 00 00 00 08 RSP: 002b:00007fff9bbdf8e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f68602d9833 RDX: 0000000000000009 RSI: 00005614f3ebc740 RDI: 0000000000000001 RBP: 00005614f3ebc740 R08: 000000000000000a R09: 0000000000000008 R10: 00005614f3db6ba0 R11: 0000000000000246 R12: 0000000000000009 R13: 00007f68603bd6a0 R14: 0000000000000009 R15: 00007f68603b8880 For lockdep, there is a sanity check in lockdep_init_map_type(), the lock-class key must either have been allocated statically or must have been registered as a dynamic key. However the commit e18df2889ff9 ("mm/hugetlb_cgroup: prepare cftypes based on template") has changed the cftypes from static allocated objects to dynamic allocated objects, so the cft->lockdep_key must be registered proactively. [xiujianfeng@huawei.com: fix BUG()] Link: https://lkml.kernel.org/r/20240619015527.2212698-1-xiujianfeng@huawei.com Link: https://lkml.kernel.org/r/20240618071922.2127289-1-xiujianfeng@huawei.com Link: https://lore.kernel.org/all/602186b3-5ce3-41b3-90a3-134792cc2a48@samsung.com/ Fixes: e18df2889ff9 ("mm/hugetlb_cgroup: prepare cftypes based on template") Signed-off-by: Xiu Jianfeng Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202406181046.8d8b2492-oliver.sang@intel.com Tested-by: Marek Szyprowski Tested-by: SeongJae Park Closes: https://lore.kernel.org/20240618233608.400367-1-sj@kernel.org Cc: Muchun Song Cc: Oscar Salvador Signed-off-by: Andrew Morton --- include/linux/cgroup-defs.h | 2 - mm/hugetlb_cgroup.c | 158 ++++++++++++++++++++++++++++++++++++ 2 files changed, 158 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd3695..dc151cb0ef09b0 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -676,9 +676,7 @@ struct cftype { __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); -#ifdef CONFIG_DEBUG_LOCK_ALLOC struct lock_class_key lockdep_key; -#endif }; /* diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 45f94a8697762f..a4506569841944 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -27,7 +27,17 @@ #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff) #define MEMFILE_ATTR(val) ((val) & 0xffff) +/* Use t->m[0] to encode the offset */ +#define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0))) +#define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff) +#define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff) + +#define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl) +#define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl) + static struct hugetlb_cgroup *root_h_cgroup __read_mostly; +static struct cftype *dfl_files; +static struct cftype *legacy_files; static inline struct page_counter * __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx, @@ -702,12 +712,144 @@ static int hugetlb_events_local_show(struct seq_file *seq, void *v) return __hugetlb_events_show(seq, true); } +static struct cftype hugetlb_dfl_tmpl[] = { + { + .name = "max", + .private = RES_LIMIT, + .seq_show = hugetlb_cgroup_read_u64_max, + .write = hugetlb_cgroup_write_dfl, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "rsvd.max", + .private = RES_RSVD_LIMIT, + .seq_show = hugetlb_cgroup_read_u64_max, + .write = hugetlb_cgroup_write_dfl, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "current", + .private = RES_USAGE, + .seq_show = hugetlb_cgroup_read_u64_max, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "rsvd.current", + .private = RES_RSVD_USAGE, + .seq_show = hugetlb_cgroup_read_u64_max, + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "events", + .seq_show = hugetlb_events_show, + .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]), + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "events.local", + .seq_show = hugetlb_events_local_show, + .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]), + .flags = CFTYPE_NOT_ON_ROOT, + }, + { + .name = "numa_stat", + .seq_show = hugetlb_cgroup_read_numa_stat, + .flags = CFTYPE_NOT_ON_ROOT, + }, + /* don't need terminator here */ +}; + +static struct cftype hugetlb_legacy_tmpl[] = { + { + .name = "limit_in_bytes", + .private = RES_LIMIT, + .read_u64 = hugetlb_cgroup_read_u64, + .write = hugetlb_cgroup_write_legacy, + }, + { + .name = "rsvd.limit_in_bytes", + .private = RES_RSVD_LIMIT, + .read_u64 = hugetlb_cgroup_read_u64, + .write = hugetlb_cgroup_write_legacy, + }, + { + .name = "usage_in_bytes", + .private = RES_USAGE, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "rsvd.usage_in_bytes", + .private = RES_RSVD_USAGE, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "max_usage_in_bytes", + .private = RES_MAX_USAGE, + .write = hugetlb_cgroup_reset, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "rsvd.max_usage_in_bytes", + .private = RES_RSVD_MAX_USAGE, + .write = hugetlb_cgroup_reset, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "failcnt", + .private = RES_FAILCNT, + .write = hugetlb_cgroup_reset, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "rsvd.failcnt", + .private = RES_RSVD_FAILCNT, + .write = hugetlb_cgroup_reset, + .read_u64 = hugetlb_cgroup_read_u64, + }, + { + .name = "numa_stat", + .seq_show = hugetlb_cgroup_read_numa_stat, + }, + /* don't need terminator here */ +}; + +static void __init +hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft, + struct cftype *tmpl, int tmpl_size) +{ + char buf[32]; + int i, idx = hstate_index(h); + + /* format the size */ + mem_fmt(buf, sizeof(buf), huge_page_size(h)); + + for (i = 0; i < tmpl_size; cft++, tmpl++, i++) { + *cft = *tmpl; + /* rebuild the name */ + snprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name); + /* rebuild the private */ + cft->private = MEMFILE_PRIVATE(idx, tmpl->private); + /* rebuild the file_offset */ + if (tmpl->file_offset) { + unsigned int offset = tmpl->file_offset; + + cft->file_offset = MEMFILE_OFFSET0(offset) + + MEMFILE_FIELD_SIZE(offset) * idx; + } + + lockdep_register_key(&cft->lockdep_key); + } +} + static void __init __hugetlb_cgroup_file_dfl_init(int idx) { char buf[32]; struct cftype *cft; struct hstate *h = &hstates[idx]; + hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE, + hugetlb_dfl_tmpl, DFL_TMPL_SIZE); + /* format the size */ mem_fmt(buf, sizeof(buf), huge_page_size(h)); @@ -779,6 +921,9 @@ static void __init __hugetlb_cgroup_file_legacy_init(int idx) struct cftype *cft; struct hstate *h = &hstates[idx]; + hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE, + hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE); + /* format the size */ mem_fmt(buf, sizeof(buf), huge_page_size(h)); @@ -856,10 +1001,23 @@ static void __init __hugetlb_cgroup_file_init(int idx) __hugetlb_cgroup_file_legacy_init(idx); } +static void __init __hugetlb_cgroup_file_pre_init(void) +{ + int cft_count; + + cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */ + dfl_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); + BUG_ON(!dfl_files); + cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */ + legacy_files = kcalloc(cft_count, sizeof(struct cftype), GFP_KERNEL); + BUG_ON(!legacy_files); +} + void __init hugetlb_cgroup_file_init(void) { struct hstate *h; + __hugetlb_cgroup_file_pre_init(); for_each_hstate(h) __hugetlb_cgroup_file_init(hstate_index(h)); }