Skip to content

Commit 393c371

Browse files
minchankgregkh
authored andcommitted
kernfs: switch global kernfs_rwsem lock to per-fs lock
The kernfs implementation has big lock granularity(kernfs_rwsem) so every kernfs-based(e.g., sysfs, cgroup) fs are able to compete the lock. It makes trouble for some cases to wait the global lock for a long time even though they are totally independent contexts each other. A general example is process A goes under direct reclaim with holding the lock when it accessed the file in sysfs and process B is waiting the lock with exclusive mode and then process C is waiting the lock until process B could finish the job after it gets the lock from process A. This patch switches the global kernfs_rwsem to per-fs lock, which put the rwsem into kernfs_root. Suggested-by: Tejun Heo <tj@kernel.org> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Minchan Kim <minchan@kernel.org> Link: https://lore.kernel.org/r/20211118230008.2679780-1-minchan@kernel.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent 1360572 commit 393c371

File tree

6 files changed

+97
-63
lines changed

6 files changed

+97
-63
lines changed

fs/kernfs/dir.c

+65-45
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
#include "kernfs-internal.h"
1919

20-
DECLARE_RWSEM(kernfs_rwsem);
2120
static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
2221
static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
2322
static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
@@ -26,7 +25,7 @@ static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */
2625

2726
static bool kernfs_active(struct kernfs_node *kn)
2827
{
29-
lockdep_assert_held(&kernfs_rwsem);
28+
lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem);
3029
return atomic_read(&kn->active) >= 0;
3130
}
3231

@@ -457,14 +456,15 @@ void kernfs_put_active(struct kernfs_node *kn)
457456
* return after draining is complete.
458457
*/
459458
static void kernfs_drain(struct kernfs_node *kn)
460-
__releases(&kernfs_rwsem) __acquires(&kernfs_rwsem)
459+
__releases(&kernfs_root(kn)->kernfs_rwsem)
460+
__acquires(&kernfs_root(kn)->kernfs_rwsem)
461461
{
462462
struct kernfs_root *root = kernfs_root(kn);
463463

464-
lockdep_assert_held_write(&kernfs_rwsem);
464+
lockdep_assert_held_write(&root->kernfs_rwsem);
465465
WARN_ON_ONCE(kernfs_active(kn));
466466

467-
up_write(&kernfs_rwsem);
467+
up_write(&root->kernfs_rwsem);
468468

469469
if (kernfs_lockdep(kn)) {
470470
rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
@@ -483,7 +483,7 @@ static void kernfs_drain(struct kernfs_node *kn)
483483

484484
kernfs_drain_open_files(kn);
485485

486-
down_write(&kernfs_rwsem);
486+
down_write(&root->kernfs_rwsem);
487487
}
488488

489489
/**
@@ -718,11 +718,12 @@ struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
718718
int kernfs_add_one(struct kernfs_node *kn)
719719
{
720720
struct kernfs_node *parent = kn->parent;
721+
struct kernfs_root *root = kernfs_root(parent);
721722
struct kernfs_iattrs *ps_iattr;
722723
bool has_ns;
723724
int ret;
724725

725-
down_write(&kernfs_rwsem);
726+
down_write(&root->kernfs_rwsem);
726727

727728
ret = -EINVAL;
728729
has_ns = kernfs_ns_enabled(parent);
@@ -753,7 +754,7 @@ int kernfs_add_one(struct kernfs_node *kn)
753754
ps_iattr->ia_mtime = ps_iattr->ia_ctime;
754755
}
755756

756-
up_write(&kernfs_rwsem);
757+
up_write(&root->kernfs_rwsem);
757758

758759
/*
759760
* Activate the new node unless CREATE_DEACTIVATED is requested.
@@ -767,7 +768,7 @@ int kernfs_add_one(struct kernfs_node *kn)
767768
return 0;
768769

769770
out_unlock:
770-
up_write(&kernfs_rwsem);
771+
up_write(&root->kernfs_rwsem);
771772
return ret;
772773
}
773774

@@ -788,7 +789,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
788789
bool has_ns = kernfs_ns_enabled(parent);
789790
unsigned int hash;
790791

791-
lockdep_assert_held(&kernfs_rwsem);
792+
lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem);
792793

793794
if (has_ns != (bool)ns) {
794795
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
@@ -820,7 +821,7 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
820821
size_t len;
821822
char *p, *name;
822823

823-
lockdep_assert_held_read(&kernfs_rwsem);
824+
lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem);
824825

825826
/* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
826827
spin_lock_irq(&kernfs_rename_lock);
@@ -859,11 +860,12 @@ struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
859860
const char *name, const void *ns)
860861
{
861862
struct kernfs_node *kn;
863+
struct kernfs_root *root = kernfs_root(parent);
862864

863-
down_read(&kernfs_rwsem);
865+
down_read(&root->kernfs_rwsem);
864866
kn = kernfs_find_ns(parent, name, ns);
865867
kernfs_get(kn);
866-
up_read(&kernfs_rwsem);
868+
up_read(&root->kernfs_rwsem);
867869

868870
return kn;
869871
}
@@ -883,11 +885,12 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
883885
const char *path, const void *ns)
884886
{
885887
struct kernfs_node *kn;
888+
struct kernfs_root *root = kernfs_root(parent);
886889

887-
down_read(&kernfs_rwsem);
890+
down_read(&root->kernfs_rwsem);
888891
kn = kernfs_walk_ns(parent, path, ns);
889892
kernfs_get(kn);
890-
up_read(&kernfs_rwsem);
893+
up_read(&root->kernfs_rwsem);
891894

892895
return kn;
893896
}
@@ -912,6 +915,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
912915
return ERR_PTR(-ENOMEM);
913916

914917
idr_init(&root->ino_idr);
918+
init_rwsem(&root->kernfs_rwsem);
915919
INIT_LIST_HEAD(&root->supers);
916920

917921
/*
@@ -1035,6 +1039,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
10351039
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
10361040
{
10371041
struct kernfs_node *kn;
1042+
struct kernfs_root *root;
10381043

10391044
if (flags & LOOKUP_RCU)
10401045
return -ECHILD;
@@ -1046,18 +1051,19 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
10461051
/* If the kernfs parent node has changed discard and
10471052
* proceed to ->lookup.
10481053
*/
1049-
down_read(&kernfs_rwsem);
10501054
spin_lock(&dentry->d_lock);
10511055
parent = kernfs_dentry_node(dentry->d_parent);
10521056
if (parent) {
1057+
spin_unlock(&dentry->d_lock);
1058+
root = kernfs_root(parent);
1059+
down_read(&root->kernfs_rwsem);
10531060
if (kernfs_dir_changed(parent, dentry)) {
1054-
spin_unlock(&dentry->d_lock);
1055-
up_read(&kernfs_rwsem);
1061+
up_read(&root->kernfs_rwsem);
10561062
return 0;
10571063
}
1058-
}
1059-
spin_unlock(&dentry->d_lock);
1060-
up_read(&kernfs_rwsem);
1064+
up_read(&root->kernfs_rwsem);
1065+
} else
1066+
spin_unlock(&dentry->d_lock);
10611067

10621068
/* The kernfs parent node hasn't changed, leave the
10631069
* dentry negative and return success.
@@ -1066,7 +1072,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
10661072
}
10671073

10681074
kn = kernfs_dentry_node(dentry);
1069-
down_read(&kernfs_rwsem);
1075+
root = kernfs_root(kn);
1076+
down_read(&root->kernfs_rwsem);
10701077

10711078
/* The kernfs node has been deactivated */
10721079
if (!kernfs_active(kn))
@@ -1085,10 +1092,10 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
10851092
kernfs_info(dentry->d_sb)->ns != kn->ns)
10861093
goto out_bad;
10871094

1088-
up_read(&kernfs_rwsem);
1095+
up_read(&root->kernfs_rwsem);
10891096
return 1;
10901097
out_bad:
1091-
up_read(&kernfs_rwsem);
1098+
up_read(&root->kernfs_rwsem);
10921099
return 0;
10931100
}
10941101

@@ -1102,10 +1109,12 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
11021109
{
11031110
struct kernfs_node *parent = dir->i_private;
11041111
struct kernfs_node *kn;
1112+
struct kernfs_root *root;
11051113
struct inode *inode = NULL;
11061114
const void *ns = NULL;
11071115

1108-
down_read(&kernfs_rwsem);
1116+
root = kernfs_root(parent);
1117+
down_read(&root->kernfs_rwsem);
11091118
if (kernfs_ns_enabled(parent))
11101119
ns = kernfs_info(dir->i_sb)->ns;
11111120

@@ -1116,7 +1125,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
11161125
* create a negative.
11171126
*/
11181127
if (!kernfs_active(kn)) {
1119-
up_read(&kernfs_rwsem);
1128+
up_read(&root->kernfs_rwsem);
11201129
return NULL;
11211130
}
11221131
inode = kernfs_get_inode(dir->i_sb, kn);
@@ -1131,7 +1140,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
11311140
*/
11321141
if (!IS_ERR(inode))
11331142
kernfs_set_rev(parent, dentry);
1134-
up_read(&kernfs_rwsem);
1143+
up_read(&root->kernfs_rwsem);
11351144

11361145
/* instantiate and hash (possibly negative) dentry */
11371146
return d_splice_alias(inode, dentry);
@@ -1254,7 +1263,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
12541263
{
12551264
struct rb_node *rbn;
12561265

1257-
lockdep_assert_held_write(&kernfs_rwsem);
1266+
lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem);
12581267

12591268
/* if first iteration, visit leftmost descendant which may be root */
12601269
if (!pos)
@@ -1289,8 +1298,9 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
12891298
void kernfs_activate(struct kernfs_node *kn)
12901299
{
12911300
struct kernfs_node *pos;
1301+
struct kernfs_root *root = kernfs_root(kn);
12921302

1293-
down_write(&kernfs_rwsem);
1303+
down_write(&root->kernfs_rwsem);
12941304

12951305
pos = NULL;
12961306
while ((pos = kernfs_next_descendant_post(pos, kn))) {
@@ -1304,14 +1314,14 @@ void kernfs_activate(struct kernfs_node *kn)
13041314
pos->flags |= KERNFS_ACTIVATED;
13051315
}
13061316

1307-
up_write(&kernfs_rwsem);
1317+
up_write(&root->kernfs_rwsem);
13081318
}
13091319

13101320
static void __kernfs_remove(struct kernfs_node *kn)
13111321
{
13121322
struct kernfs_node *pos;
13131323

1314-
lockdep_assert_held_write(&kernfs_rwsem);
1324+
lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem);
13151325

13161326
/*
13171327
* Short-circuit if non-root @kn has already finished removal.
@@ -1381,9 +1391,11 @@ static void __kernfs_remove(struct kernfs_node *kn)
13811391
*/
13821392
void kernfs_remove(struct kernfs_node *kn)
13831393
{
1384-
down_write(&kernfs_rwsem);
1394+
struct kernfs_root *root = kernfs_root(kn);
1395+
1396+
down_write(&root->kernfs_rwsem);
13851397
__kernfs_remove(kn);
1386-
up_write(&kernfs_rwsem);
1398+
up_write(&root->kernfs_rwsem);
13871399
}
13881400

13891401
/**
@@ -1469,8 +1481,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
14691481
bool kernfs_remove_self(struct kernfs_node *kn)
14701482
{
14711483
bool ret;
1484+
struct kernfs_root *root = kernfs_root(kn);
14721485

1473-
down_write(&kernfs_rwsem);
1486+
down_write(&root->kernfs_rwsem);
14741487
kernfs_break_active_protection(kn);
14751488

14761489
/*
@@ -1498,9 +1511,9 @@ bool kernfs_remove_self(struct kernfs_node *kn)
14981511
atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
14991512
break;
15001513

1501-
up_write(&kernfs_rwsem);
1514+
up_write(&root->kernfs_rwsem);
15021515
schedule();
1503-
down_write(&kernfs_rwsem);
1516+
down_write(&root->kernfs_rwsem);
15041517
}
15051518
finish_wait(waitq, &wait);
15061519
WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
@@ -1513,7 +1526,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
15131526
*/
15141527
kernfs_unbreak_active_protection(kn);
15151528

1516-
up_write(&kernfs_rwsem);
1529+
up_write(&root->kernfs_rwsem);
15171530
return ret;
15181531
}
15191532

@@ -1530,20 +1543,22 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
15301543
const void *ns)
15311544
{
15321545
struct kernfs_node *kn;
1546+
struct kernfs_root *root;
15331547

15341548
if (!parent) {
15351549
WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
15361550
name);
15371551
return -ENOENT;
15381552
}
15391553

1540-
down_write(&kernfs_rwsem);
1554+
root = kernfs_root(parent);
1555+
down_write(&root->kernfs_rwsem);
15411556

15421557
kn = kernfs_find_ns(parent, name, ns);
15431558
if (kn)
15441559
__kernfs_remove(kn);
15451560

1546-
up_write(&kernfs_rwsem);
1561+
up_write(&root->kernfs_rwsem);
15471562

15481563
if (kn)
15491564
return 0;
@@ -1562,14 +1577,16 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
15621577
const char *new_name, const void *new_ns)
15631578
{
15641579
struct kernfs_node *old_parent;
1580+
struct kernfs_root *root;
15651581
const char *old_name = NULL;
15661582
int error;
15671583

15681584
/* can't move or rename root */
15691585
if (!kn->parent)
15701586
return -EINVAL;
15711587

1572-
down_write(&kernfs_rwsem);
1588+
root = kernfs_root(kn);
1589+
down_write(&root->kernfs_rwsem);
15731590

15741591
error = -ENOENT;
15751592
if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
@@ -1623,7 +1640,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
16231640

16241641
error = 0;
16251642
out:
1626-
up_write(&kernfs_rwsem);
1643+
up_write(&root->kernfs_rwsem);
16271644
return error;
16281645
}
16291646

@@ -1694,11 +1711,14 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
16941711
struct dentry *dentry = file->f_path.dentry;
16951712
struct kernfs_node *parent = kernfs_dentry_node(dentry);
16961713
struct kernfs_node *pos = file->private_data;
1714+
struct kernfs_root *root;
16971715
const void *ns = NULL;
16981716

16991717
if (!dir_emit_dots(file, ctx))
17001718
return 0;
1701-
down_read(&kernfs_rwsem);
1719+
1720+
root = kernfs_root(parent);
1721+
down_read(&root->kernfs_rwsem);
17021722

17031723
if (kernfs_ns_enabled(parent))
17041724
ns = kernfs_info(dentry->d_sb)->ns;
@@ -1715,12 +1735,12 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
17151735
file->private_data = pos;
17161736
kernfs_get(pos);
17171737

1718-
up_read(&kernfs_rwsem);
1738+
up_read(&root->kernfs_rwsem);
17191739
if (!dir_emit(ctx, name, len, ino, type))
17201740
return 0;
1721-
down_read(&kernfs_rwsem);
1741+
down_read(&root->kernfs_rwsem);
17221742
}
1723-
up_read(&kernfs_rwsem);
1743+
up_read(&root->kernfs_rwsem);
17241744
file->private_data = NULL;
17251745
ctx->pos = INT_MAX;
17261746
return 0;

0 commit comments

Comments
 (0)