Skip to content

Commit 46eae99

Browse files
Miklos Szeredibrauner
authored andcommitted
add statmount(2) syscall
Add a way to query attributes of a single mount instead of having to parse the complete /proc/$PID/mountinfo, which might be huge. Lookup the mount the new 64bit mount ID. If a mount needs to be queried based on path, then statx(2) can be used to first query the mount ID belonging to the path. Design is based on a suggestion by Linus: "So I'd suggest something that is very much like "statfsat()", which gets a buffer and a length, and returns an extended "struct statfs" *AND* just a string description at the end." The interface closely mimics that of statx. Handle ASCII attributes by appending after the end of the structure (as per above suggestion). Pointers to strings are stored in u64 members to make the structure the same regardless of pointer size. Strings are nul terminated. Link: https://lore.kernel.org/all/CAHk-=wh5YifP7hzKSbwJj94+DZ2czjrZsczy6GBimiogZws=rg@mail.gmail.com/ Signed-off-by: Miklos Szeredi <mszeredi@redhat.com> Link: https://lore.kernel.org/r/20231025140205.3586473-5-mszeredi@redhat.com Reviewed-by: Ian Kent <raven@themaw.net> [Christian Brauner <brauner@kernel.org>: various minor changes] Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 56c94c6 commit 46eae99

File tree

3 files changed

+339
-0
lines changed

3 files changed

+339
-0
lines changed

fs/namespace.c

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4683,6 +4683,287 @@ int show_path(struct seq_file *m, struct dentry *root)
46834683
return 0;
46844684
}
46854685

4686+
static struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns)
4687+
{
4688+
struct mount *mnt = mnt_find_id_at(ns, id);
4689+
4690+
if (!mnt || mnt->mnt_id_unique != id)
4691+
return NULL;
4692+
4693+
return &mnt->mnt;
4694+
}
4695+
4696+
struct kstatmount {
4697+
struct statmount __user *const buf;
4698+
size_t const bufsize;
4699+
struct vfsmount *const mnt;
4700+
u64 const mask;
4701+
struct seq_file seq;
4702+
struct path root;
4703+
struct statmount sm;
4704+
size_t pos;
4705+
int err;
4706+
};
4707+
4708+
typedef int (*statmount_func_t)(struct kstatmount *);
4709+
4710+
static int statmount_string_seq(struct kstatmount *s, statmount_func_t func)
4711+
{
4712+
size_t rem = s->bufsize - s->pos - sizeof(s->sm);
4713+
struct seq_file *seq = &s->seq;
4714+
int ret;
4715+
4716+
seq->count = 0;
4717+
seq->size = min(seq->size, rem);
4718+
seq->buf = kvmalloc(seq->size, GFP_KERNEL_ACCOUNT);
4719+
if (!seq->buf)
4720+
return -ENOMEM;
4721+
4722+
ret = func(s);
4723+
if (ret)
4724+
return ret;
4725+
4726+
if (seq_has_overflowed(seq)) {
4727+
if (seq->size == rem)
4728+
return -EOVERFLOW;
4729+
seq->size *= 2;
4730+
if (seq->size > MAX_RW_COUNT)
4731+
return -ENOMEM;
4732+
kvfree(seq->buf);
4733+
return 0;
4734+
}
4735+
4736+
/* Done */
4737+
return 1;
4738+
}
4739+
4740+
static void statmount_string(struct kstatmount *s, u64 mask, statmount_func_t func,
4741+
u32 *str)
4742+
{
4743+
int ret = s->pos + sizeof(s->sm) >= s->bufsize ? -EOVERFLOW : 0;
4744+
struct statmount *sm = &s->sm;
4745+
struct seq_file *seq = &s->seq;
4746+
4747+
if (s->err || !(s->mask & mask))
4748+
return;
4749+
4750+
seq->size = PAGE_SIZE;
4751+
while (!ret)
4752+
ret = statmount_string_seq(s, func);
4753+
4754+
if (ret < 0) {
4755+
s->err = ret;
4756+
} else {
4757+
seq->buf[seq->count++] = '\0';
4758+
if (copy_to_user(s->buf->str + s->pos, seq->buf, seq->count)) {
4759+
s->err = -EFAULT;
4760+
} else {
4761+
*str = s->pos;
4762+
s->pos += seq->count;
4763+
}
4764+
}
4765+
kvfree(seq->buf);
4766+
sm->mask |= mask;
4767+
}
4768+
4769+
static void statmount_numeric(struct kstatmount *s, u64 mask, statmount_func_t func)
4770+
{
4771+
if (s->err || !(s->mask & mask))
4772+
return;
4773+
4774+
s->err = func(s);
4775+
s->sm.mask |= mask;
4776+
}
4777+
4778+
static u64 mnt_to_attr_flags(struct vfsmount *mnt)
4779+
{
4780+
unsigned int mnt_flags = READ_ONCE(mnt->mnt_flags);
4781+
u64 attr_flags = 0;
4782+
4783+
if (mnt_flags & MNT_READONLY)
4784+
attr_flags |= MOUNT_ATTR_RDONLY;
4785+
if (mnt_flags & MNT_NOSUID)
4786+
attr_flags |= MOUNT_ATTR_NOSUID;
4787+
if (mnt_flags & MNT_NODEV)
4788+
attr_flags |= MOUNT_ATTR_NODEV;
4789+
if (mnt_flags & MNT_NOEXEC)
4790+
attr_flags |= MOUNT_ATTR_NOEXEC;
4791+
if (mnt_flags & MNT_NODIRATIME)
4792+
attr_flags |= MOUNT_ATTR_NODIRATIME;
4793+
if (mnt_flags & MNT_NOSYMFOLLOW)
4794+
attr_flags |= MOUNT_ATTR_NOSYMFOLLOW;
4795+
4796+
if (mnt_flags & MNT_NOATIME)
4797+
attr_flags |= MOUNT_ATTR_NOATIME;
4798+
else if (mnt_flags & MNT_RELATIME)
4799+
attr_flags |= MOUNT_ATTR_RELATIME;
4800+
else
4801+
attr_flags |= MOUNT_ATTR_STRICTATIME;
4802+
4803+
if (is_idmapped_mnt(mnt))
4804+
attr_flags |= MOUNT_ATTR_IDMAP;
4805+
4806+
return attr_flags;
4807+
}
4808+
4809+
static u64 mnt_to_propagation_flags(struct mount *m)
4810+
{
4811+
u64 propagation = 0;
4812+
4813+
if (IS_MNT_SHARED(m))
4814+
propagation |= MS_SHARED;
4815+
if (IS_MNT_SLAVE(m))
4816+
propagation |= MS_SLAVE;
4817+
if (IS_MNT_UNBINDABLE(m))
4818+
propagation |= MS_UNBINDABLE;
4819+
if (!propagation)
4820+
propagation |= MS_PRIVATE;
4821+
4822+
return propagation;
4823+
}
4824+
4825+
static int statmount_sb_basic(struct kstatmount *s)
4826+
{
4827+
struct super_block *sb = s->mnt->mnt_sb;
4828+
4829+
s->sm.sb_dev_major = MAJOR(sb->s_dev);
4830+
s->sm.sb_dev_minor = MINOR(sb->s_dev);
4831+
s->sm.sb_magic = sb->s_magic;
4832+
s->sm.sb_flags = sb->s_flags & (SB_RDONLY|SB_SYNCHRONOUS|SB_DIRSYNC|SB_LAZYTIME);
4833+
4834+
return 0;
4835+
}
4836+
4837+
static int statmount_mnt_basic(struct kstatmount *s)
4838+
{
4839+
struct mount *m = real_mount(s->mnt);
4840+
4841+
s->sm.mnt_id = m->mnt_id_unique;
4842+
s->sm.mnt_parent_id = m->mnt_parent->mnt_id_unique;
4843+
s->sm.mnt_id_old = m->mnt_id;
4844+
s->sm.mnt_parent_id_old = m->mnt_parent->mnt_id;
4845+
s->sm.mnt_attr = mnt_to_attr_flags(&m->mnt);
4846+
s->sm.mnt_propagation = mnt_to_propagation_flags(m);
4847+
s->sm.mnt_peer_group = IS_MNT_SHARED(m) ? m->mnt_group_id : 0;
4848+
s->sm.mnt_master = IS_MNT_SLAVE(m) ? m->mnt_master->mnt_group_id : 0;
4849+
4850+
return 0;
4851+
}
4852+
4853+
static int statmount_propagate_from(struct kstatmount *s)
4854+
{
4855+
struct mount *m = real_mount(s->mnt);
4856+
4857+
if (!IS_MNT_SLAVE(m))
4858+
return 0;
4859+
4860+
s->sm.propagate_from = get_dominating_id(m, &current->fs->root);
4861+
4862+
return 0;
4863+
}
4864+
4865+
static int statmount_mnt_root(struct kstatmount *s)
4866+
{
4867+
struct seq_file *seq = &s->seq;
4868+
int err = show_path(seq, s->mnt->mnt_root);
4869+
4870+
if (!err && !seq_has_overflowed(seq)) {
4871+
seq->buf[seq->count] = '\0';
4872+
seq->count = string_unescape_inplace(seq->buf, UNESCAPE_OCTAL);
4873+
}
4874+
return err;
4875+
}
4876+
4877+
static int statmount_mnt_point(struct kstatmount *s)
4878+
{
4879+
struct vfsmount *mnt = s->mnt;
4880+
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
4881+
int err = seq_path_root(&s->seq, &mnt_path, &s->root, "");
4882+
4883+
return err == SEQ_SKIP ? 0 : err;
4884+
}
4885+
4886+
static int statmount_fs_type(struct kstatmount *s)
4887+
{
4888+
struct seq_file *seq = &s->seq;
4889+
struct super_block *sb = s->mnt->mnt_sb;
4890+
4891+
seq_puts(seq, sb->s_type->name);
4892+
return 0;
4893+
}
4894+
4895+
static int do_statmount(struct kstatmount *s)
4896+
{
4897+
struct statmount *sm = &s->sm;
4898+
struct mount *m = real_mount(s->mnt);
4899+
size_t copysize = min_t(size_t, s->bufsize, sizeof(*sm));
4900+
int err;
4901+
4902+
/*
4903+
* Don't trigger audit denials. We just want to determine what
4904+
* mounts to show users.
4905+
*/
4906+
if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
4907+
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
4908+
return -EPERM;
4909+
4910+
err = security_sb_statfs(s->mnt->mnt_root);
4911+
if (err)
4912+
return err;
4913+
4914+
statmount_numeric(s, STATMOUNT_SB_BASIC, statmount_sb_basic);
4915+
statmount_numeric(s, STATMOUNT_MNT_BASIC, statmount_mnt_basic);
4916+
statmount_numeric(s, STATMOUNT_PROPAGATE_FROM, statmount_propagate_from);
4917+
statmount_string(s, STATMOUNT_FS_TYPE, statmount_fs_type, &sm->fs_type);
4918+
statmount_string(s, STATMOUNT_MNT_ROOT, statmount_mnt_root, &sm->mnt_root);
4919+
statmount_string(s, STATMOUNT_MNT_POINT, statmount_mnt_point, &sm->mnt_point);
4920+
4921+
if (s->err)
4922+
return s->err;
4923+
4924+
/* Return the number of bytes copied to the buffer */
4925+
sm->size = copysize + s->pos;
4926+
4927+
if (copy_to_user(s->buf, sm, copysize))
4928+
return -EFAULT;
4929+
4930+
return 0;
4931+
}
4932+
4933+
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
4934+
struct statmount __user *, buf, size_t, bufsize,
4935+
unsigned int, flags)
4936+
{
4937+
struct vfsmount *mnt;
4938+
struct mnt_id_req kreq;
4939+
int ret;
4940+
4941+
if (flags)
4942+
return -EINVAL;
4943+
4944+
if (copy_from_user(&kreq, req, sizeof(kreq)))
4945+
return -EFAULT;
4946+
4947+
down_read(&namespace_sem);
4948+
mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
4949+
ret = -ENOENT;
4950+
if (mnt) {
4951+
struct kstatmount s = {
4952+
.mask = kreq.request_mask,
4953+
.buf = buf,
4954+
.bufsize = bufsize,
4955+
.mnt = mnt,
4956+
};
4957+
4958+
get_fs_root(current->fs, &s.root);
4959+
ret = do_statmount(&s);
4960+
path_put(&s.root);
4961+
}
4962+
up_read(&namespace_sem);
4963+
4964+
return ret;
4965+
}
4966+
46864967
static void __init init_mount_tree(void)
46874968
{
46884969
struct vfsmount *mnt;

include/linux/syscalls.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ struct landlock_ruleset_attr;
7474
enum landlock_rule_type;
7575
struct cachestat_range;
7676
struct cachestat;
77+
struct statmount;
78+
struct mnt_id_req;
7779

7880
#include <linux/types.h>
7981
#include <linux/aio_abi.h>
@@ -407,6 +409,9 @@ asmlinkage long sys_statfs64(const char __user *path, size_t sz,
407409
asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf);
408410
asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
409411
struct statfs64 __user *buf);
412+
asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
413+
struct statmount __user *buf, size_t bufsize,
414+
unsigned int flags);
410415
asmlinkage long sys_truncate(const char __user *path, long length);
411416
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
412417
#if BITS_PER_LONG == 32

include/uapi/linux/mount.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,57 @@ struct mount_attr {
138138
/* List of all mount_attr versions. */
139139
#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
140140

141+
142+
/*
143+
* Structure for getting mount/superblock/filesystem info with statmount(2).
144+
*
145+
* The interface is similar to statx(2): individual fields or groups can be
146+
* selected with the @mask argument of statmount(). Kernel will set the @mask
147+
* field according to the supported fields.
148+
*
149+
* If string fields are selected, then the caller needs to pass a buffer that
150+
* has space after the fixed part of the structure. Nul terminated strings are
151+
* copied there and offsets relative to @str are stored in the relevant fields.
152+
* If the buffer is too small, then EOVERFLOW is returned. The actually used
153+
* size is returned in @size.
154+
*/
155+
struct statmount {
156+
__u32 size; /* Total size, including strings */
157+
__u32 __spare1;
158+
__u64 mask; /* What results were written */
159+
__u32 sb_dev_major; /* Device ID */
160+
__u32 sb_dev_minor;
161+
__u64 sb_magic; /* ..._SUPER_MAGIC */
162+
__u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */
163+
__u32 fs_type; /* [str] Filesystem type */
164+
__u64 mnt_id; /* Unique ID of mount */
165+
__u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */
166+
__u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */
167+
__u32 mnt_parent_id_old;
168+
__u64 mnt_attr; /* MOUNT_ATTR_... */
169+
__u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */
170+
__u64 mnt_peer_group; /* ID of shared peer group */
171+
__u64 mnt_master; /* Mount receives propagation from this ID */
172+
__u64 propagate_from; /* Propagation from in current namespace */
173+
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
174+
__u32 mnt_point; /* [str] Mountpoint relative to current root */
175+
__u64 __spare2[50];
176+
char str[]; /* Variable size part containing strings */
177+
};
178+
179+
struct mnt_id_req {
180+
__u64 mnt_id;
181+
__u64 request_mask;
182+
};
183+
184+
/*
185+
* @mask bits for statmount(2)
186+
*/
187+
#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */
188+
#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */
189+
#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */
190+
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
191+
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
192+
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
193+
141194
#endif /* _UAPI_LINUX_MOUNT_H */

0 commit comments

Comments
 (0)