Skip to content

Commit d18e900

Browse files
Miklos SzerediAl Viro
authored andcommitted
vfs: add i_op->atomic_open()
Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi <mszeredi@suse.cz> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
1 parent 54ef487 commit d18e900

File tree

6 files changed

+270
-2
lines changed

6 files changed

+270
-2
lines changed

Documentation/filesystems/Locking

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ ata *);
6262
int (*removexattr) (struct dentry *, const char *);
6363
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
6464
void (*update_time)(struct inode *, struct timespec *, int);
65+
struct file * (*atomic_open)(struct inode *, struct dentry *,
66+
struct opendata *, unsigned open_flag,
67+
umode_t create_mode, bool *created);
6568

6669
locking rules:
6770
all may block
@@ -89,6 +92,7 @@ listxattr: no
8992
removexattr: yes
9093
fiemap: no
9194
update_time: no
95+
atomic_open: yes
9296

9397
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
9498
victim.

Documentation/filesystems/vfs.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,9 @@ struct inode_operations {
364364
ssize_t (*listxattr) (struct dentry *, char *, size_t);
365365
int (*removexattr) (struct dentry *, const char *);
366366
void (*update_time)(struct inode *, struct timespec *, int);
367+
struct file * (*atomic_open)(struct inode *, struct dentry *,
368+
struct opendata *, unsigned open_flag,
369+
umode_t create_mode, bool *created);
367370
};
368371

369372
Again, all methods are called without any locks being held, unless
@@ -476,6 +479,14 @@ otherwise noted.
476479
an inode. If this is not defined the VFS will update the inode itself
477480
and call mark_inode_dirty_sync.
478481

482+
atomic_open: called on the last component of an open. Using this optional
483+
method the filesystem can look up, possibly create and open the file in
484+
one atomic operation. If it cannot perform this (e.g. the file type
485+
turned out to be wrong) it may signal this by returning NULL instead of
486+
an open struct file pointer. This method is only called if the last
487+
component is negative or needs lookup. Cached positive dentries are
488+
still handled by f_op->open().
489+
479490
The Address Space Object
480491
========================
481492

fs/internal.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t);
8585
struct nameidata;
8686
extern struct file *nameidata_to_filp(struct nameidata *);
8787
extern void release_open_intent(struct nameidata *);
88+
struct opendata {
89+
struct dentry *dentry;
90+
struct vfsmount *mnt;
91+
struct file **filp;
92+
};
8893
struct open_flags {
8994
int open_flag;
9095
umode_t mode;

fs/namei.c

Lines changed: 201 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag)
21962196
return flag;
21972197
}
21982198

2199+
static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
2200+
{
2201+
int error = security_path_mknod(dir, dentry, mode, 0);
2202+
if (error)
2203+
return error;
2204+
2205+
error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
2206+
if (error)
2207+
return error;
2208+
2209+
return security_inode_create(dir->dentry->d_inode, dentry, mode);
2210+
}
2211+
2212+
static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
2213+
struct path *path, const struct open_flags *op,
2214+
int *want_write, bool need_lookup,
2215+
bool *created)
2216+
{
2217+
struct inode *dir = nd->path.dentry->d_inode;
2218+
unsigned open_flag = open_to_namei_flags(op->open_flag);
2219+
umode_t mode;
2220+
int error;
2221+
int acc_mode;
2222+
struct opendata od;
2223+
struct file *filp;
2224+
int create_error = 0;
2225+
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
2226+
2227+
BUG_ON(dentry->d_inode);
2228+
2229+
/* Don't create child dentry for a dead directory. */
2230+
if (unlikely(IS_DEADDIR(dir))) {
2231+
filp = ERR_PTR(-ENOENT);
2232+
goto out;
2233+
}
2234+
2235+
mode = op->mode & S_IALLUGO;
2236+
if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
2237+
mode &= ~current_umask();
2238+
2239+
if (open_flag & O_EXCL) {
2240+
open_flag &= ~O_TRUNC;
2241+
*created = true;
2242+
}
2243+
2244+
/*
2245+
* Checking write permission is tricky, bacuse we don't know if we are
2246+
* going to actually need it: O_CREAT opens should work as long as the
2247+
* file exists. But checking existence breaks atomicity. The trick is
2248+
* to check access and if not granted clear O_CREAT from the flags.
2249+
*
2250+
* Another problem is returing the "right" error value (e.g. for an
2251+
* O_EXCL open we want to return EEXIST not EROFS).
2252+
*/
2253+
if ((open_flag & (O_CREAT | O_TRUNC)) ||
2254+
(open_flag & O_ACCMODE) != O_RDONLY) {
2255+
error = mnt_want_write(nd->path.mnt);
2256+
if (!error) {
2257+
*want_write = 1;
2258+
} else if (!(open_flag & O_CREAT)) {
2259+
/*
2260+
* No O_CREATE -> atomicity not a requirement -> fall
2261+
* back to lookup + open
2262+
*/
2263+
goto no_open;
2264+
} else if (open_flag & (O_EXCL | O_TRUNC)) {
2265+
/* Fall back and fail with the right error */
2266+
create_error = error;
2267+
goto no_open;
2268+
} else {
2269+
/* No side effects, safe to clear O_CREAT */
2270+
create_error = error;
2271+
open_flag &= ~O_CREAT;
2272+
}
2273+
}
2274+
2275+
if (open_flag & O_CREAT) {
2276+
error = may_o_create(&nd->path, dentry, op->mode);
2277+
if (error) {
2278+
create_error = error;
2279+
if (open_flag & O_EXCL)
2280+
goto no_open;
2281+
open_flag &= ~O_CREAT;
2282+
}
2283+
}
2284+
2285+
if (nd->flags & LOOKUP_DIRECTORY)
2286+
open_flag |= O_DIRECTORY;
2287+
2288+
od.dentry = DENTRY_NOT_SET;
2289+
od.mnt = nd->path.mnt;
2290+
od.filp = &nd->intent.open.file;
2291+
filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
2292+
created);
2293+
if (IS_ERR(filp)) {
2294+
if (WARN_ON(od.dentry != DENTRY_NOT_SET))
2295+
dput(od.dentry);
2296+
2297+
if (create_error && PTR_ERR(filp) == -ENOENT)
2298+
filp = ERR_PTR(create_error);
2299+
goto out;
2300+
}
2301+
2302+
acc_mode = op->acc_mode;
2303+
if (*created) {
2304+
fsnotify_create(dir, dentry);
2305+
acc_mode = MAY_OPEN;
2306+
}
2307+
2308+
if (!filp) {
2309+
if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
2310+
filp = ERR_PTR(-EIO);
2311+
goto out;
2312+
}
2313+
if (od.dentry) {
2314+
dput(dentry);
2315+
dentry = od.dentry;
2316+
}
2317+
goto looked_up;
2318+
}
2319+
2320+
/*
2321+
* We didn't have the inode before the open, so check open permission
2322+
* here.
2323+
*/
2324+
error = may_open(&filp->f_path, acc_mode, open_flag);
2325+
if (error)
2326+
goto out_fput;
2327+
2328+
error = open_check_o_direct(filp);
2329+
if (error)
2330+
goto out_fput;
2331+
2332+
out:
2333+
dput(dentry);
2334+
return filp;
2335+
2336+
out_fput:
2337+
fput(filp);
2338+
filp = ERR_PTR(error);
2339+
goto out;
2340+
2341+
no_open:
2342+
if (need_lookup) {
2343+
dentry = lookup_real(dir, dentry, nd);
2344+
if (IS_ERR(dentry))
2345+
return ERR_CAST(dentry);
2346+
2347+
if (create_error) {
2348+
int open_flag = op->open_flag;
2349+
2350+
filp = ERR_PTR(create_error);
2351+
if ((open_flag & O_EXCL)) {
2352+
if (!dentry->d_inode)
2353+
goto out;
2354+
} else if (!dentry->d_inode) {
2355+
goto out;
2356+
} else if ((open_flag & O_TRUNC) &&
2357+
S_ISREG(dentry->d_inode->i_mode)) {
2358+
goto out;
2359+
}
2360+
/* will fail later, go on to get the right error */
2361+
}
2362+
}
2363+
looked_up:
2364+
path->dentry = dentry;
2365+
path->mnt = nd->path.mnt;
2366+
return NULL;
2367+
}
2368+
21992369
/*
22002370
* Lookup, maybe create and open the last component
22012371
*
@@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
22192389
if (IS_ERR(dentry))
22202390
return ERR_CAST(dentry);
22212391

2392+
/* Cached positive dentry: will open in f_op->open */
2393+
if (!need_lookup && dentry->d_inode)
2394+
goto out_no_open;
2395+
2396+
if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
2397+
return atomic_open(nd, dentry, path, op, want_write,
2398+
need_lookup, created);
2399+
}
2400+
22222401
if (need_lookup) {
22232402
BUG_ON(dentry->d_inode);
22242403

@@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
22512430
if (error)
22522431
goto out_dput;
22532432
}
2433+
out_no_open:
22542434
path->dentry = dentry;
22552435
path->mnt = nd->path.mnt;
22562436
return NULL;
@@ -2344,8 +2524,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
23442524
filp = lookup_open(nd, path, op, &want_write, &created);
23452525
mutex_unlock(&dir->d_inode->i_mutex);
23462526

2347-
if (IS_ERR(filp))
2348-
goto out;
2527+
if (filp) {
2528+
if (IS_ERR(filp))
2529+
goto out;
2530+
2531+
if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
2532+
will_truncate = 0;
2533+
2534+
audit_inode(pathname, filp->f_path.dentry);
2535+
goto opened;
2536+
}
23492537

23502538
if (created) {
23512539
/* Don't check for write permission, don't truncate */
@@ -2361,6 +2549,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
23612549
*/
23622550
audit_inode(pathname, path->dentry);
23632551

2552+
/*
2553+
* If atomic_open() acquired write access it is dropped now due to
2554+
* possible mount and symlink following (this might be optimized away if
2555+
* necessary...)
2556+
*/
2557+
if (want_write) {
2558+
mnt_drop_write(nd->path.mnt);
2559+
want_write = 0;
2560+
}
2561+
23642562
error = -EEXIST;
23652563
if (open_flag & O_EXCL)
23662564
goto exit_dput;
@@ -2444,6 +2642,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
24442642
retried = true;
24452643
goto retry_lookup;
24462644
}
2645+
opened:
24472646
if (!IS_ERR(filp)) {
24482647
error = ima_file_check(filp, op->acc_mode);
24492648
if (error) {

fs/open.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,48 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
810810
}
811811
EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
812812

813+
/**
814+
* finish_open - finish opening a file
815+
* @od: opaque open data
816+
* @dentry: pointer to dentry
817+
* @open: open callback
818+
*
819+
* This can be used to finish opening a file passed to i_op->atomic_open().
820+
*
821+
* If the open callback is set to NULL, then the standard f_op->open()
822+
* filesystem callback is substituted.
823+
*/
824+
struct file *finish_open(struct opendata *od, struct dentry *dentry,
825+
int (*open)(struct inode *, struct file *))
826+
{
827+
struct file *res;
828+
829+
mntget(od->mnt);
830+
dget(dentry);
831+
832+
res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
833+
if (!IS_ERR(res))
834+
*od->filp = NULL;
835+
836+
return res;
837+
}
838+
EXPORT_SYMBOL(finish_open);
839+
840+
/**
841+
* finish_no_open - finish ->atomic_open() without opening the file
842+
*
843+
* @od: opaque open data
844+
* @dentry: dentry or NULL (as returned from ->lookup())
845+
*
846+
* This can be used to set the result of a successful lookup in ->atomic_open().
847+
* The filesystem's atomic_open() method shall return NULL after calling this.
848+
*/
849+
void finish_no_open(struct opendata *od, struct dentry *dentry)
850+
{
851+
od->dentry = dentry;
852+
}
853+
EXPORT_SYMBOL(finish_no_open);
854+
813855
/**
814856
* nameidata_to_filp - convert a nameidata to an open filp.
815857
* @nd: pointer to nameidata

include/linux/fs.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,7 @@ struct kstatfs;
427427
struct vm_area_struct;
428428
struct vfsmount;
429429
struct cred;
430+
struct opendata;
430431

431432
extern void __init inode_init(void);
432433
extern void __init inode_init_early(void);
@@ -1693,6 +1694,9 @@ struct inode_operations {
16931694
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
16941695
u64 len);
16951696
int (*update_time)(struct inode *, struct timespec *, int);
1697+
struct file * (*atomic_open)(struct inode *, struct dentry *,
1698+
struct opendata *, unsigned open_flag,
1699+
umode_t create_mode, bool *created);
16961700
} ____cacheline_aligned;
16971701

16981702
struct seq_file;
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
20612065
const struct cred *);
20622066
extern int filp_close(struct file *, fl_owner_t id);
20632067
extern char * getname(const char __user *);
2068+
extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
2069+
int (*open)(struct inode *, struct file *));
2070+
extern void finish_no_open(struct opendata *od, struct dentry *dentry);
20642071

20652072
/* fs/ioctl.c */
20662073

0 commit comments

Comments
 (0)