Message ID | 20230707132746.1892211-3-bschubert@ddn.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | FUSE: Implement full atomic open | expand |
On Fri, 7 Jul 2023 at 15:28, Bernd Schubert <bschubert@ddn.com> wrote: > > From: Dharmendra Singh <dsingh@ddn.com> > > This adds full atomic open support, to avoid lookup before open/create. > If the implementation (fuse server/daemon) does not support atomic open > it falls back to non-atomic open. > > Signed-off-by: Dharmendra Singh <dsingh@ddn.com> > Signed-off-by: Horst Birthelmer <hbirthelmer@ddn.com> > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dir.c | 170 +++++++++++++++++++++++++++++++++++++- > fs/fuse/fuse_i.h | 3 + > include/uapi/linux/fuse.h | 3 + > 3 files changed, 175 insertions(+), 1 deletion(-) > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 6ffc573de470..8145bbfc7a40 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -724,7 +724,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, > > static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, > umode_t, dev_t); > -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, > +static int fuse_create_open(struct inode *dir, struct dentry *entry, > struct file *file, unsigned flags, > umode_t mode) > { > @@ -770,6 +770,174 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, > return finish_no_open(file, res); > } > > +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, > + struct file *file, unsigned flags, > + umode_t mode) > +{ > + > + int err; > + struct inode *inode; > + struct fuse_mount *fm = get_fuse_mount(dir); > + struct fuse_conn *fc = fm->fc; > + FUSE_ARGS(args); > + struct fuse_forget_link *forget; > + struct fuse_create_in inarg; > + struct fuse_open_out outopen; > + struct fuse_entry_out outentry; > + struct fuse_inode *fi; > + struct fuse_file *ff; > + struct dentry *res = NULL; > + > + /* Userspace expects S_IFREG in create mode */ > + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) { > + WARN_ON(1); > + err = -EINVAL; > + goto out_err; > + } > + forget = fuse_alloc_forget(); > + err = -ENOMEM; > + if (!forget) > + goto out_err; > + > + err = -ENOMEM; > + ff = fuse_file_alloc(fm); > + if (!ff) > + goto out_put_forget_req; > + > + if (!fc->dont_mask) > + mode &= ~current_umask(); > + > + flags &= ~O_NOCTTY; > + memset(&inarg, 0, sizeof(inarg)); > + memset(&outentry, 0, sizeof(outentry)); > + inarg.flags = flags; > + inarg.mode = mode; > + inarg.umask = current_umask(); > + > + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && > + !(flags & O_EXCL) && !capable(CAP_FSETID)) { > + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; > + } > + > + args.opcode = FUSE_OPEN_ATOMIC; > + args.nodeid = get_node_id(dir); > + args.in_numargs = 2; > + args.in_args[0].size = sizeof(inarg); > + args.in_args[0].value = &inarg; > + args.in_args[1].size = entry->d_name.len + 1; > + args.in_args[1].value = entry->d_name.name; > + args.out_numargs = 2; > + args.out_args[0].size = sizeof(outentry); > + args.out_args[0].value = &outentry; > + args.out_args[1].size = sizeof(outopen); > + args.out_args[1].value = &outopen; > + > + if (flags & O_CREAT) { > + err = get_create_ext(&args, dir, entry, mode); > + if (err) > + goto out_free_ff; > + } > + > + err = fuse_simple_request(fm, &args); free_ext_value() missing. Which also begs the question: can't _fuse_create_open() and _fuse_atomic_open() be consolidated into a common helper? There's just too much duplication between them to warrant completely separate implementations. > + if (err == -ENOSYS) { > + fc->no_open_atomic = 1; > + fuse_file_free(ff); > + kfree(forget); > + goto fallback; > + } > + if (err) { > + if (err == -ENOENT) > + fuse_invalidate_entry_cache(entry); > + goto out_free_ff; > + } > + > + err = -EIO; > + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) > + goto out_free_ff; > + > + ff->fh = outopen.fh; > + ff->nodeid = outentry.nodeid; > + ff->open_flags = outopen.open_flags; > + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, > + &outentry.attr, entry_attr_timeout(&outentry), 0); > + if (!inode) { > + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); > + fuse_sync_release(NULL, ff, flags); > + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); > + err = -ENOMEM; > + goto out_err; > + } > + if (d_in_lookup(entry)) { > + res = d_splice_alias(inode, entry); > + if (res) { > + if (IS_ERR(res)) { > + /* > + * Close the file in user space, but do not unlink it, > + * if it was created - with network file systems other > + * clients might have already accessed it. > + */ > + fi = get_fuse_inode(inode); > + fuse_sync_release(fi, ff, flags); > + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); > + err = PTR_ERR(res); > + goto out_err; > + } > + entry = res; > + } > + } else > + d_instantiate(entry, inode); > + fuse_change_entry_timeout(entry, &outentry); > + > + if (outopen.open_flags & FOPEN_FILE_CREATED) { > + if (!(flags & O_CREAT)) { > + pr_debug("Server side bug, FOPEN_FILE_CREATED set " > + "without O_CREAT, ignoring."); > + } else { > + /* This should be always set when the file is created */ > + fuse_dir_changed(dir); > + file->f_mode |= FMODE_CREATED; > + } > + } > + > + if (!(flags & O_CREAT)) > + fuse_advise_use_readdirplus(dir); We advise to use readdirplus from lookup, because readdirplus can substitute for a lookup. But readdirplus cannot substitute for the atomic open, so it's not a good idea to advise using readdirpuls in this case. At least that's how I see this. Thanks, Miklos
On 8/8/23 14:29, Miklos Szeredi wrote: > On Fri, 7 Jul 2023 at 15:28, Bernd Schubert <bschubert@ddn.com> wrote: >> >> From: Dharmendra Singh <dsingh@ddn.com> >> >> This adds full atomic open support, to avoid lookup before open/create. >> If the implementation (fuse server/daemon) does not support atomic open >> it falls back to non-atomic open. >> >> Signed-off-by: Dharmendra Singh <dsingh@ddn.com> >> Signed-off-by: Horst Birthelmer <hbirthelmer@ddn.com> >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >> --- >> fs/fuse/dir.c | 170 +++++++++++++++++++++++++++++++++++++- >> fs/fuse/fuse_i.h | 3 + >> include/uapi/linux/fuse.h | 3 + >> 3 files changed, 175 insertions(+), 1 deletion(-) >> >> diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c >> index 6ffc573de470..8145bbfc7a40 100644 >> --- a/fs/fuse/dir.c >> +++ b/fs/fuse/dir.c >> @@ -724,7 +724,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, >> >> static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, >> umode_t, dev_t); >> -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, >> +static int fuse_create_open(struct inode *dir, struct dentry *entry, >> struct file *file, unsigned flags, >> umode_t mode) >> { >> @@ -770,6 +770,174 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, >> return finish_no_open(file, res); >> } >> >> +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, >> + struct file *file, unsigned flags, >> + umode_t mode) >> +{ >> + >> + int err; >> + struct inode *inode; >> + struct fuse_mount *fm = get_fuse_mount(dir); >> + struct fuse_conn *fc = fm->fc; >> + FUSE_ARGS(args); >> + struct fuse_forget_link *forget; >> + struct fuse_create_in inarg; >> + struct fuse_open_out outopen; >> + struct fuse_entry_out outentry; >> + struct fuse_inode *fi; >> + struct fuse_file *ff; >> + struct dentry *res = NULL; >> + >> + /* Userspace expects S_IFREG in create mode */ >> + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) { >> + WARN_ON(1); >> + err = -EINVAL; >> + goto out_err; >> + } >> + forget = fuse_alloc_forget(); >> + err = -ENOMEM; >> + if (!forget) >> + goto out_err; >> + >> + err = -ENOMEM; >> + ff = fuse_file_alloc(fm); >> + if (!ff) >> + goto out_put_forget_req; >> + >> + if (!fc->dont_mask) >> + mode &= ~current_umask(); >> + >> + flags &= ~O_NOCTTY; >> + memset(&inarg, 0, sizeof(inarg)); >> + memset(&outentry, 0, sizeof(outentry)); >> + inarg.flags = flags; >> + inarg.mode = mode; >> + inarg.umask = current_umask(); >> + >> + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && >> + !(flags & O_EXCL) && !capable(CAP_FSETID)) { >> + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; >> + } >> + >> + args.opcode = FUSE_OPEN_ATOMIC; >> + args.nodeid = get_node_id(dir); >> + args.in_numargs = 2; >> + args.in_args[0].size = sizeof(inarg); >> + args.in_args[0].value = &inarg; >> + args.in_args[1].size = entry->d_name.len + 1; >> + args.in_args[1].value = entry->d_name.name; >> + args.out_numargs = 2; >> + args.out_args[0].size = sizeof(outentry); >> + args.out_args[0].value = &outentry; >> + args.out_args[1].size = sizeof(outopen); >> + args.out_args[1].value = &outopen; >> + >> + if (flags & O_CREAT) { >> + err = get_create_ext(&args, dir, entry, mode); >> + if (err) >> + goto out_free_ff; >> + } >> + >> + err = fuse_simple_request(fm, &args); > > free_ext_value() missing. > > Which also begs the question: can't _fuse_create_open() and > _fuse_atomic_open() be consolidated into a common helper? There's > just too much duplication between them to warrant completely separate > implementations. Thanks a lot for your review! I'm going to sent out v7 either today or tomorrow, which also adds in revalidate (which includes a modified version of your vfs patch). v7 also adds in a change similar to commit c94c09535c4debcc439f55b5b6d9ebe57bd4665a (what Al had done for NFS) and revalidate also makes things more complex. I think it now doesn't look that similar to _fuse_create_open anymore. We can then decide if we still want to have a common helper function. > >> + if (err == -ENOSYS) { >> + fc->no_open_atomic = 1; >> + fuse_file_free(ff); >> + kfree(forget); >> + goto fallback; >> + } >> + if (err) { >> + if (err == -ENOENT) >> + fuse_invalidate_entry_cache(entry); >> + goto out_free_ff; >> + } >> + >> + err = -EIO; >> + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) >> + goto out_free_ff; >> + >> + ff->fh = outopen.fh; >> + ff->nodeid = outentry.nodeid; >> + ff->open_flags = outopen.open_flags; >> + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, >> + &outentry.attr, entry_attr_timeout(&outentry), 0); >> + if (!inode) { >> + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); >> + fuse_sync_release(NULL, ff, flags); >> + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); >> + err = -ENOMEM; >> + goto out_err; >> + } >> + if (d_in_lookup(entry)) { >> + res = d_splice_alias(inode, entry); >> + if (res) { >> + if (IS_ERR(res)) { >> + /* >> + * Close the file in user space, but do not unlink it, >> + * if it was created - with network file systems other >> + * clients might have already accessed it. >> + */ >> + fi = get_fuse_inode(inode); >> + fuse_sync_release(fi, ff, flags); >> + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); >> + err = PTR_ERR(res); >> + goto out_err; >> + } >> + entry = res; >> + } >> + } else >> + d_instantiate(entry, inode); >> + fuse_change_entry_timeout(entry, &outentry); >> + >> + if (outopen.open_flags & FOPEN_FILE_CREATED) { >> + if (!(flags & O_CREAT)) { >> + pr_debug("Server side bug, FOPEN_FILE_CREATED set " >> + "without O_CREAT, ignoring."); >> + } else { >> + /* This should be always set when the file is created */ >> + fuse_dir_changed(dir); >> + file->f_mode |= FMODE_CREATED; >> + } >> + } >> + >> + if (!(flags & O_CREAT)) >> + fuse_advise_use_readdirplus(dir); > > We advise to use readdirplus from lookup, because readdirplus can > substitute for a lookup. But readdirplus cannot substitute for the > atomic open, so it's not a good idea to advise using readdirpuls in > this case. At least that's how I see this. Yes thanks, it is a long time since the initial patch version and I *think* it came in here, as lookup is now avoided for open and idea was probably that we thought this advise would not be done anymore. I guess the right code path to set it getattr (which currently still does an additional lookup) and not open. Thanks for spotting it! Bernd
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6ffc573de470..8145bbfc7a40 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -724,7 +724,7 @@ static int _fuse_create_open(struct inode *dir, struct dentry *entry, static int fuse_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t, dev_t); -static int fuse_atomic_open(struct inode *dir, struct dentry *entry, +static int fuse_create_open(struct inode *dir, struct dentry *entry, struct file *file, unsigned flags, umode_t mode) { @@ -770,6 +770,174 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, return finish_no_open(file, res); } +static int _fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode) +{ + + int err; + struct inode *inode; + struct fuse_mount *fm = get_fuse_mount(dir); + struct fuse_conn *fc = fm->fc; + FUSE_ARGS(args); + struct fuse_forget_link *forget; + struct fuse_create_in inarg; + struct fuse_open_out outopen; + struct fuse_entry_out outentry; + struct fuse_inode *fi; + struct fuse_file *ff; + struct dentry *res = NULL; + + /* Userspace expects S_IFREG in create mode */ + if ((flags & O_CREAT) && (mode & S_IFMT) != S_IFREG) { + WARN_ON(1); + err = -EINVAL; + goto out_err; + } + forget = fuse_alloc_forget(); + err = -ENOMEM; + if (!forget) + goto out_err; + + err = -ENOMEM; + ff = fuse_file_alloc(fm); + if (!ff) + goto out_put_forget_req; + + if (!fc->dont_mask) + mode &= ~current_umask(); + + flags &= ~O_NOCTTY; + memset(&inarg, 0, sizeof(inarg)); + memset(&outentry, 0, sizeof(outentry)); + inarg.flags = flags; + inarg.mode = mode; + inarg.umask = current_umask(); + + if (fc->handle_killpriv_v2 && (flags & O_TRUNC) && + !(flags & O_EXCL) && !capable(CAP_FSETID)) { + inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; + } + + args.opcode = FUSE_OPEN_ATOMIC; + args.nodeid = get_node_id(dir); + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = entry->d_name.len + 1; + args.in_args[1].value = entry->d_name.name; + args.out_numargs = 2; + args.out_args[0].size = sizeof(outentry); + args.out_args[0].value = &outentry; + args.out_args[1].size = sizeof(outopen); + args.out_args[1].value = &outopen; + + if (flags & O_CREAT) { + err = get_create_ext(&args, dir, entry, mode); + if (err) + goto out_free_ff; + } + + err = fuse_simple_request(fm, &args); + if (err == -ENOSYS) { + fc->no_open_atomic = 1; + fuse_file_free(ff); + kfree(forget); + goto fallback; + } + if (err) { + if (err == -ENOENT) + fuse_invalidate_entry_cache(entry); + goto out_free_ff; + } + + err = -EIO; + if (invalid_nodeid(outentry.nodeid) || fuse_invalid_attr(&outentry.attr)) + goto out_free_ff; + + ff->fh = outopen.fh; + ff->nodeid = outentry.nodeid; + ff->open_flags = outopen.open_flags; + inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation, + &outentry.attr, entry_attr_timeout(&outentry), 0); + if (!inode) { + flags &= ~(O_CREAT | O_EXCL | O_TRUNC); + fuse_sync_release(NULL, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = -ENOMEM; + goto out_err; + } + if (d_in_lookup(entry)) { + res = d_splice_alias(inode, entry); + if (res) { + if (IS_ERR(res)) { + /* + * Close the file in user space, but do not unlink it, + * if it was created - with network file systems other + * clients might have already accessed it. + */ + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + err = PTR_ERR(res); + goto out_err; + } + entry = res; + } + } else + d_instantiate(entry, inode); + fuse_change_entry_timeout(entry, &outentry); + + if (outopen.open_flags & FOPEN_FILE_CREATED) { + if (!(flags & O_CREAT)) { + pr_debug("Server side bug, FOPEN_FILE_CREATED set " + "without O_CREAT, ignoring."); + } else { + /* This should be always set when the file is created */ + fuse_dir_changed(dir); + file->f_mode |= FMODE_CREATED; + } + } + + if (!(flags & O_CREAT)) + fuse_advise_use_readdirplus(dir); + if (S_ISDIR(mode)) + ff->open_flags &= ~FOPEN_DIRECT_IO; + err = finish_open(file, entry, generic_file_open); + if (err) { + fi = get_fuse_inode(inode); + fuse_sync_release(fi, ff, flags); + } else { + file->private_data = ff; + fuse_finish_open(inode, file); + } + kfree(forget); + dput(res); + return err; + +out_free_ff: + fuse_file_free(ff); +out_put_forget_req: + kfree(forget); +out_err: + return err; + +fallback: + return fuse_create_open(dir, entry, file, flags, mode); +} + +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct file *file, unsigned flags, + umode_t mode) +{ + struct fuse_conn *fc = get_fuse_conn(dir); + + if (fc->no_open_atomic) + return fuse_create_open(dir, entry, file, flags, mode); + else + return _fuse_atomic_open(dir, entry, file, flags, mode); +} + /* * Code shared between mknod, mkdir, symlink and link */ diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 9b7fc7d3c7f1..4e2ebcc28912 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -672,6 +672,9 @@ struct fuse_conn { /** Is open/release not implemented by fs? */ unsigned no_open:1; + /** Is open atomic not impelmented by fs? */ + unsigned no_open_atomic:1; + /** Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 1b9d0dfae72d..ee36263c0f3a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -314,6 +314,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_FILE_CREATED: the file was indeed created */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -322,6 +323,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_FILE_CREATED (1 << 7) /** * INIT request/reply flags @@ -572,6 +574,7 @@ enum fuse_opcode { FUSE_REMOVEMAPPING = 49, FUSE_SYNCFS = 50, FUSE_TMPFILE = 51, + FUSE_OPEN_ATOMIC = 52, /* CUSE specific operations */ CUSE_INIT = 4096,