Message ID | 20240726083752.302301-3-yangyun50@huawei.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: add no forget support | expand |
On Fri, Jul 26, 2024 at 04:37:52PM +0800, yangyun wrote: > FUSE_FORGET requests are not used if the fuse file system does not > implement the forget operation in userspace (e.g., fuse file system > does not cache any inodes). > > However, the kernel is invisible to the userspace implementation and > always sends FUSE_FORGET requests, which can lead to performance > degradation because of useless contex switch and memory copy in some > cases (e.g., many inodes are evicted from icache which was described > in commit 07e77dca8a1f ("fuse: separate queue for FORGET requests")). > > Just like 'no_interrupt' in 'struct fuse_conn', we add 'no_forget'. > But since FUSE_FORGET request does not have a reply from userspce, > we can not use ENOSYS to reflect the 'no_forget' assignment. So add > the FUSE_NO_FORGET_SUPPORT init flag. > > Besides, if no_forget is enabled, 'nlookup' in 'struct fuse_inode' > does not used and its value change can be disabled which are protected > by spin_lock to reduce lock contention. > > Signed-off-by: yangyun <yangyun50@huawei.com> > --- > fs/fuse/dev.c | 6 ++++++ > fs/fuse/dir.c | 4 +--- > fs/fuse/fuse_i.h | 24 ++++++++++++++++++++++++ > fs/fuse/inode.c | 10 +++++----- > fs/fuse/readdir.c | 8 ++------ > include/uapi/linux/fuse.h | 3 +++ > 6 files changed, 41 insertions(+), 14 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 932356833b0d..10890db9426b 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -238,6 +238,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > { > struct fuse_iqueue *fiq = &fc->iq; > > + if (fc->no_forget) > + return; > + > forget->forget_one.nodeid = nodeid; > forget->forget_one.nlookup = nlookup; > > @@ -257,6 +260,9 @@ void fuse_force_forget(struct fuse_mount *fm, u64 nodeid) > struct fuse_forget_in inarg; > FUSE_ARGS(args); > > + if (fm->fc->no_forget) > + return; > + > memset(&inarg, 0, sizeof(inarg)); > inarg.nlookup = 1; > args.opcode = FUSE_FORGET; > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > index 6bfb3a128658..833225ed1d4f 100644 > --- a/fs/fuse/dir.c > +++ b/fs/fuse/dir.c > @@ -236,9 +236,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) > fuse_force_forget(fm, outarg.nodeid); > goto invalid; > } > - spin_lock(&fi->lock); > - fi->nlookup++; > - spin_unlock(&fi->lock); > + fuse_nlookup_inc_if_enabled(fm->fc, fi); > } > if (ret == -ENOMEM || ret == -EINTR) > goto out; > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index b9a5b8ec0de5..924d6b0ad700 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -860,6 +860,9 @@ struct fuse_conn { > /** Passthrough support for read/write IO */ > unsigned int passthrough:1; > > + /** Do not send FORGET request */ > + unsigned int no_forget:1; > + > /** Maximum stack depth for passthrough backing files */ > int max_stack_depth; > > @@ -1029,6 +1032,27 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) > rcu_read_unlock(); > } > > +static inline void fuse_nlookup_inc_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) > +{ > + if (fc->no_forget) > + return; > + > + spin_lock(&fi->lock); > + fi->nlookup++; > + spin_unlock(&fi->lock); > +} > + > +static inline void fuse_nlookup_dec_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) > +{ > + if (fc->no_forget) > + return; > + > + spin_lock(&fi->lock); > + fi->nlookup--; > + spin_lock(&fi->lock); > +} This naming scheme is overly verbose, you can simply have fuse_inc_nlookup() fuse_dec_nlookup() Thanks, Josef
On Fri, Jul 26, 2024 at 11:40:17AM -0400, Josef Bacik wrote: > On Fri, Jul 26, 2024 at 04:37:52PM +0800, yangyun wrote: > > FUSE_FORGET requests are not used if the fuse file system does not > > implement the forget operation in userspace (e.g., fuse file system > > does not cache any inodes). > > > > However, the kernel is invisible to the userspace implementation and > > always sends FUSE_FORGET requests, which can lead to performance > > degradation because of useless contex switch and memory copy in some > > cases (e.g., many inodes are evicted from icache which was described > > in commit 07e77dca8a1f ("fuse: separate queue for FORGET requests")). > > > > Just like 'no_interrupt' in 'struct fuse_conn', we add 'no_forget'. > > But since FUSE_FORGET request does not have a reply from userspce, > > we can not use ENOSYS to reflect the 'no_forget' assignment. So add > > the FUSE_NO_FORGET_SUPPORT init flag. > > > > Besides, if no_forget is enabled, 'nlookup' in 'struct fuse_inode' > > does not used and its value change can be disabled which are protected > > by spin_lock to reduce lock contention. > > > > Signed-off-by: yangyun <yangyun50@huawei.com> > > --- > > fs/fuse/dev.c | 6 ++++++ > > fs/fuse/dir.c | 4 +--- > > fs/fuse/fuse_i.h | 24 ++++++++++++++++++++++++ > > fs/fuse/inode.c | 10 +++++----- > > fs/fuse/readdir.c | 8 ++------ > > include/uapi/linux/fuse.h | 3 +++ > > 6 files changed, 41 insertions(+), 14 deletions(-) > > > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > > index 932356833b0d..10890db9426b 100644 > > --- a/fs/fuse/dev.c > > +++ b/fs/fuse/dev.c > > @@ -238,6 +238,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, > > { > > struct fuse_iqueue *fiq = &fc->iq; > > > > + if (fc->no_forget) > > + return; > > + > > forget->forget_one.nodeid = nodeid; > > forget->forget_one.nlookup = nlookup; > > > > @@ -257,6 +260,9 @@ void fuse_force_forget(struct fuse_mount *fm, u64 nodeid) > > struct fuse_forget_in inarg; > > FUSE_ARGS(args); > > > > + if (fm->fc->no_forget) > > + return; > > + > > memset(&inarg, 0, sizeof(inarg)); > > inarg.nlookup = 1; > > args.opcode = FUSE_FORGET; > > diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c > > index 6bfb3a128658..833225ed1d4f 100644 > > --- a/fs/fuse/dir.c > > +++ b/fs/fuse/dir.c > > @@ -236,9 +236,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) > > fuse_force_forget(fm, outarg.nodeid); > > goto invalid; > > } > > - spin_lock(&fi->lock); > > - fi->nlookup++; > > - spin_unlock(&fi->lock); > > + fuse_nlookup_inc_if_enabled(fm->fc, fi); > > } > > if (ret == -ENOMEM || ret == -EINTR) > > goto out; > > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > > index b9a5b8ec0de5..924d6b0ad700 100644 > > --- a/fs/fuse/fuse_i.h > > +++ b/fs/fuse/fuse_i.h > > @@ -860,6 +860,9 @@ struct fuse_conn { > > /** Passthrough support for read/write IO */ > > unsigned int passthrough:1; > > > > + /** Do not send FORGET request */ > > + unsigned int no_forget:1; > > + > > /** Maximum stack depth for passthrough backing files */ > > int max_stack_depth; > > > > @@ -1029,6 +1032,27 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) > > rcu_read_unlock(); > > } > > > > +static inline void fuse_nlookup_inc_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) > > +{ > > + if (fc->no_forget) > > + return; > > + > > + spin_lock(&fi->lock); > > + fi->nlookup++; > > + spin_unlock(&fi->lock); > > +} > > + > > +static inline void fuse_nlookup_dec_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) > > +{ > > + if (fc->no_forget) > > + return; > > + > > + spin_lock(&fi->lock); > > + fi->nlookup--; > > + spin_lock(&fi->lock); > > +} > > This naming scheme is overly verbose, you can simply have > > fuse_inc_nlookup() > fuse_dec_nlookup() Thanks for your advice. I will modify this in the next version. > > Thanks, > > Josef
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 932356833b0d..10890db9426b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -238,6 +238,9 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, { struct fuse_iqueue *fiq = &fc->iq; + if (fc->no_forget) + return; + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; @@ -257,6 +260,9 @@ void fuse_force_forget(struct fuse_mount *fm, u64 nodeid) struct fuse_forget_in inarg; FUSE_ARGS(args); + if (fm->fc->no_forget) + return; + memset(&inarg, 0, sizeof(inarg)); inarg.nlookup = 1; args.opcode = FUSE_FORGET; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 6bfb3a128658..833225ed1d4f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -236,9 +236,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) fuse_force_forget(fm, outarg.nodeid); goto invalid; } - spin_lock(&fi->lock); - fi->nlookup++; - spin_unlock(&fi->lock); + fuse_nlookup_inc_if_enabled(fm->fc, fi); } if (ret == -ENOMEM || ret == -EINTR) goto out; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b9a5b8ec0de5..924d6b0ad700 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -860,6 +860,9 @@ struct fuse_conn { /** Passthrough support for read/write IO */ unsigned int passthrough:1; + /** Do not send FORGET request */ + unsigned int no_forget:1; + /** Maximum stack depth for passthrough backing files */ int max_stack_depth; @@ -1029,6 +1032,27 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) rcu_read_unlock(); } +static inline void fuse_nlookup_inc_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) +{ + if (fc->no_forget) + return; + + spin_lock(&fi->lock); + fi->nlookup++; + spin_unlock(&fi->lock); +} + +static inline void fuse_nlookup_dec_if_enabled(struct fuse_conn *fc, struct fuse_inode *fi) +{ + if (fc->no_forget) + return; + + spin_lock(&fi->lock); + fi->nlookup--; + spin_lock(&fi->lock); +} + + /** Device operations */ extern const struct file_operations fuse_dev_operations; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 99e44ea7d875..277dc9479505 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -483,9 +483,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, } } fi = get_fuse_inode(inode); - spin_lock(&fi->lock); - fi->nlookup++; - spin_unlock(&fi->lock); + fuse_nlookup_inc_if_enabled(fc, fi); done: fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version); @@ -1331,6 +1329,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; + if (flags & FUSE_NO_FORGET_SUPPORT) + fc->no_forget = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1378,7 +1378,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | - FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND; + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_NO_FORGET_SUPPORT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; @@ -1593,7 +1593,7 @@ static int fuse_fill_super_submount(struct super_block *sb, * that, though, so undo it here. */ fi = get_fuse_inode(root); - fi->nlookup--; + fuse_nlookup_dec_if_enabled(fm->fc, get_fuse_inode(root)); sb->s_d_op = &fuse_dentry_operations; sb->s_root = d_make_root(root); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 39f01ac31f7c..13922662e07a 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -218,9 +218,7 @@ static int fuse_direntplus_link(struct file *file, } fi = get_fuse_inode(inode); - spin_lock(&fi->lock); - fi->nlookup++; - spin_unlock(&fi->lock); + fuse_nlookup_inc_if_enabled(fc, fi); forget_all_cached_acls(inode); fuse_change_attributes(inode, &o->attr, NULL, @@ -247,9 +245,7 @@ static int fuse_direntplus_link(struct file *file, if (!IS_ERR(inode)) { struct fuse_inode *fi = get_fuse_inode(inode); - spin_lock(&fi->lock); - fi->nlookup--; - spin_unlock(&fi->lock); + fuse_nlookup_dec_if_enabled(fc, fi); } return PTR_ERR(dentry); } diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d08b99d60f6f..bf660880bc7a 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -217,6 +217,7 @@ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag * - add FUSE_NO_EXPORT_SUPPORT init flag * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + * - add FUSE_NO_FORGET_SUPPORT init flag */ #ifndef _LINUX_FUSE_H @@ -421,6 +422,7 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests + * FUSE_NO_FORGET_SUPPORT: disable forget requests */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -463,6 +465,7 @@ struct fuse_file_lock { #define FUSE_PASSTHROUGH (1ULL << 37) #define FUSE_NO_EXPORT_SUPPORT (1ULL << 38) #define FUSE_HAS_RESEND (1ULL << 39) +#define FUSE_NO_FORGET_SUPPORT (1ULL << 40) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP
FUSE_FORGET requests are not used if the fuse file system does not implement the forget operation in userspace (e.g., fuse file system does not cache any inodes). However, the kernel is invisible to the userspace implementation and always sends FUSE_FORGET requests, which can lead to performance degradation because of useless contex switch and memory copy in some cases (e.g., many inodes are evicted from icache which was described in commit 07e77dca8a1f ("fuse: separate queue for FORGET requests")). Just like 'no_interrupt' in 'struct fuse_conn', we add 'no_forget'. But since FUSE_FORGET request does not have a reply from userspce, we can not use ENOSYS to reflect the 'no_forget' assignment. So add the FUSE_NO_FORGET_SUPPORT init flag. Besides, if no_forget is enabled, 'nlookup' in 'struct fuse_inode' does not used and its value change can be disabled which are protected by spin_lock to reduce lock contention. Signed-off-by: yangyun <yangyun50@huawei.com> --- fs/fuse/dev.c | 6 ++++++ fs/fuse/dir.c | 4 +--- fs/fuse/fuse_i.h | 24 ++++++++++++++++++++++++ fs/fuse/inode.c | 10 +++++----- fs/fuse/readdir.c | 8 ++------ include/uapi/linux/fuse.h | 3 +++ 6 files changed, 41 insertions(+), 14 deletions(-)