Message ID | 20250403-fuse-io-uring-trace-points-v2-1-bd04f2b22f91@ddn.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: Improve ftraces, per-cpu req unique and code dup removal | expand |
On Thu, Apr 3, 2025 at 6:05 AM Bernd Schubert <bschubert@ddn.com> wrote: > > No need to take lock, we can have that per cpu and > add in the current cpu as offset. > > fuse-io-uring and virtiofs especially benefit from it > as they don't need the fiq lock at all. > > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev.c | 24 +++--------------------- > fs/fuse/fuse_dev_i.h | 4 ---- > fs/fuse/fuse_i.h | 23 ++++++++++++++++++----- > fs/fuse/inode.c | 1 + > 4 files changed, 22 insertions(+), 30 deletions(-) > > diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c > index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644 > --- a/fs/fuse/dev.c > +++ b/fs/fuse/dev.c > @@ -204,24 +204,6 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args) > } > EXPORT_SYMBOL_GPL(fuse_len_args); > > -static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq) > -{ > - fiq->reqctr += FUSE_REQ_ID_STEP; > - return fiq->reqctr; > -} > - > -u64 fuse_get_unique(struct fuse_iqueue *fiq) > -{ > - u64 ret; > - > - spin_lock(&fiq->lock); > - ret = fuse_get_unique_locked(fiq); > - spin_unlock(&fiq->lock); > - > - return ret; > -} > -EXPORT_SYMBOL_GPL(fuse_get_unique); > - > unsigned int fuse_req_hash(u64 unique) > { > return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); > @@ -278,7 +260,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) > spin_lock(&fiq->lock); > if (fiq->connected) { > if (req->in.h.opcode != FUSE_NOTIFY_REPLY) > - req->in.h.unique = fuse_get_unique_locked(fiq); > + req->in.h.unique = fuse_get_unique(fiq); > list_add_tail(&req->list, &fiq->pending); > fuse_dev_wake_and_unlock(fiq); > } else { > @@ -1177,7 +1159,7 @@ __releases(fiq->lock) > struct fuse_in_header ih = { > .opcode = FUSE_FORGET, > .nodeid = forget->forget_one.nodeid, > - .unique = fuse_get_unique_locked(fiq), > + .unique = fuse_get_unique(fiq), > .len = sizeof(ih) + sizeof(arg), > }; > > @@ -1208,7 +1190,7 @@ __releases(fiq->lock) > struct fuse_batch_forget_in arg = { .count = 0 }; > struct fuse_in_header ih = { > .opcode = FUSE_BATCH_FORGET, > - .unique = fuse_get_unique_locked(fiq), > + .unique = fuse_get_unique(fiq), > .len = sizeof(ih) + sizeof(arg), > }; > > diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h > index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644 > --- a/fs/fuse/fuse_dev_i.h > +++ b/fs/fuse/fuse_dev_i.h > @@ -8,10 +8,6 @@ > > #include <linux/types.h> > > -/* Ordinary requests have even IDs, while interrupts IDs are odd */ > -#define FUSE_INT_REQ_BIT (1ULL << 0) > -#define FUSE_REQ_ID_STEP (1ULL << 1) > - > struct fuse_arg; > struct fuse_args; > struct fuse_pqueue; > diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h > index fee96fe7887b30cd57b8a6bbda11447a228cf446..73c612dd58e45ecde0b8f72fd58ac603d12cf202 100644 > --- a/fs/fuse/fuse_i.h > +++ b/fs/fuse/fuse_i.h > @@ -9,6 +9,8 @@ > #ifndef _FS_FUSE_I_H > #define _FS_FUSE_I_H > > +#include "linux/percpu-defs.h" Think the convention is #include <linux/percpu-defs.h> though I wonder if you even need this. I see other filesystems using percpu counters but they don't explicitly include this header. Compilation seems fine without it. > +#include "linux/threads.h" Do you need threads.h? > #ifndef pr_fmt > # define pr_fmt(fmt) "fuse: " fmt > #endif > @@ -44,6 +46,10 @@ > /** Number of dentries for each connection in the control filesystem */ > #define FUSE_CTL_NUM_DENTRIES 5 > > +/* Ordinary requests have even IDs, while interrupts IDs are odd */ > +#define FUSE_INT_REQ_BIT (1ULL << 0) > +#define FUSE_REQ_ID_STEP (1ULL << 1) > + > /** Maximum of max_pages received in init_out */ > extern unsigned int fuse_max_pages_limit; > > @@ -490,7 +496,7 @@ struct fuse_iqueue { > wait_queue_head_t waitq; > > /** The next unique request id */ > - u64 reqctr; > + u64 __percpu *reqctr; > > /** The list of pending requests */ > struct list_head pending; > @@ -1065,6 +1071,17 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) > rcu_read_unlock(); > } > > +/** > + * Get the next unique ID for a request > + */ > +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) > +{ > + int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1); I don't think you need the + 1 here. This works fine even if task_cpu() returns 0. > + u64 cntr = this_cpu_inc_return(*fiq->reqctr); > + > + return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step; if you want to save a multiplication, I think we could just do static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) { u64 cntr = this_cpu_inc_return(*fiq->reqctr); return (cntr * NR_CPUS + task_cpu(current)) * FUSE_REQ_ID_STEP; } > +} > + > /** Device operations */ > extern const struct file_operations fuse_dev_operations; > > @@ -1415,10 +1432,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx); > */ > unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); > > -/** > - * Get the next unique ID for a request > - */ > -u64 fuse_get_unique(struct fuse_iqueue *fiq); > void fuse_free_conn(struct fuse_conn *fc); > > /* dax.c */ > diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c > index e9db2cb8c150878634728685af0fa15e7ade628f..12012bfbf59a93deb9d27e0e0641e4ea2ec4c233 100644 > --- a/fs/fuse/inode.c > +++ b/fs/fuse/inode.c > @@ -930,6 +930,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq, > memset(fiq, 0, sizeof(struct fuse_iqueue)); > spin_lock_init(&fiq->lock); > init_waitqueue_head(&fiq->waitq); > + fiq->reqctr = alloc_percpu(u64); > INIT_LIST_HEAD(&fiq->pending); > INIT_LIST_HEAD(&fiq->interrupts); > fiq->forget_list_tail = &fiq->forget_list_head; > I think we need a free_percpu(fiq->reqctr); as well when the last ref on the connection is dropped or else this is leaked Thanks, Joanne > -- > 2.43.0 >
Thanks for your review Joanne! On 4/3/25 20:27, Joanne Koong wrote: > On Thu, Apr 3, 2025 at 6:05 AM Bernd Schubert <bschubert@ddn.com> wrote: >> >> No need to take lock, we can have that per cpu and >> add in the current cpu as offset. >> >> fuse-io-uring and virtiofs especially benefit from it >> as they don't need the fiq lock at all. >> >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >> --- >> fs/fuse/dev.c | 24 +++--------------------- >> fs/fuse/fuse_dev_i.h | 4 ---- >> fs/fuse/fuse_i.h | 23 ++++++++++++++++++----- >> fs/fuse/inode.c | 1 + >> 4 files changed, 22 insertions(+), 30 deletions(-) >> >> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c >> index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644 >> --- a/fs/fuse/dev.c >> +++ b/fs/fuse/dev.c >> @@ -204,24 +204,6 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args) >> } >> EXPORT_SYMBOL_GPL(fuse_len_args); >> >> -static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq) >> -{ >> - fiq->reqctr += FUSE_REQ_ID_STEP; >> - return fiq->reqctr; >> -} >> - >> -u64 fuse_get_unique(struct fuse_iqueue *fiq) >> -{ >> - u64 ret; >> - >> - spin_lock(&fiq->lock); >> - ret = fuse_get_unique_locked(fiq); >> - spin_unlock(&fiq->lock); >> - >> - return ret; >> -} >> -EXPORT_SYMBOL_GPL(fuse_get_unique); >> - >> unsigned int fuse_req_hash(u64 unique) >> { >> return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); >> @@ -278,7 +260,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) >> spin_lock(&fiq->lock); >> if (fiq->connected) { >> if (req->in.h.opcode != FUSE_NOTIFY_REPLY) >> - req->in.h.unique = fuse_get_unique_locked(fiq); >> + req->in.h.unique = fuse_get_unique(fiq); >> list_add_tail(&req->list, &fiq->pending); >> fuse_dev_wake_and_unlock(fiq); >> } else { >> @@ -1177,7 +1159,7 @@ __releases(fiq->lock) >> struct fuse_in_header ih = { >> .opcode = FUSE_FORGET, >> .nodeid = forget->forget_one.nodeid, >> - .unique = fuse_get_unique_locked(fiq), >> + .unique = fuse_get_unique(fiq), >> .len = sizeof(ih) + sizeof(arg), >> }; >> >> @@ -1208,7 +1190,7 @@ __releases(fiq->lock) >> struct fuse_batch_forget_in arg = { .count = 0 }; >> struct fuse_in_header ih = { >> .opcode = FUSE_BATCH_FORGET, >> - .unique = fuse_get_unique_locked(fiq), >> + .unique = fuse_get_unique(fiq), >> .len = sizeof(ih) + sizeof(arg), >> }; >> >> diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h >> index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644 >> --- a/fs/fuse/fuse_dev_i.h >> +++ b/fs/fuse/fuse_dev_i.h >> @@ -8,10 +8,6 @@ >> >> #include <linux/types.h> >> >> -/* Ordinary requests have even IDs, while interrupts IDs are odd */ >> -#define FUSE_INT_REQ_BIT (1ULL << 0) >> -#define FUSE_REQ_ID_STEP (1ULL << 1) >> - >> struct fuse_arg; >> struct fuse_args; >> struct fuse_pqueue; >> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h >> index fee96fe7887b30cd57b8a6bbda11447a228cf446..73c612dd58e45ecde0b8f72fd58ac603d12cf202 100644 >> --- a/fs/fuse/fuse_i.h >> +++ b/fs/fuse/fuse_i.h >> @@ -9,6 +9,8 @@ >> #ifndef _FS_FUSE_I_H >> #define _FS_FUSE_I_H >> >> +#include "linux/percpu-defs.h" > > Think the convention is #include <linux/percpu-defs.h> though I wonder > if you even need this. I see other filesystems using percpu counters > but they don't explicitly include this header. Compilation seems fine > without it. > >> +#include "linux/threads.h" > > Do you need threads.h? Oh, I fixed my .clangd settings, it had added headers itself. > >> #ifndef pr_fmt >> # define pr_fmt(fmt) "fuse: " fmt >> #endif >> @@ -44,6 +46,10 @@ >> /** Number of dentries for each connection in the control filesystem */ >> #define FUSE_CTL_NUM_DENTRIES 5 >> >> +/* Ordinary requests have even IDs, while interrupts IDs are odd */ >> +#define FUSE_INT_REQ_BIT (1ULL << 0) >> +#define FUSE_REQ_ID_STEP (1ULL << 1) >> + >> /** Maximum of max_pages received in init_out */ >> extern unsigned int fuse_max_pages_limit; >> >> @@ -490,7 +496,7 @@ struct fuse_iqueue { >> wait_queue_head_t waitq; >> >> /** The next unique request id */ >> - u64 reqctr; >> + u64 __percpu *reqctr; >> >> /** The list of pending requests */ >> struct list_head pending; >> @@ -1065,6 +1071,17 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) >> rcu_read_unlock(); >> } >> >> +/** >> + * Get the next unique ID for a request >> + */ >> +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) >> +{ >> + int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1); > > I don't think you need the + 1 here. This works fine even if > task_cpu() returns 0. Yeah right, I had a version that was multiplying by the step > >> + u64 cntr = this_cpu_inc_return(*fiq->reqctr); >> + >> + return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step; > > if you want to save a multiplication, I think we could just do > > static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) { > u64 cntr = this_cpu_inc_return(*fiq->reqctr); > return (cntr * NR_CPUS + task_cpu(current)) * FUSE_REQ_ID_STEP; > } > I find this harder to read - the compiler will optimize that anyway? >> +} >> + >> /** Device operations */ >> extern const struct file_operations fuse_dev_operations; >> >> @@ -1415,10 +1432,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx); >> */ >> unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); >> >> -/** >> - * Get the next unique ID for a request >> - */ >> -u64 fuse_get_unique(struct fuse_iqueue *fiq); >> void fuse_free_conn(struct fuse_conn *fc); >> >> /* dax.c */ >> diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c >> index e9db2cb8c150878634728685af0fa15e7ade628f..12012bfbf59a93deb9d27e0e0641e4ea2ec4c233 100644 >> --- a/fs/fuse/inode.c >> +++ b/fs/fuse/inode.c >> @@ -930,6 +930,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq, >> memset(fiq, 0, sizeof(struct fuse_iqueue)); >> spin_lock_init(&fiq->lock); >> init_waitqueue_head(&fiq->waitq); >> + fiq->reqctr = alloc_percpu(u64); >> INIT_LIST_HEAD(&fiq->pending); >> INIT_LIST_HEAD(&fiq->interrupts); >> fiq->forget_list_tail = &fiq->forget_list_head; >> > > I think we need a free_percpu(fiq->reqctr); as well when the last ref > on the connection is dropped or else this is leaked Right, totally forgot. Thanks, Bernd
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -204,24 +204,6 @@ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args) } EXPORT_SYMBOL_GPL(fuse_len_args); -static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq) -{ - fiq->reqctr += FUSE_REQ_ID_STEP; - return fiq->reqctr; -} - -u64 fuse_get_unique(struct fuse_iqueue *fiq) -{ - u64 ret; - - spin_lock(&fiq->lock); - ret = fuse_get_unique_locked(fiq); - spin_unlock(&fiq->lock); - - return ret; -} -EXPORT_SYMBOL_GPL(fuse_get_unique); - unsigned int fuse_req_hash(u64 unique) { return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS); @@ -278,7 +260,7 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) spin_lock(&fiq->lock); if (fiq->connected) { if (req->in.h.opcode != FUSE_NOTIFY_REPLY) - req->in.h.unique = fuse_get_unique_locked(fiq); + req->in.h.unique = fuse_get_unique(fiq); list_add_tail(&req->list, &fiq->pending); fuse_dev_wake_and_unlock(fiq); } else { @@ -1177,7 +1159,7 @@ __releases(fiq->lock) struct fuse_in_header ih = { .opcode = FUSE_FORGET, .nodeid = forget->forget_one.nodeid, - .unique = fuse_get_unique_locked(fiq), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; @@ -1208,7 +1190,7 @@ __releases(fiq->lock) struct fuse_batch_forget_in arg = { .count = 0 }; struct fuse_in_header ih = { .opcode = FUSE_BATCH_FORGET, - .unique = fuse_get_unique_locked(fiq), + .unique = fuse_get_unique(fiq), .len = sizeof(ih) + sizeof(arg), }; diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -8,10 +8,6 @@ #include <linux/types.h> -/* Ordinary requests have even IDs, while interrupts IDs are odd */ -#define FUSE_INT_REQ_BIT (1ULL << 0) -#define FUSE_REQ_ID_STEP (1ULL << 1) - struct fuse_arg; struct fuse_args; struct fuse_pqueue; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fee96fe7887b30cd57b8a6bbda11447a228cf446..73c612dd58e45ecde0b8f72fd58ac603d12cf202 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -9,6 +9,8 @@ #ifndef _FS_FUSE_I_H #define _FS_FUSE_I_H +#include "linux/percpu-defs.h" +#include "linux/threads.h" #ifndef pr_fmt # define pr_fmt(fmt) "fuse: " fmt #endif @@ -44,6 +46,10 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 +/* Ordinary requests have even IDs, while interrupts IDs are odd */ +#define FUSE_INT_REQ_BIT (1ULL << 0) +#define FUSE_REQ_ID_STEP (1ULL << 1) + /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; @@ -490,7 +496,7 @@ struct fuse_iqueue { wait_queue_head_t waitq; /** The next unique request id */ - u64 reqctr; + u64 __percpu *reqctr; /** The list of pending requests */ struct list_head pending; @@ -1065,6 +1071,17 @@ static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket) rcu_read_unlock(); } +/** + * Get the next unique ID for a request + */ +static inline u64 fuse_get_unique(struct fuse_iqueue *fiq) +{ + int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1); + u64 cntr = this_cpu_inc_return(*fiq->reqctr); + + return cntr * FUSE_REQ_ID_STEP * NR_CPUS + step; +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; @@ -1415,10 +1432,6 @@ int fuse_readdir(struct file *file, struct dir_context *ctx); */ unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); -/** - * Get the next unique ID for a request - */ -u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_free_conn(struct fuse_conn *fc); /* dax.c */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index e9db2cb8c150878634728685af0fa15e7ade628f..12012bfbf59a93deb9d27e0e0641e4ea2ec4c233 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -930,6 +930,7 @@ static void fuse_iqueue_init(struct fuse_iqueue *fiq, memset(fiq, 0, sizeof(struct fuse_iqueue)); spin_lock_init(&fiq->lock); init_waitqueue_head(&fiq->waitq); + fiq->reqctr = alloc_percpu(u64); INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->interrupts); fiq->forget_list_tail = &fiq->forget_list_head;
No need to take lock, we can have that per cpu and add in the current cpu as offset. fuse-io-uring and virtiofs especially benefit from it as they don't need the fiq lock at all. Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/fuse/dev.c | 24 +++--------------------- fs/fuse/fuse_dev_i.h | 4 ---- fs/fuse/fuse_i.h | 23 ++++++++++++++++++----- fs/fuse/inode.c | 1 + 4 files changed, 22 insertions(+), 30 deletions(-)