diff mbox series

[1/4] fuse: Make the fuse_send_one request counter atomic

Message ID 20250402-fuse-io-uring-trace-points-v1-1-11b0211fa658@ddn.com (mailing list archive)
State New
Headers show
Series fuse: Improve ftraces, atomic req unique and code dup removal | expand

Commit Message

Bernd Schubert April 2, 2025, 5:40 p.m. UTC
No need to take lock, we can have that in atomic way.
fuse-io-uring and virtiofs especially benefit from it
as they don't need the fiq lock at all.

Signed-off-by: Bernd Schubert <bschubert@ddn.com>
---
 fs/fuse/dev.c        | 24 +++---------------------
 fs/fuse/fuse_dev_i.h |  4 ----
 fs/fuse/fuse_i.h     | 18 +++++++++++++-----
 3 files changed, 16 insertions(+), 30 deletions(-)

Comments

Miklos Szeredi April 2, 2025, 6:29 p.m. UTC | #1
On Wed, 2 Apr 2025 at 19:41, Bernd Schubert <bschubert@ddn.com> wrote:
>
> No need to take lock, we can have that in atomic way.
> fuse-io-uring and virtiofs especially benefit from it
> as they don't need the fiq lock at all.

This is good.

It would be even better to have per-cpu counters, each initialized to
a cpuid * FUSE_REQ_ID_STEP and jumping by NR_CPU * FUSE_REQ_ID_STEP.

Hmm?

Thanks,
Miklos
Bernd Schubert April 3, 2025, 9:16 a.m. UTC | #2
Hi Miklos,

thanks for the quick reply.

On 4/2/25 20:29, Miklos Szeredi wrote:
> On Wed, 2 Apr 2025 at 19:41, Bernd Schubert <bschubert@ddn.com> wrote:
>>
>> No need to take lock, we can have that in atomic way.
>> fuse-io-uring and virtiofs especially benefit from it
>> as they don't need the fiq lock at all.
> 
> This is good.
> 
> It would be even better to have per-cpu counters, each initialized to
> a cpuid * FUSE_REQ_ID_STEP and jumping by NR_CPU * FUSE_REQ_ID_STEP.
> 
> Hmm?

/**
 * Get the next unique ID for a request
 */
static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
{
	int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1);
	u64 cntr = this_cpu_inc_return(*fiq->reqctr);

	return cntr * step;
}



  passthrough_hp-10113   [028] ...1. 79978.381908: fuse_request_bg_enqueue: connection 43 req 58 opcode 26 (FUSE_INIT) len 0 
  passthrough_hp-10113   [028] ...2. 79978.382032: fuse_request_enqueue: connection 43 req 58 opcode 26 (FUSE_INIT) len 104 
     fuse_worker-10115   [008] ...1. 79978.485348: fuse_request_send: connection 43 req 58 opcode 26 (FUSE_INIT) len 104 
     fuse_worker-10115   [008] ...1. 79978.489948: fuse_request_end: connection 43 req 58 len 80 error 0
              df-10153   [012] ...1. 79981.776173: fuse_request_enqueue: connection 43 req 26 opcode 3 (FUSE_GETATTR) len 56 
    fuse-ring-12-10131   [012] ...1. 79981.776345: fuse_request_send: connection 43 req 26 opcode 3 (FUSE_GETATTR) len 56 
    fuse-ring-12-10131   [012] ...1. 79981.776628: fuse_request_end: connection 43 req 26 len 56 error 0
              df-10153   [012] ...1. 79981.778866: fuse_request_enqueue: connection 43 req 52 opcode 17 (FUSE_STATFS) len 40 
    fuse-ring-12-10131   [012] ...1. 79981.778887: fuse_request_send: connection 43 req 52 opcode 17 (FUSE_STATFS) len 40 
    fuse-ring-12-10131   [012] ...1. 79981.779050: fuse_request_end: connection 43 req 52 len 40 error 0
              ls-10154   [013] ...1. 79986.145078: fuse_request_enqueue: connection 43 req 28 opcode 22 (FUSE_GETXATTR) len 65 
    fuse-ring-13-10132   [013] ...1. 79986.145440: fuse_request_send: connection 43 req 28 opcode 22 (FUSE_GETXATTR) len 65 
    fuse-ring-13-10132   [013] ...1. 79986.146932: fuse_request_end: connection 43 req 28 len 65 error -95
              ls-10154   [013] ...1. 79986.147172: fuse_request_enqueue: connection 43 req 56 opcode 22 (FUSE_GETXATTR) len 72 
    fuse-ring-13-10132   [013] ...1. 79986.147219: fuse_request_send: connection 43 req 56 opcode 22 (FUSE_GETXATTR) len 72 
    fuse-ring-13-10132   [013] ...1. 79986.148048: fuse_request_end: connection 43 req 56 len 72 error -95
              ls-10154   [013] ...1. 79986.152345: fuse_request_enqueue: connection 43 req 84 opcode 27 (FUSE_OPENDIR) len 48 
    fuse-ring-13-10132   [013] ...1. 79986.152385: fuse_request_send: connection 43 req 84 opcode 27 (FUSE_OPENDIR) len 48 
    fuse-ring-13-10132   [013] ...1. 79986.153214: fuse_request_end: connection 43 req 84 len 48 error 0
              ls-10154   [013] ...1. 79986.154291: fuse_request_enqueue: connection 43 req 112 opcode 44 (FUSE_READDIRPLUS) len 80 
    fuse-ring-13-10132   [013] ...1. 79986.154405: fuse_request_send: connection 43 req 112 opcode 44 (FUSE_READDIRPLUS) len 80 
    fuse-ring-13-10132   [013] ...1. 79986.171515: fuse_request_end: connection 43 req 112 len 80 error 0
              ls-10154   [013] ...1. 79986.174221: fuse_request_enqueue: connection 43 req 140 opcode 44 (FUSE_READDIRPLUS) len 80 
    fuse-ring-13-10132   [013] ...1. 79986.174264: fuse_request_send: connection 43 req 140 opcode 44 (FUSE_READDIRPLUS) len 80 
    fuse-ring-13-10132   [013] ...1. 79986.174510: fuse_request_end: connection 43 req 140 len 80 error 0
              ls-10154   [013] ...1. 79986.174739: fuse_request_bg_enqueue: connection 43 req 168 opcode 29 (FUSE_RELEASEDIR) len 0 
    fuse-ring-13-10132   [013] ...1. 79986.179691: fuse_request_send: connection 43 req 168 opcode 29 (FUSE_RELEASEDIR) len 64 
    fuse-ring-13-10132   [013] ...1. 79986.180011: fuse_request_end: connection 43 req 168 len 64 error 0



Slight issue is that request IDs now have quite an up down,
even more than patch 2/4. Ok with you?


Thanks,
Bernd
Miklos Szeredi April 3, 2025, 12:15 p.m. UTC | #3
On Thu, 3 Apr 2025 at 11:16, Bernd Schubert <bernd@bsbernd.com> wrote:
>
> Hi Miklos,
>
> thanks for the quick reply.
>
> On 4/2/25 20:29, Miklos Szeredi wrote:
> > On Wed, 2 Apr 2025 at 19:41, Bernd Schubert <bschubert@ddn.com> wrote:
> >>
> >> No need to take lock, we can have that in atomic way.
> >> fuse-io-uring and virtiofs especially benefit from it
> >> as they don't need the fiq lock at all.
> >
> > This is good.
> >
> > It would be even better to have per-cpu counters, each initialized to
> > a cpuid * FUSE_REQ_ID_STEP and jumping by NR_CPU * FUSE_REQ_ID_STEP.
> >
> > Hmm?
>
> /**
>  * Get the next unique ID for a request
>  */
> static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
> {
>         int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1);
>         u64 cntr = this_cpu_inc_return(*fiq->reqctr);
>
>         return cntr * step;

return cntr  * FUSE_REQ_ID_STEP * NR_CPU + step;

?

> Slight issue is that request IDs now have quite an up down,
> even more than patch 2/4. Ok with you?

Being more obvious is an advantage, since any issues will come to light sooner.

Thanks,
Miklos
Bernd Schubert April 3, 2025, 1:06 p.m. UTC | #4
On 4/3/25 14:15, Miklos Szeredi wrote:
> On Thu, 3 Apr 2025 at 11:16, Bernd Schubert <bernd@bsbernd.com> wrote:
>>
>> Hi Miklos,
>>
>> thanks for the quick reply.
>>
>> On 4/2/25 20:29, Miklos Szeredi wrote:
>>> On Wed, 2 Apr 2025 at 19:41, Bernd Schubert <bschubert@ddn.com> wrote:
>>>>
>>>> No need to take lock, we can have that in atomic way.
>>>> fuse-io-uring and virtiofs especially benefit from it
>>>> as they don't need the fiq lock at all.
>>>
>>> This is good.
>>>
>>> It would be even better to have per-cpu counters, each initialized to
>>> a cpuid * FUSE_REQ_ID_STEP and jumping by NR_CPU * FUSE_REQ_ID_STEP.
>>>
>>> Hmm?
>>
>> /**
>>  * Get the next unique ID for a request
>>  */
>> static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
>> {
>>         int step = FUSE_REQ_ID_STEP * (task_cpu(current) + 1);
>>         u64 cntr = this_cpu_inc_return(*fiq->reqctr);
>>
>>         return cntr * step;
> 
> return cntr  * FUSE_REQ_ID_STEP * NR_CPU + step;

Thanks, updated.

> 
> ?
> 
>> Slight issue is that request IDs now have quite an up down,
>> even more than patch 2/4. Ok with you?
> 
> Being more obvious is an advantage, since any issues will come to light sooner.

I sent v2, non linear values between cores should be an issue we could
feel free to back to v1.


Thanks,
Bernd
diff mbox series

Patch

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 51e31df4c54613280a9c295f530b18e1d461a974..e9592ab092b948bacb5034018bd1f32c917d5c9f 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -204,24 +204,6 @@  unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
 }
 EXPORT_SYMBOL_GPL(fuse_len_args);
 
-static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
-{
-	fiq->reqctr += FUSE_REQ_ID_STEP;
-	return fiq->reqctr;
-}
-
-u64 fuse_get_unique(struct fuse_iqueue *fiq)
-{
-	u64 ret;
-
-	spin_lock(&fiq->lock);
-	ret = fuse_get_unique_locked(fiq);
-	spin_unlock(&fiq->lock);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(fuse_get_unique);
-
 unsigned int fuse_req_hash(u64 unique)
 {
 	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
@@ -278,7 +260,7 @@  static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 	spin_lock(&fiq->lock);
 	if (fiq->connected) {
 		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
-			req->in.h.unique = fuse_get_unique_locked(fiq);
+			req->in.h.unique = fuse_get_unique(fiq);
 		list_add_tail(&req->list, &fiq->pending);
 		fuse_dev_wake_and_unlock(fiq);
 	} else {
@@ -1177,7 +1159,7 @@  __releases(fiq->lock)
 	struct fuse_in_header ih = {
 		.opcode = FUSE_FORGET,
 		.nodeid = forget->forget_one.nodeid,
-		.unique = fuse_get_unique_locked(fiq),
+		.unique = fuse_get_unique(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
@@ -1208,7 +1190,7 @@  __releases(fiq->lock)
 	struct fuse_batch_forget_in arg = { .count = 0 };
 	struct fuse_in_header ih = {
 		.opcode = FUSE_BATCH_FORGET,
-		.unique = fuse_get_unique_locked(fiq),
+		.unique = fuse_get_unique(fiq),
 		.len = sizeof(ih) + sizeof(arg),
 	};
 
diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h
index 3b2bfe1248d3573abe3b144a6d4bf6a502f56a40..e0afd837a8024450bab77312c7eebdcc7a39bd36 100644
--- a/fs/fuse/fuse_dev_i.h
+++ b/fs/fuse/fuse_dev_i.h
@@ -8,10 +8,6 @@ 
 
 #include <linux/types.h>
 
-/* Ordinary requests have even IDs, while interrupts IDs are odd */
-#define FUSE_INT_REQ_BIT (1ULL << 0)
-#define FUSE_REQ_ID_STEP (1ULL << 1)
-
 struct fuse_arg;
 struct fuse_args;
 struct fuse_pqueue;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fee96fe7887b30cd57b8a6bbda11447a228cf446..8aea23ffaf2fa44b284d4efef1e009fb1ca876a0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -44,6 +44,10 @@ 
 /** Number of dentries for each connection in the control filesystem */
 #define FUSE_CTL_NUM_DENTRIES 5
 
+/* Ordinary requests have even IDs, while interrupts IDs are odd */
+#define FUSE_INT_REQ_BIT (1ULL << 0)
+#define FUSE_REQ_ID_STEP (1ULL << 1)
+
 /** Maximum of max_pages received in init_out */
 extern unsigned int fuse_max_pages_limit;
 
@@ -490,7 +494,7 @@  struct fuse_iqueue {
 	wait_queue_head_t waitq;
 
 	/** The next unique request id */
-	u64 reqctr;
+	atomic64_t reqctr;
 
 	/** The list of pending requests */
 	struct list_head pending;
@@ -1065,6 +1069,14 @@  static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
 	rcu_read_unlock();
 }
 
+/**
+ * Get the next unique ID for a request
+ */
+static inline u64 fuse_get_unique(struct fuse_iqueue *fiq)
+{
+	return atomic64_add_return(FUSE_REQ_ID_STEP, &fiq->reqctr);
+}
+
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
 
@@ -1415,10 +1427,6 @@  int fuse_readdir(struct file *file, struct dir_context *ctx);
  */
 unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args);
 
-/**
- * Get the next unique ID for a request
- */
-u64 fuse_get_unique(struct fuse_iqueue *fiq);
 void fuse_free_conn(struct fuse_conn *fc);
 
 /* dax.c */