diff mbox series

[RFC,v4,2/3] io_uring: add api to set / get napi configuration.

Message ID 20221121172953.4030697-3-shr@devkernel.io (mailing list archive)
State New
Headers show
Series io_uring: add napi busy polling support | expand

Commit Message

Stefan Roesch Nov. 21, 2022, 5:29 p.m. UTC
This adds an api to register the busy poll timeout from liburing. To be
able to use this functionality, the corresponding liburing patch is needed.

Signed-off-by: Stefan Roesch <shr@devkernel.io>
---
 include/linux/io_uring_types.h |  2 +-
 include/uapi/linux/io_uring.h  | 11 +++++++
 io_uring/io_uring.c            | 54 ++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)

Comments

Jens Axboe Nov. 21, 2022, 5:35 p.m. UTC | #1
On 11/21/22 10:29 AM, Stefan Roesch wrote:
> This adds an api to register the busy poll timeout from liburing. To be
> able to use this functionality, the corresponding liburing patch is needed.
> 
> Signed-off-by: Stefan Roesch <shr@devkernel.io>
> ---
>  include/linux/io_uring_types.h |  2 +-
>  include/uapi/linux/io_uring.h  | 11 +++++++
>  io_uring/io_uring.c            | 54 ++++++++++++++++++++++++++++++++++
>  3 files changed, 66 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 23993b5d3186..67b861305d97 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -274,8 +274,8 @@ struct io_ring_ctx {
>  	struct list_head	napi_list;	/* track busy poll napi_id */
>  	spinlock_t		napi_lock;	/* napi_list lock */
>  
> -	unsigned int		napi_busy_poll_to; /* napi busy poll default timeout */
>  	bool			napi_prefer_busy_poll;
> +	unsigned int		napi_busy_poll_to;
>  #endif

Why is this being moved? Seems unrelated, and it actually creates another
hole rather than filling one as it did before.
Jens Axboe Nov. 21, 2022, 5:37 p.m. UTC | #2
On 11/21/22 10:29?AM, Stefan Roesch wrote:
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 4f432694cbed..cf0e7cc8ad2e 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -4122,6 +4122,48 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
>  	return ret;
>  }
>  
> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
> +{
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +	const struct io_uring_napi curr = {
> +		.busy_poll_to = ctx->napi_busy_poll_to,
> +	};
> +	struct io_uring_napi *napi;
> +
> +	napi = memdup_user(arg, sizeof(*napi));
> +	if (IS_ERR(napi))
> +		return PTR_ERR(napi);
> +
> +	WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to);
> +
> +	kfree(napi);
> +
> +	if (copy_to_user(arg, &curr, sizeof(curr)))
> +		return -EFAULT;
> +
> +	return 0;
> +#else
> +	return -EINVAL;
> +#endif
> +}

This should return -EINVAL if any of the padding or reserved fields are
non-zero. If you don't do that, then it's not expendable in the future.
Ammar Faizi Nov. 21, 2022, 5:45 p.m. UTC | #3
On 11/22/22 12:29 AM, Stefan Roesch wrote:
> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
> +{
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +	const struct io_uring_napi curr = {
> +		.busy_poll_to = ctx->napi_busy_poll_to,
> +	};
> +	struct io_uring_napi *napi;
> +
> +	napi = memdup_user(arg, sizeof(*napi));
> +	if (IS_ERR(napi))
> +		return PTR_ERR(napi);
> +
> +	WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to);
> +
> +	kfree(napi);
> +
> +	if (copy_to_user(arg, &curr, sizeof(curr)))
> +		return -EFAULT;
> +
> +	return 0;

Considering:

    1) `struct io_uring_napi` is 16 bytes in size.

    2) The lifetime of `struct io_uring_napi *napi;` is brief.

There is no need to use memdup_user() and kfree(). You can place it
on the stack and use copy_from_user() instead.
Stefan Roesch Nov. 21, 2022, 6:55 p.m. UTC | #4
Jens Axboe <axboe@kernel.dk> writes:

> On 11/21/22 10:29?AM, Stefan Roesch wrote:
>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>> index 4f432694cbed..cf0e7cc8ad2e 100644
>> --- a/io_uring/io_uring.c
>> +++ b/io_uring/io_uring.c
>> @@ -4122,6 +4122,48 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
>>  	return ret;
>>  }
>>
>> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
>> +{
>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>> +	const struct io_uring_napi curr = {
>> +		.busy_poll_to = ctx->napi_busy_poll_to,
>> +	};
>> +	struct io_uring_napi *napi;
>> +
>> +	napi = memdup_user(arg, sizeof(*napi));
>> +	if (IS_ERR(napi))
>> +		return PTR_ERR(napi);
>> +
>> +	WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to);
>> +
>> +	kfree(napi);
>> +
>> +	if (copy_to_user(arg, &curr, sizeof(curr)))
>> +		return -EFAULT;
>> +
>> +	return 0;
>> +#else
>> +	return -EINVAL;
>> +#endif
>> +}
>
> This should return -EINVAL if any of the padding or reserved fields are
> non-zero. If you don't do that, then it's not expendable in the future.

The next version will have that change.
Stefan Roesch Nov. 21, 2022, 6:55 p.m. UTC | #5
Jens Axboe <axboe@kernel.dk> writes:

> On 11/21/22 10:29 AM, Stefan Roesch wrote:
>> This adds an api to register the busy poll timeout from liburing. To be
>> able to use this functionality, the corresponding liburing patch is needed.
>>
>> Signed-off-by: Stefan Roesch <shr@devkernel.io>
>> ---
>>  include/linux/io_uring_types.h |  2 +-
>>  include/uapi/linux/io_uring.h  | 11 +++++++
>>  io_uring/io_uring.c            | 54 ++++++++++++++++++++++++++++++++++
>>  3 files changed, 66 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
>> index 23993b5d3186..67b861305d97 100644
>> --- a/include/linux/io_uring_types.h
>> +++ b/include/linux/io_uring_types.h
>> @@ -274,8 +274,8 @@ struct io_ring_ctx {
>>  	struct list_head	napi_list;	/* track busy poll napi_id */
>>  	spinlock_t		napi_lock;	/* napi_list lock */
>>
>> -	unsigned int		napi_busy_poll_to; /* napi busy poll default timeout */
>>  	bool			napi_prefer_busy_poll;
>> +	unsigned int		napi_busy_poll_to;
>>  #endif
>
> Why is this being moved? Seems unrelated, and it actually creates another
> hole rather than filling one as it did before.

That was not intended. The next version of the patch will restore the
previous order.
Stefan Roesch Nov. 21, 2022, 6:56 p.m. UTC | #6
Ammar Faizi <ammarfaizi2@gnuweeb.org> writes:

> On 11/22/22 12:29 AM, Stefan Roesch wrote:
>> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
>> +{
>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>> +	const struct io_uring_napi curr = {
>> +		.busy_poll_to = ctx->napi_busy_poll_to,
>> +	};
>> +	struct io_uring_napi *napi;
>> +
>> +	napi = memdup_user(arg, sizeof(*napi));
>> +	if (IS_ERR(napi))
>> +		return PTR_ERR(napi);
>> +
>> +	WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to);
>> +
>> +	kfree(napi);
>> +
>> +	if (copy_to_user(arg, &curr, sizeof(curr)))
>> +		return -EFAULT;
>> +
>> +	return 0;
>
> Considering:
>
>    1) `struct io_uring_napi` is 16 bytes in size.
>
>    2) The lifetime of `struct io_uring_napi *napi;` is brief.
>
> There is no need to use memdup_user() and kfree(). You can place it
> on the stack and use copy_from_user() instead.

The next version of the patch will use copy_from_user.
diff mbox series

Patch

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 23993b5d3186..67b861305d97 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -274,8 +274,8 @@  struct io_ring_ctx {
 	struct list_head	napi_list;	/* track busy poll napi_id */
 	spinlock_t		napi_lock;	/* napi_list lock */
 
-	unsigned int		napi_busy_poll_to; /* napi busy poll default timeout */
 	bool			napi_prefer_busy_poll;
+	unsigned int		napi_busy_poll_to;
 #endif
 
 	struct {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2df3225b562f..1a713bbafaee 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -490,6 +490,10 @@  enum {
 	/* register a range of fixed file slots for automatic slot allocation */
 	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
 
+	/* set/clear busy poll settings */
+	IORING_REGISTER_NAPI			= 26,
+	IORING_UNREGISTER_NAPI			= 27,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -612,6 +616,13 @@  struct io_uring_buf_reg {
 	__u64	resv[3];
 };
 
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+	__u32	busy_poll_to;
+	__u32	pad;
+	__u64	resv;
+};
+
 /*
  * io_uring_restriction->opcode values
  */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 4f432694cbed..cf0e7cc8ad2e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -4122,6 +4122,48 @@  static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	return ret;
 }
 
+static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+	struct io_uring_napi *napi;
+
+	napi = memdup_user(arg, sizeof(*napi));
+	if (IS_ERR(napi))
+		return PTR_ERR(napi);
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to);
+
+	kfree(napi);
+
+	if (copy_to_user(arg, &curr, sizeof(curr)))
+		return -EFAULT;
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+
+	if (copy_to_user(arg, &curr, sizeof(curr)))
+		return -EFAULT;
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -4282,6 +4324,18 @@  static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_file_alloc_range(ctx, arg);
 		break;
+	case IORING_REGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_register_napi(ctx, arg);
+		break;
+	case IORING_UNREGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_unregister_napi(ctx, arg);
+		break;
 	default:
 		ret = -EINVAL;
 		break;