diff mbox series

[v5,2/3] io_uring: add api to set / get napi configuration.

Message ID 20221121191437.996297-3-shr@devkernel.io (mailing list archive)
State New
Headers show
Series io_uring: add napi busy polling support | expand

Commit Message

Stefan Roesch Nov. 21, 2022, 7:14 p.m. UTC
This adds an api to register the busy poll timeout from liburing. To be
able to use this functionality, the corresponding liburing patch is needed.

Signed-off-by: Stefan Roesch <shr@devkernel.io>
---
 include/uapi/linux/io_uring.h | 11 ++++++++
 io_uring/io_uring.c           | 53 +++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

Comments

Jens Axboe Nov. 21, 2022, 7:46 p.m. UTC | #1
On 11/21/22 12:14?PM, Stefan Roesch wrote:
> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
> +{
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +	const struct io_uring_napi curr = {
> +		.busy_poll_to = ctx->napi_busy_poll_to,
> +	};
> +
> +	if (copy_to_user(arg, &curr, sizeof(curr)))
> +		return -EFAULT;
> +
> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
> +	return 0;
> +#else
> +	return -EINVAL;
> +#endif
> +}

Should probably check resv/pad here as well, maybe even the
'busy_poll_to' being zero?
Ammar Faizi Nov. 22, 2022, 1:13 p.m. UTC | #2
On 11/22/22 2:46 AM, Jens Axboe wrote:
> On 11/21/22 12:14?PM, Stefan Roesch wrote:
>> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
>> +{
>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>> +	const struct io_uring_napi curr = {
>> +		.busy_poll_to = ctx->napi_busy_poll_to,
>> +	};
>> +
>> +	if (copy_to_user(arg, &curr, sizeof(curr)))
>> +		return -EFAULT;
>> +
>> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
>> +	return 0;
>> +#else
>> +	return -EINVAL;
>> +#endif
>> +}
> 
> Should probably check resv/pad here as well, maybe even the
> 'busy_poll_to' being zero?

Jens, this function doesn't read from __user memory, it writes to
__user memory.

@curr.resv and @curr.pad are on the kernel's stack. Both are already
implicitly initialized to zero by the partial struct initializer.
Jens Axboe Nov. 22, 2022, 1:19 p.m. UTC | #3
On 11/22/22 6:13 AM, Ammar Faizi wrote:
> On 11/22/22 2:46 AM, Jens Axboe wrote:
>> On 11/21/22 12:14?PM, Stefan Roesch wrote:
>>> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
>>> +{
>>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>>> +    const struct io_uring_napi curr = {
>>> +        .busy_poll_to = ctx->napi_busy_poll_to,
>>> +    };
>>> +
>>> +    if (copy_to_user(arg, &curr, sizeof(curr)))
>>> +        return -EFAULT;
>>> +
>>> +    WRITE_ONCE(ctx->napi_busy_poll_to, 0);
>>> +    return 0;
>>> +#else
>>> +    return -EINVAL;
>>> +#endif
>>> +}
>>
>> Should probably check resv/pad here as well, maybe even the
>> 'busy_poll_to' being zero?
> 
> Jens, this function doesn't read from __user memory, it writes to
> __user memory.
> 
> @curr.resv and @curr.pad are on the kernel's stack. Both are already
> implicitly initialized to zero by the partial struct initializer.

Oh yes, guess I totally missed that we don't care about the value
at all (just zero the target) and copy back the old values.
Ammar Faizi Nov. 25, 2022, 9:43 p.m. UTC | #4
On 11/22/22 2:14 AM, Stefan Roesch wrote:
> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
> +{
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +	const struct io_uring_napi curr = {
> +		.busy_poll_to = ctx->napi_busy_poll_to,
> +	};
> +
> +	if (copy_to_user(arg, &curr, sizeof(curr)))
> +		return -EFAULT;
> +
> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
> +	return 0;
> +#else
> +	return -EINVAL;
> +#endif
> +}
> +
I suggest allowing users to pass a NULL as the arg in case they
don't want to care about the old values.

Something like:

    io_uring_unregister_napi(ring, NULL);

What do you think?
Stefan Roesch Nov. 28, 2022, 8:22 p.m. UTC | #5
Ammar Faizi <ammarfaizi2@gnuweeb.org> writes:

> On 11/22/22 2:14 AM, Stefan Roesch wrote:
>> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
>> +{
>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>> +	const struct io_uring_napi curr = {
>> +		.busy_poll_to = ctx->napi_busy_poll_to,
>> +	};
>> +
>> +	if (copy_to_user(arg, &curr, sizeof(curr)))
>> +		return -EFAULT;
>> +
>> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
>> +	return 0;
>> +#else
>> +	return -EINVAL;
>> +#endif
>> +}
>> +
> I suggest allowing users to pass a NULL as the arg in case they
> don't want to care about the old values.
>
> Something like:
>
>    io_uring_unregister_napi(ring, NULL);
>
> What do you think?

Sounds good, i can make that change in the next version.
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2df3225b562f..1a713bbafaee 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -490,6 +490,10 @@  enum {
 	/* register a range of fixed file slots for automatic slot allocation */
 	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
 
+	/* set/clear busy poll settings */
+	IORING_REGISTER_NAPI			= 26,
+	IORING_UNREGISTER_NAPI			= 27,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -612,6 +616,13 @@  struct io_uring_buf_reg {
 	__u64	resv[3];
 };
 
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+	__u32	busy_poll_to;
+	__u32	pad;
+	__u64	resv;
+};
+
 /*
  * io_uring_restriction->opcode values
  */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6d7c05fa3c80..d8790c1b1cfb 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -4122,6 +4122,47 @@  static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	return ret;
 }
 
+static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+	struct io_uring_napi napi;
+
+	if (copy_from_user(&napi, arg, sizeof(napi)))
+		return -EFAULT;
+	if (napi.pad || napi.resv)
+		return -EINVAL;
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
+
+	if (copy_to_user(arg, &curr, sizeof(curr)))
+		return -EFAULT;
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+
+	if (copy_to_user(arg, &curr, sizeof(curr)))
+		return -EFAULT;
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -4282,6 +4323,18 @@  static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_file_alloc_range(ctx, arg);
 		break;
+	case IORING_REGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_register_napi(ctx, arg);
+		break;
+	case IORING_UNREGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_unregister_napi(ctx, arg);
+		break;
 	default:
 		ret = -EINVAL;
 		break;