Message ID | 20221121191437.996297-3-shr@devkernel.io (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring: add napi busy polling support | expand |
On 11/21/22 12:14?PM, Stefan Roesch wrote: > +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) > +{ > +#ifdef CONFIG_NET_RX_BUSY_POLL > + const struct io_uring_napi curr = { > + .busy_poll_to = ctx->napi_busy_poll_to, > + }; > + > + if (copy_to_user(arg, &curr, sizeof(curr))) > + return -EFAULT; > + > + WRITE_ONCE(ctx->napi_busy_poll_to, 0); > + return 0; > +#else > + return -EINVAL; > +#endif > +} Should probably check resv/pad here as well, maybe even the 'busy_poll_to' being zero?
On 11/22/22 2:46 AM, Jens Axboe wrote: > On 11/21/22 12:14?PM, Stefan Roesch wrote: >> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) >> +{ >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + const struct io_uring_napi curr = { >> + .busy_poll_to = ctx->napi_busy_poll_to, >> + }; >> + >> + if (copy_to_user(arg, &curr, sizeof(curr))) >> + return -EFAULT; >> + >> + WRITE_ONCE(ctx->napi_busy_poll_to, 0); >> + return 0; >> +#else >> + return -EINVAL; >> +#endif >> +} > > Should probably check resv/pad here as well, maybe even the > 'busy_poll_to' being zero? Jens, this function doesn't read from __user memory, it writes to __user memory. @curr.resv and @curr.pad are on the kernel's stack. Both are already implicitly initialized to zero by the partial struct initializer.
On 11/22/22 6:13 AM, Ammar Faizi wrote: > On 11/22/22 2:46 AM, Jens Axboe wrote: >> On 11/21/22 12:14?PM, Stefan Roesch wrote: >>> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) >>> +{ >>> +#ifdef CONFIG_NET_RX_BUSY_POLL >>> + const struct io_uring_napi curr = { >>> + .busy_poll_to = ctx->napi_busy_poll_to, >>> + }; >>> + >>> + if (copy_to_user(arg, &curr, sizeof(curr))) >>> + return -EFAULT; >>> + >>> + WRITE_ONCE(ctx->napi_busy_poll_to, 0); >>> + return 0; >>> +#else >>> + return -EINVAL; >>> +#endif >>> +} >> >> Should probably check resv/pad here as well, maybe even the >> 'busy_poll_to' being zero? > > Jens, this function doesn't read from __user memory, it writes to > __user memory. > > @curr.resv and @curr.pad are on the kernel's stack. Both are already > implicitly initialized to zero by the partial struct initializer. Oh yes, guess I totally missed that we don't care about the value at all (just zero the target) and copy back the old values.
On 11/22/22 2:14 AM, Stefan Roesch wrote: > +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) > +{ > +#ifdef CONFIG_NET_RX_BUSY_POLL > + const struct io_uring_napi curr = { > + .busy_poll_to = ctx->napi_busy_poll_to, > + }; > + > + if (copy_to_user(arg, &curr, sizeof(curr))) > + return -EFAULT; > + > + WRITE_ONCE(ctx->napi_busy_poll_to, 0); > + return 0; > +#else > + return -EINVAL; > +#endif > +} > + I suggest allowing users to pass a NULL as the arg in case they don't want to care about the old values. Something like: io_uring_unregister_napi(ring, NULL); What do you think?
Ammar Faizi <ammarfaizi2@gnuweeb.org> writes: > On 11/22/22 2:14 AM, Stefan Roesch wrote: >> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) >> +{ >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + const struct io_uring_napi curr = { >> + .busy_poll_to = ctx->napi_busy_poll_to, >> + }; >> + >> + if (copy_to_user(arg, &curr, sizeof(curr))) >> + return -EFAULT; >> + >> + WRITE_ONCE(ctx->napi_busy_poll_to, 0); >> + return 0; >> +#else >> + return -EINVAL; >> +#endif >> +} >> + > I suggest allowing users to pass a NULL as the arg in case they > don't want to care about the old values. > > Something like: > > io_uring_unregister_napi(ring, NULL); > > What do you think? Sounds good, i can make that change in the next version.
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2df3225b562f..1a713bbafaee 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -490,6 +490,10 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* set/clear busy poll settings */ + IORING_REGISTER_NAPI = 26, + IORING_UNREGISTER_NAPI = 27, + /* this goes last */ IORING_REGISTER_LAST }; @@ -612,6 +616,13 @@ struct io_uring_buf_reg { __u64 resv[3]; }; +/* argument for IORING_(UN)REGISTER_NAPI */ +struct io_uring_napi { + __u32 busy_poll_to; + __u32 pad; + __u64 resv; +}; + /* * io_uring_restriction->opcode values */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 6d7c05fa3c80..d8790c1b1cfb 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4122,6 +4122,47 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, return ret; } +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + const struct io_uring_napi curr = { + .busy_poll_to = ctx->napi_busy_poll_to, + }; + struct io_uring_napi napi; + + if (copy_from_user(&napi, arg, sizeof(napi))) + return -EFAULT; + if (napi.pad || napi.resv) + return -EINVAL; + + WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to); + + if (copy_to_user(arg, &curr, sizeof(curr))) + return -EFAULT; + + return 0; +#else + return -EINVAL; +#endif +} + +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + const struct io_uring_napi curr = { + .busy_poll_to = ctx->napi_busy_poll_to, + }; + + if (copy_to_user(arg, &curr, sizeof(curr))) + return -EFAULT; + + WRITE_ONCE(ctx->napi_busy_poll_to, 0); + return 0; +#else + return -EINVAL; +#endif +} + static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) @@ -4282,6 +4323,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_file_alloc_range(ctx, arg); break; + case IORING_REGISTER_NAPI: + ret = -EINVAL; + if (!arg) + break; + ret = io_register_napi(ctx, arg); + break; + case IORING_UNREGISTER_NAPI: + ret = -EINVAL; + if (!arg) + break; + ret = io_unregister_napi(ctx, arg); + break; default: ret = -EINVAL; break;
This adds an api to register the busy poll timeout from liburing. To be able to use this functionality, the corresponding liburing patch is needed. Signed-off-by: Stefan Roesch <shr@devkernel.io> --- include/uapi/linux/io_uring.h | 11 ++++++++ io_uring/io_uring.c | 53 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+)