Message ID | 20221121172953.4030697-3-shr@devkernel.io (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring: add napi busy polling support | expand |
On 11/21/22 10:29 AM, Stefan Roesch wrote: > This adds an api to register the busy poll timeout from liburing. To be > able to use this functionality, the corresponding liburing patch is needed. > > Signed-off-by: Stefan Roesch <shr@devkernel.io> > --- > include/linux/io_uring_types.h | 2 +- > include/uapi/linux/io_uring.h | 11 +++++++ > io_uring/io_uring.c | 54 ++++++++++++++++++++++++++++++++++ > 3 files changed, 66 insertions(+), 1 deletion(-) > > diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h > index 23993b5d3186..67b861305d97 100644 > --- a/include/linux/io_uring_types.h > +++ b/include/linux/io_uring_types.h > @@ -274,8 +274,8 @@ struct io_ring_ctx { > struct list_head napi_list; /* track busy poll napi_id */ > spinlock_t napi_lock; /* napi_list lock */ > > - unsigned int napi_busy_poll_to; /* napi busy poll default timeout */ > bool napi_prefer_busy_poll; > + unsigned int napi_busy_poll_to; > #endif Why is this being moved? Seems unrelated, and it actually creates another hole rather than filling one as it did before.
On 11/21/22 10:29?AM, Stefan Roesch wrote: > diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c > index 4f432694cbed..cf0e7cc8ad2e 100644 > --- a/io_uring/io_uring.c > +++ b/io_uring/io_uring.c > @@ -4122,6 +4122,48 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, > return ret; > } > > +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) > +{ > +#ifdef CONFIG_NET_RX_BUSY_POLL > + const struct io_uring_napi curr = { > + .busy_poll_to = ctx->napi_busy_poll_to, > + }; > + struct io_uring_napi *napi; > + > + napi = memdup_user(arg, sizeof(*napi)); > + if (IS_ERR(napi)) > + return PTR_ERR(napi); > + > + WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to); > + > + kfree(napi); > + > + if (copy_to_user(arg, &curr, sizeof(curr))) > + return -EFAULT; > + > + return 0; > +#else > + return -EINVAL; > +#endif > +} This should return -EINVAL if any of the padding or reserved fields are non-zero. If you don't do that, then it's not expendable in the future.
On 11/22/22 12:29 AM, Stefan Roesch wrote: > +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) > +{ > +#ifdef CONFIG_NET_RX_BUSY_POLL > + const struct io_uring_napi curr = { > + .busy_poll_to = ctx->napi_busy_poll_to, > + }; > + struct io_uring_napi *napi; > + > + napi = memdup_user(arg, sizeof(*napi)); > + if (IS_ERR(napi)) > + return PTR_ERR(napi); > + > + WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to); > + > + kfree(napi); > + > + if (copy_to_user(arg, &curr, sizeof(curr))) > + return -EFAULT; > + > + return 0; Considering: 1) `struct io_uring_napi` is 16 bytes in size. 2) The lifetime of `struct io_uring_napi *napi;` is brief. There is no need to use memdup_user() and kfree(). You can place it on the stack and use copy_from_user() instead.
Jens Axboe <axboe@kernel.dk> writes: > On 11/21/22 10:29?AM, Stefan Roesch wrote: >> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c >> index 4f432694cbed..cf0e7cc8ad2e 100644 >> --- a/io_uring/io_uring.c >> +++ b/io_uring/io_uring.c >> @@ -4122,6 +4122,48 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, >> return ret; >> } >> >> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) >> +{ >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + const struct io_uring_napi curr = { >> + .busy_poll_to = ctx->napi_busy_poll_to, >> + }; >> + struct io_uring_napi *napi; >> + >> + napi = memdup_user(arg, sizeof(*napi)); >> + if (IS_ERR(napi)) >> + return PTR_ERR(napi); >> + >> + WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to); >> + >> + kfree(napi); >> + >> + if (copy_to_user(arg, &curr, sizeof(curr))) >> + return -EFAULT; >> + >> + return 0; >> +#else >> + return -EINVAL; >> +#endif >> +} > > This should return -EINVAL if any of the padding or reserved fields are > non-zero. If you don't do that, then it's not expendable in the future. The next version will have that change.
Jens Axboe <axboe@kernel.dk> writes: > On 11/21/22 10:29 AM, Stefan Roesch wrote: >> This adds an api to register the busy poll timeout from liburing. To be >> able to use this functionality, the corresponding liburing patch is needed. >> >> Signed-off-by: Stefan Roesch <shr@devkernel.io> >> --- >> include/linux/io_uring_types.h | 2 +- >> include/uapi/linux/io_uring.h | 11 +++++++ >> io_uring/io_uring.c | 54 ++++++++++++++++++++++++++++++++++ >> 3 files changed, 66 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h >> index 23993b5d3186..67b861305d97 100644 >> --- a/include/linux/io_uring_types.h >> +++ b/include/linux/io_uring_types.h >> @@ -274,8 +274,8 @@ struct io_ring_ctx { >> struct list_head napi_list; /* track busy poll napi_id */ >> spinlock_t napi_lock; /* napi_list lock */ >> >> - unsigned int napi_busy_poll_to; /* napi busy poll default timeout */ >> bool napi_prefer_busy_poll; >> + unsigned int napi_busy_poll_to; >> #endif > > Why is this being moved? Seems unrelated, and it actually creates another > hole rather than filling one as it did before. That was not intended. The next version of the patch will restore the previous order.
Ammar Faizi <ammarfaizi2@gnuweeb.org> writes: > On 11/22/22 12:29 AM, Stefan Roesch wrote: >> +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) >> +{ >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + const struct io_uring_napi curr = { >> + .busy_poll_to = ctx->napi_busy_poll_to, >> + }; >> + struct io_uring_napi *napi; >> + >> + napi = memdup_user(arg, sizeof(*napi)); >> + if (IS_ERR(napi)) >> + return PTR_ERR(napi); >> + >> + WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to); >> + >> + kfree(napi); >> + >> + if (copy_to_user(arg, &curr, sizeof(curr))) >> + return -EFAULT; >> + >> + return 0; > > Considering: > > 1) `struct io_uring_napi` is 16 bytes in size. > > 2) The lifetime of `struct io_uring_napi *napi;` is brief. > > There is no need to use memdup_user() and kfree(). You can place it > on the stack and use copy_from_user() instead. The next version of the patch will use copy_from_user.
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 23993b5d3186..67b861305d97 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -274,8 +274,8 @@ struct io_ring_ctx { struct list_head napi_list; /* track busy poll napi_id */ spinlock_t napi_lock; /* napi_list lock */ - unsigned int napi_busy_poll_to; /* napi busy poll default timeout */ bool napi_prefer_busy_poll; + unsigned int napi_busy_poll_to; #endif struct { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2df3225b562f..1a713bbafaee 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -490,6 +490,10 @@ enum { /* register a range of fixed file slots for automatic slot allocation */ IORING_REGISTER_FILE_ALLOC_RANGE = 25, + /* set/clear busy poll settings */ + IORING_REGISTER_NAPI = 26, + IORING_UNREGISTER_NAPI = 27, + /* this goes last */ IORING_REGISTER_LAST }; @@ -612,6 +616,13 @@ struct io_uring_buf_reg { __u64 resv[3]; }; +/* argument for IORING_(UN)REGISTER_NAPI */ +struct io_uring_napi { + __u32 busy_poll_to; + __u32 pad; + __u64 resv; +}; + /* * io_uring_restriction->opcode values */ diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 4f432694cbed..cf0e7cc8ad2e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -4122,6 +4122,48 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, return ret; } +static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + const struct io_uring_napi curr = { + .busy_poll_to = ctx->napi_busy_poll_to, + }; + struct io_uring_napi *napi; + + napi = memdup_user(arg, sizeof(*napi)); + if (IS_ERR(napi)) + return PTR_ERR(napi); + + WRITE_ONCE(ctx->napi_busy_poll_to, napi->busy_poll_to); + + kfree(napi); + + if (copy_to_user(arg, &curr, sizeof(curr))) + return -EFAULT; + + return 0; +#else + return -EINVAL; +#endif +} + +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) +{ +#ifdef CONFIG_NET_RX_BUSY_POLL + const struct io_uring_napi curr = { + .busy_poll_to = ctx->napi_busy_poll_to, + }; + + if (copy_to_user(arg, &curr, sizeof(curr))) + return -EFAULT; + + WRITE_ONCE(ctx->napi_busy_poll_to, 0); + return 0; +#else + return -EINVAL; +#endif +} + static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, void __user *arg, unsigned nr_args) __releases(ctx->uring_lock) @@ -4282,6 +4324,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_file_alloc_range(ctx, arg); break; + case IORING_REGISTER_NAPI: + ret = -EINVAL; + if (!arg) + break; + ret = io_register_napi(ctx, arg); + break; + case IORING_UNREGISTER_NAPI: + ret = -EINVAL; + if (!arg) + break; + ret = io_unregister_napi(ctx, arg); + break; default: ret = -EINVAL; break;
This adds an api to register the busy poll timeout from liburing. To be able to use this functionality, the corresponding liburing patch is needed. Signed-off-by: Stefan Roesch <shr@devkernel.io> --- include/linux/io_uring_types.h | 2 +- include/uapi/linux/io_uring.h | 11 +++++++ io_uring/io_uring.c | 54 ++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-)