Message ID | 20230209230144.465620-5-shr@devkernel.io (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring: add napi busy polling support | expand |
I'd fold 2+3 into this patch again, having them standalone don't really make a lot of sense without this patch. This looks a lot better and gets rid of the ifdef infestation! Minor comments below, mostly just because I think we should fold those 3 patches anyway. > diff --git a/io_uring/napi.c b/io_uring/napi.c > new file mode 100644 > index 000000000000..c9e2afae382d > --- /dev/null > +++ b/io_uring/napi.c > @@ -0,0 +1,281 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include "io_uring.h" > +#include "napi.h" > + > +#ifdef CONFIG_NET_RX_BUSY_POLL > + > +/* Timeout for cleanout of stale entries. */ > +#define NAPI_TIMEOUT (60 * SEC_CONVERSION) > + > +struct io_napi_ht_entry { > + unsigned int napi_id; > + struct list_head list; > + > + /* Covered by napi lock spinlock. */ > + unsigned long timeout; > + struct hlist_node node; > +}; > + > +static inline bool io_napi_busy_loop_on(struct io_ring_ctx *ctx) > +{ > + return READ_ONCE(ctx->napi_busy_poll_to); > +} I'd probably get rid of this helper, to be honest. > +static bool io_napi_busy_loop(struct list_head *napi_list, bool prefer_busy_poll) > +{ > + struct io_napi_ht_entry *e; > + struct io_napi_ht_entry *n; > + > + list_for_each_entry_safe(e, n, napi_list, list) { > + napi_busy_loop(e->napi_id, NULL, NULL, prefer_busy_poll, > + BUSY_POLL_BUDGET); > + } Looks like 8 spaces before that BUSY_POLL_BUDGET, should be a tab? > +static void io_napi_blocking_busy_loop(struct list_head *napi_list, > + struct io_wait_queue *iowq) > +{ > + unsigned long start_time = 0; > + > + if (!list_is_singular(napi_list)) > + start_time = busy_loop_current_time(); > + > + while (!list_is_singular(napi_list) && > + io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll) && > + !io_napi_busy_loop_should_end(iowq, start_time)) { > + ; > + } > + > + if (list_is_singular(napi_list)) { > + struct io_napi_ht_entry *ne = list_first_entry(napi_list, > + struct io_napi_ht_entry, list); > + > + napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, > + iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); > + } > +} This does look a LOT better! I do think a helper for the first while would make sense, and then have a comment in that helper on what this is doing exactly. static void io_napi_multi_busy_loop(napi_list, iowq) { unsigned long start_time = busy_loop_current_time(); do { if (list_is_singular(napi_list)) break; if (!io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll)) break; } while (!io_napi_busy_loop_should_end(iowq, start_time)); } static void io_napi_blocking_busy_loop(struct list_head *napi_list, struct io_wait_queue *iowq) { if (!list_is_singular(napi_list)) io_napi_multi_busy_loop(napi_list, iowq); if (list_is_singular(napi_list)) { struct io_napi_ht_entry *ne; ne = list_first_entry(napi_list, struct io_napi_ht_entry, list); napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); } } I think that is still much easier to read rather than all of these combined statements. What do you think? > +static void io_napi_merge_lists(struct io_ring_ctx *ctx, struct list_head *napi_list) > +{ > + spin_lock(&ctx->napi_lock); > + list_splice(napi_list, &ctx->napi_list); > + io_napi_remove_stale(ctx); > + spin_unlock(&ctx->napi_lock); > +} First line too long, split it into two. Did you look into the locking side like I mentioned in the previous review? > +/* > + * io_napi_adjust_busy_loop_timeout() - Add napi id to the busy poll list > + * @ctx: pointer to io-uring context structure > + * @iowq: pointer to io wait queue > + * @napi_list: pointer to head of napi list > + * @ts: pointer to timespec or NULL > + * > + * Adjust the busy loop timeout according to timespec and busy poll timeout. > + */ > +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq, > + struct list_head *napi_list, > + struct timespec64 *ts) > +{ > + if (!list_empty(napi_list)) { > + if (ts) > + __io_napi_adjust_busy_loop_timeout( > + READ_ONCE(ctx->napi_busy_poll_to), > + ts, &iowq->napi_busy_poll_to); > + else > + iowq->napi_busy_poll_to = READ_ONCE(ctx->napi_busy_poll_to); > + } > +} I'd make this: void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, struct list_head *napi_list, struct timespec64 *ts) { if (list_empty(napi_list)) return; __io_napi_adjust_timeout(ctx, iowq, napi_list, ts); } and put it in the header. That leaves the fast path mostly untouched, rather than forcing a function call here. Also note the alignment of the variables in the function header, this applies in a bunch of spots. And just drop the busy_loop thing from the naming where it isn't strictly needed, lots of these function names are really long. > +/* > + * io_napi_setup_busy_loop() - setup the busy poll loop > + * @ctx: pointer to io-uring context structure > + * @iowq: pointer to io wait queue > + * @napi_list: pointer to head of napi list > + * > + * Capture busy poll timeout and prefer busy poll seeting Splice of the napi list. > + */ > +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > + struct list_head *napi_list) > +{ > + iowq->napi_busy_poll_to = 0; > + iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); > + > + if (!(ctx->flags & IORING_SETUP_SQPOLL)) { > + spin_lock(&ctx->napi_lock); > + list_splice_init(&ctx->napi_list, napi_list); > + spin_unlock(&ctx->napi_lock); > + } > +} Might need a comment here on why SQPOLL needs something extra? > +/* > + * io_napi_end_busy_loop() - execute busy poll loop > + * @ctx: pointer to io-uring context structure > + * @iowq: pointer to io wait queue > + * @napi_list: pointer to head of napi list > + * > + * Execute the busy poll loop and merge the spliced off list. > + */ > +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > + struct list_head *napi_list) > +{ > + if (iowq->napi_busy_poll_to) > + io_napi_blocking_busy_loop(napi_list, iowq); > + > + if (!list_empty(napi_list)) > + io_napi_merge_lists(ctx, napi_list); > +} This should go above the users in this file. Maybe others are like that too, didn't check. > +++ b/io_uring/napi.h > @@ -0,0 +1,49 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > + > +#ifndef IOU_NAPI_H > +#define IOU_NAPI_H > + > +#include <linux/kernel.h> > +#include <linux/io_uring.h> > +#include <net/busy_poll.h> > + > +#ifdef CONFIG_NET_RX_BUSY_POLL > + > +#define NAPI_LIST_HEAD(l) LIST_HEAD(l) > + > +void io_napi_init(struct io_ring_ctx *ctx); > +void io_napi_free(struct io_ring_ctx *ctx); > + > +void io_napi_add(struct io_kiocb *req); > + > +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > + struct list_head *napi_list); > +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, > + struct io_wait_queue *iowq, struct list_head *napi_list, > + struct timespec64 *ts); > +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > + struct list_head *napi_list); > + > +#else > + > +#define NAPI_LIST_HEAD(l) > + > +static inline void io_napi_init(struct io_ring_ctx *ctx) > +{ > +} > + > +static inline void io_napi_free(struct io_ring_ctx *ctx) > +{ > +} > + > +static inline void io_napi_add(struct io_kiocb *req) > +{ > +} > + > +#define io_napi_setup_busy_loop(ctx, iowq, napi_list) do {} while (0) > +#define io_napi_adjust_busy_loop_timeout(ctx, iowq, napi_list, ts) do {} while (0) > +#define io_napi_end_busy_loop(ctx, iowq, napi_list) do {} while (0) This looks way better!
Jens Axboe <axboe@kernel.dk> writes: > I'd fold 2+3 into this patch again, having them standalone don't really > make a lot of sense without this patch. > > This looks a lot better and gets rid of the ifdef infestation! Minor > comments below, mostly just because I think we should fold those 3 > patches anyway. > >> diff --git a/io_uring/napi.c b/io_uring/napi.c >> new file mode 100644 >> index 000000000000..c9e2afae382d >> --- /dev/null >> +++ b/io_uring/napi.c >> @@ -0,0 +1,281 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> + >> +#include "io_uring.h" >> +#include "napi.h" >> + >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + >> +/* Timeout for cleanout of stale entries. */ >> +#define NAPI_TIMEOUT (60 * SEC_CONVERSION) >> + >> +struct io_napi_ht_entry { >> + unsigned int napi_id; >> + struct list_head list; >> + >> + /* Covered by napi lock spinlock. */ >> + unsigned long timeout; >> + struct hlist_node node; >> +}; >> + >> +static inline bool io_napi_busy_loop_on(struct io_ring_ctx *ctx) >> +{ >> + return READ_ONCE(ctx->napi_busy_poll_to); >> +} > > I'd probably get rid of this helper, to be honest. > I removed the helper in the next version. >> +static bool io_napi_busy_loop(struct list_head *napi_list, bool prefer_busy_poll) >> +{ >> + struct io_napi_ht_entry *e; >> + struct io_napi_ht_entry *n; >> + >> + list_for_each_entry_safe(e, n, napi_list, list) { >> + napi_busy_loop(e->napi_id, NULL, NULL, prefer_busy_poll, >> + BUSY_POLL_BUDGET); >> + } > > Looks like 8 spaces before that BUSY_POLL_BUDGET, should be a tab? > Fixed. >> +static void io_napi_blocking_busy_loop(struct list_head *napi_list, >> + struct io_wait_queue *iowq) >> +{ >> + unsigned long start_time = 0; >> + >> + if (!list_is_singular(napi_list)) >> + start_time = busy_loop_current_time(); >> + >> + while (!list_is_singular(napi_list) && >> + io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll) && >> + !io_napi_busy_loop_should_end(iowq, start_time)) { >> + ; >> + } >> + >> + if (list_is_singular(napi_list)) { >> + struct io_napi_ht_entry *ne = list_first_entry(napi_list, >> + struct io_napi_ht_entry, list); >> + >> + napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, >> + iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); >> + } >> +} > > This does look a LOT better! I do think a helper for the first while > would make sense, and then have a comment in that helper on what this is > doing exactly. > > static void io_napi_multi_busy_loop(napi_list, iowq) > { > unsigned long start_time = busy_loop_current_time(); > > do { > if (list_is_singular(napi_list)) > break; > if (!io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll)) > break; > } while (!io_napi_busy_loop_should_end(iowq, start_time)); > } > > static void io_napi_blocking_busy_loop(struct list_head *napi_list, > struct io_wait_queue *iowq) > { > if (!list_is_singular(napi_list)) > io_napi_multi_busy_loop(napi_list, iowq); > if (list_is_singular(napi_list)) { > struct io_napi_ht_entry *ne; > > ne = list_first_entry(napi_list, struct io_napi_ht_entry, list); > napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, > iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); > } > } > > I think that is still much easier to read rather than all of these > combined statements. What do you think? > I personally prefer the while loop, but I made the above change in the next version. >> +static void io_napi_merge_lists(struct io_ring_ctx *ctx, struct list_head *napi_list) >> +{ >> + spin_lock(&ctx->napi_lock); >> + list_splice(napi_list, &ctx->napi_list); >> + io_napi_remove_stale(ctx); >> + spin_unlock(&ctx->napi_lock); >> +} > > First line too long, split it into two. Did you look into the locking > side like I mentioned in the previous review? > Fixed. I looked at the locking, however not all code path where io_napi_add is called guarantee that the io-uring lock is taken. >> +/* >> + * io_napi_adjust_busy_loop_timeout() - Add napi id to the busy poll list >> + * @ctx: pointer to io-uring context structure >> + * @iowq: pointer to io wait queue >> + * @napi_list: pointer to head of napi list >> + * @ts: pointer to timespec or NULL >> + * >> + * Adjust the busy loop timeout according to timespec and busy poll timeout. >> + */ >> +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, >> + struct io_wait_queue *iowq, >> + struct list_head *napi_list, >> + struct timespec64 *ts) >> +{ >> + if (!list_empty(napi_list)) { >> + if (ts) >> + __io_napi_adjust_busy_loop_timeout( >> + READ_ONCE(ctx->napi_busy_poll_to), >> + ts, &iowq->napi_busy_poll_to); >> + else >> + iowq->napi_busy_poll_to = READ_ONCE(ctx->napi_busy_poll_to); >> + } >> +} > > I'd make this: > > void io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, > struct list_head *napi_list, struct timespec64 *ts) > { > if (list_empty(napi_list)) > return; > > __io_napi_adjust_timeout(ctx, iowq, napi_list, ts); > } > > and put it in the header. That leaves the fast path mostly untouched, > rather than forcing a function call here. > > Also note the alignment of the variables in the function header, this > applies in a bunch of spots. And just drop the busy_loop thing from the > naming where it isn't strictly needed, lots of these function names are > really long. > > Unfortunately the function doesn't get inlined. I added a new helper io_napi to avoid this case. >> +/* >> + * io_napi_setup_busy_loop() - setup the busy poll loop >> + * @ctx: pointer to io-uring context structure >> + * @iowq: pointer to io wait queue >> + * @napi_list: pointer to head of napi list >> + * >> + * Capture busy poll timeout and prefer busy poll seeting Splice of the napi list. >> + */ >> +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, >> + struct list_head *napi_list) >> +{ >> + iowq->napi_busy_poll_to = 0; >> + iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); >> + >> + if (!(ctx->flags & IORING_SETUP_SQPOLL)) { >> + spin_lock(&ctx->napi_lock); >> + list_splice_init(&ctx->napi_list, napi_list); >> + spin_unlock(&ctx->napi_lock); >> + } >> +} > > Might need a comment here on why SQPOLL needs something extra? > I added a comment. >> +/* >> + * io_napi_end_busy_loop() - execute busy poll loop >> + * @ctx: pointer to io-uring context structure >> + * @iowq: pointer to io wait queue >> + * @napi_list: pointer to head of napi list >> + * >> + * Execute the busy poll loop and merge the spliced off list. >> + */ >> +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, >> + struct list_head *napi_list) >> +{ >> + if (iowq->napi_busy_poll_to) >> + io_napi_blocking_busy_loop(napi_list, iowq); >> + >> + if (!list_empty(napi_list)) >> + io_napi_merge_lists(ctx, napi_list); >> +} > > This should go above the users in this file. Maybe others are like that > too, didn't check. > There are no users in this file. >> +++ b/io_uring/napi.h >> @@ -0,0 +1,49 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> + >> +#ifndef IOU_NAPI_H >> +#define IOU_NAPI_H >> + >> +#include <linux/kernel.h> >> +#include <linux/io_uring.h> >> +#include <net/busy_poll.h> >> + >> +#ifdef CONFIG_NET_RX_BUSY_POLL >> + >> +#define NAPI_LIST_HEAD(l) LIST_HEAD(l) >> + >> +void io_napi_init(struct io_ring_ctx *ctx); >> +void io_napi_free(struct io_ring_ctx *ctx); >> + >> +void io_napi_add(struct io_kiocb *req); >> + >> +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, >> + struct list_head *napi_list); >> +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, >> + struct io_wait_queue *iowq, struct list_head *napi_list, >> + struct timespec64 *ts); >> +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, >> + struct list_head *napi_list); >> + >> +#else >> + >> +#define NAPI_LIST_HEAD(l) >> + >> +static inline void io_napi_init(struct io_ring_ctx *ctx) >> +{ >> +} >> + >> +static inline void io_napi_free(struct io_ring_ctx *ctx) >> +{ >> +} >> + >> +static inline void io_napi_add(struct io_kiocb *req) >> +{ >> +} >> + >> +#define io_napi_setup_busy_loop(ctx, iowq, napi_list) do {} while (0) >> +#define io_napi_adjust_busy_loop_timeout(ctx, iowq, napi_list, ts) do {} while (0) >> +#define io_napi_end_busy_loop(ctx, iowq, napi_list) do {} while (0) > > This looks way better!
diff --git a/io_uring/Makefile b/io_uring/Makefile index 8cc8e5387a75..2efe7c5f07ba 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \ sqpoll.o fdinfo.o tctx.o poll.o \ cancel.o kbuf.o rsrc.o rw.o opdef.o notif.o obj-$(CONFIG_IO_WQ) += io-wq.o +obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5d6aa783fb97..7074379a9bd0 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -90,6 +90,7 @@ #include "rsrc.h" #include "cancel.h" #include "net.h" +#include "napi.h" #include "notif.h" #include "timeout.h" @@ -335,6 +336,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_WQ_LIST(&ctx->locked_free_list); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_WQ_LIST(&ctx->submit_state.compl_reqs); + io_napi_init(ctx); + return ctx; err: kfree(ctx->dummy_ubuf); @@ -2534,6 +2537,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, struct io_wait_queue iowq; struct io_rings *rings = ctx->rings; int ret; + NAPI_LIST_HEAD(local_napi_list); if (!io_allowed_run_tw(ctx)) return -EEXIST; @@ -2566,15 +2570,22 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; iowq.timeout = KTIME_MAX; + io_napi_setup_busy_loop(ctx, &iowq, &local_napi_list); if (uts) { struct timespec64 ts; if (get_timespec64(&ts, uts)) return -EFAULT; + + io_napi_adjust_busy_loop_timeout(ctx, &iowq, &local_napi_list, &ts); iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } else { + io_napi_adjust_busy_loop_timeout(ctx, &iowq, &local_napi_list, NULL); } trace_io_uring_cqring_wait(ctx, min_events); + io_napi_end_busy_loop(ctx, &iowq, &local_napi_list); + do { unsigned long check_cq; @@ -2806,6 +2817,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) io_req_caches_free(ctx); if (ctx->hash_map) io_wq_put_hash(ctx->hash_map); + io_napi_free(ctx); kfree(ctx->cancel_table.hbs); kfree(ctx->cancel_table_locked.hbs); kfree(ctx->dummy_ubuf); diff --git a/io_uring/napi.c b/io_uring/napi.c new file mode 100644 index 000000000000..c9e2afae382d --- /dev/null +++ b/io_uring/napi.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "io_uring.h" +#include "napi.h" + +#ifdef CONFIG_NET_RX_BUSY_POLL + +/* Timeout for cleanout of stale entries. */ +#define NAPI_TIMEOUT (60 * SEC_CONVERSION) + +struct io_napi_ht_entry { + unsigned int napi_id; + struct list_head list; + + /* Covered by napi lock spinlock. */ + unsigned long timeout; + struct hlist_node node; +}; + +static inline bool io_napi_busy_loop_on(struct io_ring_ctx *ctx) +{ + return READ_ONCE(ctx->napi_busy_poll_to); +} + +static inline void __io_napi_add(struct io_ring_ctx *ctx, struct file *file) +{ + unsigned int napi_id; + struct socket *sock; + struct sock *sk; + struct io_napi_ht_entry *he; + + if (!io_napi_busy_loop_on(ctx)) + return; + + sock = sock_from_file(file); + if (!sock) + return; + + sk = sock->sk; + if (!sk) + return; + + napi_id = READ_ONCE(sk->sk_napi_id); + + /* Non-NAPI IDs can be rejected. */ + if (napi_id < MIN_NAPI_ID) + return; + + spin_lock(&ctx->napi_lock); + hash_for_each_possible(ctx->napi_ht, he, node, napi_id) { + if (he->napi_id == napi_id) { + he->timeout = jiffies + NAPI_TIMEOUT; + goto out; + } + } + + he = kmalloc(sizeof(*he), GFP_NOWAIT); + if (!he) + goto out; + + he->napi_id = napi_id; + he->timeout = jiffies + NAPI_TIMEOUT; + hash_add(ctx->napi_ht, &he->node, napi_id); + + list_add_tail(&he->list, &ctx->napi_list); + +out: + spin_unlock(&ctx->napi_lock); +} + +static void __io_napi_adjust_busy_loop_timeout(unsigned int poll_to, + struct timespec64 *ts, + unsigned int *new_poll_to) +{ + struct timespec64 pollto = ns_to_timespec64(1000 * (s64)poll_to); + + if (timespec64_compare(ts, &pollto) > 0) { + *ts = timespec64_sub(*ts, pollto); + *new_poll_to = poll_to; + } else { + u64 to = timespec64_to_ns(ts); + + do_div(to, 1000); + *new_poll_to = to; + ts->tv_sec = 0; + ts->tv_nsec = 0; + } +} + +static inline bool io_napi_busy_loop_timeout(unsigned long start_time, + unsigned long bp_usec) +{ + if (bp_usec) { + unsigned long end_time = start_time + bp_usec; + unsigned long now = busy_loop_current_time(); + + return time_after(now, end_time); + } + + return true; +} + +static bool io_napi_busy_loop_should_end(void *p, unsigned long start_time) +{ + struct io_wait_queue *iowq = p; + + return signal_pending(current) || + io_should_wake(iowq) || + io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to); +} + +static bool io_napi_busy_loop(struct list_head *napi_list, bool prefer_busy_poll) +{ + struct io_napi_ht_entry *e; + struct io_napi_ht_entry *n; + + list_for_each_entry_safe(e, n, napi_list, list) { + napi_busy_loop(e->napi_id, NULL, NULL, prefer_busy_poll, + BUSY_POLL_BUDGET); + } + + return !list_empty(napi_list); +} + +static void io_napi_blocking_busy_loop(struct list_head *napi_list, + struct io_wait_queue *iowq) +{ + unsigned long start_time = 0; + + if (!list_is_singular(napi_list)) + start_time = busy_loop_current_time(); + + while (!list_is_singular(napi_list) && + io_napi_busy_loop(napi_list, iowq->napi_prefer_busy_poll) && + !io_napi_busy_loop_should_end(iowq, start_time)) { + ; + } + + if (list_is_singular(napi_list)) { + struct io_napi_ht_entry *ne = list_first_entry(napi_list, + struct io_napi_ht_entry, list); + + napi_busy_loop(ne->napi_id, io_napi_busy_loop_should_end, iowq, + iowq->napi_prefer_busy_poll, BUSY_POLL_BUDGET); + } +} + +static void io_napi_remove_stale(struct io_ring_ctx *ctx) +{ + unsigned int i; + struct io_napi_ht_entry *he; + + hash_for_each(ctx->napi_ht, i, he, node) { + if (time_after(jiffies, he->timeout)) { + list_del(&he->list); + hash_del(&he->node); + } + } + +} + +static void io_napi_merge_lists(struct io_ring_ctx *ctx, struct list_head *napi_list) +{ + spin_lock(&ctx->napi_lock); + list_splice(napi_list, &ctx->napi_list); + io_napi_remove_stale(ctx); + spin_unlock(&ctx->napi_lock); +} + +/* + * io_napi_init() - Init napi settings + * @ctx: pointer to io-uring context structure + * + * Init napi settings in the io-uring context. + */ +void io_napi_init(struct io_ring_ctx *ctx) +{ + INIT_LIST_HEAD(&ctx->napi_list); + spin_lock_init(&ctx->napi_lock); + ctx->napi_prefer_busy_poll = false; + ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll); +} + +/* + * io_napi_free() - Deallocate napi + * @ctx: pointer to io-uring context structure + * + * Free the napi list and the hash table in the io-uring context. + */ +void io_napi_free(struct io_ring_ctx *ctx) +{ + unsigned int i; + struct io_napi_ht_entry *he; + LIST_HEAD(napi_list); + + spin_lock(&ctx->napi_lock); + hash_for_each(ctx->napi_ht, i, he, node) + hash_del(&he->node); + spin_unlock(&ctx->napi_lock); +} + +/* + * io_napi_add() - Add napi id to the busy poll list + * @req: pointer to io_kiocb request + * + * Add the napi id of the socket to the napi busy poll list and hash table. + */ +void io_napi_add(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + if (!io_napi_busy_loop_on(ctx)) + return; + + __io_napi_add(ctx, req->file); +} + +/* + * io_napi_adjust_busy_loop_timeout() - Add napi id to the busy poll list + * @ctx: pointer to io-uring context structure + * @iowq: pointer to io wait queue + * @napi_list: pointer to head of napi list + * @ts: pointer to timespec or NULL + * + * Adjust the busy loop timeout according to timespec and busy poll timeout. + */ +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, + struct list_head *napi_list, + struct timespec64 *ts) +{ + if (!list_empty(napi_list)) { + if (ts) + __io_napi_adjust_busy_loop_timeout( + READ_ONCE(ctx->napi_busy_poll_to), + ts, &iowq->napi_busy_poll_to); + else + iowq->napi_busy_poll_to = READ_ONCE(ctx->napi_busy_poll_to); + } +} + +/* + * io_napi_setup_busy_loop() - setup the busy poll loop + * @ctx: pointer to io-uring context structure + * @iowq: pointer to io wait queue + * @napi_list: pointer to head of napi list + * + * Capture busy poll timeout and prefer busy poll seeting Splice of the napi list. + */ +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct list_head *napi_list) +{ + iowq->napi_busy_poll_to = 0; + iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); + + if (!(ctx->flags & IORING_SETUP_SQPOLL)) { + spin_lock(&ctx->napi_lock); + list_splice_init(&ctx->napi_list, napi_list); + spin_unlock(&ctx->napi_lock); + } +} + +/* + * io_napi_end_busy_loop() - execute busy poll loop + * @ctx: pointer to io-uring context structure + * @iowq: pointer to io wait queue + * @napi_list: pointer to head of napi list + * + * Execute the busy poll loop and merge the spliced off list. + */ +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct list_head *napi_list) +{ + if (iowq->napi_busy_poll_to) + io_napi_blocking_busy_loop(napi_list, iowq); + + if (!list_empty(napi_list)) + io_napi_merge_lists(ctx, napi_list); +} + +#endif diff --git a/io_uring/napi.h b/io_uring/napi.h new file mode 100644 index 000000000000..0672592cfb79 --- /dev/null +++ b/io_uring/napi.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef IOU_NAPI_H +#define IOU_NAPI_H + +#include <linux/kernel.h> +#include <linux/io_uring.h> +#include <net/busy_poll.h> + +#ifdef CONFIG_NET_RX_BUSY_POLL + +#define NAPI_LIST_HEAD(l) LIST_HEAD(l) + +void io_napi_init(struct io_ring_ctx *ctx); +void io_napi_free(struct io_ring_ctx *ctx); + +void io_napi_add(struct io_kiocb *req); + +void io_napi_setup_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct list_head *napi_list); +void io_napi_adjust_busy_loop_timeout(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, struct list_head *napi_list, + struct timespec64 *ts); +void io_napi_end_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, + struct list_head *napi_list); + +#else + +#define NAPI_LIST_HEAD(l) + +static inline void io_napi_init(struct io_ring_ctx *ctx) +{ +} + +static inline void io_napi_free(struct io_ring_ctx *ctx) +{ +} + +static inline void io_napi_add(struct io_kiocb *req) +{ +} + +#define io_napi_setup_busy_loop(ctx, iowq, napi_list) do {} while (0) +#define io_napi_adjust_busy_loop_timeout(ctx, iowq, napi_list, ts) do {} while (0) +#define io_napi_end_busy_loop(ctx, iowq, napi_list) do {} while (0) + +#endif + +#endif diff --git a/io_uring/poll.c b/io_uring/poll.c index 8339a92b4510..ab08299eb341 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -15,6 +15,7 @@ #include "io_uring.h" #include "refs.h" +#include "napi.h" #include "opdef.h" #include "kbuf.h" #include "poll.h" @@ -629,6 +630,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req, __io_poll_execute(req, mask); return 0; } + io_napi_add(req); if (ipt->owning) { /*