Message ID | 8945b01756d902f5d5b0667f20b957ad3f742e5e.1666895626.git.metze@samba.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [1/1] io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag | expand |
On 10/27/22 19:34, Stefan Metzmacher wrote: > It might be useful for applications to detect if a zero copy > transfer with SEND[MSG]_ZC was actually possible or not. > The application can fallback to plain SEND[MSG] in order > to avoid the overhead of two cqes per request. > Or it can generate a log message that could indicate > to an administrator that no zero copy was possible > and could explain degraded performance. From a quick look seems good, I'll test and double check when I'm back on tuesday > Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa > Cc: Pavel Begunkov <asml.silence@gmail.com> > Signed-off-by: Stefan Metzmacher <metze@samba.org> > --- > include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ > io_uring/net.c | 6 +++++- > io_uring/notif.c | 12 ++++++++++++ > io_uring/notif.h | 3 +++ > 4 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index ab7458033ee3..423f98781a20 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -296,10 +296,28 @@ enum io_uring_op { > * > * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in > * the buf_index field. > + * > + * IORING_SEND_ZC_REPORT_USAGE > + * If set, SEND[MSG]_ZC should report > + * the zerocopy usage in cqe.res > + * for the IORING_CQE_F_NOTIF cqe. > + * 0 is reported if zerocopy was actually possible. > + * IORING_NOTIF_USAGE_ZC_COPIED if data was copied > + * (at least partially). > */ > #define IORING_RECVSEND_POLL_FIRST (1U << 0) > #define IORING_RECV_MULTISHOT (1U << 1) > #define IORING_RECVSEND_FIXED_BUF (1U << 2) > +#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) > + > +/* > + * cqe.res for IORING_CQE_F_NOTIF if > + * IORING_SEND_ZC_REPORT_USAGE was requested > + * > + * It should be treated as a flag, all other > + * bits of cqe.res should be treated as reserved! > + */ > +#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) > > /* > * accept flags stored in sqe->ioprio > diff --git a/io_uring/net.c b/io_uring/net.c > index 15dea91625e2..0a8cdc5ae7af 100644 > --- a/io_uring/net.c > +++ b/io_uring/net.c > @@ -939,7 +939,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > > zc->flags = READ_ONCE(sqe->ioprio); > if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | > - IORING_RECVSEND_FIXED_BUF)) > + IORING_RECVSEND_FIXED_BUF | > + IORING_SEND_ZC_REPORT_USAGE)) > return -EINVAL; > notif = zc->notif = io_alloc_notif(ctx); > if (!notif) > @@ -957,6 +958,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > req->imu = READ_ONCE(ctx->user_bufs[idx]); > io_req_set_rsrc_node(notif, ctx, 0); > } > + if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { > + io_notif_to_data(notif)->zc_report = true; > + } > > if (req->opcode == IORING_OP_SEND_ZC) { > if (READ_ONCE(sqe->__pad3[0])) > diff --git a/io_uring/notif.c b/io_uring/notif.c > index e37c6569d82e..4bfef10161fa 100644 > --- a/io_uring/notif.c > +++ b/io_uring/notif.c > @@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) > __io_unaccount_mem(ctx->user, nd->account_pages); > nd->account_pages = 0; > } > + > + if (nd->zc_report && (nd->zc_copied || !nd->zc_used)) > + notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED; > + > io_req_task_complete(notif, locked); > } > > @@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, > struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); > struct io_kiocb *notif = cmd_to_io_kiocb(nd); > > + if (nd->zc_report) { > + if (success && !nd->zc_used && skb) > + WRITE_ONCE(nd->zc_used, true); > + else if (!success && !nd->zc_copied) > + WRITE_ONCE(nd->zc_copied, true); > + } > + > if (refcount_dec_and_test(&uarg->refcnt)) { > notif->io_task_work.func = __io_notif_complete_tw; > io_req_task_work_add(notif); > @@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) > nd->account_pages = 0; > nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; > nd->uarg.callback = io_uring_tx_zerocopy_callback; > + nd->zc_report = nd->zc_used = nd->zc_copied = false; > refcount_set(&nd->uarg.refcnt, 1); > return notif; > } > diff --git a/io_uring/notif.h b/io_uring/notif.h > index 5b4d710c8ca5..4ae696273c78 100644 > --- a/io_uring/notif.h > +++ b/io_uring/notif.h > @@ -13,6 +13,9 @@ struct io_notif_data { > struct file *file; > struct ubuf_info uarg; > unsigned long account_pages; > + bool zc_report; > + bool zc_used; > + bool zc_copied; > }; > > void io_notif_flush(struct io_kiocb *notif);
On 10/27/22 19:34, Stefan Metzmacher wrote: > It might be useful for applications to detect if a zero copy > transfer with SEND[MSG]_ZC was actually possible or not. > The application can fallback to plain SEND[MSG] in order > to avoid the overhead of two cqes per request. > Or it can generate a log message that could indicate > to an administrator that no zero copy was possible > and could explain degraded performance. Looks good, Reviewed-by: Pavel Begunkov <asml.silence@gmail.com> > Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa > Cc: Pavel Begunkov <asml.silence@gmail.com> > Signed-off-by: Stefan Metzmacher <metze@samba.org> > --- > include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ > io_uring/net.c | 6 +++++- > io_uring/notif.c | 12 ++++++++++++ > io_uring/notif.h | 3 +++ > 4 files changed, 38 insertions(+), 1 deletion(-) > > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index ab7458033ee3..423f98781a20 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -296,10 +296,28 @@ enum io_uring_op { > * > * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in > * the buf_index field. > + * > + * IORING_SEND_ZC_REPORT_USAGE > + * If set, SEND[MSG]_ZC should report > + * the zerocopy usage in cqe.res > + * for the IORING_CQE_F_NOTIF cqe. > + * 0 is reported if zerocopy was actually possible. > + * IORING_NOTIF_USAGE_ZC_COPIED if data was copied > + * (at least partially). > */ > #define IORING_RECVSEND_POLL_FIRST (1U << 0) > #define IORING_RECV_MULTISHOT (1U << 1) > #define IORING_RECVSEND_FIXED_BUF (1U << 2) > +#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) > + > +/* > + * cqe.res for IORING_CQE_F_NOTIF if > + * IORING_SEND_ZC_REPORT_USAGE was requested > + * > + * It should be treated as a flag, all other > + * bits of cqe.res should be treated as reserved! > + */ > +#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) > > /* > * accept flags stored in sqe->ioprio > diff --git a/io_uring/net.c b/io_uring/net.c > index 15dea91625e2..0a8cdc5ae7af 100644 > --- a/io_uring/net.c > +++ b/io_uring/net.c > @@ -939,7 +939,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > > zc->flags = READ_ONCE(sqe->ioprio); > if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | > - IORING_RECVSEND_FIXED_BUF)) > + IORING_RECVSEND_FIXED_BUF | > + IORING_SEND_ZC_REPORT_USAGE)) > return -EINVAL; > notif = zc->notif = io_alloc_notif(ctx); > if (!notif) > @@ -957,6 +958,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > req->imu = READ_ONCE(ctx->user_bufs[idx]); > io_req_set_rsrc_node(notif, ctx, 0); > } > + if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { > + io_notif_to_data(notif)->zc_report = true; > + } > > if (req->opcode == IORING_OP_SEND_ZC) { > if (READ_ONCE(sqe->__pad3[0])) > diff --git a/io_uring/notif.c b/io_uring/notif.c > index e37c6569d82e..4bfef10161fa 100644 > --- a/io_uring/notif.c > +++ b/io_uring/notif.c > @@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) > __io_unaccount_mem(ctx->user, nd->account_pages); > nd->account_pages = 0; > } > + > + if (nd->zc_report && (nd->zc_copied || !nd->zc_used)) > + notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED; > + > io_req_task_complete(notif, locked); > } > > @@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, > struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); > struct io_kiocb *notif = cmd_to_io_kiocb(nd); > > + if (nd->zc_report) { > + if (success && !nd->zc_used && skb) > + WRITE_ONCE(nd->zc_used, true); > + else if (!success && !nd->zc_copied) > + WRITE_ONCE(nd->zc_copied, true); > + } > + > if (refcount_dec_and_test(&uarg->refcnt)) { > notif->io_task_work.func = __io_notif_complete_tw; > io_req_task_work_add(notif); > @@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) > nd->account_pages = 0; > nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; > nd->uarg.callback = io_uring_tx_zerocopy_callback; > + nd->zc_report = nd->zc_used = nd->zc_copied = false; > refcount_set(&nd->uarg.refcnt, 1); > return notif; > } > diff --git a/io_uring/notif.h b/io_uring/notif.h > index 5b4d710c8ca5..4ae696273c78 100644 > --- a/io_uring/notif.h > +++ b/io_uring/notif.h > @@ -13,6 +13,9 @@ struct io_notif_data { > struct file *file; > struct ubuf_info uarg; > unsigned long account_pages; > + bool zc_report; > + bool zc_used; > + bool zc_copied; > }; > > void io_notif_flush(struct io_kiocb *notif);
On Thu, 27 Oct 2022 20:34:45 +0200, Stefan Metzmacher wrote: > It might be useful for applications to detect if a zero copy > transfer with SEND[MSG]_ZC was actually possible or not. > The application can fallback to plain SEND[MSG] in order > to avoid the overhead of two cqes per request. > Or it can generate a log message that could indicate > to an administrator that no zero copy was possible > and could explain degraded performance. > > [...] Applied, thanks! [1/1] io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag commit: 4847a0eae62976ac27f192cd59b9de72b390eff3 Best regards,
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ab7458033ee3..423f98781a20 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -296,10 +296,28 @@ enum io_uring_op { * * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in * the buf_index field. + * + * IORING_SEND_ZC_REPORT_USAGE + * If set, SEND[MSG]_ZC should report + * the zerocopy usage in cqe.res + * for the IORING_CQE_F_NOTIF cqe. + * 0 is reported if zerocopy was actually possible. + * IORING_NOTIF_USAGE_ZC_COPIED if data was copied + * (at least partially). */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) #define IORING_RECVSEND_FIXED_BUF (1U << 2) +#define IORING_SEND_ZC_REPORT_USAGE (1U << 3) + +/* + * cqe.res for IORING_CQE_F_NOTIF if + * IORING_SEND_ZC_REPORT_USAGE was requested + * + * It should be treated as a flag, all other + * bits of cqe.res should be treated as reserved! + */ +#define IORING_NOTIF_USAGE_ZC_COPIED (1U << 31) /* * accept flags stored in sqe->ioprio diff --git a/io_uring/net.c b/io_uring/net.c index 15dea91625e2..0a8cdc5ae7af 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -939,7 +939,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) zc->flags = READ_ONCE(sqe->ioprio); if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | - IORING_RECVSEND_FIXED_BUF)) + IORING_RECVSEND_FIXED_BUF | + IORING_SEND_ZC_REPORT_USAGE)) return -EINVAL; notif = zc->notif = io_alloc_notif(ctx); if (!notif) @@ -957,6 +958,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) req->imu = READ_ONCE(ctx->user_bufs[idx]); io_req_set_rsrc_node(notif, ctx, 0); } + if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) { + io_notif_to_data(notif)->zc_report = true; + } if (req->opcode == IORING_OP_SEND_ZC) { if (READ_ONCE(sqe->__pad3[0])) diff --git a/io_uring/notif.c b/io_uring/notif.c index e37c6569d82e..4bfef10161fa 100644 --- a/io_uring/notif.c +++ b/io_uring/notif.c @@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked) __io_unaccount_mem(ctx->user, nd->account_pages); nd->account_pages = 0; } + + if (nd->zc_report && (nd->zc_copied || !nd->zc_used)) + notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED; + io_req_task_complete(notif, locked); } @@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg); struct io_kiocb *notif = cmd_to_io_kiocb(nd); + if (nd->zc_report) { + if (success && !nd->zc_used && skb) + WRITE_ONCE(nd->zc_used, true); + else if (!success && !nd->zc_copied) + WRITE_ONCE(nd->zc_copied, true); + } + if (refcount_dec_and_test(&uarg->refcnt)) { notif->io_task_work.func = __io_notif_complete_tw; io_req_task_work_add(notif); @@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) nd->account_pages = 0; nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; nd->uarg.callback = io_uring_tx_zerocopy_callback; + nd->zc_report = nd->zc_used = nd->zc_copied = false; refcount_set(&nd->uarg.refcnt, 1); return notif; } diff --git a/io_uring/notif.h b/io_uring/notif.h index 5b4d710c8ca5..4ae696273c78 100644 --- a/io_uring/notif.h +++ b/io_uring/notif.h @@ -13,6 +13,9 @@ struct io_notif_data { struct file *file; struct ubuf_info uarg; unsigned long account_pages; + bool zc_report; + bool zc_used; + bool zc_copied; }; void io_notif_flush(struct io_kiocb *notif);
It might be useful for applications to detect if a zero copy transfer with SEND[MSG]_ZC was actually possible or not. The application can fallback to plain SEND[MSG] in order to avoid the overhead of two cqes per request. Or it can generate a log message that could indicate to an administrator that no zero copy was possible and could explain degraded performance. Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa Cc: Pavel Begunkov <asml.silence@gmail.com> Signed-off-by: Stefan Metzmacher <metze@samba.org> --- include/uapi/linux/io_uring.h | 18 ++++++++++++++++++ io_uring/net.c | 6 +++++- io_uring/notif.c | 12 ++++++++++++ io_uring/notif.h | 3 +++ 4 files changed, 38 insertions(+), 1 deletion(-)