Message ID | 20220819103021.240340-3-joshi.k@samsung.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fixed-buffer for uring-cmd/passthrough | expand |
On 8/19/22 11:30, Kanchan Joshi wrote: > From: Anuj Gupta <anuj20.g@samsung.com> > > Add IORING_OP_URING_CMD_FIXED opcode that enables sending io_uring > command with previously registered buffers. User-space passes the buffer > index in sqe->buf_index, same as done in read/write variants that uses > fixed buffers. > > Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> > Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> > --- > include/linux/io_uring.h | 5 ++++- > include/uapi/linux/io_uring.h | 1 + > io_uring/opdef.c | 10 ++++++++++ > io_uring/rw.c | 3 ++- > io_uring/uring_cmd.c | 18 +++++++++++++++++- > 5 files changed, 34 insertions(+), 3 deletions(-) > > diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h > index 60aba10468fc..40961d7c3827 100644 > --- a/include/linux/io_uring.h > +++ b/include/linux/io_uring.h > @@ -5,6 +5,8 @@ > #include <linux/sched.h> > #include <linux/xarray.h> > > +#include<uapi/linux/io_uring.h> > + > enum io_uring_cmd_flags { > IO_URING_F_COMPLETE_DEFER = 1, > IO_URING_F_UNLOCKED = 2, > @@ -15,6 +17,7 @@ enum io_uring_cmd_flags { > IO_URING_F_SQE128 = 4, > IO_URING_F_CQE32 = 8, > IO_URING_F_IOPOLL = 16, > + IO_URING_F_FIXEDBUFS = 32, > }; > > struct io_uring_cmd { > @@ -33,7 +36,7 @@ struct io_uring_cmd { > > #if defined(CONFIG_IO_URING) > int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, > - struct iov_iter *iter, void *ioucmd) > + struct iov_iter *iter, void *ioucmd); Please try to compile the first patch separately > void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); > void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, > void (*task_work_cb)(struct io_uring_cmd *)); > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index 1463cfecb56b..80ea35d1ed5c 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -203,6 +203,7 @@ enum io_uring_op { > IORING_OP_SOCKET, > IORING_OP_URING_CMD, > IORING_OP_SENDZC_NOTIF, > + IORING_OP_URING_CMD_FIXED, I don't think it should be another opcode, is there any control flags we can fit it in? > /* this goes last, obviously */ > IORING_OP_LAST, > diff --git a/io_uring/opdef.c b/io_uring/opdef.c > index 9a0df19306fe..7d5731b84c92 100644 > --- a/io_uring/opdef.c > +++ b/io_uring/opdef.c > @@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = { > .issue = io_uring_cmd, > .prep_async = io_uring_cmd_prep_async, > }, > + [IORING_OP_URING_CMD_FIXED] = { > + .needs_file = 1, > + .plug = 1, > + .name = "URING_CMD_FIXED", > + .iopoll = 1, > + .async_size = uring_cmd_pdu_size(1), > + .prep = io_uring_cmd_prep, > + .issue = io_uring_cmd, > + .prep_async = io_uring_cmd_prep_async, > + }, > [IORING_OP_SENDZC_NOTIF] = { > .name = "SENDZC_NOTIF", > .needs_file = 1, > diff --git a/io_uring/rw.c b/io_uring/rw.c > index 1a4fb8a44b9a..3c7b94bffa62 100644 > --- a/io_uring/rw.c > +++ b/io_uring/rw.c > @@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) > if (READ_ONCE(req->iopoll_completed)) > break; > > - if (req->opcode == IORING_OP_URING_CMD) { > + if (req->opcode == IORING_OP_URING_CMD || > + req->opcode == IORING_OP_URING_CMD_FIXED) { I don't see the changed chunk upstream > struct io_uring_cmd *ioucmd = (struct io_uring_cmd *)rw; > > ret = req->file->f_op->uring_cmd_iopoll(ioucmd); [...]
On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote: >On 8/19/22 11:30, Kanchan Joshi wrote: >>From: Anuj Gupta <anuj20.g@samsung.com> >> >>Add IORING_OP_URING_CMD_FIXED opcode that enables sending io_uring >>command with previously registered buffers. User-space passes the buffer >>index in sqe->buf_index, same as done in read/write variants that uses >>fixed buffers. >> >>Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> >>Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> >>--- >> include/linux/io_uring.h | 5 ++++- >> include/uapi/linux/io_uring.h | 1 + >> io_uring/opdef.c | 10 ++++++++++ >> io_uring/rw.c | 3 ++- >> io_uring/uring_cmd.c | 18 +++++++++++++++++- >> 5 files changed, 34 insertions(+), 3 deletions(-) >> >>diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h >>index 60aba10468fc..40961d7c3827 100644 >>--- a/include/linux/io_uring.h >>+++ b/include/linux/io_uring.h >>@@ -5,6 +5,8 @@ >> #include <linux/sched.h> >> #include <linux/xarray.h> >>+#include<uapi/linux/io_uring.h> >>+ >> enum io_uring_cmd_flags { >> IO_URING_F_COMPLETE_DEFER = 1, >> IO_URING_F_UNLOCKED = 2, >>@@ -15,6 +17,7 @@ enum io_uring_cmd_flags { >> IO_URING_F_SQE128 = 4, >> IO_URING_F_CQE32 = 8, >> IO_URING_F_IOPOLL = 16, >>+ IO_URING_F_FIXEDBUFS = 32, >> }; >> struct io_uring_cmd { >>@@ -33,7 +36,7 @@ struct io_uring_cmd { >> #if defined(CONFIG_IO_URING) >> int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, >>- struct iov_iter *iter, void *ioucmd) >>+ struct iov_iter *iter, void *ioucmd); > >Please try to compile the first patch separately Indeed, this should have been part of that patch. Thanks. >> void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); >> void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, >> void (*task_work_cb)(struct io_uring_cmd *)); >>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>index 1463cfecb56b..80ea35d1ed5c 100644 >>--- a/include/uapi/linux/io_uring.h >>+++ b/include/uapi/linux/io_uring.h >>@@ -203,6 +203,7 @@ enum io_uring_op { >> IORING_OP_SOCKET, >> IORING_OP_URING_CMD, >> IORING_OP_SENDZC_NOTIF, >>+ IORING_OP_URING_CMD_FIXED, > >I don't think it should be another opcode, is there any >control flags we can fit it in? using sqe->rw_flags could be another way. But I think that may create bit of disharmony in user-space. Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as IORING_OP_READ/WRITE_FIXED. User-space uses new opcode, and sends the buffer by filling sqe->buf_index. So must we take a different way? >> /* this goes last, obviously */ >> IORING_OP_LAST, >>diff --git a/io_uring/opdef.c b/io_uring/opdef.c >>index 9a0df19306fe..7d5731b84c92 100644 >>--- a/io_uring/opdef.c >>+++ b/io_uring/opdef.c >>@@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = { >> .issue = io_uring_cmd, >> .prep_async = io_uring_cmd_prep_async, >> }, >>+ [IORING_OP_URING_CMD_FIXED] = { >>+ .needs_file = 1, >>+ .plug = 1, >>+ .name = "URING_CMD_FIXED", >>+ .iopoll = 1, >>+ .async_size = uring_cmd_pdu_size(1), >>+ .prep = io_uring_cmd_prep, >>+ .issue = io_uring_cmd, >>+ .prep_async = io_uring_cmd_prep_async, >>+ }, >> [IORING_OP_SENDZC_NOTIF] = { >> .name = "SENDZC_NOTIF", >> .needs_file = 1, >>diff --git a/io_uring/rw.c b/io_uring/rw.c >>index 1a4fb8a44b9a..3c7b94bffa62 100644 >>--- a/io_uring/rw.c >>+++ b/io_uring/rw.c >>@@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) >> if (READ_ONCE(req->iopoll_completed)) >> break; >>- if (req->opcode == IORING_OP_URING_CMD) { >>+ if (req->opcode == IORING_OP_URING_CMD || >>+ req->opcode == IORING_OP_URING_CMD_FIXED) { > >I don't see the changed chunk upstream Right, it is on top of iopoll support (plus one more series mentioned in covered letter). Here is the link - https://lore.kernel.org/linux-block/20220807183607.352351-1-joshi.k@samsung.com/ It would be great if you could review that.
On 8/22/22 12:33, Kanchan Joshi wrote: > On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote: [...] >>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>> index 1463cfecb56b..80ea35d1ed5c 100644 >>> --- a/include/uapi/linux/io_uring.h >>> +++ b/include/uapi/linux/io_uring.h >>> @@ -203,6 +203,7 @@ enum io_uring_op { >>> IORING_OP_SOCKET, >>> IORING_OP_URING_CMD, >>> IORING_OP_SENDZC_NOTIF, >>> + IORING_OP_URING_CMD_FIXED, >> >> I don't think it should be another opcode, is there any >> control flags we can fit it in? > > using sqe->rw_flags could be another way. We also use ->ioprio for io_uring opcode specific flags, e.g. like in io_sendmsg_prep() for IORING_RECVSEND_POLL_FIRST, might be even better better. > But I think that may create bit of disharmony in user-space. > Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as > IORING_OP_READ/WRITE_FIXED. And I still believe it was a bad choice, I don't like this encoding of independent options/features by linearising toggles into opcodes. A consistent way to add vectored fixed bufs would be to have a 4th opcode, e.g. READV_FIXED, which is not great. > User-space uses new opcode, and sends the > buffer by filling sqe->buf_index. So must we take a different way? I do think so >>> /* this goes last, obviously */ >>> IORING_OP_LAST, >>> diff --git a/io_uring/opdef.c b/io_uring/opdef.c >>> index 9a0df19306fe..7d5731b84c92 100644 >>> --- a/io_uring/opdef.c >>> +++ b/io_uring/opdef.c >>> @@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = { >>> .issue = io_uring_cmd, >>> .prep_async = io_uring_cmd_prep_async, >>> }, >>> + [IORING_OP_URING_CMD_FIXED] = { >>> + .needs_file = 1, >>> + .plug = 1, >>> + .name = "URING_CMD_FIXED", >>> + .iopoll = 1, >>> + .async_size = uring_cmd_pdu_size(1), >>> + .prep = io_uring_cmd_prep, >>> + .issue = io_uring_cmd, >>> + .prep_async = io_uring_cmd_prep_async, >>> + }, >>> [IORING_OP_SENDZC_NOTIF] = { >>> .name = "SENDZC_NOTIF", >>> .needs_file = 1, >>> diff --git a/io_uring/rw.c b/io_uring/rw.c >>> index 1a4fb8a44b9a..3c7b94bffa62 100644 >>> --- a/io_uring/rw.c >>> +++ b/io_uring/rw.c >>> @@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) >>> if (READ_ONCE(req->iopoll_completed)) >>> break; >>> - if (req->opcode == IORING_OP_URING_CMD) { >>> + if (req->opcode == IORING_OP_URING_CMD || >>> + req->opcode == IORING_OP_URING_CMD_FIXED) { >> >> I don't see the changed chunk upstream > > Right, it is on top of iopoll support (plus one more series mentioned in > covered letter). Here is the link - https://lore.kernel.org/linux-block/20220807183607.352351-1-joshi.k@samsung.com/ > It would be great if you could review that. >
On Thu, Aug 25, 2022 at 10:34:11AM +0100, Pavel Begunkov wrote: >On 8/22/22 12:33, Kanchan Joshi wrote: >>On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote: >[...] >>>>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>>>index 1463cfecb56b..80ea35d1ed5c 100644 >>>>--- a/include/uapi/linux/io_uring.h >>>>+++ b/include/uapi/linux/io_uring.h >>>>@@ -203,6 +203,7 @@ enum io_uring_op { >>>> IORING_OP_SOCKET, >>>> IORING_OP_URING_CMD, >>>> IORING_OP_SENDZC_NOTIF, >>>>+ IORING_OP_URING_CMD_FIXED, >>> >>>I don't think it should be another opcode, is there any >>>control flags we can fit it in? >> >>using sqe->rw_flags could be another way. > >We also use ->ioprio for io_uring opcode specific flags, >e.g. like in io_sendmsg_prep() for IORING_RECVSEND_POLL_FIRST, >might be even better better. > >>But I think that may create bit of disharmony in user-space. >>Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as >>IORING_OP_READ/WRITE_FIXED. > >And I still believe it was a bad choice, I don't like this encoding >of independent options/features by linearising toggles into opcodes. >A consistent way to add vectored fixed bufs would be to have a 4th >opcode, e.g. READV_FIXED, which is not great. > >>User-space uses new opcode, and sends the >>buffer by filling sqe->buf_index. So must we take a different way? > >I do think so I see. Will change this in next iteration.
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 60aba10468fc..40961d7c3827 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -5,6 +5,8 @@ #include <linux/sched.h> #include <linux/xarray.h> +#include<uapi/linux/io_uring.h> + enum io_uring_cmd_flags { IO_URING_F_COMPLETE_DEFER = 1, IO_URING_F_UNLOCKED = 2, @@ -15,6 +17,7 @@ enum io_uring_cmd_flags { IO_URING_F_SQE128 = 4, IO_URING_F_CQE32 = 8, IO_URING_F_IOPOLL = 16, + IO_URING_F_FIXEDBUFS = 32, }; struct io_uring_cmd { @@ -33,7 +36,7 @@ struct io_uring_cmd { #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, - struct iov_iter *iter, void *ioucmd) + struct iov_iter *iter, void *ioucmd); void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void (*task_work_cb)(struct io_uring_cmd *)); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 1463cfecb56b..80ea35d1ed5c 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -203,6 +203,7 @@ enum io_uring_op { IORING_OP_SOCKET, IORING_OP_URING_CMD, IORING_OP_SENDZC_NOTIF, + IORING_OP_URING_CMD_FIXED, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 9a0df19306fe..7d5731b84c92 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = { .issue = io_uring_cmd, .prep_async = io_uring_cmd_prep_async, }, + [IORING_OP_URING_CMD_FIXED] = { + .needs_file = 1, + .plug = 1, + .name = "URING_CMD_FIXED", + .iopoll = 1, + .async_size = uring_cmd_pdu_size(1), + .prep = io_uring_cmd_prep, + .issue = io_uring_cmd, + .prep_async = io_uring_cmd_prep_async, + }, [IORING_OP_SENDZC_NOTIF] = { .name = "SENDZC_NOTIF", .needs_file = 1, diff --git a/io_uring/rw.c b/io_uring/rw.c index 1a4fb8a44b9a..3c7b94bffa62 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) if (READ_ONCE(req->iopoll_completed)) break; - if (req->opcode == IORING_OP_URING_CMD) { + if (req->opcode == IORING_OP_URING_CMD || + req->opcode == IORING_OP_URING_CMD_FIXED) { struct io_uring_cmd *ioucmd = (struct io_uring_cmd *)rw; ret = req->file->f_op->uring_cmd_iopoll(ioucmd); diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index ff65cc8ab6cc..9383150b2949 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -3,11 +3,13 @@ #include <linux/errno.h> #include <linux/file.h> #include <linux/io_uring.h> +#include <linux/nospec.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "uring_cmd.h" +#include "rsrc.h" static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) { @@ -74,6 +76,18 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (sqe->rw_flags || sqe->__pad1) return -EINVAL; + + req->buf_index = READ_ONCE(sqe->buf_index); + if (req->opcode == IORING_OP_URING_CMD_FIXED) { + struct io_ring_ctx *ctx = req->ctx; + u16 index; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req, ctx, 0); + } ioucmd->cmd = sqe->cmd; ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); return 0; @@ -98,6 +112,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) req->iopoll_completed = 0; WRITE_ONCE(ioucmd->cookie, NULL); } + if (req->opcode == IORING_OP_URING_CMD_FIXED) + issue_flags |= IO_URING_F_FIXEDBUFS; if (req_has_async_data(req)) ioucmd->cmd = req->async_data; @@ -125,7 +141,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd) { - struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); + struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); struct io_mapped_ubuf *imu = req->imu; return io_import_fixed(rw, iter, imu, ubuf, len);