diff mbox series

[for-next,2/4] io_uring: introduce fixed buffer support for io_uring_cmd

Message ID 20220819103021.240340-3-joshi.k@samsung.com (mailing list archive)
State New
Headers show
Series fixed-buffer for uring-cmd/passthrough | expand

Commit Message

Kanchan Joshi Aug. 19, 2022, 10:30 a.m. UTC
From: Anuj Gupta <anuj20.g@samsung.com>

Add IORING_OP_URING_CMD_FIXED opcode that enables sending io_uring
command with previously registered buffers. User-space passes the buffer
index in sqe->buf_index, same as done in read/write variants that uses
fixed buffers.

Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
---
 include/linux/io_uring.h      |  5 ++++-
 include/uapi/linux/io_uring.h |  1 +
 io_uring/opdef.c              | 10 ++++++++++
 io_uring/rw.c                 |  3 ++-
 io_uring/uring_cmd.c          | 18 +++++++++++++++++-
 5 files changed, 34 insertions(+), 3 deletions(-)

Comments

Pavel Begunkov Aug. 22, 2022, 10:58 a.m. UTC | #1
On 8/19/22 11:30, Kanchan Joshi wrote:
> From: Anuj Gupta <anuj20.g@samsung.com>
> 
> Add IORING_OP_URING_CMD_FIXED opcode that enables sending io_uring
> command with previously registered buffers. User-space passes the buffer
> index in sqe->buf_index, same as done in read/write variants that uses
> fixed buffers.
> 
> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
> ---
>   include/linux/io_uring.h      |  5 ++++-
>   include/uapi/linux/io_uring.h |  1 +
>   io_uring/opdef.c              | 10 ++++++++++
>   io_uring/rw.c                 |  3 ++-
>   io_uring/uring_cmd.c          | 18 +++++++++++++++++-
>   5 files changed, 34 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index 60aba10468fc..40961d7c3827 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -5,6 +5,8 @@
>   #include <linux/sched.h>
>   #include <linux/xarray.h>
>   
> +#include<uapi/linux/io_uring.h>
> +
>   enum io_uring_cmd_flags {
>   	IO_URING_F_COMPLETE_DEFER	= 1,
>   	IO_URING_F_UNLOCKED		= 2,
> @@ -15,6 +17,7 @@ enum io_uring_cmd_flags {
>   	IO_URING_F_SQE128		= 4,
>   	IO_URING_F_CQE32		= 8,
>   	IO_URING_F_IOPOLL		= 16,
> +	IO_URING_F_FIXEDBUFS		= 32,
>   };
>   
>   struct io_uring_cmd {
> @@ -33,7 +36,7 @@ struct io_uring_cmd {
>   
>   #if defined(CONFIG_IO_URING)
>   int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
> -		struct iov_iter *iter, void *ioucmd)
> +		struct iov_iter *iter, void *ioucmd);

Please try to compile the first patch separately

>   void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
>   void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
>   			void (*task_work_cb)(struct io_uring_cmd *));
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 1463cfecb56b..80ea35d1ed5c 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -203,6 +203,7 @@ enum io_uring_op {
>   	IORING_OP_SOCKET,
>   	IORING_OP_URING_CMD,
>   	IORING_OP_SENDZC_NOTIF,
> +	IORING_OP_URING_CMD_FIXED,

I don't think it should be another opcode, is there any
control flags we can fit it in?


>   	/* this goes last, obviously */
>   	IORING_OP_LAST,
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index 9a0df19306fe..7d5731b84c92 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = {
>   		.issue			= io_uring_cmd,
>   		.prep_async		= io_uring_cmd_prep_async,
>   	},
> +	[IORING_OP_URING_CMD_FIXED] = {
> +		.needs_file		= 1,
> +		.plug			= 1,
> +		.name			= "URING_CMD_FIXED",
> +		.iopoll			= 1,
> +		.async_size		= uring_cmd_pdu_size(1),
> +		.prep			= io_uring_cmd_prep,
> +		.issue			= io_uring_cmd,
> +		.prep_async		= io_uring_cmd_prep_async,
> +	},
>   	[IORING_OP_SENDZC_NOTIF] = {
>   		.name			= "SENDZC_NOTIF",
>   		.needs_file		= 1,
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index 1a4fb8a44b9a..3c7b94bffa62 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
>   		if (READ_ONCE(req->iopoll_completed))
>   			break;
>   
> -		if (req->opcode == IORING_OP_URING_CMD) {
> +		if (req->opcode == IORING_OP_URING_CMD ||
> +				req->opcode == IORING_OP_URING_CMD_FIXED) {

I don't see the changed chunk upstream

>   			struct io_uring_cmd *ioucmd = (struct io_uring_cmd *)rw;
>   
>   			ret = req->file->f_op->uring_cmd_iopoll(ioucmd);
[...]
Kanchan Joshi Aug. 22, 2022, 11:33 a.m. UTC | #2
On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote:
>On 8/19/22 11:30, Kanchan Joshi wrote:
>>From: Anuj Gupta <anuj20.g@samsung.com>
>>
>>Add IORING_OP_URING_CMD_FIXED opcode that enables sending io_uring
>>command with previously registered buffers. User-space passes the buffer
>>index in sqe->buf_index, same as done in read/write variants that uses
>>fixed buffers.
>>
>>Signed-off-by: Anuj Gupta <anuj20.g@samsung.com>
>>Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
>>---
>>  include/linux/io_uring.h      |  5 ++++-
>>  include/uapi/linux/io_uring.h |  1 +
>>  io_uring/opdef.c              | 10 ++++++++++
>>  io_uring/rw.c                 |  3 ++-
>>  io_uring/uring_cmd.c          | 18 +++++++++++++++++-
>>  5 files changed, 34 insertions(+), 3 deletions(-)
>>
>>diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
>>index 60aba10468fc..40961d7c3827 100644
>>--- a/include/linux/io_uring.h
>>+++ b/include/linux/io_uring.h
>>@@ -5,6 +5,8 @@
>>  #include <linux/sched.h>
>>  #include <linux/xarray.h>
>>+#include<uapi/linux/io_uring.h>
>>+
>>  enum io_uring_cmd_flags {
>>  	IO_URING_F_COMPLETE_DEFER	= 1,
>>  	IO_URING_F_UNLOCKED		= 2,
>>@@ -15,6 +17,7 @@ enum io_uring_cmd_flags {
>>  	IO_URING_F_SQE128		= 4,
>>  	IO_URING_F_CQE32		= 8,
>>  	IO_URING_F_IOPOLL		= 16,
>>+	IO_URING_F_FIXEDBUFS		= 32,
>>  };
>>  struct io_uring_cmd {
>>@@ -33,7 +36,7 @@ struct io_uring_cmd {
>>  #if defined(CONFIG_IO_URING)
>>  int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
>>-		struct iov_iter *iter, void *ioucmd)
>>+		struct iov_iter *iter, void *ioucmd);
>
>Please try to compile the first patch separately

Indeed, this should have been part of that patch. Thanks.

>>  void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
>>  void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
>>  			void (*task_work_cb)(struct io_uring_cmd *));
>>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>>index 1463cfecb56b..80ea35d1ed5c 100644
>>--- a/include/uapi/linux/io_uring.h
>>+++ b/include/uapi/linux/io_uring.h
>>@@ -203,6 +203,7 @@ enum io_uring_op {
>>  	IORING_OP_SOCKET,
>>  	IORING_OP_URING_CMD,
>>  	IORING_OP_SENDZC_NOTIF,
>>+	IORING_OP_URING_CMD_FIXED,
>
>I don't think it should be another opcode, is there any
>control flags we can fit it in?

using sqe->rw_flags could be another way.
But I think that may create bit of disharmony in user-space.
Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as
IORING_OP_READ/WRITE_FIXED. User-space uses new opcode, and sends the
buffer by filling sqe->buf_index. 
So must we take a different way?

>>  	/* this goes last, obviously */
>>  	IORING_OP_LAST,
>>diff --git a/io_uring/opdef.c b/io_uring/opdef.c
>>index 9a0df19306fe..7d5731b84c92 100644
>>--- a/io_uring/opdef.c
>>+++ b/io_uring/opdef.c
>>@@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = {
>>  		.issue			= io_uring_cmd,
>>  		.prep_async		= io_uring_cmd_prep_async,
>>  	},
>>+	[IORING_OP_URING_CMD_FIXED] = {
>>+		.needs_file		= 1,
>>+		.plug			= 1,
>>+		.name			= "URING_CMD_FIXED",
>>+		.iopoll			= 1,
>>+		.async_size		= uring_cmd_pdu_size(1),
>>+		.prep			= io_uring_cmd_prep,
>>+		.issue			= io_uring_cmd,
>>+		.prep_async		= io_uring_cmd_prep_async,
>>+	},
>>  	[IORING_OP_SENDZC_NOTIF] = {
>>  		.name			= "SENDZC_NOTIF",
>>  		.needs_file		= 1,
>>diff --git a/io_uring/rw.c b/io_uring/rw.c
>>index 1a4fb8a44b9a..3c7b94bffa62 100644
>>--- a/io_uring/rw.c
>>+++ b/io_uring/rw.c
>>@@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
>>  		if (READ_ONCE(req->iopoll_completed))
>>  			break;
>>-		if (req->opcode == IORING_OP_URING_CMD) {
>>+		if (req->opcode == IORING_OP_URING_CMD ||
>>+				req->opcode == IORING_OP_URING_CMD_FIXED) {
>
>I don't see the changed chunk upstream

Right, it is on top of iopoll support (plus one more series mentioned in
covered letter). Here is the link - 
https://lore.kernel.org/linux-block/20220807183607.352351-1-joshi.k@samsung.com/
It would be great if you could review that.
Pavel Begunkov Aug. 25, 2022, 9:34 a.m. UTC | #3
On 8/22/22 12:33, Kanchan Joshi wrote:
> On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote:
[...]
>>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>>> index 1463cfecb56b..80ea35d1ed5c 100644
>>> --- a/include/uapi/linux/io_uring.h
>>> +++ b/include/uapi/linux/io_uring.h
>>> @@ -203,6 +203,7 @@ enum io_uring_op {
>>>      IORING_OP_SOCKET,
>>>      IORING_OP_URING_CMD,
>>>      IORING_OP_SENDZC_NOTIF,
>>> +    IORING_OP_URING_CMD_FIXED,
>>
>> I don't think it should be another opcode, is there any
>> control flags we can fit it in?
> 
> using sqe->rw_flags could be another way.

We also use ->ioprio for io_uring opcode specific flags,
e.g. like in io_sendmsg_prep() for IORING_RECVSEND_POLL_FIRST,
might be even better better.

> But I think that may create bit of disharmony in user-space.
> Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as
> IORING_OP_READ/WRITE_FIXED.

And I still believe it was a bad choice, I don't like this encoding
of independent options/features by linearising toggles into opcodes.
A consistent way to add vectored fixed bufs would be to have a 4th
opcode, e.g. READV_FIXED, which is not great.

> User-space uses new opcode, and sends the
> buffer by filling sqe->buf_index. So must we take a different way?

I do think so


>>>      /* this goes last, obviously */
>>>      IORING_OP_LAST,
>>> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
>>> index 9a0df19306fe..7d5731b84c92 100644
>>> --- a/io_uring/opdef.c
>>> +++ b/io_uring/opdef.c
>>> @@ -472,6 +472,16 @@ const struct io_op_def io_op_defs[] = {
>>>          .issue            = io_uring_cmd,
>>>          .prep_async        = io_uring_cmd_prep_async,
>>>      },
>>> +    [IORING_OP_URING_CMD_FIXED] = {
>>> +        .needs_file        = 1,
>>> +        .plug            = 1,
>>> +        .name            = "URING_CMD_FIXED",
>>> +        .iopoll            = 1,
>>> +        .async_size        = uring_cmd_pdu_size(1),
>>> +        .prep            = io_uring_cmd_prep,
>>> +        .issue            = io_uring_cmd,
>>> +        .prep_async        = io_uring_cmd_prep_async,
>>> +    },
>>>      [IORING_OP_SENDZC_NOTIF] = {
>>>          .name            = "SENDZC_NOTIF",
>>>          .needs_file        = 1,
>>> diff --git a/io_uring/rw.c b/io_uring/rw.c
>>> index 1a4fb8a44b9a..3c7b94bffa62 100644
>>> --- a/io_uring/rw.c
>>> +++ b/io_uring/rw.c
>>> @@ -1005,7 +1005,8 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
>>>          if (READ_ONCE(req->iopoll_completed))
>>>              break;
>>> -        if (req->opcode == IORING_OP_URING_CMD) {
>>> +        if (req->opcode == IORING_OP_URING_CMD ||
>>> +                req->opcode == IORING_OP_URING_CMD_FIXED) {
>>
>> I don't see the changed chunk upstream
> 
> Right, it is on top of iopoll support (plus one more series mentioned in
> covered letter). Here is the link - https://lore.kernel.org/linux-block/20220807183607.352351-1-joshi.k@samsung.com/
> It would be great if you could review that.
>
Kanchan Joshi Aug. 25, 2022, 4:02 p.m. UTC | #4
On Thu, Aug 25, 2022 at 10:34:11AM +0100, Pavel Begunkov wrote:
>On 8/22/22 12:33, Kanchan Joshi wrote:
>>On Mon, Aug 22, 2022 at 11:58:24AM +0100, Pavel Begunkov wrote:
>[...]
>>>>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>>>>index 1463cfecb56b..80ea35d1ed5c 100644
>>>>--- a/include/uapi/linux/io_uring.h
>>>>+++ b/include/uapi/linux/io_uring.h
>>>>@@ -203,6 +203,7 @@ enum io_uring_op {
>>>>     IORING_OP_SOCKET,
>>>>     IORING_OP_URING_CMD,
>>>>     IORING_OP_SENDZC_NOTIF,
>>>>+    IORING_OP_URING_CMD_FIXED,
>>>
>>>I don't think it should be another opcode, is there any
>>>control flags we can fit it in?
>>
>>using sqe->rw_flags could be another way.
>
>We also use ->ioprio for io_uring opcode specific flags,
>e.g. like in io_sendmsg_prep() for IORING_RECVSEND_POLL_FIRST,
>might be even better better.
>
>>But I think that may create bit of disharmony in user-space.
>>Current choice (IORING_OP_URING_CMD_FIXED) is along the same lines as
>>IORING_OP_READ/WRITE_FIXED.
>
>And I still believe it was a bad choice, I don't like this encoding
>of independent options/features by linearising toggles into opcodes.
>A consistent way to add vectored fixed bufs would be to have a 4th
>opcode, e.g. READV_FIXED, which is not great.
>
>>User-space uses new opcode, and sends the
>>buffer by filling sqe->buf_index. So must we take a different way?
>
>I do think so

I see. Will change this in next iteration.
diff mbox series

Patch

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 60aba10468fc..40961d7c3827 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -5,6 +5,8 @@ 
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
+#include<uapi/linux/io_uring.h>
+
 enum io_uring_cmd_flags {
 	IO_URING_F_COMPLETE_DEFER	= 1,
 	IO_URING_F_UNLOCKED		= 2,
@@ -15,6 +17,7 @@  enum io_uring_cmd_flags {
 	IO_URING_F_SQE128		= 4,
 	IO_URING_F_CQE32		= 8,
 	IO_URING_F_IOPOLL		= 16,
+	IO_URING_F_FIXEDBUFS		= 32,
 };
 
 struct io_uring_cmd {
@@ -33,7 +36,7 @@  struct io_uring_cmd {
 
 #if defined(CONFIG_IO_URING)
 int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
-		struct iov_iter *iter, void *ioucmd)
+		struct iov_iter *iter, void *ioucmd);
 void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
 void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 			void (*task_work_cb)(struct io_uring_cmd *));
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1463cfecb56b..80ea35d1ed5c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -203,6 +203,7 @@  enum io_uring_op {
 	IORING_OP_SOCKET,
 	IORING_OP_URING_CMD,
 	IORING_OP_SENDZC_NOTIF,
+	IORING_OP_URING_CMD_FIXED,
 
 	/* this goes last, obviously */
 	IORING_OP_LAST,
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 9a0df19306fe..7d5731b84c92 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -472,6 +472,16 @@  const struct io_op_def io_op_defs[] = {
 		.issue			= io_uring_cmd,
 		.prep_async		= io_uring_cmd_prep_async,
 	},
+	[IORING_OP_URING_CMD_FIXED] = {
+		.needs_file		= 1,
+		.plug			= 1,
+		.name			= "URING_CMD_FIXED",
+		.iopoll			= 1,
+		.async_size		= uring_cmd_pdu_size(1),
+		.prep			= io_uring_cmd_prep,
+		.issue			= io_uring_cmd,
+		.prep_async		= io_uring_cmd_prep_async,
+	},
 	[IORING_OP_SENDZC_NOTIF] = {
 		.name			= "SENDZC_NOTIF",
 		.needs_file		= 1,
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1a4fb8a44b9a..3c7b94bffa62 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -1005,7 +1005,8 @@  int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (READ_ONCE(req->iopoll_completed))
 			break;
 
-		if (req->opcode == IORING_OP_URING_CMD) {
+		if (req->opcode == IORING_OP_URING_CMD ||
+				req->opcode == IORING_OP_URING_CMD_FIXED) {
 			struct io_uring_cmd *ioucmd = (struct io_uring_cmd *)rw;
 
 			ret = req->file->f_op->uring_cmd_iopoll(ioucmd);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index ff65cc8ab6cc..9383150b2949 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -3,11 +3,13 @@ 
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/io_uring.h>
+#include <linux/nospec.h>
 
 #include <uapi/linux/io_uring.h>
 
 #include "io_uring.h"
 #include "uring_cmd.h"
+#include "rsrc.h"
 
 static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
 {
@@ -74,6 +76,18 @@  int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	if (sqe->rw_flags || sqe->__pad1)
 		return -EINVAL;
+
+	req->buf_index = READ_ONCE(sqe->buf_index);
+	if (req->opcode == IORING_OP_URING_CMD_FIXED) {
+		struct io_ring_ctx *ctx = req->ctx;
+		u16 index;
+
+		if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+			return -EFAULT;
+		index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+		req->imu = ctx->user_bufs[index];
+		io_req_set_rsrc_node(req, ctx, 0);
+	}
 	ioucmd->cmd = sqe->cmd;
 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 	return 0;
@@ -98,6 +112,8 @@  int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 		req->iopoll_completed = 0;
 		WRITE_ONCE(ioucmd->cookie, NULL);
 	}
+	if (req->opcode == IORING_OP_URING_CMD_FIXED)
+		issue_flags |= IO_URING_F_FIXEDBUFS;
 
 	if (req_has_async_data(req))
 		ioucmd->cmd = req->async_data;
@@ -125,7 +141,7 @@  int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len,
 		int rw, struct iov_iter *iter, void *ioucmd)
 {
-	struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd);
+	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 	struct io_mapped_ubuf *imu = req->imu;
 
 	return io_import_fixed(rw, iter, imu, ubuf, len);