diff mbox series

io_uring: cancelable uring_cmd

Message ID 20230921042434.2500190-1-ming.lei@redhat.com (mailing list archive)
State New
Headers show
Series io_uring: cancelable uring_cmd | expand

Commit Message

Ming Lei Sept. 21, 2023, 4:24 a.m. UTC
uring_cmd may never complete, such as ublk, in which uring cmd isn't
completed until one new block request is coming from ublk block device.

Add cancelable uring_cmd to provide mechanism to driver to cancel
pending commands in its own way.

Add API of io_uring_cmd_mark_cancelable() for driver to mark one
command as cancelable, then io_uring will cancel this command in
io_uring_cancel_generic(). Driver callback is provided for canceling
command in driver's way, meantime driver gets notified with exiting of
io_uring task or context.

Suggested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---

ublk patches:

	https://github.com/ming1/linux/commits/uring_exit_and_ublk

 include/linux/io_uring.h       | 22 +++++++++++++++++-
 include/linux/io_uring_types.h |  6 +++++
 include/uapi/linux/io_uring.h  |  7 ++++--
 io_uring/io_uring.c            | 30 ++++++++++++++++++++++++
 io_uring/uring_cmd.c           | 42 ++++++++++++++++++++++++++++++++++
 5 files changed, 104 insertions(+), 3 deletions(-)

Comments

Gabriel Krisman Bertazi Sept. 21, 2023, 6:46 p.m. UTC | #1
Ming Lei <ming.lei@redhat.com> writes:

> uring_cmd may never complete, such as ublk, in which uring cmd isn't
> completed until one new block request is coming from ublk block device.
>
> Add cancelable uring_cmd to provide mechanism to driver to cancel
> pending commands in its own way.
>
> Add API of io_uring_cmd_mark_cancelable() for driver to mark one
> command as cancelable, then io_uring will cancel this command in
> io_uring_cancel_generic(). Driver callback is provided for canceling
> command in driver's way, meantime driver gets notified with exiting of
> io_uring task or context.
>
> Suggested-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>
> ublk patches:
>
> 	https://github.com/ming1/linux/commits/uring_exit_and_ublk
>
>  include/linux/io_uring.h       | 22 +++++++++++++++++-
>  include/linux/io_uring_types.h |  6 +++++
>  include/uapi/linux/io_uring.h  |  7 ++++--
>  io_uring/io_uring.c            | 30 ++++++++++++++++++++++++
>  io_uring/uring_cmd.c           | 42 ++++++++++++++++++++++++++++++++++
>  5 files changed, 104 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> index 106cdc55ff3b..5b98308a154f 100644
> --- a/include/linux/io_uring.h
> +++ b/include/linux/io_uring.h
> @@ -22,6 +22,9 @@ enum io_uring_cmd_flags {
>  	IO_URING_F_IOPOLL		= (1 << 10),
>  };
>  
> +typedef void (uring_cmd_cancel_fn)(struct io_uring_cmd *,
> +		unsigned int issue_flags, struct task_struct *task);
> +

Hi Ming,

I wonder if uring_cmd_cancel shouldn't just be a new file operation, pairing
with f_op->uring_cmd.  it would, of course, also mean don't need to pass
it here occupying the pdu or explicitly registering it. iiuc, would also
allow you to drop the flag and just assume it is cancelable if the operation
exists, further simplifying the code.

> +static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
> +					  struct task_struct *task,
> +					  bool cancel_all)
> +{
> +	struct hlist_node *tmp;
> +	struct io_kiocb *req;
> +	bool ret = false;
> +
> +	mutex_lock(&ctx->uring_lock);
> +	hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
> +			hash_node) {
> +		struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
> +				struct io_uring_cmd);
> +
> +		if (!cancel_all && req->task != task)
> +			continue;
> +
> +		/* safe to call ->cancel_fn() since cmd isn't done yet */
> +		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
> +			cmd->cancel_fn(cmd, 0, task);

I find it weird to pass task here.  Also, it seems you use it only to
sanity check it is the same as ubq->ubq_daemon.  Can you just recover it
from cmd_to_io_kiocb(cmd)->task? it should be guaranteed to be the same
as task by the check immediately before.

Thanks,
Ming Lei Sept. 22, 2023, 12:50 a.m. UTC | #2
Hello Gabriel,

On Thu, Sep 21, 2023 at 02:46:31PM -0400, Gabriel Krisman Bertazi wrote:
> Ming Lei <ming.lei@redhat.com> writes:
> 
> > uring_cmd may never complete, such as ublk, in which uring cmd isn't
> > completed until one new block request is coming from ublk block device.
> >
> > Add cancelable uring_cmd to provide mechanism to driver to cancel
> > pending commands in its own way.
> >
> > Add API of io_uring_cmd_mark_cancelable() for driver to mark one
> > command as cancelable, then io_uring will cancel this command in
> > io_uring_cancel_generic(). Driver callback is provided for canceling
> > command in driver's way, meantime driver gets notified with exiting of
> > io_uring task or context.
> >
> > Suggested-by: Jens Axboe <axboe@kernel.dk>
> > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > ---
> >
> > ublk patches:
> >
> > 	https://github.com/ming1/linux/commits/uring_exit_and_ublk
> >
> >  include/linux/io_uring.h       | 22 +++++++++++++++++-
> >  include/linux/io_uring_types.h |  6 +++++
> >  include/uapi/linux/io_uring.h  |  7 ++++--
> >  io_uring/io_uring.c            | 30 ++++++++++++++++++++++++
> >  io_uring/uring_cmd.c           | 42 ++++++++++++++++++++++++++++++++++
> >  5 files changed, 104 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> > index 106cdc55ff3b..5b98308a154f 100644
> > --- a/include/linux/io_uring.h
> > +++ b/include/linux/io_uring.h
> > @@ -22,6 +22,9 @@ enum io_uring_cmd_flags {
> >  	IO_URING_F_IOPOLL		= (1 << 10),
> >  };
> >  
> > +typedef void (uring_cmd_cancel_fn)(struct io_uring_cmd *,
> > +		unsigned int issue_flags, struct task_struct *task);
> > +
> 
> Hi Ming,
> 
> I wonder if uring_cmd_cancel shouldn't just be a new file operation, pairing
> with f_op->uring_cmd.  it would, of course, also mean don't need to pass
> it here occupying the pdu or explicitly registering it. iiuc, would also
> allow you to drop the flag and just assume it is cancelable if the operation
> exists, further simplifying the code.

If there are more such use cases, it is probably not a bad idea to add
new operation for canceling command.

But definitely there are not, so not good to add one new operation now,
since new operation field is added to all drivers/FSs actually, 99% of
them shouldn't pay for such cost.

Also I don't see how much simplification is made with new operation,
cause the approach in this patch just needs one flag & callback for canceling,
both are freely available, only driver with such feature needs to pay
the extra callback cost(8bytes in uring_cmd->pdu[]).

> 
> > +static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
> > +					  struct task_struct *task,
> > +					  bool cancel_all)
> > +{
> > +	struct hlist_node *tmp;
> > +	struct io_kiocb *req;
> > +	bool ret = false;
> > +
> > +	mutex_lock(&ctx->uring_lock);
> > +	hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
> > +			hash_node) {
> > +		struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
> > +				struct io_uring_cmd);
> > +
> > +		if (!cancel_all && req->task != task)
> > +			continue;
> > +
> > +		/* safe to call ->cancel_fn() since cmd isn't done yet */
> > +		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
> > +			cmd->cancel_fn(cmd, 0, task);
> 
> I find it weird to pass task here.  Also, it seems you use it only to
> sanity check it is the same as ubq->ubq_daemon.  Can you just recover it
> from cmd_to_io_kiocb(cmd)->task? it should be guaranteed to be the same
> as task by the check immediately before.

'task' parameter is very important for ublk use case, in future I plan to
support multiple tasks per queue(io_uring_ctx) for ublk queue for
relaxing the current (more stict) limit of single task/context for
single queue. So when one task is exiting, ublk driver will just need to
cancel commands queued from this task, not necessarily to cancel the
whole queue/device.

Also cmd_to_io_kiocb(cmd)->task shouldn't work since io_kiocb can be
thought as being not exported to driver strictly speaking.


Thanks,
Ming
Ming Lei Sept. 22, 2023, 2:25 a.m. UTC | #3
On Fri, Sep 22, 2023 at 08:50:48AM +0800, Ming Lei wrote:
> Hello Gabriel,
> 
> On Thu, Sep 21, 2023 at 02:46:31PM -0400, Gabriel Krisman Bertazi wrote:
> > Ming Lei <ming.lei@redhat.com> writes:
> > 
> > > uring_cmd may never complete, such as ublk, in which uring cmd isn't
> > > completed until one new block request is coming from ublk block device.
> > >
> > > Add cancelable uring_cmd to provide mechanism to driver to cancel
> > > pending commands in its own way.
> > >
> > > Add API of io_uring_cmd_mark_cancelable() for driver to mark one
> > > command as cancelable, then io_uring will cancel this command in
> > > io_uring_cancel_generic(). Driver callback is provided for canceling
> > > command in driver's way, meantime driver gets notified with exiting of
> > > io_uring task or context.
> > >
> > > Suggested-by: Jens Axboe <axboe@kernel.dk>
> > > Signed-off-by: Ming Lei <ming.lei@redhat.com>
> > > ---
> > >
> > > ublk patches:
> > >
> > > 	https://github.com/ming1/linux/commits/uring_exit_and_ublk
> > >
> > >  include/linux/io_uring.h       | 22 +++++++++++++++++-
> > >  include/linux/io_uring_types.h |  6 +++++
> > >  include/uapi/linux/io_uring.h  |  7 ++++--
> > >  io_uring/io_uring.c            | 30 ++++++++++++++++++++++++
> > >  io_uring/uring_cmd.c           | 42 ++++++++++++++++++++++++++++++++++
> > >  5 files changed, 104 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
> > > index 106cdc55ff3b..5b98308a154f 100644
> > > --- a/include/linux/io_uring.h
> > > +++ b/include/linux/io_uring.h
> > > @@ -22,6 +22,9 @@ enum io_uring_cmd_flags {
> > >  	IO_URING_F_IOPOLL		= (1 << 10),
> > >  };
> > >  
> > > +typedef void (uring_cmd_cancel_fn)(struct io_uring_cmd *,
> > > +		unsigned int issue_flags, struct task_struct *task);
> > > +
> > 
> > Hi Ming,
> > 
> > I wonder if uring_cmd_cancel shouldn't just be a new file operation, pairing
> > with f_op->uring_cmd.  it would, of course, also mean don't need to pass
> > it here occupying the pdu or explicitly registering it. iiuc, would also
> > allow you to drop the flag and just assume it is cancelable if the operation
> > exists, further simplifying the code.
> 
> If there are more such use cases, it is probably not a bad idea to add
> new operation for canceling command.
> 
> But definitely there are not, so not good to add one new operation now,
> since new operation field is added to all drivers/FSs actually, 99% of
> them shouldn't pay for such cost.
> 
> Also I don't see how much simplification is made with new operation,
> cause the approach in this patch just needs one flag & callback for canceling,
> both are freely available, only driver with such feature needs to pay
> the extra callback cost(8bytes in uring_cmd->pdu[]).

Another way is to reserve some cmd op range for io_uring use, then
we can define CANCEL_CMD and pass it to ->uring_cmd(), such as reserving
the following range:

	_IOWR(0xFF, 0, 0xFF) ~ _IOWR(0xFF, 0x7F, 0xFF)	//user visible
	_IOWR(0xFF, 0x80, 0xFF) ~ _IOWR(0xFF, 0xFF, 0xFF)	//io_uring internal

even SOCKET_URING_OP_SIOCINQ/SOCKET_URING_OP_SIOCOUTQ can be covered
since they are just merged to v6.6-rc1, then we have fixed cmd op range
for io_uring for future use cases.

Jens & Gabriel, which way do you think better?

> 
> > 
> > > +static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
> > > +					  struct task_struct *task,
> > > +					  bool cancel_all)
> > > +{
> > > +	struct hlist_node *tmp;
> > > +	struct io_kiocb *req;
> > > +	bool ret = false;
> > > +
> > > +	mutex_lock(&ctx->uring_lock);
> > > +	hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
> > > +			hash_node) {
> > > +		struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
> > > +				struct io_uring_cmd);
> > > +
> > > +		if (!cancel_all && req->task != task)
> > > +			continue;
> > > +
> > > +		/* safe to call ->cancel_fn() since cmd isn't done yet */
> > > +		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
> > > +			cmd->cancel_fn(cmd, 0, task);
> > 
> > I find it weird to pass task here.  Also, it seems you use it only to
> > sanity check it is the same as ubq->ubq_daemon.  Can you just recover it
> > from cmd_to_io_kiocb(cmd)->task? it should be guaranteed to be the same
> > as task by the check immediately before.
> 
> 'task' parameter is very important for ublk use case, in future I plan to
> support multiple tasks per queue(io_uring_ctx) for ublk queue for
> relaxing the current (more stict) limit of single task/context for
> single queue. So when one task is exiting, ublk driver will just need to
> cancel commands queued from this task, not necessarily to cancel the
> whole queue/device.
> 
> Also cmd_to_io_kiocb(cmd)->task shouldn't work since io_kiocb can be
> thought as being not exported to driver strictly speaking.

new API of io_uring_cmd_get_task() can be added which should be just
used for handling CANCEL_CMD if we take ->uring_cmd().

Thanks,
Ming
diff mbox series

Patch

diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 106cdc55ff3b..5b98308a154f 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -22,6 +22,9 @@  enum io_uring_cmd_flags {
 	IO_URING_F_IOPOLL		= (1 << 10),
 };
 
+typedef void (uring_cmd_cancel_fn)(struct io_uring_cmd *,
+		unsigned int issue_flags, struct task_struct *task);
+
 struct io_uring_cmd {
 	struct file	*file;
 	const struct io_uring_sqe *sqe;
@@ -33,7 +36,17 @@  struct io_uring_cmd {
 	};
 	u32		cmd_op;
 	u32		flags;
-	u8		pdu[32]; /* available inline for free use */
+
+	/* less than 32 is available for cancelable cmd */
+	union {
+		u8		pdu[32]; /* available inline for free use */
+
+		struct {
+			/* available inline for free use */
+			u8	__pdu[32 - sizeof(uring_cmd_cancel_fn  *)];
+			uring_cmd_cancel_fn  *cancel_fn;
+		};
+	};
 };
 
 static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
@@ -82,6 +95,8 @@  static inline void io_uring_free(struct task_struct *tsk)
 		__io_uring_free(tsk);
 }
 int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
+int io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
+		unsigned int issue_flags, uring_cmd_cancel_fn *fn);
 #else
 static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
 			      struct iov_iter *iter, void *ioucmd)
@@ -122,6 +137,11 @@  static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
 {
 	return -EOPNOTSUPP;
 }
+static inline int io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
+		unsigned int issue_flags, uring_cmd_cancel_fn *fn)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 13d19b9be9f4..1571db76bec1 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -265,6 +265,12 @@  struct io_ring_ctx {
 		 */
 		struct io_wq_work_list	iopoll_list;
 		bool			poll_multi_queue;
+
+		/*
+		 * Any cancelable uring_cmd is added to this list in
+		 * ->uring_cmd() by io_uring_cmd_insert_cancelable()
+		 */
+		struct hlist_head	cancelable_uring_cmd;
 	} ____cacheline_aligned_in_smp;
 
 	struct {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 8e61f8b7c2ce..29a7a7e71f57 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -249,10 +249,13 @@  enum io_uring_op {
  * sqe->uring_cmd_flags
  * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
  *				along with setting sqe->buf_index.
+ * IORING_URING_CANCELABLE	not for userspace
  * IORING_URING_CMD_POLLED	driver use only
  */
-#define IORING_URING_CMD_FIXED	(1U << 0)
-#define IORING_URING_CMD_POLLED	(1U << 31)
+#define IORING_URING_CMD_FIXED		(1U << 0)
+/* set by driver, and handled by io_uring to cancel this cmd */
+#define IORING_URING_CMD_CANCELABLE	(1U << 30)
+#define IORING_URING_CMD_POLLED		(1U << 31)
 
 
 /*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 783ed0fff71b..428cffb1a7e1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3256,6 +3256,35 @@  static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
 	return ret;
 }
 
+static bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
+					  struct task_struct *task,
+					  bool cancel_all)
+{
+	struct hlist_node *tmp;
+	struct io_kiocb *req;
+	bool ret = false;
+
+	mutex_lock(&ctx->uring_lock);
+	hlist_for_each_entry_safe(req, tmp, &ctx->cancelable_uring_cmd,
+			hash_node) {
+		struct io_uring_cmd *cmd = io_kiocb_to_cmd(req,
+				struct io_uring_cmd);
+
+		if (!cancel_all && req->task != task)
+			continue;
+
+		/* safe to call ->cancel_fn() since cmd isn't done yet */
+		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
+			cmd->cancel_fn(cmd, 0, task);
+			ret = true;
+		}
+	}
+	io_submit_flush_completions(ctx);
+	mutex_unlock(&ctx->uring_lock);
+
+	return ret;
+}
+
 static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 						struct task_struct *task,
 						bool cancel_all)
@@ -3307,6 +3336,7 @@  static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	ret |= io_kill_timeouts(ctx, task, cancel_all);
 	if (task)
 		ret |= io_run_task_work() > 0;
+	ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all);
 	return ret;
 }
 
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 537795fddc87..47a6c84fd7f9 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -13,6 +13,46 @@ 
 #include "rsrc.h"
 #include "uring_cmd.h"
 
+static void io_uring_cmd_del_cancelable(struct io_uring_cmd *cmd,
+		unsigned int issue_flags)
+{
+	if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
+		struct io_kiocb *req = cmd_to_io_kiocb(cmd);
+		struct io_ring_ctx *ctx = req->ctx;
+
+		io_ring_submit_lock(ctx, issue_flags);
+		cmd->flags &= ~IORING_URING_CMD_CANCELABLE;
+		hlist_del(&req->hash_node);
+		io_ring_submit_unlock(ctx, issue_flags);
+	}
+}
+
+/*
+ * cancel callback is called in io_uring_cancel_generic() for canceling
+ * this uring_cmd, and it is driver's responsibility to cover race between
+ * race between normal completion and canceling.
+ */
+int io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
+		unsigned int issue_flags, uring_cmd_cancel_fn *fn)
+{
+	struct io_kiocb *req = cmd_to_io_kiocb(cmd);
+	struct io_ring_ctx *ctx = req->ctx;
+
+	if (!fn)
+		return -EINVAL;
+
+	io_ring_submit_lock(ctx, issue_flags);
+	if (!(cmd->flags & IORING_URING_CMD_CANCELABLE)) {
+		cmd->cancel_fn = fn;
+		cmd->flags |= IORING_URING_CMD_CANCELABLE;
+		hlist_add_head(&req->hash_node, &ctx->cancelable_uring_cmd);
+	}
+	io_ring_submit_unlock(ctx, issue_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
+
 static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
@@ -56,6 +96,8 @@  void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
 {
 	struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
 
+	io_uring_cmd_del_cancelable(ioucmd, issue_flags);
+
 	if (ret < 0)
 		req_set_fail(req);