Message ID | 20241107-fuse-uring-for-6-10-rfc4-v5-15-e8660a991499@ddn.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fuse: fuse-over-io-uring | expand |
On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: > > When the fuse-server terminates while the fuse-client or kernel > still has queued URING_CMDs, these commands retain references > to the struct file used by the fuse connection. This prevents > fuse_dev_release() from being invoked, resulting in a hung mount > point. Could you explain the flow of what happens after a fuse server terminates? How does that trigger the IO_URING_F_CANCEL uring cmd? > > This patch addresses the issue by making queued URING_CMDs > cancelable, allowing fuse_dev_release() to proceed as expected > and preventing the mount point from hanging. > > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 70 insertions(+), 6 deletions(-) > > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, > > struct fuse_uring_cmd_pdu { > struct fuse_ring_ent *ring_ent; > + struct fuse_ring_queue *queue; > }; > > /* > @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) > } > } > > +/* > + * Handle IO_URING_F_CANCEL, typically should come on daemon termination > + */ > +static void fuse_uring_cancel(struct io_uring_cmd *cmd, > + unsigned int issue_flags, struct fuse_conn *fc) > +{ > + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; > + struct fuse_ring_queue *queue = pdu->queue; > + struct fuse_ring_ent *ent; > + bool found = false; > + bool need_cmd_done = false; > + > + spin_lock(&queue->lock); > + > + /* XXX: This is cumbersome for large queues. */ > + list_for_each_entry(ent, &queue->ent_avail_queue, list) { > + if (pdu->ring_ent == ent) { > + found = true; > + break; > + } > + } > + > + if (!found) { > + pr_info("qid=%d Did not find ent=%p", queue->qid, ent); > + spin_unlock(&queue->lock); > + return; > + } > + > + if (ent->state == FRRS_WAIT) { > + ent->state = FRRS_USERSPACE; > + list_move(&ent->list, &queue->ent_in_userspace); > + need_cmd_done = true; > + } > + spin_unlock(&queue->lock); > + > + if (need_cmd_done) > + io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); > + > + /* > + * releasing the last entry should trigger fuse_dev_release() if > + * the daemon was terminated > + */ > +} > + > +static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, > + struct fuse_ring_ent *ring_ent) > +{ > + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; > + > + pdu->ring_ent = ring_ent; > + pdu->queue = ring_ent->queue; > + > + io_uring_cmd_mark_cancelable(cmd, issue_flags); > +} > + > /* > * Checks for errors and stores it into the request > */ > @@ -606,7 +662,8 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ring_ent) > * Put a ring request onto hold, it is no longer used for now. > */ > static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, > - struct fuse_ring_queue *queue) > + struct fuse_ring_queue *queue, > + unsigned int issue_flags) > __must_hold(&queue->lock) > { > struct fuse_ring *ring = queue->ring; > @@ -626,6 +683,7 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, > list_move(&ring_ent->list, &queue->ent_avail_queue); > > ring_ent->state = FRRS_WAIT; > + fuse_uring_prepare_cancel(ring_ent->cmd, issue_flags, ring_ent); > } > > /* Used to find the request on SQE commit */ > @@ -729,7 +787,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent, > * Get the next fuse req and send it > */ > static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, > - struct fuse_ring_queue *queue) > + struct fuse_ring_queue *queue, > + unsigned int issue_flags) > { > int has_next, err; > int prev_state = ring_ent->state; > @@ -738,7 +797,7 @@ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, > spin_lock(&queue->lock); > has_next = fuse_uring_ent_assign_req(ring_ent); > if (!has_next) { > - fuse_uring_ent_avail(ring_ent, queue); > + fuse_uring_ent_avail(ring_ent, queue, issue_flags); > spin_unlock(&queue->lock); > break; /* no request left */ > } > @@ -813,7 +872,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, > * and fetching is done in one step vs legacy fuse, which has separated > * read (fetch request) and write (commit result). > */ > - fuse_uring_next_fuse_req(ring_ent, queue); > + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); > return 0; > } > > @@ -853,7 +912,7 @@ static void _fuse_uring_fetch(struct fuse_ring_ent *ring_ent, > struct fuse_ring *ring = queue->ring; > > spin_lock(&queue->lock); > - fuse_uring_ent_avail(ring_ent, queue); > + fuse_uring_ent_avail(ring_ent, queue, issue_flags); > ring_ent->cmd = cmd; > spin_unlock(&queue->lock); > > @@ -1021,6 +1080,11 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > if (fc->aborted) > goto out; > > + if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { > + fuse_uring_cancel(cmd, issue_flags, fc); > + return 0; > + } > + > switch (cmd_op) { > case FUSE_URING_REQ_FETCH: > err = fuse_uring_fetch(cmd, issue_flags, fc); > @@ -1080,7 +1144,7 @@ fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, > > return; > err: > - fuse_uring_next_fuse_req(ring_ent, queue); > + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); > } > > /* queue a fuse request and send it if a ring entry is available */ > > -- > 2.43.0 >
On 11/18/24 20:32, Joanne Koong wrote: > On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: >> >> When the fuse-server terminates while the fuse-client or kernel >> still has queued URING_CMDs, these commands retain references >> to the struct file used by the fuse connection. This prevents >> fuse_dev_release() from being invoked, resulting in a hung mount >> point. > > Could you explain the flow of what happens after a fuse server > terminates? How does that trigger the IO_URING_F_CANCEL uring cmd? This is all about daemon termination, when the mount point is still alive. Basically without this patch even plain (non forced umount) hangs. Without queued IORING_OP_URING_CMDs there is a call into fuse_dev_release() on daemon termination, with queued IORING_OP_URING_CMDs this doesn't happen as each of these commands holds a file reference. IO_URING_F_CANCEL is triggered from from io-uring, I guess when the io-uring file descriptor is released (note: 'io-uring fd' != '/dev/fuse fd'). I guess I need to improve the commit message a bit. Cheers, Bernd
On Mon, Nov 18, 2024 at 11:55 AM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > > On 11/18/24 20:32, Joanne Koong wrote: > > On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: > >> > >> When the fuse-server terminates while the fuse-client or kernel > >> still has queued URING_CMDs, these commands retain references > >> to the struct file used by the fuse connection. This prevents > >> fuse_dev_release() from being invoked, resulting in a hung mount > >> point. > > > > Could you explain the flow of what happens after a fuse server > > terminates? How does that trigger the IO_URING_F_CANCEL uring cmd? > > This is all about daemon termination, when the mount point is still > alive. Basically without this patch even plain (non forced umount) > hangs. > Without queued IORING_OP_URING_CMDs there is a call into > fuse_dev_release() on daemon termination, with queued > IORING_OP_URING_CMDs this doesn't happen as each of these commands > holds a file reference. > > IO_URING_F_CANCEL is triggered from from io-uring, I guess when > the io-uring file descriptor is released > (note: 'io-uring fd' != '/dev/fuse fd'). Gotcha. I took a look at the io_uring code and it looks like the call chain looks something like this: io_uring_release() io_ring_ctx_wait_and_kill() io_ring_exit_work() io_uring_try_cancel_requests() io_uring_try_cancel_uring_cmd() file->f_op->uring_cmd() w/ IO_URING_F_CANCEL issues_flag > > > I guess I need to improve the commit message a bit. > > > > Cheers, > Bernd > > > >
On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: > > When the fuse-server terminates while the fuse-client or kernel > still has queued URING_CMDs, these commands retain references > to the struct file used by the fuse connection. This prevents > fuse_dev_release() from being invoked, resulting in a hung mount > point. > > This patch addresses the issue by making queued URING_CMDs > cancelable, allowing fuse_dev_release() to proceed as expected > and preventing the mount point from hanging. > > Signed-off-by: Bernd Schubert <bschubert@ddn.com> > --- > fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- > 1 file changed, 70 insertions(+), 6 deletions(-) > > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 > --- a/fs/fuse/dev_uring.c > +++ b/fs/fuse/dev_uring.c > @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, > > struct fuse_uring_cmd_pdu { > struct fuse_ring_ent *ring_ent; > + struct fuse_ring_queue *queue; > }; > > /* > @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) > } > } > > +/* > + * Handle IO_URING_F_CANCEL, typically should come on daemon termination > + */ > +static void fuse_uring_cancel(struct io_uring_cmd *cmd, > + unsigned int issue_flags, struct fuse_conn *fc) > +{ > + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; > + struct fuse_ring_queue *queue = pdu->queue; > + struct fuse_ring_ent *ent; > + bool found = false; > + bool need_cmd_done = false; > + > + spin_lock(&queue->lock); > + > + /* XXX: This is cumbersome for large queues. */ > + list_for_each_entry(ent, &queue->ent_avail_queue, list) { > + if (pdu->ring_ent == ent) { > + found = true; > + break; > + } > + } Do we have to check that the entry is on the ent_avail_queue, or can we assume that if the ent->state is FRRS_WAIT, the only queue it'll be on is the ent_avail_queue? I see only one case where this isn't true, for teardown in fuse_uring_stop_list_entries() - if we had a workaround for this, eg having some teardown state signifying that io_uring_cmd_done() needs to be called on the cmd and clearing FRRS_WAIT, then we could get rid of iteration through ent_avail_queue for every cancelled cmd. > + > + if (!found) { > + pr_info("qid=%d Did not find ent=%p", queue->qid, ent); > + spin_unlock(&queue->lock); > + return; > + } > + > + if (ent->state == FRRS_WAIT) { > + ent->state = FRRS_USERSPACE; > + list_move(&ent->list, &queue->ent_in_userspace); > + need_cmd_done = true; > + } > + spin_unlock(&queue->lock); > + > + if (need_cmd_done) > + io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); > + > + /* > + * releasing the last entry should trigger fuse_dev_release() if > + * the daemon was terminated > + */ > +} > + > +static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, > + struct fuse_ring_ent *ring_ent) > +{ > + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; > + > + pdu->ring_ent = ring_ent; > + pdu->queue = ring_ent->queue; > + > + io_uring_cmd_mark_cancelable(cmd, issue_flags); > +} > + > /* > * Checks for errors and stores it into the request > */ > @@ -606,7 +662,8 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ring_ent) > * Put a ring request onto hold, it is no longer used for now. > */ > static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, > - struct fuse_ring_queue *queue) > + struct fuse_ring_queue *queue, > + unsigned int issue_flags) > __must_hold(&queue->lock) > { > struct fuse_ring *ring = queue->ring; > @@ -626,6 +683,7 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, > list_move(&ring_ent->list, &queue->ent_avail_queue); > > ring_ent->state = FRRS_WAIT; > + fuse_uring_prepare_cancel(ring_ent->cmd, issue_flags, ring_ent); > } > > /* Used to find the request on SQE commit */ > @@ -729,7 +787,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent, > * Get the next fuse req and send it > */ > static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, > - struct fuse_ring_queue *queue) > + struct fuse_ring_queue *queue, > + unsigned int issue_flags) > { > int has_next, err; > int prev_state = ring_ent->state; > @@ -738,7 +797,7 @@ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, > spin_lock(&queue->lock); > has_next = fuse_uring_ent_assign_req(ring_ent); > if (!has_next) { > - fuse_uring_ent_avail(ring_ent, queue); > + fuse_uring_ent_avail(ring_ent, queue, issue_flags); > spin_unlock(&queue->lock); > break; /* no request left */ > } > @@ -813,7 +872,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, > * and fetching is done in one step vs legacy fuse, which has separated > * read (fetch request) and write (commit result). > */ > - fuse_uring_next_fuse_req(ring_ent, queue); > + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); > return 0; > } > > @@ -853,7 +912,7 @@ static void _fuse_uring_fetch(struct fuse_ring_ent *ring_ent, > struct fuse_ring *ring = queue->ring; > > spin_lock(&queue->lock); > - fuse_uring_ent_avail(ring_ent, queue); > + fuse_uring_ent_avail(ring_ent, queue, issue_flags); > ring_ent->cmd = cmd; > spin_unlock(&queue->lock); > > @@ -1021,6 +1080,11 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) > if (fc->aborted) > goto out; > > + if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { > + fuse_uring_cancel(cmd, issue_flags, fc); > + return 0; > + } > + > switch (cmd_op) { > case FUSE_URING_REQ_FETCH: > err = fuse_uring_fetch(cmd, issue_flags, fc); > @@ -1080,7 +1144,7 @@ fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, > > return; > err: > - fuse_uring_next_fuse_req(ring_ent, queue); > + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); > } > > /* queue a fuse request and send it if a ring entry is available */ > > -- > 2.43.0 >
On 11/19/24 00:30, Joanne Koong wrote: > On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: >> >> When the fuse-server terminates while the fuse-client or kernel >> still has queued URING_CMDs, these commands retain references >> to the struct file used by the fuse connection. This prevents >> fuse_dev_release() from being invoked, resulting in a hung mount >> point. >> >> This patch addresses the issue by making queued URING_CMDs >> cancelable, allowing fuse_dev_release() to proceed as expected >> and preventing the mount point from hanging. >> >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >> --- >> fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- >> 1 file changed, 70 insertions(+), 6 deletions(-) >> >> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c >> index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 >> --- a/fs/fuse/dev_uring.c >> +++ b/fs/fuse/dev_uring.c >> @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, >> >> struct fuse_uring_cmd_pdu { >> struct fuse_ring_ent *ring_ent; >> + struct fuse_ring_queue *queue; >> }; >> >> /* >> @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) >> } >> } >> >> +/* >> + * Handle IO_URING_F_CANCEL, typically should come on daemon termination >> + */ >> +static void fuse_uring_cancel(struct io_uring_cmd *cmd, >> + unsigned int issue_flags, struct fuse_conn *fc) >> +{ >> + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; >> + struct fuse_ring_queue *queue = pdu->queue; >> + struct fuse_ring_ent *ent; >> + bool found = false; >> + bool need_cmd_done = false; >> + >> + spin_lock(&queue->lock); >> + >> + /* XXX: This is cumbersome for large queues. */ >> + list_for_each_entry(ent, &queue->ent_avail_queue, list) { >> + if (pdu->ring_ent == ent) { >> + found = true; >> + break; >> + } >> + } > > Do we have to check that the entry is on the ent_avail_queue, or can > we assume that if the ent->state is FRRS_WAIT, the only queue it'll be > on is the ent_avail_queue? I see only one case where this isn't true, > for teardown in fuse_uring_stop_list_entries() - if we had a > workaround for this, eg having some teardown state signifying that > io_uring_cmd_done() needs to be called on the cmd and clearing > FRRS_WAIT, then we could get rid of iteration through ent_avail_queue > for every cancelled cmd. I'm scared that we would run into races - I don't want to access memory pointed to by pdu->ring_ent, before I'm not sure it is on the list. Remember the long discussion Miklos and I had about the tiny 'tag' variable and finding requests using existing hash lists [0] ? Personally I would prefer an array of struct queue_entries { struct fuse_ring_ent *ring_ent; bool valid; } in struct fuse_ring_queue { ... struct queue_entries *entries[] } And that array would only get freed on queue destruction. Besides avoiding hash lists, it would also allow to use 'valid' to know if we can access the ring_entry and then check the state. Thanks, Bernd [0] https://lore.kernel.org/linux-fsdevel/CAJfpegu_UQ1BNp0UDHeOZFWwUoXbJ_LP4W=o+UX6MB3DsJbH8g@mail.gmail.com/T/#t
On Mon, Nov 18, 2024 at 3:47 PM Bernd Schubert <bernd.schubert@fastmail.fm> wrote: > > On 11/19/24 00:30, Joanne Koong wrote: > > On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: > >> > >> When the fuse-server terminates while the fuse-client or kernel > >> still has queued URING_CMDs, these commands retain references > >> to the struct file used by the fuse connection. This prevents > >> fuse_dev_release() from being invoked, resulting in a hung mount > >> point. > >> > >> This patch addresses the issue by making queued URING_CMDs > >> cancelable, allowing fuse_dev_release() to proceed as expected > >> and preventing the mount point from hanging. > >> > >> Signed-off-by: Bernd Schubert <bschubert@ddn.com> > >> --- > >> fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- > >> 1 file changed, 70 insertions(+), 6 deletions(-) > >> > >> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c > >> index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 > >> --- a/fs/fuse/dev_uring.c > >> +++ b/fs/fuse/dev_uring.c > >> @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, > >> > >> struct fuse_uring_cmd_pdu { > >> struct fuse_ring_ent *ring_ent; > >> + struct fuse_ring_queue *queue; > >> }; > >> > >> /* > >> @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) > >> } > >> } > >> > >> +/* > >> + * Handle IO_URING_F_CANCEL, typically should come on daemon termination > >> + */ > >> +static void fuse_uring_cancel(struct io_uring_cmd *cmd, > >> + unsigned int issue_flags, struct fuse_conn *fc) > >> +{ > >> + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; > >> + struct fuse_ring_queue *queue = pdu->queue; > >> + struct fuse_ring_ent *ent; > >> + bool found = false; > >> + bool need_cmd_done = false; > >> + > >> + spin_lock(&queue->lock); > >> + > >> + /* XXX: This is cumbersome for large queues. */ > >> + list_for_each_entry(ent, &queue->ent_avail_queue, list) { > >> + if (pdu->ring_ent == ent) { > >> + found = true; > >> + break; > >> + } > >> + } > > > > Do we have to check that the entry is on the ent_avail_queue, or can > > we assume that if the ent->state is FRRS_WAIT, the only queue it'll be > > on is the ent_avail_queue? I see only one case where this isn't true, > > for teardown in fuse_uring_stop_list_entries() - if we had a > > workaround for this, eg having some teardown state signifying that > > io_uring_cmd_done() needs to be called on the cmd and clearing > > FRRS_WAIT, then we could get rid of iteration through ent_avail_queue > > for every cancelled cmd. > > > I'm scared that we would run into races - I don't want to access memory > pointed to by pdu->ring_ent, before I'm not sure it is on the list. Oh, I was seeing that we mark the cmd as cancellable (eg in fuse_uring_prepare_cancel()) only after the ring_ent is moved to the ent_avail_queue (in fuse_uring_ent_avail()) and that this is done in the scope of the queue->lock, so we would only call into fuse_uring_cancel() when the ring_ent is on the list. Could there still be a race condition here? Alternatively, inspired by your "bool valid;" idea below, maybe another solution would be having a bit in "struct fuse_ring_ent" tracking if io_uring_cmd_done() needs to be called on it? This is fairly unimportant though - this part could always be optimized in a future patchset if you think it needs to be optimized, but was just curious if these would work. Thanks, Joanne > Remember the long discussion Miklos and I had about the tiny 'tag' > variable and finding requests using existing hash lists [0] ? > Personally I would prefer an array of > > struct queue_entries { > struct fuse_ring_ent *ring_ent; > bool valid; > } > > > in struct fuse_ring_queue { > ... > struct queue_entries *entries[] > } > > And that array would only get freed on queue destruction. Besides > avoiding hash lists, it would also allow to use 'valid' to know if > we can access the ring_entry and then check the state. > > Thanks, > Bernd > > > [0] https://lore.kernel.org/linux-fsdevel/CAJfpegu_UQ1BNp0UDHeOZFWwUoXbJ_LP4W=o+UX6MB3DsJbH8g@mail.gmail.com/T/#t
On 11/19/24 03:02, Joanne Koong wrote: > On Mon, Nov 18, 2024 at 3:47 PM Bernd Schubert > <bernd.schubert@fastmail.fm> wrote: >> >> On 11/19/24 00:30, Joanne Koong wrote: >>> On Thu, Nov 7, 2024 at 9:04 AM Bernd Schubert <bschubert@ddn.com> wrote: >>>> >>>> When the fuse-server terminates while the fuse-client or kernel >>>> still has queued URING_CMDs, these commands retain references >>>> to the struct file used by the fuse connection. This prevents >>>> fuse_dev_release() from being invoked, resulting in a hung mount >>>> point. >>>> >>>> This patch addresses the issue by making queued URING_CMDs >>>> cancelable, allowing fuse_dev_release() to proceed as expected >>>> and preventing the mount point from hanging. >>>> >>>> Signed-off-by: Bernd Schubert <bschubert@ddn.com> >>>> --- >>>> fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- >>>> 1 file changed, 70 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c >>>> index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 >>>> --- a/fs/fuse/dev_uring.c >>>> +++ b/fs/fuse/dev_uring.c >>>> @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, >>>> >>>> struct fuse_uring_cmd_pdu { >>>> struct fuse_ring_ent *ring_ent; >>>> + struct fuse_ring_queue *queue; >>>> }; >>>> >>>> /* >>>> @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) >>>> } >>>> } >>>> >>>> +/* >>>> + * Handle IO_URING_F_CANCEL, typically should come on daemon termination >>>> + */ >>>> +static void fuse_uring_cancel(struct io_uring_cmd *cmd, >>>> + unsigned int issue_flags, struct fuse_conn *fc) >>>> +{ >>>> + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; >>>> + struct fuse_ring_queue *queue = pdu->queue; >>>> + struct fuse_ring_ent *ent; >>>> + bool found = false; >>>> + bool need_cmd_done = false; >>>> + >>>> + spin_lock(&queue->lock); >>>> + >>>> + /* XXX: This is cumbersome for large queues. */ >>>> + list_for_each_entry(ent, &queue->ent_avail_queue, list) { >>>> + if (pdu->ring_ent == ent) { >>>> + found = true; >>>> + break; >>>> + } >>>> + } >>> >>> Do we have to check that the entry is on the ent_avail_queue, or can >>> we assume that if the ent->state is FRRS_WAIT, the only queue it'll be >>> on is the ent_avail_queue? I see only one case where this isn't true, >>> for teardown in fuse_uring_stop_list_entries() - if we had a >>> workaround for this, eg having some teardown state signifying that >>> io_uring_cmd_done() needs to be called on the cmd and clearing >>> FRRS_WAIT, then we could get rid of iteration through ent_avail_queue >>> for every cancelled cmd. >> >> >> I'm scared that we would run into races - I don't want to access memory >> pointed to by pdu->ring_ent, before I'm not sure it is on the list. > > Oh, I was seeing that we mark the cmd as cancellable (eg in > fuse_uring_prepare_cancel()) only after the ring_ent is moved to the > ent_avail_queue (in fuse_uring_ent_avail()) and that this is done in > the scope of the queue->lock, so we would only call into > fuse_uring_cancel() when the ring_ent is on the list. Could there > still be a race condition here? > > Alternatively, inspired by your "bool valid;" idea below, maybe > another solution would be having a bit in "struct fuse_ring_ent" > tracking if io_uring_cmd_done() needs to be called on it? What I mean is that daemon termination might race with normal umount. Umount does everything cleanly and iterates through lists, but might free 'struct fuse_ring_ent', see fuse_uring_entry_teardown(). On the other hand, daemon termination with IO_URING_F_CANCEL has the pointer to ring_ent - but that pointer might be already freed by umount. That also means another bit in "struct fuse_ring_ent" won't help. > > This is fairly unimportant though - this part could always be > optimized in a future patchset if you think it needs to be optimized, > but was just curious if these would work. > I'm going to change logic a bit and will introduce another list 'freeable_ring_ent'. Entries will be moved to that new list and only freed in fuse_uring_destruct(). After that IO_URING_F_CANCEL can check stat of ring_ent directly Thanks for the discussion! Bernd
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 6af515458695ccb2e32cc8c62c45471e6710c15f..b465da41c42c47eaf69f09bab1423061bc8fcc68 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring, struct fuse_uring_cmd_pdu { struct fuse_ring_ent *ring_ent; + struct fuse_ring_queue *queue; }; /* @@ -382,6 +383,61 @@ void fuse_uring_stop_queues(struct fuse_ring *ring) } } +/* + * Handle IO_URING_F_CANCEL, typically should come on daemon termination + */ +static void fuse_uring_cancel(struct io_uring_cmd *cmd, + unsigned int issue_flags, struct fuse_conn *fc) +{ + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; + struct fuse_ring_queue *queue = pdu->queue; + struct fuse_ring_ent *ent; + bool found = false; + bool need_cmd_done = false; + + spin_lock(&queue->lock); + + /* XXX: This is cumbersome for large queues. */ + list_for_each_entry(ent, &queue->ent_avail_queue, list) { + if (pdu->ring_ent == ent) { + found = true; + break; + } + } + + if (!found) { + pr_info("qid=%d Did not find ent=%p", queue->qid, ent); + spin_unlock(&queue->lock); + return; + } + + if (ent->state == FRRS_WAIT) { + ent->state = FRRS_USERSPACE; + list_move(&ent->list, &queue->ent_in_userspace); + need_cmd_done = true; + } + spin_unlock(&queue->lock); + + if (need_cmd_done) + io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags); + + /* + * releasing the last entry should trigger fuse_dev_release() if + * the daemon was terminated + */ +} + +static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, + struct fuse_ring_ent *ring_ent) +{ + struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu; + + pdu->ring_ent = ring_ent; + pdu->queue = ring_ent->queue; + + io_uring_cmd_mark_cancelable(cmd, issue_flags); +} + /* * Checks for errors and stores it into the request */ @@ -606,7 +662,8 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ring_ent) * Put a ring request onto hold, it is no longer used for now. */ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, - struct fuse_ring_queue *queue) + struct fuse_ring_queue *queue, + unsigned int issue_flags) __must_hold(&queue->lock) { struct fuse_ring *ring = queue->ring; @@ -626,6 +683,7 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent, list_move(&ring_ent->list, &queue->ent_avail_queue); ring_ent->state = FRRS_WAIT; + fuse_uring_prepare_cancel(ring_ent->cmd, issue_flags, ring_ent); } /* Used to find the request on SQE commit */ @@ -729,7 +787,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent, * Get the next fuse req and send it */ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, - struct fuse_ring_queue *queue) + struct fuse_ring_queue *queue, + unsigned int issue_flags) { int has_next, err; int prev_state = ring_ent->state; @@ -738,7 +797,7 @@ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent, spin_lock(&queue->lock); has_next = fuse_uring_ent_assign_req(ring_ent); if (!has_next) { - fuse_uring_ent_avail(ring_ent, queue); + fuse_uring_ent_avail(ring_ent, queue, issue_flags); spin_unlock(&queue->lock); break; /* no request left */ } @@ -813,7 +872,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, * and fetching is done in one step vs legacy fuse, which has separated * read (fetch request) and write (commit result). */ - fuse_uring_next_fuse_req(ring_ent, queue); + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); return 0; } @@ -853,7 +912,7 @@ static void _fuse_uring_fetch(struct fuse_ring_ent *ring_ent, struct fuse_ring *ring = queue->ring; spin_lock(&queue->lock); - fuse_uring_ent_avail(ring_ent, queue); + fuse_uring_ent_avail(ring_ent, queue, issue_flags); ring_ent->cmd = cmd; spin_unlock(&queue->lock); @@ -1021,6 +1080,11 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) if (fc->aborted) goto out; + if ((unlikely(issue_flags & IO_URING_F_CANCEL))) { + fuse_uring_cancel(cmd, issue_flags, fc); + return 0; + } + switch (cmd_op) { case FUSE_URING_REQ_FETCH: err = fuse_uring_fetch(cmd, issue_flags, fc); @@ -1080,7 +1144,7 @@ fuse_uring_send_req_in_task(struct io_uring_cmd *cmd, return; err: - fuse_uring_next_fuse_req(ring_ent, queue); + fuse_uring_next_fuse_req(ring_ent, queue, issue_flags); } /* queue a fuse request and send it if a ring entry is available */
When the fuse-server terminates while the fuse-client or kernel still has queued URING_CMDs, these commands retain references to the struct file used by the fuse connection. This prevents fuse_dev_release() from being invoked, resulting in a hung mount point. This patch addresses the issue by making queued URING_CMDs cancelable, allowing fuse_dev_release() to proceed as expected and preventing the mount point from hanging. Signed-off-by: Bernd Schubert <bschubert@ddn.com> --- fs/fuse/dev_uring.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 6 deletions(-)