diff mbox series

[1/5] virtiofs: Do not end request in submission context

Message ID 20191015174626.11593-2-vgoyal@redhat.com (mailing list archive)
State New, archived
Headers show
Series virtiofs: Fix couple of deadlocks | expand

Commit Message

Vivek Goyal Oct. 15, 2019, 5:46 p.m. UTC
Submission context can hold some locks which end request code tries to
hold again and deadlock can occur. For example, fc->bg_lock. If a background
request is being submitted, it might hold fc->bg_lock and if we could not
submit request (because device went away) and tried to end request,
then deadlock happens. During testing, I also got a warning from deadlock
detection code.

So put requests on a list and end requests from a worker thread.

I got following warning from deadlock detector.

[  603.137138] WARNING: possible recursive locking detected
[  603.137142] --------------------------------------------
[  603.137144] blogbench/2036 is trying to acquire lock:
[  603.137149] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_request_end+0xdf/0x1c0 [fuse]
[  603.140701]
[  603.140701] but task is already holding lock:
[  603.140703] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_simple_background+0x92/0x1d0 [fuse]
[  603.140713]
[  603.140713] other info that might help us debug this:
[  603.140714]  Possible unsafe locking scenario:
[  603.140714]
[  603.140715]        CPU0
[  603.140716]        ----
[  603.140716]   lock(&(&fc->bg_lock)->rlock);
[  603.140718]   lock(&(&fc->bg_lock)->rlock);
[  603.140719]
[  603.140719]  *** DEADLOCK ***

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
 fs/fuse/virtio_fs.c | 38 ++++++++++++++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 4 deletions(-)

Comments

Miklos Szeredi Oct. 21, 2019, 8:03 a.m. UTC | #1
On Tue, Oct 15, 2019 at 7:46 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> Submission context can hold some locks which end request code tries to
> hold again and deadlock can occur. For example, fc->bg_lock. If a background
> request is being submitted, it might hold fc->bg_lock and if we could not
> submit request (because device went away) and tried to end request,
> then deadlock happens. During testing, I also got a warning from deadlock
> detection code.
>
> So put requests on a list and end requests from a worker thread.
>
> I got following warning from deadlock detector.
>
> [  603.137138] WARNING: possible recursive locking detected
> [  603.137142] --------------------------------------------
> [  603.137144] blogbench/2036 is trying to acquire lock:
> [  603.137149] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_request_end+0xdf/0x1c0 [fuse]
> [  603.140701]
> [  603.140701] but task is already holding lock:
> [  603.140703] 00000000f0f51107 (&(&fc->bg_lock)->rlock){+.+.}, at: fuse_simple_background+0x92/0x1d0 [fuse]
> [  603.140713]
> [  603.140713] other info that might help us debug this:
> [  603.140714]  Possible unsafe locking scenario:
> [  603.140714]
> [  603.140715]        CPU0
> [  603.140716]        ----
> [  603.140716]   lock(&(&fc->bg_lock)->rlock);
> [  603.140718]   lock(&(&fc->bg_lock)->rlock);
> [  603.140719]
> [  603.140719]  *** DEADLOCK ***
>
> Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
> ---
>  fs/fuse/virtio_fs.c | 38 ++++++++++++++++++++++++++++++++++----
>  1 file changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 6af3f131e468..24ac6f8bf3f7 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -30,6 +30,7 @@ struct virtio_fs_vq {
>         struct virtqueue *vq;     /* protected by ->lock */
>         struct work_struct done_work;
>         struct list_head queued_reqs;
> +       struct list_head end_reqs;      /* End these requests */
>         struct delayed_work dispatch_work;
>         struct fuse_dev *fud;
>         bool connected;
> @@ -259,8 +260,27 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work)
>         spin_unlock(&fsvq->lock);
>  }
>
> -static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
> +static void virtio_fs_request_dispatch_work(struct work_struct *work)
>  {
> +       struct fuse_req *req;
> +       struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
> +                                                dispatch_work.work);
> +       struct fuse_conn *fc = fsvq->fud->fc;
> +
> +       pr_debug("virtio-fs: worker %s called.\n", __func__);
> +       while (1) {
> +               spin_lock(&fsvq->lock);
> +               req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
> +                                              list);
> +               if (!req) {
> +                       spin_unlock(&fsvq->lock);
> +                       return;
> +               }
> +
> +               list_del_init(&req->list);
> +               spin_unlock(&fsvq->lock);
> +               fuse_request_end(fc, req);
> +       }
>  }
>
>  static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
> @@ -502,6 +522,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
>         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
>         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
>         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
> +       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
>         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
>                         virtio_fs_hiprio_dispatch_work);
>         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
> @@ -511,8 +532,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
>                 spin_lock_init(&fs->vqs[i].lock);
>                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
>                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
> -                                       virtio_fs_dummy_dispatch_work);
> +                                 virtio_fs_request_dispatch_work);
>                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
> +               INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
>                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
>                          "requests.%u", i - VQ_REQUEST);
>                 callbacks[i] = virtio_fs_vq_done;
> @@ -918,6 +940,7 @@ __releases(fiq->lock)
>         struct fuse_conn *fc;
>         struct fuse_req *req;
>         struct fuse_pqueue *fpq;
> +       struct virtio_fs_vq *fsvq;
>         int ret;
>
>         WARN_ON(list_empty(&fiq->pending));
> @@ -951,7 +974,8 @@ __releases(fiq->lock)
>         smp_mb__after_atomic();
>
>  retry:
> -       ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
> +       fsvq = &fs->vqs[queue_id];
> +       ret = virtio_fs_enqueue_req(fsvq, req);
>         if (ret < 0) {
>                 if (ret == -ENOMEM || ret == -ENOSPC) {
>                         /* Virtqueue full. Retry submission */
> @@ -965,7 +989,13 @@ __releases(fiq->lock)
>                 clear_bit(FR_SENT, &req->flags);
>                 list_del_init(&req->list);
>                 spin_unlock(&fpq->lock);
> -               fuse_request_end(fc, req);
> +
> +               /* Can't end request in submission context. Use a worker */
> +               spin_lock(&fsvq->lock);
> +               list_add_tail(&req->list, &fsvq->end_reqs);
> +               schedule_delayed_work(&fsvq->dispatch_work,
> +                                     msecs_to_jiffies(1));

What's the reason to delay by one msec?  If this is purely for
deadlock avoidance, then a zero delay would work better, no?

Thanks,
Miklos
Vivek Goyal Oct. 21, 2019, 11:52 a.m. UTC | #2
On Mon, Oct 21, 2019 at 10:03:39AM +0200, Miklos Szeredi wrote:

[..]
> >  static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
> > @@ -502,6 +522,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
> >         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
> >         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
> >         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
> > +       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
> >         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
> >                         virtio_fs_hiprio_dispatch_work);
> >         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
> > @@ -511,8 +532,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
> >                 spin_lock_init(&fs->vqs[i].lock);
> >                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
> >                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
> > -                                       virtio_fs_dummy_dispatch_work);
> > +                                 virtio_fs_request_dispatch_work);
> >                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
> > +               INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
> >                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
> >                          "requests.%u", i - VQ_REQUEST);
> >                 callbacks[i] = virtio_fs_vq_done;
> > @@ -918,6 +940,7 @@ __releases(fiq->lock)
> >         struct fuse_conn *fc;
> >         struct fuse_req *req;
> >         struct fuse_pqueue *fpq;
> > +       struct virtio_fs_vq *fsvq;
> >         int ret;
> >
> >         WARN_ON(list_empty(&fiq->pending));
> > @@ -951,7 +974,8 @@ __releases(fiq->lock)
> >         smp_mb__after_atomic();
> >
> >  retry:
> > -       ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
> > +       fsvq = &fs->vqs[queue_id];
> > +       ret = virtio_fs_enqueue_req(fsvq, req);
> >         if (ret < 0) {
> >                 if (ret == -ENOMEM || ret == -ENOSPC) {
> >                         /* Virtqueue full. Retry submission */
> > @@ -965,7 +989,13 @@ __releases(fiq->lock)
> >                 clear_bit(FR_SENT, &req->flags);
> >                 list_del_init(&req->list);
> >                 spin_unlock(&fpq->lock);
> > -               fuse_request_end(fc, req);
> > +
> > +               /* Can't end request in submission context. Use a worker */
> > +               spin_lock(&fsvq->lock);
> > +               list_add_tail(&req->list, &fsvq->end_reqs);
> > +               schedule_delayed_work(&fsvq->dispatch_work,
> > +                                     msecs_to_jiffies(1));
> 
> What's the reason to delay by one msec?  If this is purely for
> deadlock avoidance, then a zero delay would work better, no?

Hi Miklos,

I have no good reason to do that. Will change it to zero delay.

Thanks
Vivek
Miklos Szeredi Oct. 21, 2019, 1:58 p.m. UTC | #3
On Mon, Oct 21, 2019 at 1:52 PM Vivek Goyal <vgoyal@redhat.com> wrote:
>
> On Mon, Oct 21, 2019 at 10:03:39AM +0200, Miklos Szeredi wrote:
>
> [..]
> > >  static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
> > > @@ -502,6 +522,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
> > >         names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
> > >         INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
> > >         INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
> > > +       INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
> > >         INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
> > >                         virtio_fs_hiprio_dispatch_work);
> > >         spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
> > > @@ -511,8 +532,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
> > >                 spin_lock_init(&fs->vqs[i].lock);
> > >                 INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
> > >                 INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
> > > -                                       virtio_fs_dummy_dispatch_work);
> > > +                                 virtio_fs_request_dispatch_work);
> > >                 INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
> > > +               INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
> > >                 snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
> > >                          "requests.%u", i - VQ_REQUEST);
> > >                 callbacks[i] = virtio_fs_vq_done;
> > > @@ -918,6 +940,7 @@ __releases(fiq->lock)
> > >         struct fuse_conn *fc;
> > >         struct fuse_req *req;
> > >         struct fuse_pqueue *fpq;
> > > +       struct virtio_fs_vq *fsvq;
> > >         int ret;
> > >
> > >         WARN_ON(list_empty(&fiq->pending));
> > > @@ -951,7 +974,8 @@ __releases(fiq->lock)
> > >         smp_mb__after_atomic();
> > >
> > >  retry:
> > > -       ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
> > > +       fsvq = &fs->vqs[queue_id];
> > > +       ret = virtio_fs_enqueue_req(fsvq, req);
> > >         if (ret < 0) {
> > >                 if (ret == -ENOMEM || ret == -ENOSPC) {
> > >                         /* Virtqueue full. Retry submission */
> > > @@ -965,7 +989,13 @@ __releases(fiq->lock)
> > >                 clear_bit(FR_SENT, &req->flags);
> > >                 list_del_init(&req->list);
> > >                 spin_unlock(&fpq->lock);
> > > -               fuse_request_end(fc, req);
> > > +
> > > +               /* Can't end request in submission context. Use a worker */
> > > +               spin_lock(&fsvq->lock);
> > > +               list_add_tail(&req->list, &fsvq->end_reqs);
> > > +               schedule_delayed_work(&fsvq->dispatch_work,
> > > +                                     msecs_to_jiffies(1));
> >
> > What's the reason to delay by one msec?  If this is purely for
> > deadlock avoidance, then a zero delay would work better, no?
>
> Hi Miklos,
>
> I have no good reason to do that. Will change it to zero delay.

Okay, fixed and pushed out to fuse.git#for-next.

Thanks,
Miklos
diff mbox series

Patch

diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 6af3f131e468..24ac6f8bf3f7 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -30,6 +30,7 @@  struct virtio_fs_vq {
 	struct virtqueue *vq;     /* protected by ->lock */
 	struct work_struct done_work;
 	struct list_head queued_reqs;
+	struct list_head end_reqs;	/* End these requests */
 	struct delayed_work dispatch_work;
 	struct fuse_dev *fud;
 	bool connected;
@@ -259,8 +260,27 @@  static void virtio_fs_hiprio_done_work(struct work_struct *work)
 	spin_unlock(&fsvq->lock);
 }
 
-static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+static void virtio_fs_request_dispatch_work(struct work_struct *work)
 {
+	struct fuse_req *req;
+	struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+						 dispatch_work.work);
+	struct fuse_conn *fc = fsvq->fud->fc;
+
+	pr_debug("virtio-fs: worker %s called.\n", __func__);
+	while (1) {
+		spin_lock(&fsvq->lock);
+		req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
+					       list);
+		if (!req) {
+			spin_unlock(&fsvq->lock);
+			return;
+		}
+
+		list_del_init(&req->list);
+		spin_unlock(&fsvq->lock);
+		fuse_request_end(fc, req);
+	}
 }
 
 static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
@@ -502,6 +522,7 @@  static int virtio_fs_setup_vqs(struct virtio_device *vdev,
 	names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
 	INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
 	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+	INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
 	INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
 			virtio_fs_hiprio_dispatch_work);
 	spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
@@ -511,8 +532,9 @@  static int virtio_fs_setup_vqs(struct virtio_device *vdev,
 		spin_lock_init(&fs->vqs[i].lock);
 		INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
 		INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
-					virtio_fs_dummy_dispatch_work);
+				  virtio_fs_request_dispatch_work);
 		INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+		INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
 		snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
 			 "requests.%u", i - VQ_REQUEST);
 		callbacks[i] = virtio_fs_vq_done;
@@ -918,6 +940,7 @@  __releases(fiq->lock)
 	struct fuse_conn *fc;
 	struct fuse_req *req;
 	struct fuse_pqueue *fpq;
+	struct virtio_fs_vq *fsvq;
 	int ret;
 
 	WARN_ON(list_empty(&fiq->pending));
@@ -951,7 +974,8 @@  __releases(fiq->lock)
 	smp_mb__after_atomic();
 
 retry:
-	ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+	fsvq = &fs->vqs[queue_id];
+	ret = virtio_fs_enqueue_req(fsvq, req);
 	if (ret < 0) {
 		if (ret == -ENOMEM || ret == -ENOSPC) {
 			/* Virtqueue full. Retry submission */
@@ -965,7 +989,13 @@  __releases(fiq->lock)
 		clear_bit(FR_SENT, &req->flags);
 		list_del_init(&req->list);
 		spin_unlock(&fpq->lock);
-		fuse_request_end(fc, req);
+
+		/* Can't end request in submission context. Use a worker */
+		spin_lock(&fsvq->lock);
+		list_add_tail(&req->list, &fsvq->end_reqs);
+		schedule_delayed_work(&fsvq->dispatch_work,
+				      msecs_to_jiffies(1));
+		spin_unlock(&fsvq->lock);
 		return;
 	}
 }