Message ID | 20191115205705.2046-5-vgoyal@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | virtiofs: Add a notification queue | expand |
On Fri, Nov 15, 2019 at 03:57:05PM -0500, Vivek Goyal wrote: > As of now we don't support blocking variant of posix locks and daemon returns > -EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is > limited and it is possible we fill virtqueue with all the requests of > fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request > can't make progress because virtqueue is full. And that means F_SETLKW can't > make progress and we are deadlocked. > > Use notification queue to solve this problem. After submitting lock request > device will send a reply asking requester to wait. Once lock is available, > requester will get a notification saying locking is available. That way > we don't keep the request virtueue busy while we are waiting for lock > and further unlock requests can make progress. > > When we get a reply in response to lock request, we need a way to know if > we need to wait for notification or not. I have overloaded the > fuse_out_header->error field. If value is ->error is 1, that's a signal > to caller to wait for lock notification. > > Signed-off-by: Vivek Goyal <vgoyal@redhat.com> > --- > fs/fuse/virtio_fs.c | 78 ++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/fuse.h | 7 ++++ > 2 files changed, 84 insertions(+), 1 deletion(-) > > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c > index 21d8d9d7d317..8aa9fc996556 100644 > --- a/fs/fuse/virtio_fs.c > +++ b/fs/fuse/virtio_fs.c > @@ -35,6 +35,7 @@ struct virtio_fs_vq { > struct work_struct done_work; > struct list_head queued_reqs; > struct list_head end_reqs; /* End these requests */ > + struct list_head wait_reqs; /* requests waiting for notification */ > struct virtio_fs_notify_node *notify_nodes; > struct list_head notify_reqs; /* List for queuing notify requests */ > struct delayed_work dispatch_work; > @@ -85,7 +86,6 @@ struct virtio_fs_notify_node { > > static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq); > > - > static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) > { > struct virtio_fs *fs = vq->vdev->priv; > @@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq) > return 0; > } > > +static int notify_complete_waiting_req(struct virtio_fs *vfs, > + struct fuse_notify_lock_out *out_args) > +{ > + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST]; > + struct fuse_req *req, *next; > + bool found = false; > + struct fuse_conn *fc = fsvq->fud->fc; > + > + /* Find waiting request with the unique number and end it */ > + spin_lock(&fsvq->lock); > + list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) { > + if (req->in.h.unique == out_args->id) { > + list_del_init(&req->list); > + clear_bit(FR_SENT, &req->flags); > + /* Transfer error code from notify */ > + req->out.h.error = out_args->error; > + found = true; > + break; > + } > + } > + spin_unlock(&fsvq->lock); > + > + /* > + * TODO: It is possible that some re-ordering happens in notify > + * comes before request is complete. Deal with it. > + */ > + if (found) { > + fuse_request_end(fc, req); > + spin_lock(&fsvq->lock); > + dec_in_flight_req(fsvq); > + spin_unlock(&fsvq->lock); > + } else > + pr_debug("virtio-fs: Did not find waiting request with" > + " unique=0x%llx\n", out_args->id); > + > + return 0; > +} > + > +static int virtio_fs_handle_notify(struct virtio_fs *vfs, > + struct virtio_fs_notify *notify) > +{ > + int ret = 0; > + struct fuse_out_header *oh = ¬ify->out_hdr; > + struct fuse_notify_lock_out *lo; > + > + /* > + * For notifications, oh.unique is 0 and oh->error contains code > + * for which notification as arrived. > + */ > + switch(oh->error) { > + case FUSE_NOTIFY_LOCK: > + lo = (struct fuse_notify_lock_out *) ¬ify->outarg; > + notify_complete_waiting_req(vfs, lo); > + break; > + default: > + printk("virtio-fs: Unexpected notification %d\n", oh->error); > + } > + return ret; > +} Is this specific to virtio or can be it handled in common code? > + > static void virtio_fs_notify_done_work(struct work_struct *work) > { > struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, > done_work); > struct virtqueue *vq = fsvq->vq; > + struct virtio_fs *vfs = vq->vdev->priv; > LIST_HEAD(reqs); > struct virtio_fs_notify_node *notify, *next; > + struct fuse_out_header *oh; > > spin_lock(&fsvq->lock); > do { > @@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work) > > /* Process notify */ > list_for_each_entry_safe(notify, next, &reqs, list) { > + oh = ¬ify->notify.out_hdr; > + WARN_ON(oh->unique); > + /* Handle notification */ > + virtio_fs_handle_notify(vfs, ¬ify->notify); > spin_lock(&fsvq->lock); > dec_in_flight_req(fsvq); > list_del_init(¬ify->list); > @@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work) > * TODO verify that server properly follows FUSE protocol > * (oh.uniq, oh.len) > */ > + if (req->out.h.error == 1) { > + /* Wait for notification to complete request */ > + list_del_init(&req->list); > + spin_lock(&fsvq->lock); > + list_add_tail(&req->list, &fsvq->wait_reqs); > + spin_unlock(&fsvq->lock); > + continue; > + } > + > args = req->args; > copy_args_from_argbuf(args, req); > > @@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq, > strncpy(fsvq->name, name, VQ_NAME_LEN); > spin_lock_init(&fsvq->lock); > INIT_LIST_HEAD(&fsvq->queued_reqs); > + INIT_LIST_HEAD(&fsvq->wait_reqs); > INIT_LIST_HEAD(&fsvq->end_reqs); > INIT_LIST_HEAD(&fsvq->notify_reqs); > init_completion(&fsvq->in_flight_zero); > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > index 373cada89815..45f0c4efec8e 100644 > --- a/include/uapi/linux/fuse.h > +++ b/include/uapi/linux/fuse.h > @@ -481,6 +481,7 @@ enum fuse_notify_code { > FUSE_NOTIFY_STORE = 4, > FUSE_NOTIFY_RETRIEVE = 5, > FUSE_NOTIFY_DELETE = 6, > + FUSE_NOTIFY_LOCK = 7, > FUSE_NOTIFY_CODE_MAX, > }; > > @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in { > uint64_t dummy4; > }; > > +struct fuse_notify_lock_out { > + uint64_t id; Please call this field "unique" or "lock_unique" so it's clear this identifier is the fuse_header_in->unique value of the lock request. > + int32_t error; > + int32_t padding; > +}; > + > /* Device ioctls: */ > #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t) > > -- > 2.20.1 >
On Thu, Nov 21, 2019 at 05:00:20PM +0000, Stefan Hajnoczi wrote: [..] > > +static int virtio_fs_handle_notify(struct virtio_fs *vfs, > > + struct virtio_fs_notify *notify) > > +{ > > + int ret = 0; > > + struct fuse_out_header *oh = ¬ify->out_hdr; > > + struct fuse_notify_lock_out *lo; > > + > > + /* > > + * For notifications, oh.unique is 0 and oh->error contains code > > + * for which notification as arrived. > > + */ > > + switch(oh->error) { > > + case FUSE_NOTIFY_LOCK: > > + lo = (struct fuse_notify_lock_out *) ¬ify->outarg; > > + notify_complete_waiting_req(vfs, lo); > > + break; > > + default: > > + printk("virtio-fs: Unexpected notification %d\n", oh->error); > > + } > > + return ret; > > +} > > Is this specific to virtio or can be it handled in common code? This is not specific to virtio_fs. In principle, regular fuse daemon could implement something similar. Though they might not have to because client can just block without introducing deadlock possibilities. Anyway, I will look into moving this code into fuse common. [..] > > diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h > > index 373cada89815..45f0c4efec8e 100644 > > --- a/include/uapi/linux/fuse.h > > +++ b/include/uapi/linux/fuse.h > > @@ -481,6 +481,7 @@ enum fuse_notify_code { > > FUSE_NOTIFY_STORE = 4, > > FUSE_NOTIFY_RETRIEVE = 5, > > FUSE_NOTIFY_DELETE = 6, > > + FUSE_NOTIFY_LOCK = 7, > > FUSE_NOTIFY_CODE_MAX, > > }; > > > > @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in { > > uint64_t dummy4; > > }; > > > > +struct fuse_notify_lock_out { > > + uint64_t id; > > Please call this field "unique" or "lock_unique" so it's clear this > identifier is the fuse_header_in->unique value of the lock request. Ok, will do. Vivek
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 21d8d9d7d317..8aa9fc996556 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -35,6 +35,7 @@ struct virtio_fs_vq { struct work_struct done_work; struct list_head queued_reqs; struct list_head end_reqs; /* End these requests */ + struct list_head wait_reqs; /* requests waiting for notification */ struct virtio_fs_notify_node *notify_nodes; struct list_head notify_reqs; /* List for queuing notify requests */ struct delayed_work dispatch_work; @@ -85,7 +86,6 @@ struct virtio_fs_notify_node { static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq); - static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) { struct virtio_fs *fs = vq->vdev->priv; @@ -513,13 +513,75 @@ static int virtio_fs_enqueue_all_notify(struct virtio_fs_vq *fsvq) return 0; } +static int notify_complete_waiting_req(struct virtio_fs *vfs, + struct fuse_notify_lock_out *out_args) +{ + struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_REQUEST]; + struct fuse_req *req, *next; + bool found = false; + struct fuse_conn *fc = fsvq->fud->fc; + + /* Find waiting request with the unique number and end it */ + spin_lock(&fsvq->lock); + list_for_each_entry_safe(req, next, &fsvq->wait_reqs, list) { + if (req->in.h.unique == out_args->id) { + list_del_init(&req->list); + clear_bit(FR_SENT, &req->flags); + /* Transfer error code from notify */ + req->out.h.error = out_args->error; + found = true; + break; + } + } + spin_unlock(&fsvq->lock); + + /* + * TODO: It is possible that some re-ordering happens in notify + * comes before request is complete. Deal with it. + */ + if (found) { + fuse_request_end(fc, req); + spin_lock(&fsvq->lock); + dec_in_flight_req(fsvq); + spin_unlock(&fsvq->lock); + } else + pr_debug("virtio-fs: Did not find waiting request with" + " unique=0x%llx\n", out_args->id); + + return 0; +} + +static int virtio_fs_handle_notify(struct virtio_fs *vfs, + struct virtio_fs_notify *notify) +{ + int ret = 0; + struct fuse_out_header *oh = ¬ify->out_hdr; + struct fuse_notify_lock_out *lo; + + /* + * For notifications, oh.unique is 0 and oh->error contains code + * for which notification as arrived. + */ + switch(oh->error) { + case FUSE_NOTIFY_LOCK: + lo = (struct fuse_notify_lock_out *) ¬ify->outarg; + notify_complete_waiting_req(vfs, lo); + break; + default: + printk("virtio-fs: Unexpected notification %d\n", oh->error); + } + return ret; +} + static void virtio_fs_notify_done_work(struct work_struct *work) { struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, done_work); struct virtqueue *vq = fsvq->vq; + struct virtio_fs *vfs = vq->vdev->priv; LIST_HEAD(reqs); struct virtio_fs_notify_node *notify, *next; + struct fuse_out_header *oh; spin_lock(&fsvq->lock); do { @@ -535,6 +597,10 @@ static void virtio_fs_notify_done_work(struct work_struct *work) /* Process notify */ list_for_each_entry_safe(notify, next, &reqs, list) { + oh = ¬ify->notify.out_hdr; + WARN_ON(oh->unique); + /* Handle notification */ + virtio_fs_handle_notify(vfs, ¬ify->notify); spin_lock(&fsvq->lock); dec_in_flight_req(fsvq); list_del_init(¬ify->list); @@ -656,6 +722,15 @@ static void virtio_fs_requests_done_work(struct work_struct *work) * TODO verify that server properly follows FUSE protocol * (oh.uniq, oh.len) */ + if (req->out.h.error == 1) { + /* Wait for notification to complete request */ + list_del_init(&req->list); + spin_lock(&fsvq->lock); + list_add_tail(&req->list, &fsvq->wait_reqs); + spin_unlock(&fsvq->lock); + continue; + } + args = req->args; copy_args_from_argbuf(args, req); @@ -705,6 +780,7 @@ static int virtio_fs_init_vq(struct virtio_fs *fs, struct virtio_fs_vq *fsvq, strncpy(fsvq->name, name, VQ_NAME_LEN); spin_lock_init(&fsvq->lock); INIT_LIST_HEAD(&fsvq->queued_reqs); + INIT_LIST_HEAD(&fsvq->wait_reqs); INIT_LIST_HEAD(&fsvq->end_reqs); INIT_LIST_HEAD(&fsvq->notify_reqs); init_completion(&fsvq->in_flight_zero); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 373cada89815..45f0c4efec8e 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -481,6 +481,7 @@ enum fuse_notify_code { FUSE_NOTIFY_STORE = 4, FUSE_NOTIFY_RETRIEVE = 5, FUSE_NOTIFY_DELETE = 6, + FUSE_NOTIFY_LOCK = 7, FUSE_NOTIFY_CODE_MAX, }; @@ -868,6 +869,12 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_notify_lock_out { + uint64_t id; + int32_t error; + int32_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_CLONE _IOR(229, 0, uint32_t)
As of now we don't support blocking variant of posix locks and daemon returns -EOPNOTSUPP. Reason being that it can lead to deadlocks. Virtqueue size is limited and it is possible we fill virtqueue with all the requests of fcntl(F_SETLKW) and wait for reply. And later a subsequent unlock request can't make progress because virtqueue is full. And that means F_SETLKW can't make progress and we are deadlocked. Use notification queue to solve this problem. After submitting lock request device will send a reply asking requester to wait. Once lock is available, requester will get a notification saying locking is available. That way we don't keep the request virtueue busy while we are waiting for lock and further unlock requests can make progress. When we get a reply in response to lock request, we need a way to know if we need to wait for notification or not. I have overloaded the fuse_out_header->error field. If value is ->error is 1, that's a signal to caller to wait for lock notification. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> --- fs/fuse/virtio_fs.c | 78 ++++++++++++++++++++++++++++++++++++++- include/uapi/linux/fuse.h | 7 ++++ 2 files changed, 84 insertions(+), 1 deletion(-)