[RFC,5/8] vhost: allow userspace to bind vqs to CPUs

Message ID	1607068593-16932-6-git-send-email-michael.christie@oracle.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <target-devel-owner@kernel.org> From: Mike Christie <michael.christie@oracle.com> To: sgarzare@redhat.com, stefanha@redhat.com, linux-scsi@vger.kernel.org, target-devel@vger.kernel.org, mst@redhat.com, jasowang@redhat.com, pbonzini@redhat.com, virtualization@lists.linux-foundation.org Subject: [RFC PATCH 5/8] vhost: allow userspace to bind vqs to CPUs Date: Fri, 4 Dec 2020 01:56:30 -0600 Message-Id: <1607068593-16932-6-git-send-email-michael.christie@oracle.com> In-Reply-To: <1607068593-16932-1-git-send-email-michael.christie@oracle.com> References: <1607068593-16932-1-git-send-email-michael.christie@oracle.com> Precedence: bulk
Series	vhost: allow userspace to control vq cpu affinity \| expand [RFC,0/8] vhost: allow userspace to control vq cpu affinity [RFC,1/8] vhost: remove work arg from vhost_work_flush [RFC,2/8] vhost-scsi: remove extra flushes [RFC,3/8] vhost poll: fix coding style [RFC,4/8] vhost: move msg_handler to new ops struct [RFC,5/8] vhost: allow userspace to bind vqs to CPUs [RFC,6/8] vhost-scsi: make SCSI cmd completion per vq [RFC,7/8] vhost, vhost-scsi: flush IO vqs then send TMF rsp [RFC,8/8] vhost-scsi: hook vhost-scsi into vring set cpu support

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 531a00d..6a27fe6 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1330,8 +1330,10 @@ static int vhost_net_open(struct inode *inode, struct file *f) VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); - vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); - vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev); + vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev, + vqs[VHOST_NET_VQ_TX]); + vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev, + vqs[VHOST_NET_VQ_RX]); f->private_data = n; n->page_frag.page = NULL; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ee2551c..f425d0f 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -187,13 +187,15 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) /* Init poll structure */ void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, - __poll_t mask, struct vhost_dev *dev) + __poll_t mask, struct vhost_dev *dev, + struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; poll->dev = dev; poll->wqh = NULL; + poll->vq = vq; vhost_work_init(&poll->work, fn); } @@ -242,6 +244,9 @@ void vhost_work_dev_flush(struct vhost_dev *dev) vhost_work_queue(dev, &flush.work); wait_for_completion(&flush.wait_event); } + + if (dev->wq) + flush_workqueue(dev->wq); } EXPORT_SYMBOL_GPL(vhost_work_dev_flush); @@ -253,7 +258,46 @@ void vhost_poll_flush(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_flush); -void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) +static bool vhost_run_work_list(struct vhost_dev *dev, + struct llist_head *work_list) +{ + struct vhost_work *work, *work_next; + struct llist_node *node; + + node = llist_del_all(work_list); + if (!node) + return false; + + node = llist_reverse_order(node); + /* make sure flag is seen after deletion */ + smp_wmb(); + llist_for_each_entry_safe(work, work_next, node, node) { + clear_bit(VHOST_WORK_QUEUED, &work->flags); + __set_current_state(TASK_RUNNING); + kcov_remote_start_common(dev->kcov_handle); + work->fn(work); + kcov_remote_stop(); + if (need_resched()) + schedule(); + } + + return true; +} + +static void vhost_vq_work(struct work_struct *work) +{ + struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, + work); + struct vhost_dev *dev = vq->dev; + + kthread_use_mm(dev->mm); + vhost_run_work_list(dev, &vq->work_list); + kthread_unuse_mm(dev->mm); +} + +static void __vhost_work_queue(struct vhost_dev *dev, + struct vhost_virtqueue *vq, + struct vhost_work *work) { if (!dev->worker) return; @@ -263,12 +307,28 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ - llist_add(&work->node, &dev->work_list); - wake_up_process(dev->worker); + if (!vq || vq->cpu == -1) { + llist_add(&work->node, &dev->work_list); + wake_up_process(dev->worker); + } else { + llist_add(&work->node, &vq->work_list); + queue_work_on(vq->cpu, dev->wq, &vq->work); + } } } + +void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) +{ + __vhost_work_queue(dev, NULL, work); +} EXPORT_SYMBOL_GPL(vhost_work_queue); +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) +{ + __vhost_work_queue(vq->dev, vq, work); +} +EXPORT_SYMBOL_GPL(vhost_vq_work_queue); + /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { @@ -278,7 +338,7 @@ bool vhost_has_work(struct vhost_dev *dev) void vhost_poll_queue(struct vhost_poll *poll) { - vhost_work_queue(poll->dev, &poll->work); + vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); @@ -344,8 +404,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, static int vhost_worker(void *data) { struct vhost_dev *dev = data; - struct vhost_work *work, *work_next; - struct llist_node *node; kthread_use_mm(dev->mm); @@ -358,22 +416,8 @@ static int vhost_worker(void *data) break; } - node = llist_del_all(&dev->work_list); - if (!node) + if (!vhost_run_work_list(dev, &dev->work_list)) schedule(); - - node = llist_reverse_order(node); - /* make sure flag is seen after deletion */ - smp_wmb(); - llist_for_each_entry_safe(work, work_next, node, node) { - clear_bit(VHOST_WORK_QUEUED, &work->flags); - __set_current_state(TASK_RUNNING); - kcov_remote_start_common(dev->kcov_handle); - work->fn(work); - kcov_remote_stop(); - if (need_resched()) - schedule(); - } } kthread_unuse_mm(dev->mm); return 0; @@ -480,6 +524,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iotlb = NULL; dev->mm = NULL; dev->worker = NULL; + dev->wq = NULL; dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; @@ -497,12 +542,15 @@ void vhost_dev_init(struct vhost_dev *dev, vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; + vq->cpu = -1; vq->dev = dev; + init_llist_head(&vq->work_list); + INIT_WORK(&vq->work, vhost_vq_work); mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, - EPOLLIN, dev); + EPOLLIN, dev, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); @@ -1572,6 +1620,39 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, return r; } + +static long vhost_vring_set_cpu(struct vhost_dev *d, struct vhost_virtqueue *vq, + void __user *argp) +{ + struct vhost_vring_state s; + int ret = 0; + + if (vq->private_data) + return -EBUSY; + + if (copy_from_user(&s, argp, sizeof s)) + return -EFAULT; + + if (s.num == -1) { + vq->cpu = s.num; + return 0; + } + + if (s.num >= nr_cpu_ids) + return -EINVAL; + + if (!d->ops || !d->ops->get_workqueue) + return -EINVAL; + + if (!d->wq) + d->wq = d->ops->get_workqueue(); + if (!d->wq) + return -EINVAL; + + vq->cpu = s.num; + return ret; +} + long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) { struct file *eventfp, *filep = NULL; @@ -1601,6 +1682,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg mutex_lock(&vq->mutex); switch (ioctl) { + case VHOST_SET_VRING_CPU: + r = vhost_vring_set_cpu(d, vq, argp); + break; case VHOST_SET_VRING_BASE: /* Moving base with an active backend? * You don't want to do that. */ diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 64fa638..28ff4a2 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -26,7 +26,6 @@ struct vhost_work { }; /* Poll a file (eventfd or socket) */ -/* Note: there's nothing vhost specific about this structure. */ struct vhost_poll { poll_table table; wait_queue_head_t *wqh; @@ -34,14 +33,17 @@ struct vhost_poll { struct vhost_work work; __poll_t mask; struct vhost_dev *dev; + struct vhost_virtqueue *vq; }; void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); bool vhost_has_work(struct vhost_dev *dev); +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, - __poll_t mask, struct vhost_dev *dev); + __poll_t mask, struct vhost_dev *dev, + struct vhost_virtqueue *vq); int vhost_poll_start(struct vhost_poll *poll, struct file *file); void vhost_poll_stop(struct vhost_poll *poll); void vhost_poll_flush(struct vhost_poll *poll); @@ -82,6 +84,9 @@ struct vhost_virtqueue { struct eventfd_ctx *error_ctx; struct eventfd_ctx *log_ctx; + unsigned int cpu; + struct work_struct work; + struct llist_head work_list; struct vhost_poll poll; /* The routine to call when the Guest pings us, or timeout. */ @@ -145,6 +150,11 @@ struct vhost_msg_node { struct vhost_dev_ops { int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg); + /* + * If the driver supports the VHOST_SET_VRING_CPU ioctl this must + * return the workqueue to use. + */ + struct workqueue_struct *(*get_workqueue)(void); }; struct vhost_dev { @@ -166,6 +176,7 @@ struct vhost_dev { int byte_weight; u64 kcov_handle; bool use_worker; + struct workqueue_struct *wq; struct vhost_dev_ops *ops; }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860..78b31d8 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -70,6 +70,11 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Bind a vring to a CPU. vhost_vring_state.num is -1 then the default worker + * and it's cgroup will be used. If vhost_vring_state.num != -1, the vring will + * be bound to the CPU in vhost_vring_state.num. + */ +#define VHOST_SET_VRING_CPU _IOW(VHOST_VIRTIO, 0x15, struct vhost_vring_state) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */

[RFC,5/8] vhost: allow userspace to bind vqs to CPUs

Commit Message

Comments

Patch