@@ -493,7 +493,6 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->umem = NULL;
dev->iotlb = NULL;
dev->mm = NULL;
- dev->worker = NULL;
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
@@ -503,6 +502,7 @@ void vhost_dev_init(struct vhost_dev *dev,
INIT_LIST_HEAD(&dev->read_list);
INIT_LIST_HEAD(&dev->pending_list);
spin_lock_init(&dev->iotlb_lock);
+ idr_init(&dev->worker_idr);
for (i = 0; i < dev->nvqs; ++i) {
@@ -576,31 +576,59 @@ static void vhost_worker_stop(struct vhost_worker *worker)
wait_for_completion(worker->exit_done);
}
-static void vhost_worker_free(struct vhost_dev *dev)
+static void vhost_worker_put(struct vhost_dev *dev, struct vhost_worker *worker)
{
- struct vhost_worker *worker = dev->worker;
-
if (!worker)
return;
- dev->worker = NULL;
+ if (!refcount_dec_and_test(&worker->refcount))
+ return;
+
WARN_ON(!llist_empty(&worker->work_list));
vhost_worker_stop(worker);
+ idr_remove(&dev->worker_idr, worker->id);
kfree(worker);
}
+static void vhost_vq_detach_worker(struct vhost_virtqueue *vq)
+{
+ if (vq->worker)
+ vhost_worker_put(vq->dev, vq->worker);
+ vq->worker = NULL;
+}
+
+static int vhost_workers_idr_iter(int id, void *worker, void *dev)
+{
+ vhost_worker_put(dev, worker);
+ return 0;
+}
+
+static void vhost_workers_free(struct vhost_dev *dev)
+{
+ int i;
+
+ if (!dev->use_worker)
+ return;
+
+ for (i = 0; i < dev->nvqs; i++)
+ vhost_vq_detach_worker(dev->vqs[i]);
+
+ idr_for_each(&dev->worker_idr, vhost_workers_idr_iter, dev);
+}
+
static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
{
struct vhost_worker *worker;
struct task_struct *task;
+ int id;
worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
if (!worker)
return NULL;
- dev->worker = worker;
worker->kcov_handle = kcov_common_handle();
init_llist_head(&worker->work_list);
+ refcount_set(&worker->refcount, 1);
/*
* vhost used to use the kthread API which ignores all signals by
@@ -613,14 +641,88 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
worker->task = task;
kernel_worker_start(task, "vhost-%d", current->pid);
+
+ /* idr accesses are done under the vhost_dev mutex */
+ id = idr_alloc(&dev->worker_idr, worker, 0, INT_MAX, GFP_KERNEL);
+ if (id < 0)
+ goto stop_worker;
+ worker->id = id;
+
return worker;
+stop_worker:
+ vhost_worker_stop(worker);
free_worker:
kfree(worker);
- dev->worker = NULL;
return NULL;
}
+/* Caller must have device mutex */
+static int vhost_vq_attach_worker(struct vhost_virtqueue *vq,
+ struct vhost_vring_worker *info)
+{
+ struct vhost_dev *dev = vq->dev;
+ struct vhost_worker *worker;
+
+ if (!dev->use_worker)
+ return -EINVAL;
+
+ /* We don't support setting a worker on an active vq */
+ if (vq->private_data)
+ return -EBUSY;
+
+ worker = idr_find(&dev->worker_idr, info->worker_id);
+ if (!worker)
+ return -ENODEV;
+
+ refcount_inc(&worker->refcount);
+
+ vhost_vq_detach_worker(vq);
+ vq->worker = worker;
+ return 0;
+}
+
+/* Caller must have device mutex */
+static int vhost_new_worker(struct vhost_dev *dev,
+ struct vhost_worker_state *info)
+{
+ struct vhost_worker *worker;
+
+ if (!dev->use_worker)
+ return -EINVAL;
+
+ worker = vhost_worker_create(dev);
+ if (!worker)
+ return -ENOMEM;
+
+ info->worker_id = worker->id;
+ return 0;
+}
+
+/* Caller must have device mutex */
+static int vhost_free_worker(struct vhost_dev *dev,
+ struct vhost_worker_state *info)
+{
+ struct vhost_worker *worker;
+
+ if (!dev->use_worker)
+ return -EINVAL;
+
+ worker = idr_find(&dev->worker_idr, info->worker_id);
+ if (!worker)
+ return -ENODEV;
+
+ /*
+ * We can free the worker if there are no attached vqs and we only
+ * have the refcount from the initial creation.
+ */
+ if (refcount_read(&worker->refcount) != 1)
+ return -EBUSY;
+
+ vhost_worker_put(dev, worker);
+ return 0;
+}
+
/* Caller should have device mutex */
long vhost_dev_set_owner(struct vhost_dev *dev)
{
@@ -640,8 +742,10 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
if (!worker)
goto err_worker;
- for (i = 0; i < dev->nvqs; i++)
+ for (i = 0; i < dev->nvqs; i++) {
+ refcount_inc(&worker->refcount);
dev->vqs[i]->worker = worker;
+ }
}
err = vhost_dev_alloc_iovecs(dev);
@@ -650,7 +754,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
return 0;
err_iovecs:
- vhost_worker_free(dev);
+ vhost_workers_free(dev);
err_worker:
vhost_detach_mm(dev);
err_mm:
@@ -742,7 +846,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
dev->iotlb = NULL;
vhost_clear_msg(dev);
wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM);
- vhost_worker_free(dev);
+ vhost_workers_free(dev);
vhost_detach_mm(dev);
}
EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -1612,6 +1716,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
struct eventfd_ctx *ctx = NULL;
u32 __user *idxp = argp;
struct vhost_virtqueue *vq;
+ struct vhost_vring_worker w;
struct vhost_vring_state s;
struct vhost_vring_file f;
u32 idx;
@@ -1719,7 +1824,16 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
if (copy_to_user(argp, &s, sizeof(s)))
r = -EFAULT;
break;
- default:
+ case VHOST_ATTACH_VRING_WORKER:
+ if (copy_from_user(&w, argp, sizeof(w))) {
+ r = -EFAULT;
+ break;
+ }
+ r = vhost_vq_attach_worker(vq, &w);
+ if (!r && copy_to_user(argp, &w, sizeof(w)))
+ r = -EFAULT;
+ break;
+default:
r = -ENOIOCTLCMD;
}
@@ -1772,6 +1886,7 @@ EXPORT_SYMBOL_GPL(vhost_init_device_iotlb);
/* Caller must have device mutex */
long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
{
+ struct vhost_worker_state w;
struct eventfd_ctx *ctx;
u64 p;
long r;
@@ -1832,6 +1947,18 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
if (ctx)
eventfd_ctx_put(ctx);
break;
+ case VHOST_NEW_WORKER:
+ r = vhost_new_worker(d, &w);
+ if (!r && copy_to_user(argp, &w, sizeof(w)))
+ r = -EFAULT;
+ break;
+ case VHOST_FREE_WORKER:
+ if (copy_from_user(&w, argp, sizeof(w))) {
+ r = -EFAULT;
+ break;
+ }
+ r = vhost_free_worker(d, &w);
+ break;
default:
r = -ENOIOCTLCMD;
break;
@@ -35,6 +35,8 @@ struct vhost_worker {
struct llist_head work_list;
u64 kcov_handle;
unsigned long flags;
+ refcount_t refcount;
+ int id;
};
/* Poll a file (eventfd or socket) */
@@ -160,7 +162,6 @@ struct vhost_dev {
struct vhost_virtqueue **vqs;
int nvqs;
struct eventfd_ctx *log_ctx;
- struct vhost_worker *worker;
struct vhost_iotlb *umem;
struct vhost_iotlb *iotlb;
spinlock_t iotlb_lock;
@@ -170,6 +171,7 @@ struct vhost_dev {
int iov_limit;
int weight;
int byte_weight;
+ struct idr worker_idr;
bool use_worker;
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg);
@@ -45,6 +45,23 @@
#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
/* Specify an eventfd file descriptor to signal on log write. */
#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ */
+#define VHOST_NEW_WORKER _IOW(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed and
+ * the last reference to the device has been released.
+ */
+#define VHOST_FREE_WORKER _IOR(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
/* Ring setup. */
/* Set number of descriptors in ring. This parameter can not
@@ -70,6 +87,11 @@
#define VHOST_VRING_BIG_ENDIAN 1
#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues. This must be done before the virtqueue is active.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOR(VHOST_VIRTIO, 0x15, \
+ struct vhost_vring_worker)
/* The following ioctls use eventfd file descriptors to signal and poll
* for events. */
@@ -47,6 +47,21 @@ struct vhost_vring_addr {
__u64 log_guest_addr;
};
+struct vhost_worker_state {
+ /*
+ * For VHOST_NEW_WORKER the kernel will return the new vhost_worker id.
+ * For VHOST_FREE_WORKER this must be set to the id of the vhost_worker
+ * to free.
+ */
+ int worker_id;
+};
+
+struct vhost_vring_worker {
+ unsigned int index;
+ /* The id of the vhost_worker returned from VHOST_NEW_WORKER */
+ int worker_id;
+};
+
/* no alignment requirement */
struct vhost_iotlb_msg {
__u64 iova;
This patch allows userspace to create workers and bind them to vqs. You can have N workers per dev and also share N workers with M vqs. Signed-off-by: Mike Christie <michael.christie@oracle.com> --- drivers/vhost/vhost.c | 149 ++++++++++++++++++++++++++++--- drivers/vhost/vhost.h | 4 +- include/uapi/linux/vhost.h | 22 +++++ include/uapi/linux/vhost_types.h | 15 ++++ 4 files changed, 178 insertions(+), 12 deletions(-)