===================================================================
@@ -0,0 +1,310 @@
+ /*
+ * virtio-block server in host kernel.
+ * Inspired by vhost-net and shamlessly ripped code from it :)
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_blk.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+
+#include "vhost.h"
+
+#define VHOST_BLK_VQ_MAX 1
+
+#if 0
+#define myprintk(fmt, ...) printk(pr_fmt(fmt), ##__VA_ARGS__)
+#else
+#define myprintk(fmt, ...)
+#endif
+
+struct vhost_blk {
+ struct vhost_dev dev;
+ struct vhost_virtqueue vqs[VHOST_BLK_VQ_MAX];
+ struct vhost_poll poll[VHOST_BLK_VQ_MAX];
+};
+
+struct vhost_blk_io {
+ struct work_struct work;
+ struct vhost_blk *blk;
+ struct file *file;
+ int head;
+ uint32_t type;
+ uint64_t sector;
+ struct iovec *iov;
+ int nvecs;
+};
+
+static struct workqueue_struct *vblk_workqueue;
+
+static void handle_io_work(struct work_struct *work)
+{
+ struct vhost_blk_io *vbio;
+ struct vhost_virtqueue *vq;
+ struct vhost_blk *blk;
+ int i, ret = 0;
+ loff_t pos;
+ uint8_t status = 0;
+
+ vbio = container_of(work, struct vhost_blk_io, work);
+ blk = vbio->blk;
+ vq = &blk->dev.vqs[0];
+ pos = vbio->sector << 8;
+
+ use_mm(blk->dev.mm);
+
+ if (vbio->type & VIRTIO_BLK_T_FLUSH) {
+ ret = vfs_fsync(vbio->file, vbio->file->f_path.dentry, 1);
+ } else if (vbio->type & VIRTIO_BLK_T_OUT) {
+ ret = vfs_writev(vbio->file, vbio->iov, vbio->nvecs, &pos);
+ } else {
+ ret = vfs_readv(vbio->file, vbio->iov, vbio->nvecs, &pos);
+ }
+
+ status = (ret < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK;
+ if (copy_to_user(vbio->iov[vbio->nvecs].iov_base, &status, sizeof status) < 0) {
+ printk("copy to user failed\n");
+ vhost_discard_vq_desc(vq);
+ unuse_mm(blk->dev.mm);
+ return;
+ }
+ mutex_lock(&vq->mutex);
+ vhost_add_used_and_signal(&blk->dev, vq, vbio->head, ret);
+ mutex_unlock(&vq->mutex);
+ unuse_mm(blk->dev.mm);
+ kfree(vbio);
+}
+
+static int cpu = 0;
+static int handoff_io(struct vhost_blk *blk, int head,
+ uint32_t type, uint64_t sector,
+ struct iovec *iov, int nvecs)
+{
+ struct vhost_virtqueue *vq = &blk->dev.vqs[0];
+ struct vhost_blk_io *vbio;
+
+ vbio = kmalloc(sizeof(struct vhost_blk_io), GFP_KERNEL);
+ if (!vbio)
+ return -ENOMEM;
+
+ INIT_WORK(&vbio->work, handle_io_work);
+ vbio->blk = blk;
+ vbio->file = vq->private_data;
+ vbio->head = head;
+ vbio->type = type;
+ vbio->sector = sector;
+ vbio->iov = iov;
+ vbio->nvecs = nvecs;
+
+ cpu = cpumask_next(cpu, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_first(cpu_online_mask);
+ queue_work_on(cpu, vblk_workqueue, &vbio->work);
+
+ return 0;
+}
+
+
+static void handle_blk(struct vhost_blk *blk)
+{
+ struct vhost_virtqueue *vq = &blk->dev.vqs[0];
+ unsigned head, out, in;
+ struct virtio_blk_outhdr hdr;
+ int r, nvecs;
+
+ use_mm(blk->dev.mm);
+ mutex_lock(&vq->mutex);
+
+ vhost_disable_notify(vq);
+
+ for (;;) {
+ head = vhost_get_vq_desc(&blk->dev, vq, vq->iov,
+ ARRAY_SIZE(vq->iov),
+ &out, &in, NULL, NULL);
+ if (head == vq->num) {
+ if (unlikely(vhost_enable_notify(vq))) {
+ vhost_disable_notify(vq);
+ continue;
+ }
+ break;
+ }
+
+ BUG_ON(vq->iov[0].iov_len != 16);
+
+ r = copy_from_user(&hdr, vq->iov[0].iov_base, sizeof hdr);
+ if (r < 0) {
+ printk("copy from user failed\n");
+ vhost_discard_vq_desc(vq);
+ break;
+ }
+
+ nvecs = out - 1;
+ if (hdr.type == VIRTIO_BLK_T_IN)
+ nvecs = in - 1;
+
+ BUG_ON(vq->iov[nvecs+1].iov_len != 1);
+ r = handoff_io(blk, head, hdr.type, hdr.sector, &vq->iov[1], nvecs);
+ if (r < 0) {
+ vhost_discard_vq_desc(vq);
+ break;
+ }
+ }
+ mutex_unlock(&vq->mutex);
+ unuse_mm(blk->dev.mm);
+}
+
+static void vhost_blk_flush(struct vhost_blk *n)
+{
+ vhost_poll_flush(n->poll);
+ vhost_poll_flush(&n->dev.vqs[0].poll);
+}
+
+static void handle_blk_kick(struct work_struct *work)
+{
+ struct vhost_virtqueue *vq;
+ struct vhost_blk *blk;
+ vq = container_of(work, struct vhost_virtqueue, poll.work);
+ blk = container_of(vq->dev, struct vhost_blk, dev);
+ handle_blk(blk);
+}
+
+static void handle_rq_blk(struct work_struct *work)
+{
+ struct vhost_blk *blk;
+ blk = container_of(work, struct vhost_blk, poll[0].work);
+ handle_blk(blk);
+}
+
+static int vhost_blk_open(struct inode *inode, struct file *f)
+{
+ struct vhost_blk *n = kmalloc(sizeof *n, GFP_KERNEL);
+ int r;
+ if (!n)
+ return -ENOMEM;
+ n->vqs[0].handle_kick = handle_blk_kick;
+ r = vhost_dev_init(&n->dev, n->vqs, VHOST_BLK_VQ_MAX);
+ if (r < 0) {
+ kfree(n);
+ return r;
+ }
+
+ vhost_poll_init(n->poll, handle_rq_blk, POLLOUT|POLLIN);
+ f->private_data = n;
+ return 0;
+}
+
+static int vhost_blk_release(struct inode *inode, struct file *f)
+{
+ struct vhost_blk *n = f->private_data;
+
+ fput(n->vqs->private_data);
+ kfree(n);
+ return 0;
+}
+
+static long vhost_blk_set_backend(struct vhost_blk *n, unsigned index, int fd)
+{
+ struct file *file;
+ struct vhost_virtqueue *vq;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ vq = n->vqs + index;
+ mutex_lock(&vq->mutex);
+ rcu_assign_pointer(vq->private_data, file);
+ mutex_unlock(&vq->mutex);
+ return 0;
+}
+
+
+static long vhost_blk_ioctl(struct file *f, unsigned int ioctl,
+ unsigned long arg)
+{
+ struct vhost_blk *n = f->private_data;
+ void __user *argp = (void __user *)arg;
+ struct vhost_vring_file backend;
+ int r;
+
+ switch (ioctl) {
+ case VHOST_NET_SET_BACKEND:
+ r = copy_from_user(&backend, argp, sizeof backend);
+ if (r < 0)
+ return r;
+ return vhost_blk_set_backend(n, backend.index, backend.fd);
+ default:
+ mutex_lock(&n->dev.mutex);
+ r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+ vhost_blk_flush(n);
+ mutex_unlock(&n->dev.mutex);
+ return r;
+ }
+}
+
+const static struct file_operations vhost_blk_fops = {
+ .owner = THIS_MODULE,
+ .release = vhost_blk_release,
+ .open = vhost_blk_open,
+ .unlocked_ioctl = vhost_blk_ioctl,
+};
+
+static struct miscdevice vhost_blk_misc = {
+ 234,
+ "vhost-blk",
+ &vhost_blk_fops,
+};
+
+int vhost_blk_init(void)
+{
+ int r = vhost_init();
+ if (r)
+ goto err_init;
+
+ vblk_workqueue = create_workqueue("vblk");
+ if (!vblk_workqueue) {
+ r = -ENOMEM;
+ goto err_vblk;
+ }
+
+ r = misc_register(&vhost_blk_misc);
+ if (r)
+ goto err_reg;
+ return 0;
+err_reg:
+ destroy_workqueue(vblk_workqueue);
+err_vblk:
+ vhost_cleanup();
+err_init:
+ return r;
+
+}
+module_init(vhost_blk_init);
+
+void vhost_blk_exit(void)
+{
+ misc_deregister(&vhost_blk_misc);
+ destroy_workqueue(vblk_workqueue);
+ vhost_cleanup();
+}
+module_exit(vhost_blk_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio blk");
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html