@@ -178,6 +178,8 @@ static int img_name_parser(const struct option *opt, const char *arg, int unset)
disk_image[image_count].readonly = true;
else if (strncmp(sep + 1, "direct", 6) == 0)
disk_image[image_count].direct = true;
+ else if (strncmp(sep + 1, "vhost", 5) == 0)
+ disk_image[image_count].use_vhost = true;
*sep = 0;
cur = sep + 1;
}
@@ -149,6 +149,8 @@ struct disk_image **disk_image__open_all(struct disk_image_params *params, int c
err = disks[i];
goto error;
}
+ if (params[i].use_vhost)
+ disks[i]->use_vhost = true;
}
return disks;
@@ -41,6 +41,7 @@ struct disk_image_operations {
struct disk_image_params {
const char *filename;
+ bool use_vhost;
bool readonly;
bool direct;
};
@@ -57,6 +58,7 @@ struct disk_image {
#ifdef CONFIG_HAS_AIO
io_context_t ctx;
#endif
+ bool use_vhost;
};
struct disk_image *disk_image__open(const char *filename, bool readonly, bool direct);
@@ -12,6 +12,7 @@
#include "kvm/virtio-pci.h"
#include "kvm/virtio.h"
+#include <linux/vhost.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_blk.h>
#include <linux/kernel.h>
@@ -19,6 +20,8 @@
#include <linux/types.h>
#include <pthread.h>
+/* TODO: We can remove this after VHOST_BLK_SET_BACKEND goes in linux/vhost.h */
+#define VHOST_BLK_SET_BACKEND _IOW(VHOST_VIRTIO, 0x40, struct vhost_vring_file)
#define VIRTIO_BLK_MAX_DEV 4
/*
@@ -50,6 +53,8 @@ struct blk_dev {
struct virt_queue vqs[NUM_VIRT_QUEUES];
struct blk_dev_req reqs[VIRTIO_BLK_QUEUE_SIZE];
+ int vhost_fd;
+
pthread_t io_thread;
int io_efd;
@@ -166,9 +171,12 @@ static void set_guest_features(struct kvm *kvm, void *dev, u32 features)
static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
{
+ struct vhost_vring_state state = { .index = vq };
+ struct vhost_vring_addr addr;
struct blk_dev *bdev = dev;
struct virt_queue *queue;
void *p;
+ int r;
compat__remove_message(compat_id);
@@ -178,9 +186,83 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN);
+ if (bdev->vhost_fd == 0)
+ return 0;
+
+ state.num = queue->vring.num;
+ r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_NUM, &state);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_NUM failed");
+ state.num = 0;
+ r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_BASE, &state);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_BASE failed");
+
+ addr = (struct vhost_vring_addr) {
+ .index = vq,
+ .desc_user_addr = (u64)(unsigned long)queue->vring.desc,
+ .avail_user_addr = (u64)(unsigned long)queue->vring.avail,
+ .used_user_addr = (u64)(unsigned long)queue->vring.used,
+ };
+
+ r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_ADDR failed");
+
return 0;
}
+static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
+{
+ struct vhost_vring_file file;
+ struct blk_dev *bdev = dev;
+ struct kvm_irqfd irq;
+ int r;
+
+ if (bdev->vhost_fd == 0)
+ return;
+
+ irq = (struct kvm_irqfd) {
+ .gsi = gsi,
+ .fd = eventfd(0, 0),
+ };
+ file = (struct vhost_vring_file) {
+ .index = vq,
+ .fd = irq.fd,
+ };
+
+ r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
+ if (r < 0)
+ die_perror("KVM_IRQFD failed");
+
+ r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_CALL, &file);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_CALL failed");
+
+ file.fd = bdev->disk->fd;
+ r = ioctl(bdev->vhost_fd, VHOST_BLK_SET_BACKEND, &file);
+ if (r != 0)
+ die("VHOST_BLK_SET_BACKEND failed %d", errno);
+
+}
+
+static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
+{
+ struct blk_dev *bdev = dev;
+ struct vhost_vring_file file = {
+ .index = vq,
+ .fd = efd,
+ };
+ int r;
+
+ if (bdev->vhost_fd == 0)
+ return;
+
+ r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_KICK, &file);
+ if (r < 0)
+ die_perror("VHOST_SET_VRING_KICK failed");
+}
+
static void *virtio_blk_thread(void *dev)
{
struct blk_dev *bdev = dev;
@@ -230,12 +312,56 @@ static struct virtio_ops blk_dev_virtio_ops = (struct virtio_ops) {
.get_host_features = get_host_features,
.set_guest_features = set_guest_features,
.init_vq = init_vq,
- .notify_vq = notify_vq,
.get_pfn_vq = get_pfn_vq,
.get_size_vq = get_size_vq,
.set_size_vq = set_size_vq,
+ .notify_vq = notify_vq,
+ .notify_vq_gsi = notify_vq_gsi,
+ .notify_vq_eventfd = notify_vq_eventfd,
};
+static void virtio_blk_vhost_init(struct kvm *kvm, struct blk_dev *bdev)
+{
+ u64 features;
+ struct vhost_memory *mem;
+ int r;
+
+ bdev->vhost_fd = open("/dev/vhost-blk", O_RDWR);
+ if (bdev->vhost_fd < 0)
+ die_perror("Failed openning vhost-blk device");
+
+ mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
+ if (mem == NULL)
+ die("Failed allocating memory for vhost memory map");
+
+ mem->nregions = 1;
+ mem->regions[0] = (struct vhost_memory_region) {
+ .guest_phys_addr = 0,
+ .memory_size = kvm->ram_size,
+ .userspace_addr = (unsigned long)kvm->ram_start,
+ };
+
+ r = ioctl(bdev->vhost_fd, VHOST_SET_OWNER);
+ if (r != 0)
+ die_perror("VHOST_SET_OWNER failed");
+
+ r = ioctl(bdev->vhost_fd, VHOST_GET_FEATURES, &features);
+ if (r != 0)
+ die_perror("VHOST_GET_FEATURES failed");
+
+ r = ioctl(bdev->vhost_fd, VHOST_SET_FEATURES, &features);
+ if (r != 0)
+ die_perror("VHOST_SET_FEATURES failed");
+ r = ioctl(bdev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
+ if (r != 0)
+ die_perror("VHOST_SET_MEM_TABLE failed");
+
+ bdev->vdev.use_vhost = true;
+
+ free(mem);
+}
+
+
static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
{
struct blk_dev *bdev;
@@ -271,7 +397,11 @@ static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
disk_image__set_callback(bdev->disk, virtio_blk_complete);
- pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev);
+ if (disk->use_vhost)
+ virtio_blk_vhost_init(kvm, bdev);
+ else
+ pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev);
+
if (compat_id == -1)
compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK");
vhost-blk is a in kernel virito-blk device accelerator. vhost-blk is similar with vhost-net. It handles virito-blk's request and completion in host kernel side. How to use: ----------------------------- Load the vhost-blk.ko module in host side and specify the vhost flag. $ lkvm run -d disk.img,vhost Performance evaluation: ----------------------------- The comparison is between kvm tool with usersapce implementation and kvm tool with vhost-blk. 1) Fio with libaio ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 8.4%, 15.3%, 10.4%, 14.6% Latency improvement: 8.5%, 15.4%, 10.4%, 15.1% 2) Fio with vsync ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 10.5%, 4.8%, 5.2%, 5.6% Latency improvement: 11.4%, 5.0%, 5.2%, 5.8% Signed-off-by: Asias He <asias.hejun@gmail.com> --- tools/kvm/builtin-run.c | 2 + tools/kvm/disk/core.c | 2 + tools/kvm/include/kvm/disk-image.h | 2 + tools/kvm/virtio/blk.c | 134 +++++++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 2 deletions(-)