From patchwork Thu Jul 12 15:46:00 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Asias He X-Patchwork-Id: 1189671 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id C69FC3FDAE for ; Thu, 12 Jul 2012 15:44:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934070Ab2GLPox (ORCPT ); Thu, 12 Jul 2012 11:44:53 -0400 Received: from mail-yw0-f46.google.com ([209.85.213.46]:48896 "EHLO mail-yw0-f46.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S934057Ab2GLPov (ORCPT ); Thu, 12 Jul 2012 11:44:51 -0400 Received: by yhmm54 with SMTP id m54so2843583yhm.19 for ; Thu, 12 Jul 2012 08:44:51 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=from:to:cc:subject:date:message-id:x-mailer; bh=ynKV2VnbfA4ir6i2FGEGoG6SUrngCAF7YzQpQ4idpkE=; b=sS4r4cjD5U4Mw18KCvfB2PlCTNl8vqWZOUZblYcxyHjnYw2LeRGuF25L4njA7jbHue KNuecMFO82b/lg/tmqgaPni7UEHKOg++lcAuQVr7TrldOiK0IK9rrBoTdoBFJLPNF020 v8L97paDPxp6U4SYxoJV0pu6Gql31BnA4aevWIaiPBdVvg2o6Q6UzkZffYxf7eZQEla2 WKogH/7+TygdiE/+3HyoDuBFDqp4FzhbZ9BE1QH6Dr6A0F+yL7xdM4FbI9shJNSH0I52 tpMc2T2Ljfd0iAgJEheu5/y6Bw6esfm4j30GDLl5F3M08V0Bqf0goQbUPNdCI+Q6WNqC LkLw== Received: by 10.66.78.42 with SMTP id y10mr91115514paw.31.1342107890622; Thu, 12 Jul 2012 08:44:50 -0700 (PDT) Received: from hj.localdomain.com ([58.194.229.69]) by mx.google.com with ESMTPS id ka5sm4071012pbb.37.2012.07.12.08.44.47 (version=TLSv1/SSLv3 cipher=OTHER); Thu, 12 Jul 2012 08:44:50 -0700 (PDT) From: Asias He To: Pekka Enberg Cc: Sasha Levin , Ingo Molnar , Cyrill Gorcunov , kvm@vger.kernel.org, Asias He Subject: [PATCH 1/1] kvm tools: Add vhost-blk support Date: Thu, 12 Jul 2012 23:46:00 +0800 Message-Id: <1342107960-28519-1-git-send-email-asias.hejun@gmail.com> X-Mailer: git-send-email 1.7.10.4 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org vhost-blk is a in kernel virito-blk device accelerator. vhost-blk is similar with vhost-net. It handles virito-blk's request and completion in host kernel side. How to use: ----------------------------- Load the vhost-blk.ko module in host side and specify the vhost flag. $ lkvm run -d disk.img,vhost Performance evaluation: ----------------------------- The comparison is between kvm tool with usersapce implementation and kvm tool with vhost-blk. 1) Fio with libaio ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 8.4%, 15.3%, 10.4%, 14.6% Latency improvement: 8.5%, 15.4%, 10.4%, 15.1% 2) Fio with vsync ioengine on Fusion IO device With bio-based IO path, sequential read/write, random read/write IOPS boost : 10.5%, 4.8%, 5.2%, 5.6% Latency improvement: 11.4%, 5.0%, 5.2%, 5.8% Signed-off-by: Asias He --- tools/kvm/builtin-run.c | 2 + tools/kvm/disk/core.c | 2 + tools/kvm/include/kvm/disk-image.h | 2 + tools/kvm/virtio/blk.c | 134 +++++++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 2 deletions(-) diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c index 8e1627e..0e213bf 100644 --- a/tools/kvm/builtin-run.c +++ b/tools/kvm/builtin-run.c @@ -178,6 +178,8 @@ static int img_name_parser(const struct option *opt, const char *arg, int unset) disk_image[image_count].readonly = true; else if (strncmp(sep + 1, "direct", 6) == 0) disk_image[image_count].direct = true; + else if (strncmp(sep + 1, "vhost", 5) == 0) + disk_image[image_count].use_vhost = true; *sep = 0; cur = sep + 1; } diff --git a/tools/kvm/disk/core.c b/tools/kvm/disk/core.c index 621c940..8aa5091 100644 --- a/tools/kvm/disk/core.c +++ b/tools/kvm/disk/core.c @@ -149,6 +149,8 @@ struct disk_image **disk_image__open_all(struct disk_image_params *params, int c err = disks[i]; goto error; } + if (params[i].use_vhost) + disks[i]->use_vhost = true; } return disks; diff --git a/tools/kvm/include/kvm/disk-image.h b/tools/kvm/include/kvm/disk-image.h index 7ae17f8..0a86515 100644 --- a/tools/kvm/include/kvm/disk-image.h +++ b/tools/kvm/include/kvm/disk-image.h @@ -41,6 +41,7 @@ struct disk_image_operations { struct disk_image_params { const char *filename; + bool use_vhost; bool readonly; bool direct; }; @@ -57,6 +58,7 @@ struct disk_image { #ifdef CONFIG_HAS_AIO io_context_t ctx; #endif + bool use_vhost; }; struct disk_image *disk_image__open(const char *filename, bool readonly, bool direct); diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c index beebd24..c1e2e18 100644 --- a/tools/kvm/virtio/blk.c +++ b/tools/kvm/virtio/blk.c @@ -12,6 +12,7 @@ #include "kvm/virtio-pci.h" #include "kvm/virtio.h" +#include #include #include #include @@ -19,6 +20,8 @@ #include #include +/* TODO: We can remove this after VHOST_BLK_SET_BACKEND goes in linux/vhost.h */ +#define VHOST_BLK_SET_BACKEND _IOW(VHOST_VIRTIO, 0x40, struct vhost_vring_file) #define VIRTIO_BLK_MAX_DEV 4 /* @@ -50,6 +53,8 @@ struct blk_dev { struct virt_queue vqs[NUM_VIRT_QUEUES]; struct blk_dev_req reqs[VIRTIO_BLK_QUEUE_SIZE]; + int vhost_fd; + pthread_t io_thread; int io_efd; @@ -166,9 +171,12 @@ static void set_guest_features(struct kvm *kvm, void *dev, u32 features) static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) { + struct vhost_vring_state state = { .index = vq }; + struct vhost_vring_addr addr; struct blk_dev *bdev = dev; struct virt_queue *queue; void *p; + int r; compat__remove_message(compat_id); @@ -178,9 +186,83 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn) vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, VIRTIO_PCI_VRING_ALIGN); + if (bdev->vhost_fd == 0) + return 0; + + state.num = queue->vring.num; + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_NUM, &state); + if (r < 0) + die_perror("VHOST_SET_VRING_NUM failed"); + state.num = 0; + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_BASE, &state); + if (r < 0) + die_perror("VHOST_SET_VRING_BASE failed"); + + addr = (struct vhost_vring_addr) { + .index = vq, + .desc_user_addr = (u64)(unsigned long)queue->vring.desc, + .avail_user_addr = (u64)(unsigned long)queue->vring.avail, + .used_user_addr = (u64)(unsigned long)queue->vring.used, + }; + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr); + if (r < 0) + die_perror("VHOST_SET_VRING_ADDR failed"); + return 0; } +static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi) +{ + struct vhost_vring_file file; + struct blk_dev *bdev = dev; + struct kvm_irqfd irq; + int r; + + if (bdev->vhost_fd == 0) + return; + + irq = (struct kvm_irqfd) { + .gsi = gsi, + .fd = eventfd(0, 0), + }; + file = (struct vhost_vring_file) { + .index = vq, + .fd = irq.fd, + }; + + r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq); + if (r < 0) + die_perror("KVM_IRQFD failed"); + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_CALL, &file); + if (r < 0) + die_perror("VHOST_SET_VRING_CALL failed"); + + file.fd = bdev->disk->fd; + r = ioctl(bdev->vhost_fd, VHOST_BLK_SET_BACKEND, &file); + if (r != 0) + die("VHOST_BLK_SET_BACKEND failed %d", errno); + +} + +static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd) +{ + struct blk_dev *bdev = dev; + struct vhost_vring_file file = { + .index = vq, + .fd = efd, + }; + int r; + + if (bdev->vhost_fd == 0) + return; + + r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_KICK, &file); + if (r < 0) + die_perror("VHOST_SET_VRING_KICK failed"); +} + static void *virtio_blk_thread(void *dev) { struct blk_dev *bdev = dev; @@ -230,12 +312,56 @@ static struct virtio_ops blk_dev_virtio_ops = (struct virtio_ops) { .get_host_features = get_host_features, .set_guest_features = set_guest_features, .init_vq = init_vq, - .notify_vq = notify_vq, .get_pfn_vq = get_pfn_vq, .get_size_vq = get_size_vq, .set_size_vq = set_size_vq, + .notify_vq = notify_vq, + .notify_vq_gsi = notify_vq_gsi, + .notify_vq_eventfd = notify_vq_eventfd, }; +static void virtio_blk_vhost_init(struct kvm *kvm, struct blk_dev *bdev) +{ + u64 features; + struct vhost_memory *mem; + int r; + + bdev->vhost_fd = open("/dev/vhost-blk", O_RDWR); + if (bdev->vhost_fd < 0) + die_perror("Failed openning vhost-blk device"); + + mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region)); + if (mem == NULL) + die("Failed allocating memory for vhost memory map"); + + mem->nregions = 1; + mem->regions[0] = (struct vhost_memory_region) { + .guest_phys_addr = 0, + .memory_size = kvm->ram_size, + .userspace_addr = (unsigned long)kvm->ram_start, + }; + + r = ioctl(bdev->vhost_fd, VHOST_SET_OWNER); + if (r != 0) + die_perror("VHOST_SET_OWNER failed"); + + r = ioctl(bdev->vhost_fd, VHOST_GET_FEATURES, &features); + if (r != 0) + die_perror("VHOST_GET_FEATURES failed"); + + r = ioctl(bdev->vhost_fd, VHOST_SET_FEATURES, &features); + if (r != 0) + die_perror("VHOST_SET_FEATURES failed"); + r = ioctl(bdev->vhost_fd, VHOST_SET_MEM_TABLE, mem); + if (r != 0) + die_perror("VHOST_SET_MEM_TABLE failed"); + + bdev->vdev.use_vhost = true; + + free(mem); +} + + static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk) { struct blk_dev *bdev; @@ -271,7 +397,11 @@ static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk) disk_image__set_callback(bdev->disk, virtio_blk_complete); - pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev); + if (disk->use_vhost) + virtio_blk_vhost_init(kvm, bdev); + else + pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev); + if (compat_id == -1) compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK");