From patchwork Thu Jun 11 14:20:11 2009 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Glauber Costa X-Patchwork-Id: 29565 Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n5BEKOd1024700 for ; Thu, 11 Jun 2009 14:20:24 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752030AbZFKOUS (ORCPT ); Thu, 11 Jun 2009 10:20:18 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1751426AbZFKOUS (ORCPT ); Thu, 11 Jun 2009 10:20:18 -0400 Received: from mx2.redhat.com ([66.187.237.31]:47338 "EHLO mx2.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751165AbZFKOUL (ORCPT ); Thu, 11 Jun 2009 10:20:11 -0400 Received: from int-mx2.corp.redhat.com (int-mx2.corp.redhat.com [172.16.27.26]) by mx2.redhat.com (8.13.8/8.13.8) with ESMTP id n5BEKEQm031324 for ; Thu, 11 Jun 2009 10:20:14 -0400 Received: from ns3.rdu.redhat.com (ns3.rdu.redhat.com [10.11.255.199]) by int-mx2.corp.redhat.com (8.13.1/8.13.1) with ESMTP id n5BEKCfE031105; Thu, 11 Jun 2009 10:20:12 -0400 Received: from localhost.localdomain (virtlab1.virt.bos.redhat.com [10.16.72.21]) by ns3.rdu.redhat.com (8.13.8/8.13.8) with ESMTP id n5BEKB8g024856; Thu, 11 Jun 2009 10:20:11 -0400 From: Glauber Costa To: kvm@vger.kernel.org Cc: avi@redhat.com Subject: [PATCH] move libkvm-all.c code to qemu-kvm.c Date: Thu, 11 Jun 2009 10:20:11 -0400 Message-Id: <1244730011-10544-1-git-send-email-glommer@redhat.com> X-Scanned-By: MIMEDefang 2.58 on 172.16.27.26 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org Ultimately, goal is to put it in kvm-all.c, so we can start sharing things. This is put here first to allow for preparation. It is almost a cut and paste. Only needed adaptation goes with kvm_has_sync_mmu(), which had a conflicting definition. Signed-off-by: Glauber Costa --- Makefile.target | 2 +- libkvm-all.c | 1541 ------------------------------------------------------- libkvm-all.h | 2 - qemu-kvm.c | 1515 +++++++++++++++++++++++++++++++++++++++++++++++++++++- qemu-kvm.h | 4 +- 5 files changed, 1510 insertions(+), 1554 deletions(-) delete mode 100644 libkvm-all.c diff --git a/Makefile.target b/Makefile.target index da18f48..dac95aa 100644 --- a/Makefile.target +++ b/Makefile.target @@ -162,7 +162,7 @@ CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif ifeq ($(USE_KVM), 1) -LIBOBJS+=qemu-kvm.o libkvm.o libkvm-all.o +LIBOBJS+=qemu-kvm.o libkvm.o endif ifdef CONFIG_SOFTFLOAT LIBOBJS+=fpu/softfloat.o diff --git a/libkvm-all.c b/libkvm-all.c deleted file mode 100644 index 45679fb..0000000 --- a/libkvm-all.c +++ /dev/null @@ -1,1541 +0,0 @@ -/* - * Kernel-based Virtual Machine control library - * - * This library provides an API to control the kvm hardware virtualization - * module. - * - * Copyright (C) 2006 Qumranet - * - * Authors: - * - * Avi Kivity - * Yaniv Kamay - * - * This work is licensed under the GNU LGPL license, version 2. - */ - -#ifndef __user -#define __user /* temporary, until installed via make headers_install */ -#endif - -#include - -#define EXPECTED_KVM_API_VERSION 12 - -#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION -#error libkvm: userspace and kernel version mismatch -#endif - -#include "sysemu.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "libkvm-all.h" - -#include "libkvm.h" - -//#define DEBUG_MEMREG -#ifdef DEBUG_MEMREG -#define DPRINTF(fmt, args...) \ - do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0) -#else -#define DPRINTF(fmt, args...) do {} while (0) -#endif - -#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) - -int kvm_abi = EXPECTED_KVM_API_VERSION; -int kvm_page_size; - -static inline void set_gsi(kvm_context_t kvm, unsigned int gsi) -{ - uint32_t *bitmap = kvm->used_gsi_bitmap; - - if (gsi < kvm->max_gsi) - bitmap[gsi / 32] |= 1U << (gsi % 32); - else - DPRINTF("Invalid GSI %d\n"); -} - -static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi) -{ - uint32_t *bitmap = kvm->used_gsi_bitmap; - - if (gsi < kvm->max_gsi) - bitmap[gsi / 32] &= ~(1U << (gsi % 32)); - else - DPRINTF("Invalid GSI %d\n"); -} - -struct slot_info { - unsigned long phys_addr; - unsigned long len; - unsigned long userspace_addr; - unsigned flags; - int logging_count; -}; - -struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS]; - -static void init_slots(void) -{ - int i; - - for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) - slots[i].len = 0; -} - -static int get_free_slot(kvm_context_t kvm) -{ - int i; - int tss_ext; - -#if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__) - tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); -#else - tss_ext = 0; -#endif - - /* - * on older kernels where the set tss ioctl is not supprted we must save - * slot 0 to hold the extended memory, as the vmx will use the last 3 - * pages of this slot. - */ - if (tss_ext > 0) - i = 0; - else - i = 1; - - for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i) - if (!slots[i].len) - return i; - return -1; -} - -static void register_slot(int slot, unsigned long phys_addr, unsigned long len, - unsigned long userspace_addr, unsigned flags) -{ - slots[slot].phys_addr = phys_addr; - slots[slot].len = len; - slots[slot].userspace_addr = userspace_addr; - slots[slot].flags = flags; -} - -static void free_slot(int slot) -{ - slots[slot].len = 0; - slots[slot].logging_count = 0; -} - -static int get_slot(unsigned long phys_addr) -{ - int i; - - for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) { - if (slots[i].len && slots[i].phys_addr <= phys_addr && - (slots[i].phys_addr + slots[i].len-1) >= phys_addr) - return i; - } - return -1; -} - -/* Returns -1 if this slot is not totally contained on any other, - * and the number of the slot otherwise */ -static int get_container_slot(uint64_t phys_addr, unsigned long size) -{ - int i; - - for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) - if (slots[i].len && slots[i].phys_addr <= phys_addr && - (slots[i].phys_addr + slots[i].len) >= phys_addr + size) - return i; - return -1; -} - -int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size) -{ - int slot = get_container_slot(phys_addr, size); - if (slot == -1) - return 0; - return 1; -} - -/* - * dirty pages logging control - */ -static int kvm_dirty_pages_log_change(kvm_context_t kvm, - unsigned long phys_addr, - unsigned flags, - unsigned mask) -{ - int r = -1; - int slot = get_slot(phys_addr); - - if (slot == -1) { - fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__); - return 1; - } - - flags = (slots[slot].flags & ~mask) | flags; - if (flags == slots[slot].flags) - return 0; - slots[slot].flags = flags; - - { - struct kvm_userspace_memory_region mem = { - .slot = slot, - .memory_size = slots[slot].len, - .guest_phys_addr = slots[slot].phys_addr, - .userspace_addr = slots[slot].userspace_addr, - .flags = slots[slot].flags, - }; - - - DPRINTF("slot %d start %llx len %llx flags %x\n", - mem.slot, - mem.guest_phys_addr, - mem.memory_size, - mem.flags); - r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); - if (r == -1) - fprintf(stderr, "%s: %m\n", __FUNCTION__); - } - return r; -} - -static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, - int (*change)(kvm_context_t kvm, - uint64_t start, - uint64_t len)) -{ - int i, r; - - for (i=r=0; idirty_pages_log_all) - return 0; - kvm->dirty_pages_log_all = 1; - return kvm_dirty_pages_log_change_all(kvm, - kvm_dirty_pages_log_enable_slot); -} - -/** - * Enable dirty page logging only for memory regions that were created with - * dirty logging enabled (disable for all other memory regions). - */ -int kvm_dirty_pages_log_reset(kvm_context_t kvm) -{ - if (!kvm->dirty_pages_log_all) - return 0; - kvm->dirty_pages_log_all = 0; - return kvm_dirty_pages_log_change_all(kvm, - kvm_dirty_pages_log_disable_slot); -} - - -kvm_context_t kvm_init(struct kvm_callbacks *callbacks, - void *opaque) -{ - int fd; - kvm_context_t kvm; - int r, gsi_count; - - fd = open("/dev/kvm", O_RDWR); - if (fd == -1) { - perror("open /dev/kvm"); - return NULL; - } - r = ioctl(fd, KVM_GET_API_VERSION, 0); - if (r == -1) { - fprintf(stderr, "kvm kernel version too old: " - "KVM_GET_API_VERSION ioctl not supported\n"); - goto out_close; - } - if (r < EXPECTED_KVM_API_VERSION) { - fprintf(stderr, "kvm kernel version too old: " - "We expect API version %d or newer, but got " - "version %d\n", - EXPECTED_KVM_API_VERSION, r); - goto out_close; - } - if (r > EXPECTED_KVM_API_VERSION) { - fprintf(stderr, "kvm userspace version too old\n"); - goto out_close; - } - kvm_abi = r; - kvm_page_size = getpagesize(); - kvm = malloc(sizeof(*kvm)); - if (kvm == NULL) - goto out_close; - memset(kvm, 0, sizeof(*kvm)); - kvm->fd = fd; - kvm->vm_fd = -1; - kvm->callbacks = callbacks; - kvm->opaque = opaque; - kvm->dirty_pages_log_all = 0; - kvm->no_irqchip_creation = 0; - kvm->no_pit_creation = 0; - - gsi_count = kvm_get_gsi_count(kvm); - if (gsi_count > 0) { - int gsi_bits, i; - - /* Round up so we can search ints using ffs */ - gsi_bits = ALIGN(gsi_count, 32); - kvm->used_gsi_bitmap = malloc(gsi_bits / 8); - if (!kvm->used_gsi_bitmap) - goto out_close; - memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8); - kvm->max_gsi = gsi_bits; - - /* Mark any over-allocated bits as already in use */ - for (i = gsi_count; i < gsi_bits; i++) - set_gsi(kvm, i); - } - - return kvm; - out_close: - close(fd); - return NULL; -} - -void kvm_finalize(kvm_context_t kvm) -{ - /* FIXME - if (kvm->vcpu_fd[0] != -1) - close(kvm->vcpu_fd[0]); - if (kvm->vm_fd != -1) - close(kvm->vm_fd); - */ - close(kvm->fd); - free(kvm); -} - -void kvm_disable_irqchip_creation(kvm_context_t kvm) -{ - kvm->no_irqchip_creation = 1; -} - -void kvm_disable_pit_creation(kvm_context_t kvm) -{ - kvm->no_pit_creation = 1; -} - -kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id) -{ - long mmap_size; - int r; - kvm_vcpu_context_t vcpu_ctx = malloc(sizeof(struct kvm_vcpu_context)); - - if (!vcpu_ctx) { - errno = ENOMEM; - return NULL; - } - - vcpu_ctx->kvm = kvm; - vcpu_ctx->id = id; - - r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id); - if (r == -1) { - fprintf(stderr, "kvm_create_vcpu: %m\n"); - goto err; - } - vcpu_ctx->fd = r; - mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0); - if (mmap_size == -1) { - fprintf(stderr, "get vcpu mmap size: %m\n"); - goto err_fd; - } - vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, - vcpu_ctx->fd, 0); - if (vcpu_ctx->run == MAP_FAILED) { - fprintf(stderr, "mmap vcpu area: %m\n"); - goto err_fd; - } - return vcpu_ctx; -err_fd: - close(vcpu_ctx->fd); -err: - free(vcpu_ctx); - return NULL; -} - -int kvm_create_vm(kvm_context_t kvm) -{ - int fd = kvm->fd; - -#ifdef KVM_CAP_IRQ_ROUTING - kvm->irq_routes = malloc(sizeof(*kvm->irq_routes)); - if (!kvm->irq_routes) - return -ENOMEM; - memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes)); - kvm->nr_allocated_irq_routes = 0; -#endif - - fd = ioctl(fd, KVM_CREATE_VM, 0); - if (fd == -1) { - fprintf(stderr, "kvm_create_vm: %m\n"); - return -1; - } - kvm->vm_fd = fd; - return 0; -} - -static int kvm_create_default_phys_mem(kvm_context_t kvm, - unsigned long phys_mem_bytes, - void **vm_mem) -{ -#ifdef KVM_CAP_USER_MEMORY - int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); - if (r > 0) - return 0; - fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n"); -#else -#error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported -#endif - return -1; -} - -int kvm_check_extension(kvm_context_t kvm, int ext) -{ - int ret; - - ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext); - if (ret > 0) - return ret; - return 0; -} - -void kvm_create_irqchip(kvm_context_t kvm) -{ - int r; - - kvm->irqchip_in_kernel = 0; -#ifdef KVM_CAP_IRQCHIP - if (!kvm->no_irqchip_creation) { - r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP); - if (r > 0) { /* kernel irqchip supported */ - r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); - if (r >= 0) { - kvm->irqchip_inject_ioctl = KVM_IRQ_LINE; -#if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS) - r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, - KVM_CAP_IRQ_INJECT_STATUS); - if (r > 0) - kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; -#endif - kvm->irqchip_in_kernel = 1; - } - else - fprintf(stderr, "Create kernel PIC irqchip failed\n"); - } - } -#endif -} - -int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem) -{ - int r; - - r = kvm_create_vm(kvm); - if (r < 0) - return r; - r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem); - if (r < 0) - return r; - init_slots(); - r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem); - if (r < 0) - return r; - kvm_create_irqchip(kvm); - - return 0; -} - - -void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start, - unsigned long len, int log, int writable) -{ - int r; - int prot = PROT_READ; - void *ptr; - struct kvm_userspace_memory_region memory = { - .memory_size = len, - .guest_phys_addr = phys_start, - .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, - }; - - if (writable) - prot |= PROT_WRITE; - -#if !defined(__s390__) - ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); -#else - ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC, - MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0); -#endif - if (ptr == MAP_FAILED) { - fprintf(stderr, "%s: %s", __func__, strerror(errno)); - return 0; - } - - memset(ptr, 0, len); - - memory.userspace_addr = (unsigned long)ptr; - memory.slot = get_free_slot(kvm); - DPRINTF("slot %d start %llx len %llx flags %x\n", - memory.slot, - memory.guest_phys_addr, - memory.memory_size, - memory.flags); - r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); - if (r == -1) { - fprintf(stderr, "%s: %s", __func__, strerror(errno)); - return 0; - } - register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, - memory.userspace_addr, memory.flags); - - return ptr; -} - -int kvm_register_phys_mem(kvm_context_t kvm, - unsigned long phys_start, void *userspace_addr, - unsigned long len, int log) -{ - - struct kvm_userspace_memory_region memory = { - .memory_size = len, - .guest_phys_addr = phys_start, - .userspace_addr = (unsigned long)(intptr_t)userspace_addr, - .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, - }; - int r; - - memory.slot = get_free_slot(kvm); - DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n", - memory.guest_phys_addr, memory.memory_size, - memory.userspace_addr, memory.slot, memory.flags); - r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); - if (r == -1) { - fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno)); - return -1; - } - register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, - memory.userspace_addr, memory.flags); - return 0; -} - - -/* destroy/free a whole slot. - * phys_start, len and slot are the params passed to kvm_create_phys_mem() - */ -void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start, - unsigned long len) -{ - int slot; - int r; - struct kvm_userspace_memory_region memory = { - .memory_size = 0, - .guest_phys_addr = phys_start, - .userspace_addr = 0, - .flags = 0, - }; - - slot = get_slot(phys_start); - - if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) { - fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", - __FUNCTION__, slot); - return; - } - if (phys_start != slots[slot].phys_addr) { - fprintf(stderr, - "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n", - __FUNCTION__, phys_start, slots[slot].phys_addr); - phys_start = slots[slot].phys_addr; - } - - memory.slot = slot; - DPRINTF("slot %d start %llx len %llx flags %x\n", - memory.slot, - memory.guest_phys_addr, - memory.memory_size, - memory.flags); - r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); - if (r == -1) { - fprintf(stderr, "destroy_userspace_phys_mem: %s", - strerror(errno)); - return; - } - - free_slot(memory.slot); -} - -void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size) -{ - - int slot = get_container_slot(phys_addr, size); - - if (slot != -1) { - DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size); - kvm_destroy_phys_mem(kvm, phys_addr, size); - return; - } -} - -static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf) -{ - int r; - struct kvm_dirty_log log = { - .slot = slot, - }; - - log.dirty_bitmap = buf; - - r = ioctl(kvm->vm_fd, ioctl_num, &log); - if (r == -1) - return -errno; - return 0; -} - -int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf) -{ - int slot; - - slot = get_slot(phys_addr); - return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf); -} - -int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr, - unsigned long len, void *buf, void *opaque, - int (*cb)(unsigned long start, unsigned long len, - void*bitmap, void *opaque)) -{ - int i; - int r; - unsigned long end_addr = phys_addr + len; - - for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) { - if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr) - && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) { - r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf); - if (r) - return r; - r = cb(slots[i].phys_addr, slots[i].len, buf, opaque); - if (r) - return r; - } - } - return 0; -} - -#ifdef KVM_CAP_IRQCHIP - -int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status) -{ - struct kvm_irq_level event; - int r; - - if (!kvm->irqchip_in_kernel) - return 0; - event.level = level; - event.irq = irq; - r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event); - if (r == -1) - perror("kvm_set_irq_level"); - - if (status) { -#ifdef KVM_CAP_IRQ_INJECT_STATUS - *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? - 1 : event.status; -#else - *status = 1; -#endif - } - - return 1; -} - -int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) -{ - int r; - - if (!kvm->irqchip_in_kernel) - return 0; - r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip); - if (r == -1) { - r = -errno; - perror("kvm_get_irqchip\n"); - } - return r; -} - -int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) -{ - int r; - - if (!kvm->irqchip_in_kernel) - return 0; - r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip); - if (r == -1) { - r = -errno; - perror("kvm_set_irqchip\n"); - } - return r; -} - -#endif - -static int handle_io(kvm_vcpu_context_t vcpu) -{ - struct kvm_run *run = vcpu->run; - kvm_context_t kvm = vcpu->kvm; - uint16_t addr = run->io.port; - int r; - int i; - void *p = (void *)run + run->io.data_offset; - - for (i = 0; i < run->io.count; ++i) { - switch (run->io.direction) { - case KVM_EXIT_IO_IN: - switch (run->io.size) { - case 1: - r = kvm->callbacks->inb(kvm->opaque, addr, p); - break; - case 2: - r = kvm->callbacks->inw(kvm->opaque, addr, p); - break; - case 4: - r = kvm->callbacks->inl(kvm->opaque, addr, p); - break; - default: - fprintf(stderr, "bad I/O size %d\n", run->io.size); - return -EMSGSIZE; - } - break; - case KVM_EXIT_IO_OUT: - switch (run->io.size) { - case 1: - r = kvm->callbacks->outb(kvm->opaque, addr, - *(uint8_t *)p); - break; - case 2: - r = kvm->callbacks->outw(kvm->opaque, addr, - *(uint16_t *)p); - break; - case 4: - r = kvm->callbacks->outl(kvm->opaque, addr, - *(uint32_t *)p); - break; - default: - fprintf(stderr, "bad I/O size %d\n", run->io.size); - return -EMSGSIZE; - } - break; - default: - fprintf(stderr, "bad I/O direction %d\n", run->io.direction); - return -EPROTO; - } - - p += run->io.size; - } - - return 0; -} - -int handle_debug(kvm_vcpu_context_t vcpu, void *env) -{ -#ifdef KVM_CAP_SET_GUEST_DEBUG - struct kvm_run *run = vcpu->run; - kvm_context_t kvm = vcpu->kvm; - - return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch); -#else - return 0; -#endif -} - -int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) -{ - return ioctl(vcpu->fd, KVM_GET_REGS, regs); -} - -int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) -{ - return ioctl(vcpu->fd, KVM_SET_REGS, regs); -} - -int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) -{ - return ioctl(vcpu->fd, KVM_GET_FPU, fpu); -} - -int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) -{ - return ioctl(vcpu->fd, KVM_SET_FPU, fpu); -} - -int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) -{ - return ioctl(vcpu->fd, KVM_GET_SREGS, sregs); -} - -int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) -{ - return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); -} - -#ifdef KVM_CAP_MP_STATE -int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) -{ - int r; - - r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); - if (r > 0) - return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state); - return -ENOSYS; -} - -int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) -{ - int r; - - r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); - if (r > 0) - return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); - return -ENOSYS; -} -#endif - -static int handle_mmio(kvm_vcpu_context_t vcpu) -{ - unsigned long addr = vcpu->run->mmio.phys_addr; - kvm_context_t kvm = vcpu->kvm; - struct kvm_run *kvm_run = vcpu->run; - void *data = kvm_run->mmio.data; - - /* hack: Red Hat 7.1 generates these weird accesses. */ - if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) - return 0; - - if (kvm_run->mmio.is_write) - return kvm->callbacks->mmio_write(kvm->opaque, addr, data, - kvm_run->mmio.len); - else - return kvm->callbacks->mmio_read(kvm->opaque, addr, data, - kvm_run->mmio.len); -} - -int handle_io_window(kvm_context_t kvm) -{ - return kvm->callbacks->io_window(kvm->opaque); -} - -int handle_halt(kvm_vcpu_context_t vcpu) -{ - return vcpu->kvm->callbacks->halt(vcpu->kvm->opaque, vcpu); -} - -int handle_shutdown(kvm_context_t kvm, void *env) -{ - return kvm->callbacks->shutdown(kvm->opaque, env); -} - -int try_push_interrupts(kvm_context_t kvm) -{ - return kvm->callbacks->try_push_interrupts(kvm->opaque); -} - -static inline void push_nmi(kvm_context_t kvm) -{ -#ifdef KVM_CAP_USER_NMI - kvm->callbacks->push_nmi(kvm->opaque); -#endif /* KVM_CAP_USER_NMI */ -} - -void post_kvm_run(kvm_context_t kvm, void *env) -{ - kvm->callbacks->post_kvm_run(kvm->opaque, env); -} - -int pre_kvm_run(kvm_context_t kvm, void *env) -{ - return kvm->callbacks->pre_kvm_run(kvm->opaque, env); -} - -int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu) -{ - return vcpu->run->if_flag; -} - -int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu) -{ - return vcpu->run->ready_for_interrupt_injection; -} - -int kvm_run(kvm_vcpu_context_t vcpu, void *env) -{ - int r; - int fd = vcpu->fd; - struct kvm_run *run = vcpu->run; - kvm_context_t kvm = vcpu->kvm; - -again: - push_nmi(kvm); -#if !defined(__s390__) - if (!kvm->irqchip_in_kernel) - run->request_interrupt_window = try_push_interrupts(kvm); -#endif - r = pre_kvm_run(kvm, env); - if (r) - return r; - r = ioctl(fd, KVM_RUN, 0); - - if (r == -1 && errno != EINTR && errno != EAGAIN) { - r = -errno; - post_kvm_run(kvm, env); - fprintf(stderr, "kvm_run: %s\n", strerror(-r)); - return r; - } - - post_kvm_run(kvm, env); - -#if defined(KVM_CAP_COALESCED_MMIO) - if (kvm->coalesced_mmio) { - struct kvm_coalesced_mmio_ring *ring = (void *)run + - kvm->coalesced_mmio * PAGE_SIZE; - while (ring->first != ring->last) { - kvm->callbacks->mmio_write(kvm->opaque, - ring->coalesced_mmio[ring->first].phys_addr, - &ring->coalesced_mmio[ring->first].data[0], - ring->coalesced_mmio[ring->first].len); - smp_wmb(); - ring->first = (ring->first + 1) % - KVM_COALESCED_MMIO_MAX; - } - } -#endif - -#if !defined(__s390__) - if (r == -1) { - r = handle_io_window(kvm); - goto more; - } -#endif - if (1) { - switch (run->exit_reason) { - case KVM_EXIT_UNKNOWN: - r = kvm->callbacks->unhandled(kvm, vcpu, - run->hw.hardware_exit_reason); - break; - case KVM_EXIT_FAIL_ENTRY: - r = kvm->callbacks->unhandled(kvm, vcpu, - run->fail_entry.hardware_entry_failure_reason); - break; - case KVM_EXIT_EXCEPTION: - fprintf(stderr, "exception %d (%x)\n", - run->ex.exception, - run->ex.error_code); - kvm_show_regs(vcpu); - kvm_show_code(vcpu); - abort(); - break; - case KVM_EXIT_IO: - r = handle_io(vcpu); - break; - case KVM_EXIT_DEBUG: - r = handle_debug(vcpu, env); - break; - case KVM_EXIT_MMIO: - r = handle_mmio(vcpu); - break; - case KVM_EXIT_HLT: - r = handle_halt(vcpu); - break; - case KVM_EXIT_IRQ_WINDOW_OPEN: - break; - case KVM_EXIT_SHUTDOWN: - r = handle_shutdown(kvm, env); - break; -#if defined(__s390__) - case KVM_EXIT_S390_SIEIC: - r = kvm->callbacks->s390_handle_intercept(kvm, vcpu, - run); - break; - case KVM_EXIT_S390_RESET: - r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run); - break; -#endif - default: - if (kvm_arch_run(vcpu)) { - fprintf(stderr, "unhandled vm exit: 0x%x\n", - run->exit_reason); - kvm_show_regs(vcpu); - abort(); - } - break; - } - } -more: - if (!r) - goto again; - return r; -} - -int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq) -{ - struct kvm_interrupt intr; - - intr.irq = irq; - return ioctl(vcpu->fd, KVM_INTERRUPT, &intr); -} - -#ifdef KVM_CAP_SET_GUEST_DEBUG -int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg) -{ - return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg); -} -#endif - -int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset) -{ - struct kvm_signal_mask *sigmask; - int r; - - if (!sigset) { - r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL); - if (r == -1) - r = -errno; - return r; - } - sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset)); - if (!sigmask) - return -ENOMEM; - - sigmask->len = 8; - memcpy(sigmask->sigset, sigset, sizeof(*sigset)); - r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask); - if (r == -1) - r = -errno; - free(sigmask); - return r; -} - -int kvm_irqchip_in_kernel(kvm_context_t kvm) -{ - return kvm->irqchip_in_kernel; -} - -int kvm_pit_in_kernel(kvm_context_t kvm) -{ - return kvm->pit_in_kernel; -} - -int kvm_has_sync_mmu(kvm_context_t kvm) -{ - int r = 0; -#ifdef KVM_CAP_SYNC_MMU - r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU); -#endif - return r; -} - -int kvm_inject_nmi(kvm_vcpu_context_t vcpu) -{ -#ifdef KVM_CAP_USER_NMI - return ioctl(vcpu->fd, KVM_NMI); -#else - return -ENOSYS; -#endif -} - -int kvm_init_coalesced_mmio(kvm_context_t kvm) -{ - int r = 0; - kvm->coalesced_mmio = 0; -#ifdef KVM_CAP_COALESCED_MMIO - r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); - if (r > 0) { - kvm->coalesced_mmio = r; - return 0; - } -#endif - return r; -} - -int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) -{ -#ifdef KVM_CAP_COALESCED_MMIO - struct kvm_coalesced_mmio_zone zone; - int r; - - if (kvm->coalesced_mmio) { - - zone.addr = addr; - zone.size = size; - - r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone); - if (r == -1) { - perror("kvm_register_coalesced_mmio_zone"); - return -errno; - } - return 0; - } -#endif - return -ENOSYS; -} - -int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) -{ -#ifdef KVM_CAP_COALESCED_MMIO - struct kvm_coalesced_mmio_zone zone; - int r; - - if (kvm->coalesced_mmio) { - - zone.addr = addr; - zone.size = size; - - r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone); - if (r == -1) { - perror("kvm_unregister_coalesced_mmio_zone"); - return -errno; - } - DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size); - return 0; - } -#endif - return -ENOSYS; -} - -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -int kvm_assign_pci_device(kvm_context_t kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); - if (ret < 0) - return -errno; - - return ret; -} - -static int kvm_old_assign_irq(kvm_context_t kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq); - if (ret < 0) - return -errno; - - return ret; -} - -#ifdef KVM_CAP_ASSIGN_DEV_IRQ -int kvm_assign_irq(kvm_context_t kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int ret; - - ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ); - if (ret > 0) { - ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq); - if (ret < 0) - return -errno; - return ret; - } - - return kvm_old_assign_irq(kvm, assigned_irq); -} - -int kvm_deassign_irq(kvm_context_t kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq); - if (ret < 0) - return -errno; - - return ret; -} -#else -int kvm_assign_irq(kvm_context_t kvm, - struct kvm_assigned_irq *assigned_irq) -{ - return kvm_old_assign_irq(kvm, assigned_irq); -} -#endif -#endif - -#ifdef KVM_CAP_DEVICE_DEASSIGNMENT -int kvm_deassign_pci_device(kvm_context_t kvm, - struct kvm_assigned_pci_dev *assigned_dev) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev); - if (ret < 0) - return -errno; - - return ret; -} -#endif - -int kvm_destroy_memory_region_works(kvm_context_t kvm) -{ - int ret = 0; - -#ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS - ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, - KVM_CAP_DESTROY_MEMORY_REGION_WORKS); - if (ret <= 0) - ret = 0; -#endif - return ret; -} - -int kvm_reinject_control(kvm_context_t kvm, int pit_reinject) -{ -#ifdef KVM_CAP_REINJECT_CONTROL - int r; - struct kvm_reinject_control control; - - control.pit_reinject = pit_reinject; - - r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL); - if (r > 0) { - r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control); - if (r == -1) - return -errno; - return r; - } -#endif - return -ENOSYS; -} - -int kvm_has_gsi_routing(kvm_context_t kvm) -{ - int r = 0; - -#ifdef KVM_CAP_IRQ_ROUTING - r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); -#endif - return r; -} - -int kvm_get_gsi_count(kvm_context_t kvm) -{ -#ifdef KVM_CAP_IRQ_ROUTING - return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); -#else - return -EINVAL; -#endif -} - -int kvm_clear_gsi_routes(kvm_context_t kvm) -{ -#ifdef KVM_CAP_IRQ_ROUTING - kvm->irq_routes->nr = 0; - return 0; -#else - return -EINVAL; -#endif -} - -int kvm_add_routing_entry(kvm_context_t kvm, - struct kvm_irq_routing_entry* entry) -{ -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing *z; - struct kvm_irq_routing_entry *new; - int n, size; - - if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) { - n = kvm->nr_allocated_irq_routes * 2; - if (n < 64) - n = 64; - size = sizeof(struct kvm_irq_routing); - size += n * sizeof(*new); - z = realloc(kvm->irq_routes, size); - if (!z) - return -ENOMEM; - kvm->nr_allocated_irq_routes = n; - kvm->irq_routes = z; - } - n = kvm->irq_routes->nr++; - new = &kvm->irq_routes->entries[n]; - memset(new, 0, sizeof(*new)); - new->gsi = entry->gsi; - new->type = entry->type; - new->flags = entry->flags; - new->u = entry->u; - - set_gsi(kvm, entry->gsi); - - return 0; -#else - return -ENOSYS; -#endif -} - -int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) -{ -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_entry e; - - e.gsi = gsi; - e.type = KVM_IRQ_ROUTING_IRQCHIP; - e.flags = 0; - e.u.irqchip.irqchip = irqchip; - e.u.irqchip.pin = pin; - return kvm_add_routing_entry(kvm, &e); -#else - return -ENOSYS; -#endif -} - -int kvm_del_routing_entry(kvm_context_t kvm, - struct kvm_irq_routing_entry* entry) -{ -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_entry *e, *p; - int i, gsi, found = 0; - - gsi = entry->gsi; - - for (i = 0; i < kvm->irq_routes->nr; ++i) { - e = &kvm->irq_routes->entries[i]; - if (e->type == entry->type - && e->gsi == gsi) { - switch (e->type) - { - case KVM_IRQ_ROUTING_IRQCHIP: { - if (e->u.irqchip.irqchip == - entry->u.irqchip.irqchip - && e->u.irqchip.pin == - entry->u.irqchip.pin) { - p = &kvm->irq_routes-> - entries[--kvm->irq_routes->nr]; - *e = *p; - found = 1; - } - break; - } - case KVM_IRQ_ROUTING_MSI: { - if (e->u.msi.address_lo == - entry->u.msi.address_lo - && e->u.msi.address_hi == - entry->u.msi.address_hi - && e->u.msi.data == entry->u.msi.data) { - p = &kvm->irq_routes-> - entries[--kvm->irq_routes->nr]; - *e = *p; - found = 1; - } - break; - } - default: - break; - } - if (found) { - /* If there are no other users of this GSI - * mark it available in the bitmap */ - for (i = 0; i < kvm->irq_routes->nr; i++) { - e = &kvm->irq_routes->entries[i]; - if (e->gsi == gsi) - break; - } - if (i == kvm->irq_routes->nr) - clear_gsi(kvm, gsi); - - return 0; - } - } - } - return -ESRCH; -#else - return -ENOSYS; -#endif -} - -int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) -{ -#ifdef KVM_CAP_IRQ_ROUTING - struct kvm_irq_routing_entry e; - - e.gsi = gsi; - e.type = KVM_IRQ_ROUTING_IRQCHIP; - e.flags = 0; - e.u.irqchip.irqchip = irqchip; - e.u.irqchip.pin = pin; - return kvm_del_routing_entry(kvm, &e); -#else - return -ENOSYS; -#endif -} - -int kvm_commit_irq_routes(kvm_context_t kvm) -{ -#ifdef KVM_CAP_IRQ_ROUTING - int r; - - kvm->irq_routes->flags = 0; - r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes); - if (r == -1) - r = -errno; - return r; -#else - return -ENOSYS; -#endif -} - -int kvm_get_irq_route_gsi(kvm_context_t kvm) -{ - int i, bit; - uint32_t *buf = kvm->used_gsi_bitmap; - - /* Return the lowest unused GSI in the bitmap */ - for (i = 0; i < kvm->max_gsi / 32; i++) { - bit = ffs(~buf[i]); - if (!bit) - continue; - - return bit - 1 + i * 32; - } - - return -ENOSPC; -} - -#ifdef KVM_CAP_DEVICE_MSIX -int kvm_assign_set_msix_nr(kvm_context_t kvm, - struct kvm_assigned_msix_nr *msix_nr) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr); - if (ret < 0) - return -errno; - - return ret; -} - -int kvm_assign_set_msix_entry(kvm_context_t kvm, - struct kvm_assigned_msix_entry *entry) -{ - int ret; - - ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry); - if (ret < 0) - return -errno; - - return ret; -} -#endif - -#if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd) - -#include - -static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) -{ - int r; - struct kvm_irqfd data = { - .fd = fd, - .gsi = gsi, - .flags = flags, - }; - - r = ioctl(kvm->vm_fd, KVM_IRQFD, &data); - if (r == -1) - r = -errno; - return r; -} - -int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) -{ - int r; - int fd; - - if (!kvm_check_extension(kvm, KVM_CAP_IRQFD)) - return -ENOENT; - - fd = eventfd(0, 0); - if (fd < 0) - return -errno; - - r = _kvm_irqfd(kvm, fd, gsi, 0); - if (r < 0) { - close(fd); - return -errno; - } - - return fd; -} - -#else /* KVM_CAP_IRQFD */ - -int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) -{ - return -ENOSYS; -} - -#endif /* KVM_CAP_IRQFD */ diff --git a/libkvm-all.h b/libkvm-all.h index d647ef1..4f7b9a3 100644 --- a/libkvm-all.h +++ b/libkvm-all.h @@ -622,8 +622,6 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm); */ int kvm_irqchip_in_kernel(kvm_context_t kvm); -int kvm_has_sync_mmu(kvm_context_t kvm); - #ifdef KVM_CAP_IRQCHIP /*! * \brief Dump in kernel IRQCHIP contents diff --git a/qemu-kvm.c b/qemu-kvm.c index 2aeb17c..09cf203 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -20,14 +20,23 @@ #include "qemu-kvm.h" #include "libkvm-all.h" +#include "libkvm.h" + #include #include #include #include +#include #define false 0 #define true 1 +#define EXPECTED_KVM_API_VERSION 12 + +#if EXPECTED_KVM_API_VERSION != KVM_API_VERSION +#error libkvm: userspace and kernel version mismatch +#endif + int kvm_allowed = 1; int kvm_irqchip = 1; int kvm_pit = 1; @@ -57,6 +66,1505 @@ static uint64_t phys_ram_size; /* The list of ioperm_data */ static LIST_HEAD(, ioperm_data) ioperm_head; +//#define DEBUG_MEMREG +#ifdef DEBUG_MEMREG +#define DPRINTF(fmt, args...) \ + do { fprintf(stderr, "%s:%d " fmt , __func__, __LINE__, ##args); } while (0) +#else +#define DPRINTF(fmt, args...) do {} while (0) +#endif + +#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1)) + +int kvm_abi = EXPECTED_KVM_API_VERSION; +int kvm_page_size; + +static inline void set_gsi(kvm_context_t kvm, unsigned int gsi) +{ + uint32_t *bitmap = kvm->used_gsi_bitmap; + + if (gsi < kvm->max_gsi) + bitmap[gsi / 32] |= 1U << (gsi % 32); + else + DPRINTF("Invalid GSI %d\n"); +} + +static inline void clear_gsi(kvm_context_t kvm, unsigned int gsi) +{ + uint32_t *bitmap = kvm->used_gsi_bitmap; + + if (gsi < kvm->max_gsi) + bitmap[gsi / 32] &= ~(1U << (gsi % 32)); + else + DPRINTF("Invalid GSI %d\n"); +} + +struct slot_info { + unsigned long phys_addr; + unsigned long len; + unsigned long userspace_addr; + unsigned flags; + int logging_count; +}; + +struct slot_info slots[KVM_MAX_NUM_MEM_REGIONS]; + +static void init_slots(void) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) + slots[i].len = 0; +} + +static int get_free_slot(kvm_context_t kvm) +{ + int i; + int tss_ext; + +#if defined(KVM_CAP_SET_TSS_ADDR) && !defined(__s390__) + tss_ext = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); +#else + tss_ext = 0; +#endif + + /* + * on older kernels where the set tss ioctl is not supprted we must save + * slot 0 to hold the extended memory, as the vmx will use the last 3 + * pages of this slot. + */ + if (tss_ext > 0) + i = 0; + else + i = 1; + + for (; i < KVM_MAX_NUM_MEM_REGIONS; ++i) + if (!slots[i].len) + return i; + return -1; +} + +static void register_slot(int slot, unsigned long phys_addr, unsigned long len, + unsigned long userspace_addr, unsigned flags) +{ + slots[slot].phys_addr = phys_addr; + slots[slot].len = len; + slots[slot].userspace_addr = userspace_addr; + slots[slot].flags = flags; +} + +static void free_slot(int slot) +{ + slots[slot].len = 0; + slots[slot].logging_count = 0; +} + +static int get_slot(unsigned long phys_addr) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) { + if (slots[i].len && slots[i].phys_addr <= phys_addr && + (slots[i].phys_addr + slots[i].len-1) >= phys_addr) + return i; + } + return -1; +} + +/* Returns -1 if this slot is not totally contained on any other, + * and the number of the slot otherwise */ +static int get_container_slot(uint64_t phys_addr, unsigned long size) +{ + int i; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS ; ++i) + if (slots[i].len && slots[i].phys_addr <= phys_addr && + (slots[i].phys_addr + slots[i].len) >= phys_addr + size) + return i; + return -1; +} + +int kvm_is_containing_region(kvm_context_t kvm, unsigned long phys_addr, unsigned long size) +{ + int slot = get_container_slot(phys_addr, size); + if (slot == -1) + return 0; + return 1; +} + +/* + * dirty pages logging control + */ +static int kvm_dirty_pages_log_change(kvm_context_t kvm, + unsigned long phys_addr, + unsigned flags, + unsigned mask) +{ + int r = -1; + int slot = get_slot(phys_addr); + + if (slot == -1) { + fprintf(stderr, "BUG: %s: invalid parameters\n", __FUNCTION__); + return 1; + } + + flags = (slots[slot].flags & ~mask) | flags; + if (flags == slots[slot].flags) + return 0; + slots[slot].flags = flags; + + { + struct kvm_userspace_memory_region mem = { + .slot = slot, + .memory_size = slots[slot].len, + .guest_phys_addr = slots[slot].phys_addr, + .userspace_addr = slots[slot].userspace_addr, + .flags = slots[slot].flags, + }; + + + DPRINTF("slot %d start %llx len %llx flags %x\n", + mem.slot, + mem.guest_phys_addr, + mem.memory_size, + mem.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem); + if (r == -1) + fprintf(stderr, "%s: %m\n", __FUNCTION__); + } + return r; +} + +static int kvm_dirty_pages_log_change_all(kvm_context_t kvm, + int (*change)(kvm_context_t kvm, + uint64_t start, + uint64_t len)) +{ + int i, r; + + for (i=r=0; idirty_pages_log_all) + return 0; + kvm->dirty_pages_log_all = 1; + return kvm_dirty_pages_log_change_all(kvm, + kvm_dirty_pages_log_enable_slot); +} + +/** + * Enable dirty page logging only for memory regions that were created with + * dirty logging enabled (disable for all other memory regions). + */ +int kvm_dirty_pages_log_reset(kvm_context_t kvm) +{ + if (!kvm->dirty_pages_log_all) + return 0; + kvm->dirty_pages_log_all = 0; + return kvm_dirty_pages_log_change_all(kvm, + kvm_dirty_pages_log_disable_slot); +} + + +kvm_context_t kvm_init(struct kvm_callbacks *callbacks, + void *opaque) +{ + int fd; + kvm_context_t kvm; + int r, gsi_count; + + fd = open("/dev/kvm", O_RDWR); + if (fd == -1) { + perror("open /dev/kvm"); + return NULL; + } + r = ioctl(fd, KVM_GET_API_VERSION, 0); + if (r == -1) { + fprintf(stderr, "kvm kernel version too old: " + "KVM_GET_API_VERSION ioctl not supported\n"); + goto out_close; + } + if (r < EXPECTED_KVM_API_VERSION) { + fprintf(stderr, "kvm kernel version too old: " + "We expect API version %d or newer, but got " + "version %d\n", + EXPECTED_KVM_API_VERSION, r); + goto out_close; + } + if (r > EXPECTED_KVM_API_VERSION) { + fprintf(stderr, "kvm userspace version too old\n"); + goto out_close; + } + kvm_abi = r; + kvm_page_size = getpagesize(); + kvm = malloc(sizeof(*kvm)); + if (kvm == NULL) + goto out_close; + memset(kvm, 0, sizeof(*kvm)); + kvm->fd = fd; + kvm->vm_fd = -1; + kvm->callbacks = callbacks; + kvm->opaque = opaque; + kvm->dirty_pages_log_all = 0; + kvm->no_irqchip_creation = 0; + kvm->no_pit_creation = 0; + + gsi_count = kvm_get_gsi_count(kvm); + if (gsi_count > 0) { + int gsi_bits, i; + + /* Round up so we can search ints using ffs */ + gsi_bits = ALIGN(gsi_count, 32); + kvm->used_gsi_bitmap = malloc(gsi_bits / 8); + if (!kvm->used_gsi_bitmap) + goto out_close; + memset(kvm->used_gsi_bitmap, 0, gsi_bits / 8); + kvm->max_gsi = gsi_bits; + + /* Mark any over-allocated bits as already in use */ + for (i = gsi_count; i < gsi_bits; i++) + set_gsi(kvm, i); + } + + return kvm; + out_close: + close(fd); + return NULL; +} + +void kvm_finalize(kvm_context_t kvm) +{ + /* FIXME + if (kvm->vcpu_fd[0] != -1) + close(kvm->vcpu_fd[0]); + if (kvm->vm_fd != -1) + close(kvm->vm_fd); + */ + close(kvm->fd); + free(kvm); +} + +void kvm_disable_irqchip_creation(kvm_context_t kvm) +{ + kvm->no_irqchip_creation = 1; +} + +void kvm_disable_pit_creation(kvm_context_t kvm) +{ + kvm->no_pit_creation = 1; +} + +kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id) +{ + long mmap_size; + int r; + kvm_vcpu_context_t vcpu_ctx = malloc(sizeof(struct kvm_vcpu_context)); + + if (!vcpu_ctx) { + errno = ENOMEM; + return NULL; + } + + vcpu_ctx->kvm = kvm; + vcpu_ctx->id = id; + + r = ioctl(kvm->vm_fd, KVM_CREATE_VCPU, id); + if (r == -1) { + fprintf(stderr, "kvm_create_vcpu: %m\n"); + goto err; + } + vcpu_ctx->fd = r; + mmap_size = ioctl(kvm->fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size == -1) { + fprintf(stderr, "get vcpu mmap size: %m\n"); + goto err_fd; + } + vcpu_ctx->run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, + vcpu_ctx->fd, 0); + if (vcpu_ctx->run == MAP_FAILED) { + fprintf(stderr, "mmap vcpu area: %m\n"); + goto err_fd; + } + return vcpu_ctx; +err_fd: + close(vcpu_ctx->fd); +err: + free(vcpu_ctx); + return NULL; +} + +int kvm_create_vm(kvm_context_t kvm) +{ + int fd = kvm->fd; + +#ifdef KVM_CAP_IRQ_ROUTING + kvm->irq_routes = malloc(sizeof(*kvm->irq_routes)); + if (!kvm->irq_routes) + return -ENOMEM; + memset(kvm->irq_routes, 0, sizeof(*kvm->irq_routes)); + kvm->nr_allocated_irq_routes = 0; +#endif + + fd = ioctl(fd, KVM_CREATE_VM, 0); + if (fd == -1) { + fprintf(stderr, "kvm_create_vm: %m\n"); + return -1; + } + kvm->vm_fd = fd; + return 0; +} + +static int kvm_create_default_phys_mem(kvm_context_t kvm, + unsigned long phys_mem_bytes, + void **vm_mem) +{ +#ifdef KVM_CAP_USER_MEMORY + int r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY); + if (r > 0) + return 0; + fprintf(stderr, "Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported\n"); +#else +#error Hypervisor too old: KVM_CAP_USER_MEMORY extension not supported +#endif + return -1; +} + +int kvm_check_extension(kvm_context_t kvm, int ext) +{ + int ret; + + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, ext); + if (ret > 0) + return ret; + return 0; +} + +void kvm_create_irqchip(kvm_context_t kvm) +{ + int r; + + kvm->irqchip_in_kernel = 0; +#ifdef KVM_CAP_IRQCHIP + if (!kvm->no_irqchip_creation) { + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP); + if (r > 0) { /* kernel irqchip supported */ + r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); + if (r >= 0) { + kvm->irqchip_inject_ioctl = KVM_IRQ_LINE; +#if defined(KVM_CAP_IRQ_INJECT_STATUS) && defined(KVM_IRQ_LINE_STATUS) + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_IRQ_INJECT_STATUS); + if (r > 0) + kvm->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; +#endif + kvm->irqchip_in_kernel = 1; + } + else + fprintf(stderr, "Create kernel PIC irqchip failed\n"); + } + } +#endif +} + +int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem) +{ + int r; + + r = kvm_create_vm(kvm); + if (r < 0) + return r; + r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem); + if (r < 0) + return r; + init_slots(); + r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem); + if (r < 0) + return r; + kvm_create_irqchip(kvm); + + return 0; +} + + +void *kvm_create_phys_mem(kvm_context_t kvm, unsigned long phys_start, + unsigned long len, int log, int writable) +{ + int r; + int prot = PROT_READ; + void *ptr; + struct kvm_userspace_memory_region memory = { + .memory_size = len, + .guest_phys_addr = phys_start, + .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, + }; + + if (writable) + prot |= PROT_WRITE; + +#if !defined(__s390__) + ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0); +#else + ptr = mmap(LIBKVM_S390_ORIGIN, len, prot | PROT_EXEC, + MAP_FIXED | MAP_SHARED | MAP_ANONYMOUS, -1, 0); +#endif + if (ptr == MAP_FAILED) { + fprintf(stderr, "%s: %s", __func__, strerror(errno)); + return 0; + } + + memset(ptr, 0, len); + + memory.userspace_addr = (unsigned long)ptr; + memory.slot = get_free_slot(kvm); + DPRINTF("slot %d start %llx len %llx flags %x\n", + memory.slot, + memory.guest_phys_addr, + memory.memory_size, + memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "%s: %s", __func__, strerror(errno)); + return 0; + } + register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.flags); + + return ptr; +} + +int kvm_register_phys_mem(kvm_context_t kvm, + unsigned long phys_start, void *userspace_addr, + unsigned long len, int log) +{ + + struct kvm_userspace_memory_region memory = { + .memory_size = len, + .guest_phys_addr = phys_start, + .userspace_addr = (unsigned long)(intptr_t)userspace_addr, + .flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0, + }; + int r; + + memory.slot = get_free_slot(kvm); + DPRINTF("memory: gpa: %llx, size: %llx, uaddr: %llx, slot: %x, flags: %lx\n", + memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.slot, memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno)); + return -1; + } + register_slot(memory.slot, memory.guest_phys_addr, memory.memory_size, + memory.userspace_addr, memory.flags); + return 0; +} + + +/* destroy/free a whole slot. + * phys_start, len and slot are the params passed to kvm_create_phys_mem() + */ +void kvm_destroy_phys_mem(kvm_context_t kvm, unsigned long phys_start, + unsigned long len) +{ + int slot; + int r; + struct kvm_userspace_memory_region memory = { + .memory_size = 0, + .guest_phys_addr = phys_start, + .userspace_addr = 0, + .flags = 0, + }; + + slot = get_slot(phys_start); + + if ((slot >= KVM_MAX_NUM_MEM_REGIONS) || (slot == -1)) { + fprintf(stderr, "BUG: %s: invalid parameters (slot=%d)\n", + __FUNCTION__, slot); + return; + } + if (phys_start != slots[slot].phys_addr) { + fprintf(stderr, + "WARNING: %s: phys_start is 0x%lx expecting 0x%lx\n", + __FUNCTION__, phys_start, slots[slot].phys_addr); + phys_start = slots[slot].phys_addr; + } + + memory.slot = slot; + DPRINTF("slot %d start %llx len %llx flags %x\n", + memory.slot, + memory.guest_phys_addr, + memory.memory_size, + memory.flags); + r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory); + if (r == -1) { + fprintf(stderr, "destroy_userspace_phys_mem: %s", + strerror(errno)); + return; + } + + free_slot(memory.slot); +} + +void kvm_unregister_memory_area(kvm_context_t kvm, uint64_t phys_addr, unsigned long size) +{ + + int slot = get_container_slot(phys_addr, size); + + if (slot != -1) { + DPRINTF("Unregistering memory region %llx (%lx)\n", phys_addr, size); + kvm_destroy_phys_mem(kvm, phys_addr, size); + return; + } +} + +static int kvm_get_map(kvm_context_t kvm, int ioctl_num, int slot, void *buf) +{ + int r; + struct kvm_dirty_log log = { + .slot = slot, + }; + + log.dirty_bitmap = buf; + + r = ioctl(kvm->vm_fd, ioctl_num, &log); + if (r == -1) + return -errno; + return 0; +} + +int kvm_get_dirty_pages(kvm_context_t kvm, unsigned long phys_addr, void *buf) +{ + int slot; + + slot = get_slot(phys_addr); + return kvm_get_map(kvm, KVM_GET_DIRTY_LOG, slot, buf); +} + +int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr, + unsigned long len, void *buf, void *opaque, + int (*cb)(unsigned long start, unsigned long len, + void*bitmap, void *opaque)) +{ + int i; + int r; + unsigned long end_addr = phys_addr + len; + + for (i = 0; i < KVM_MAX_NUM_MEM_REGIONS; ++i) { + if ((slots[i].len && (uint64_t)slots[i].phys_addr >= phys_addr) + && ((uint64_t)slots[i].phys_addr + slots[i].len <= end_addr)) { + r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf); + if (r) + return r; + r = cb(slots[i].phys_addr, slots[i].len, buf, opaque); + if (r) + return r; + } + } + return 0; +} + +#ifdef KVM_CAP_IRQCHIP + +int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status) +{ + struct kvm_irq_level event; + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + event.level = level; + event.irq = irq; + r = ioctl(kvm->vm_fd, kvm->irqchip_inject_ioctl, &event); + if (r == -1) + perror("kvm_set_irq_level"); + + if (status) { +#ifdef KVM_CAP_IRQ_INJECT_STATUS + *status = (kvm->irqchip_inject_ioctl == KVM_IRQ_LINE) ? + 1 : event.status; +#else + *status = 1; +#endif + } + + return 1; +} + +int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) +{ + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_GET_IRQCHIP, chip); + if (r == -1) { + r = -errno; + perror("kvm_get_irqchip\n"); + } + return r; +} + +int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) +{ + int r; + + if (!kvm->irqchip_in_kernel) + return 0; + r = ioctl(kvm->vm_fd, KVM_SET_IRQCHIP, chip); + if (r == -1) { + r = -errno; + perror("kvm_set_irqchip\n"); + } + return r; +} + +#endif + +static int handle_io(kvm_vcpu_context_t vcpu) +{ + struct kvm_run *run = vcpu->run; + kvm_context_t kvm = vcpu->kvm; + uint16_t addr = run->io.port; + int r; + int i; + void *p = (void *)run + run->io.data_offset; + + for (i = 0; i < run->io.count; ++i) { + switch (run->io.direction) { + case KVM_EXIT_IO_IN: + switch (run->io.size) { + case 1: + r = kvm->callbacks->inb(kvm->opaque, addr, p); + break; + case 2: + r = kvm->callbacks->inw(kvm->opaque, addr, p); + break; + case 4: + r = kvm->callbacks->inl(kvm->opaque, addr, p); + break; + default: + fprintf(stderr, "bad I/O size %d\n", run->io.size); + return -EMSGSIZE; + } + break; + case KVM_EXIT_IO_OUT: + switch (run->io.size) { + case 1: + r = kvm->callbacks->outb(kvm->opaque, addr, + *(uint8_t *)p); + break; + case 2: + r = kvm->callbacks->outw(kvm->opaque, addr, + *(uint16_t *)p); + break; + case 4: + r = kvm->callbacks->outl(kvm->opaque, addr, + *(uint32_t *)p); + break; + default: + fprintf(stderr, "bad I/O size %d\n", run->io.size); + return -EMSGSIZE; + } + break; + default: + fprintf(stderr, "bad I/O direction %d\n", run->io.direction); + return -EPROTO; + } + + p += run->io.size; + } + + return 0; +} + +int handle_debug(kvm_vcpu_context_t vcpu, void *env) +{ +#ifdef KVM_CAP_SET_GUEST_DEBUG + struct kvm_run *run = vcpu->run; + kvm_context_t kvm = vcpu->kvm; + + return kvm->callbacks->debug(kvm->opaque, env, &run->debug.arch); +#else + return 0; +#endif +} + +int kvm_get_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) +{ + return ioctl(vcpu->fd, KVM_GET_REGS, regs); +} + +int kvm_set_regs(kvm_vcpu_context_t vcpu, struct kvm_regs *regs) +{ + return ioctl(vcpu->fd, KVM_SET_REGS, regs); +} + +int kvm_get_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) +{ + return ioctl(vcpu->fd, KVM_GET_FPU, fpu); +} + +int kvm_set_fpu(kvm_vcpu_context_t vcpu, struct kvm_fpu *fpu) +{ + return ioctl(vcpu->fd, KVM_SET_FPU, fpu); +} + +int kvm_get_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) +{ + return ioctl(vcpu->fd, KVM_GET_SREGS, sregs); +} + +int kvm_set_sregs(kvm_vcpu_context_t vcpu, struct kvm_sregs *sregs) +{ + return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); +} + +#ifdef KVM_CAP_MP_STATE +int kvm_get_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) +{ + int r; + + r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); + if (r > 0) + return ioctl(vcpu->fd, KVM_GET_MP_STATE, mp_state); + return -ENOSYS; +} + +int kvm_set_mpstate(kvm_vcpu_context_t vcpu, struct kvm_mp_state *mp_state) +{ + int r; + + r = ioctl(vcpu->kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); + if (r > 0) + return ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); + return -ENOSYS; +} +#endif + +static int handle_mmio(kvm_vcpu_context_t vcpu) +{ + unsigned long addr = vcpu->run->mmio.phys_addr; + kvm_context_t kvm = vcpu->kvm; + struct kvm_run *kvm_run = vcpu->run; + void *data = kvm_run->mmio.data; + + /* hack: Red Hat 7.1 generates these weird accesses. */ + if ((addr > 0xa0000-4 && addr <= 0xa0000) && kvm_run->mmio.len == 3) + return 0; + + if (kvm_run->mmio.is_write) + return kvm->callbacks->mmio_write(kvm->opaque, addr, data, + kvm_run->mmio.len); + else + return kvm->callbacks->mmio_read(kvm->opaque, addr, data, + kvm_run->mmio.len); +} + +int handle_io_window(kvm_context_t kvm) +{ + return kvm->callbacks->io_window(kvm->opaque); +} + +int handle_halt(kvm_vcpu_context_t vcpu) +{ + return vcpu->kvm->callbacks->halt(vcpu->kvm->opaque, vcpu); +} + +int handle_shutdown(kvm_context_t kvm, void *env) +{ + return kvm->callbacks->shutdown(kvm->opaque, env); +} + +int try_push_interrupts(kvm_context_t kvm) +{ + return kvm->callbacks->try_push_interrupts(kvm->opaque); +} + +static inline void push_nmi(kvm_context_t kvm) +{ +#ifdef KVM_CAP_USER_NMI + kvm->callbacks->push_nmi(kvm->opaque); +#endif /* KVM_CAP_USER_NMI */ +} + +void post_kvm_run(kvm_context_t kvm, void *env) +{ + kvm->callbacks->post_kvm_run(kvm->opaque, env); +} + +int pre_kvm_run(kvm_context_t kvm, void *env) +{ + return kvm->callbacks->pre_kvm_run(kvm->opaque, env); +} + +int kvm_get_interrupt_flag(kvm_vcpu_context_t vcpu) +{ + return vcpu->run->if_flag; +} + +int kvm_is_ready_for_interrupt_injection(kvm_vcpu_context_t vcpu) +{ + return vcpu->run->ready_for_interrupt_injection; +} + +int kvm_run(kvm_vcpu_context_t vcpu, void *env) +{ + int r; + int fd = vcpu->fd; + struct kvm_run *run = vcpu->run; + kvm_context_t kvm = vcpu->kvm; + +again: + push_nmi(kvm); +#if !defined(__s390__) + if (!kvm->irqchip_in_kernel) + run->request_interrupt_window = try_push_interrupts(kvm); +#endif + r = pre_kvm_run(kvm, env); + if (r) + return r; + r = ioctl(fd, KVM_RUN, 0); + + if (r == -1 && errno != EINTR && errno != EAGAIN) { + r = -errno; + post_kvm_run(kvm, env); + fprintf(stderr, "kvm_run: %s\n", strerror(-r)); + return r; + } + + post_kvm_run(kvm, env); + +#if defined(KVM_CAP_COALESCED_MMIO) + if (kvm->coalesced_mmio) { + struct kvm_coalesced_mmio_ring *ring = (void *)run + + kvm->coalesced_mmio * PAGE_SIZE; + while (ring->first != ring->last) { + kvm->callbacks->mmio_write(kvm->opaque, + ring->coalesced_mmio[ring->first].phys_addr, + &ring->coalesced_mmio[ring->first].data[0], + ring->coalesced_mmio[ring->first].len); + smp_wmb(); + ring->first = (ring->first + 1) % + KVM_COALESCED_MMIO_MAX; + } + } +#endif + +#if !defined(__s390__) + if (r == -1) { + r = handle_io_window(kvm); + goto more; + } +#endif + if (1) { + switch (run->exit_reason) { + case KVM_EXIT_UNKNOWN: + r = kvm->callbacks->unhandled(kvm, vcpu, + run->hw.hardware_exit_reason); + break; + case KVM_EXIT_FAIL_ENTRY: + r = kvm->callbacks->unhandled(kvm, vcpu, + run->fail_entry.hardware_entry_failure_reason); + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "exception %d (%x)\n", + run->ex.exception, + run->ex.error_code); + kvm_show_regs(vcpu); + kvm_show_code(vcpu); + abort(); + break; + case KVM_EXIT_IO: + r = handle_io(vcpu); + break; + case KVM_EXIT_DEBUG: + r = handle_debug(vcpu, env); + break; + case KVM_EXIT_MMIO: + r = handle_mmio(vcpu); + break; + case KVM_EXIT_HLT: + r = handle_halt(vcpu); + break; + case KVM_EXIT_IRQ_WINDOW_OPEN: + break; + case KVM_EXIT_SHUTDOWN: + r = handle_shutdown(kvm, env); + break; +#if defined(__s390__) + case KVM_EXIT_S390_SIEIC: + r = kvm->callbacks->s390_handle_intercept(kvm, vcpu, + run); + break; + case KVM_EXIT_S390_RESET: + r = kvm->callbacks->s390_handle_reset(kvm, vcpu, run); + break; +#endif + default: + if (kvm_arch_run(vcpu)) { + fprintf(stderr, "unhandled vm exit: 0x%x\n", + run->exit_reason); + kvm_show_regs(vcpu); + abort(); + } + break; + } + } +more: + if (!r) + goto again; + return r; +} + +int kvm_inject_irq(kvm_vcpu_context_t vcpu, unsigned irq) +{ + struct kvm_interrupt intr; + + intr.irq = irq; + return ioctl(vcpu->fd, KVM_INTERRUPT, &intr); +} + +#ifdef KVM_CAP_SET_GUEST_DEBUG +int kvm_set_guest_debug(kvm_vcpu_context_t vcpu, struct kvm_guest_debug *dbg) +{ + return ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, dbg); +} +#endif + +int kvm_set_signal_mask(kvm_vcpu_context_t vcpu, const sigset_t *sigset) +{ + struct kvm_signal_mask *sigmask; + int r; + + if (!sigset) { + r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, NULL); + if (r == -1) + r = -errno; + return r; + } + sigmask = malloc(sizeof(*sigmask) + sizeof(*sigset)); + if (!sigmask) + return -ENOMEM; + + sigmask->len = 8; + memcpy(sigmask->sigset, sigset, sizeof(*sigset)); + r = ioctl(vcpu->fd, KVM_SET_SIGNAL_MASK, sigmask); + if (r == -1) + r = -errno; + free(sigmask); + return r; +} + +int kvm_irqchip_in_kernel(kvm_context_t kvm) +{ + return kvm->irqchip_in_kernel; +} + +int kvm_pit_in_kernel(kvm_context_t kvm) +{ + return kvm->pit_in_kernel; +} + +int kvm_has_sync_mmu(void) +{ + int r = 0; +#ifdef KVM_CAP_SYNC_MMU + r = ioctl(kvm_context->fd, KVM_CHECK_EXTENSION, KVM_CAP_SYNC_MMU); +#endif + return r; +} + +int kvm_inject_nmi(kvm_vcpu_context_t vcpu) +{ +#ifdef KVM_CAP_USER_NMI + return ioctl(vcpu->fd, KVM_NMI); +#else + return -ENOSYS; +#endif +} + +int kvm_init_coalesced_mmio(kvm_context_t kvm) +{ + int r = 0; + kvm->coalesced_mmio = 0; +#ifdef KVM_CAP_COALESCED_MMIO + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); + if (r > 0) { + kvm->coalesced_mmio = r; + return 0; + } +#endif + return r; +} + +int kvm_register_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) +{ +#ifdef KVM_CAP_COALESCED_MMIO + struct kvm_coalesced_mmio_zone zone; + int r; + + if (kvm->coalesced_mmio) { + + zone.addr = addr; + zone.size = size; + + r = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone); + if (r == -1) { + perror("kvm_register_coalesced_mmio_zone"); + return -errno; + } + return 0; + } +#endif + return -ENOSYS; +} + +int kvm_unregister_coalesced_mmio(kvm_context_t kvm, uint64_t addr, uint32_t size) +{ +#ifdef KVM_CAP_COALESCED_MMIO + struct kvm_coalesced_mmio_zone zone; + int r; + + if (kvm->coalesced_mmio) { + + zone.addr = addr; + zone.size = size; + + r = ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone); + if (r == -1) { + perror("kvm_unregister_coalesced_mmio_zone"); + return -errno; + } + DPRINTF("Unregistered coalesced mmio region for %llx (%lx)\n", addr, size); + return 0; + } +#endif + return -ENOSYS; +} + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); + if (ret < 0) + return -errno; + + return ret; +} + +static int kvm_old_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq); + if (ret < 0) + return -errno; + + return ret; +} + +#ifdef KVM_CAP_ASSIGN_DEV_IRQ +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_ASSIGN_DEV_IRQ); + if (ret > 0) { + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_DEV_IRQ, assigned_irq); + if (ret < 0) + return -errno; + return ret; + } + + return kvm_old_assign_irq(kvm, assigned_irq); +} + +int kvm_deassign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_DEV_IRQ, assigned_irq); + if (ret < 0) + return -errno; + + return ret; +} +#else +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + return kvm_old_assign_irq(kvm, assigned_irq); +} +#endif +#endif + +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT +int kvm_deassign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_DEASSIGN_PCI_DEVICE, assigned_dev); + if (ret < 0) + return -errno; + + return ret; +} +#endif + +int kvm_destroy_memory_region_works(kvm_context_t kvm) +{ + int ret = 0; + +#ifdef KVM_CAP_DESTROY_MEMORY_REGION_WORKS + ret = ioctl(kvm->fd, KVM_CHECK_EXTENSION, + KVM_CAP_DESTROY_MEMORY_REGION_WORKS); + if (ret <= 0) + ret = 0; +#endif + return ret; +} + +int kvm_reinject_control(kvm_context_t kvm, int pit_reinject) +{ +#ifdef KVM_CAP_REINJECT_CONTROL + int r; + struct kvm_reinject_control control; + + control.pit_reinject = pit_reinject; + + r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_REINJECT_CONTROL); + if (r > 0) { + r = ioctl(kvm->vm_fd, KVM_REINJECT_CONTROL, &control); + if (r == -1) + return -errno; + return r; + } +#endif + return -ENOSYS; +} + +int kvm_has_gsi_routing(kvm_context_t kvm) +{ + int r = 0; + +#ifdef KVM_CAP_IRQ_ROUTING + r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); +#endif + return r; +} + +int kvm_get_gsi_count(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); +#else + return -EINVAL; +#endif +} + +int kvm_clear_gsi_routes(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + kvm->irq_routes->nr = 0; + return 0; +#else + return -EINVAL; +#endif +} + +int kvm_add_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing *z; + struct kvm_irq_routing_entry *new; + int n, size; + + if (kvm->irq_routes->nr == kvm->nr_allocated_irq_routes) { + n = kvm->nr_allocated_irq_routes * 2; + if (n < 64) + n = 64; + size = sizeof(struct kvm_irq_routing); + size += n * sizeof(*new); + z = realloc(kvm->irq_routes, size); + if (!z) + return -ENOMEM; + kvm->nr_allocated_irq_routes = n; + kvm->irq_routes = z; + } + n = kvm->irq_routes->nr++; + new = &kvm->irq_routes->entries[n]; + memset(new, 0, sizeof(*new)); + new->gsi = entry->gsi; + new->type = entry->type; + new->flags = entry->flags; + new->u = entry->u; + + set_gsi(kvm, entry->gsi); + + return 0; +#else + return -ENOSYS; +#endif +} + +int kvm_add_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry e; + + e.gsi = gsi; + e.type = KVM_IRQ_ROUTING_IRQCHIP; + e.flags = 0; + e.u.irqchip.irqchip = irqchip; + e.u.irqchip.pin = pin; + return kvm_add_routing_entry(kvm, &e); +#else + return -ENOSYS; +#endif +} + +int kvm_del_routing_entry(kvm_context_t kvm, + struct kvm_irq_routing_entry* entry) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry *e, *p; + int i, gsi, found = 0; + + gsi = entry->gsi; + + for (i = 0; i < kvm->irq_routes->nr; ++i) { + e = &kvm->irq_routes->entries[i]; + if (e->type == entry->type + && e->gsi == gsi) { + switch (e->type) + { + case KVM_IRQ_ROUTING_IRQCHIP: { + if (e->u.irqchip.irqchip == + entry->u.irqchip.irqchip + && e->u.irqchip.pin == + entry->u.irqchip.pin) { + p = &kvm->irq_routes-> + entries[--kvm->irq_routes->nr]; + *e = *p; + found = 1; + } + break; + } + case KVM_IRQ_ROUTING_MSI: { + if (e->u.msi.address_lo == + entry->u.msi.address_lo + && e->u.msi.address_hi == + entry->u.msi.address_hi + && e->u.msi.data == entry->u.msi.data) { + p = &kvm->irq_routes-> + entries[--kvm->irq_routes->nr]; + *e = *p; + found = 1; + } + break; + } + default: + break; + } + if (found) { + /* If there are no other users of this GSI + * mark it available in the bitmap */ + for (i = 0; i < kvm->irq_routes->nr; i++) { + e = &kvm->irq_routes->entries[i]; + if (e->gsi == gsi) + break; + } + if (i == kvm->irq_routes->nr) + clear_gsi(kvm, gsi); + + return 0; + } + } + } + return -ESRCH; +#else + return -ENOSYS; +#endif +} + +int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin) +{ +#ifdef KVM_CAP_IRQ_ROUTING + struct kvm_irq_routing_entry e; + + e.gsi = gsi; + e.type = KVM_IRQ_ROUTING_IRQCHIP; + e.flags = 0; + e.u.irqchip.irqchip = irqchip; + e.u.irqchip.pin = pin; + return kvm_del_routing_entry(kvm, &e); +#else + return -ENOSYS; +#endif +} + +int kvm_commit_irq_routes(kvm_context_t kvm) +{ +#ifdef KVM_CAP_IRQ_ROUTING + int r; + + kvm->irq_routes->flags = 0; + r = ioctl(kvm->vm_fd, KVM_SET_GSI_ROUTING, kvm->irq_routes); + if (r == -1) + r = -errno; + return r; +#else + return -ENOSYS; +#endif +} + +int kvm_get_irq_route_gsi(kvm_context_t kvm) +{ + int i, bit; + uint32_t *buf = kvm->used_gsi_bitmap; + + /* Return the lowest unused GSI in the bitmap */ + for (i = 0; i < kvm->max_gsi / 32; i++) { + bit = ffs(~buf[i]); + if (!bit) + continue; + + return bit - 1 + i * 32; + } + + return -ENOSPC; +} + +#ifdef KVM_CAP_DEVICE_MSIX +int kvm_assign_set_msix_nr(kvm_context_t kvm, + struct kvm_assigned_msix_nr *msix_nr) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_NR, msix_nr); + if (ret < 0) + return -errno; + + return ret; +} + +int kvm_assign_set_msix_entry(kvm_context_t kvm, + struct kvm_assigned_msix_entry *entry) +{ + int ret; + + ret = ioctl(kvm->vm_fd, KVM_ASSIGN_SET_MSIX_ENTRY, entry); + if (ret < 0) + return -errno; + + return ret; +} +#endif + +#if defined(KVM_CAP_IRQFD) && defined(CONFIG_eventfd) + +#include + +static int _kvm_irqfd(kvm_context_t kvm, int fd, int gsi, int flags) +{ + int r; + struct kvm_irqfd data = { + .fd = fd, + .gsi = gsi, + .flags = flags, + }; + + r = ioctl(kvm->vm_fd, KVM_IRQFD, &data); + if (r == -1) + r = -errno; + return r; +} + +int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) +{ + int r; + int fd; + + if (!kvm_check_extension(kvm, KVM_CAP_IRQFD)) + return -ENOENT; + + fd = eventfd(0, 0); + if (fd < 0) + return -errno; + + r = _kvm_irqfd(kvm, fd, gsi, 0); + if (r < 0) { + close(fd); + return -errno; + } + + return fd; +} + +#else /* KVM_CAP_IRQFD */ + +int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) +{ + return -ENOSYS; +} + +#endif /* KVM_CAP_IRQFD */ static inline unsigned long kvm_get_thread_id(void) { return syscall(SYS_gettid); @@ -1427,13 +2935,6 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len) return 0; } -/* hack: both libkvm and upstream qemu define kvm_has_sync_mmu(), differently */ -#undef kvm_has_sync_mmu -int qemu_kvm_has_sync_mmu(void) -{ - return kvm_has_sync_mmu(kvm_context); -} - void qemu_kvm_cpu_stop(CPUState *env) { if (kvm_enabled()) diff --git a/qemu-kvm.h b/qemu-kvm.h index fa40542..126b8f3 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -160,16 +160,15 @@ struct ioperm_data { LIST_ENTRY(ioperm_data) entries; }; -int qemu_kvm_has_sync_mmu(void); void qemu_kvm_cpu_stop(CPUState *env); int kvm_arch_halt(void *opaque, kvm_vcpu_context_t vcpu); int handle_tpr_access(void *opaque, kvm_vcpu_context_t vcpu, uint64_t rip, int is_write); +int kvm_has_sync_mmu(void); #define kvm_enabled() (kvm_allowed) #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context) #define qemu_kvm_pit_in_kernel() kvm_pit_in_kernel(kvm_context) -#define kvm_has_sync_mmu() qemu_kvm_has_sync_mmu() void kvm_init_vcpu(CPUState *env); void kvm_load_tsc(CPUState *env); #else @@ -177,7 +176,6 @@ void kvm_load_tsc(CPUState *env); #define kvm_nested 0 #define qemu_kvm_irqchip_in_kernel() (0) #define qemu_kvm_pit_in_kernel() (0) -#define kvm_has_sync_mmu() (0) #define kvm_load_registers(env) do {} while(0) #define kvm_save_registers(env) do {} while(0) #define qemu_kvm_cpu_stop(env) do {} while(0)