From patchwork Fri Feb 18 08:55:11 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sheng Yang X-Patchwork-Id: 573681 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p1I8rFtL017789 for ; Fri, 18 Feb 2011 08:53:15 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754117Ab1BRIxO (ORCPT ); Fri, 18 Feb 2011 03:53:14 -0500 Received: from mga14.intel.com ([143.182.124.37]:48111 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932089Ab1BRIxM (ORCPT ); Fri, 18 Feb 2011 03:53:12 -0500 Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga102.ch.intel.com with ESMTP; 18 Feb 2011 00:53:12 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.62,186,1297065600"; d="scan'208";a="390659686" Received: from syang10-desktop.sh.intel.com (HELO syang10-desktop) ([10.239.13.17]) by azsmga001.ch.intel.com with ESMTP; 18 Feb 2011 00:53:10 -0800 Received: from yasker by syang10-desktop with local (Exim 4.72) (envelope-from ) id 1PqM7N-0003Mt-BN; Fri, 18 Feb 2011 16:55:13 +0800 From: Sheng Yang To: Avi Kivity , Marcelo Tosatti Cc: kvm@vger.kernel.org, "Michael S. Tsirkin" , Sheng Yang Subject: [PATCH 4/4] qemu-kvm: MSI-X MMIO support for assigned device Date: Fri, 18 Feb 2011 16:55:11 +0800 Message-Id: <1298019312-12912-5-git-send-email-sheng@linux.intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1298019312-12912-1-git-send-email-sheng@linux.intel.com> References: <1298019312-12912-1-git-send-email-sheng@linux.intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Fri, 18 Feb 2011 08:53:15 +0000 (UTC) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 5c162c4..09e3b99 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -71,6 +71,11 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, uint32_t address, int len); +static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) +{ + return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; +} + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -274,6 +279,10 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, AssignedDevRegion *region = &r_dev->v_addrs[region_num]; PCIRegion *real_region = &r_dev->real_device.regions[region_num]; int ret = 0; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); + struct kvm_msix_mmio_user msix_mmio; +#endif DEBUG("e_phys=%08" FMT_PCIBUS " r_virt=%p type=%d len=%08" FMT_PCIBUS " region_num=%d \n", e_phys, region->u.r_virtbase, type, e_size, region_num); @@ -292,6 +301,23 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, cpu_register_physical_memory(e_phys + offset, TARGET_PAGE_SIZE, r_dev->mmio_index); +#ifdef KVM_CAP_MSIX_MMIO + if (cap_mask) { + r_dev->guest_msix_table_addr = e_phys + offset; + memset(&msix_mmio, 0, sizeof msix_mmio); + msix_mmio.dev_id = calc_assigned_dev_id(r_dev->h_segnr, + r_dev->h_busnr, r_dev->h_devfn); + msix_mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV | + KVM_MSIX_MMIO_TYPE_BASE_TABLE; + msix_mmio.base_addr = e_phys + offset; + msix_mmio.base_va = (unsigned long)r_dev->msix_table_page; + msix_mmio.max_entries_nr = r_dev->max_msix_entries_nr; + msix_mmio.flags = 0; + ret = kvm_register_msix_mmio(kvm_context, &msix_mmio); + if (ret) + fprintf(stderr, "fail to register in-kernel msix_mmio!\n"); + } +#endif } } @@ -854,11 +880,6 @@ static void free_assigned_device(AssignedDevice *dev) } } -static uint32_t calc_assigned_dev_id(uint16_t seg, uint8_t bus, uint8_t devfn) -{ - return (uint32_t)seg << 16 | (uint32_t)bus << 8 | (uint32_t)devfn; -} - static void assign_failed_examine(AssignedDevice *dev) { char name[PATH_MAX], dir[PATH_MAX], driver[PATH_MAX] = {}, *ns; @@ -1268,6 +1289,9 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev, return r; } +static int assigned_dev_update_routing_handler(void *opaque, + struct kvm_msix_routing_data *data); + static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) { struct kvm_assigned_irq assigned_irq_data; @@ -1494,7 +1518,9 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) msix_table_entry = pci_get_long(pci_dev->config + pos + PCI_MSIX_TABLE); bar_nr = msix_table_entry & PCI_MSIX_BIR; msix_table_entry &= ~PCI_MSIX_BIR; - dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; + dev->msix_table_addr = pci_region[bar_nr].base_addr + + msix_table_entry; + dev->max_msix_entries_nr = get_msix_entries_max_nr(dev); } #endif @@ -1678,11 +1704,10 @@ static uint32_t msix_mmio_readw(void *opaque, target_phys_addr_t addr) (8 * (addr & 3))) & 0xffff; } -static void msix_mmio_writel(void *opaque, - target_phys_addr_t addr, uint32_t val) +static void assigned_dev_update_routing(void *opaque, + struct kvm_msix_routing_data *data) { AssignedDevice *adev = opaque; - unsigned int offset = addr & 0xfff; void *page = adev->msix_table_page; int ctrl_word, index; struct kvm_irq_routing_entry new_entry = {}; @@ -1691,11 +1716,7 @@ static void msix_mmio_writel(void *opaque, struct PCIDevice *pci_dev = &adev->dev; uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX); - DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", - addr, val); - memcpy((void *)((char *)page + offset), &val, 4); - - index = offset / 16; + index = data->entry_idx; /* Check if mask bit is being accessed */ memcpy(&msg_addr, (char *)page + index * 16, 4); @@ -1770,6 +1791,49 @@ static void msix_mmio_writel(void *opaque, adev->entry[entry_idx].u.msi.data = msg_data; } +static int assigned_dev_update_routing_handler(void *opaque, + struct kvm_msix_routing_data *data) +{ + AssignedDevice *adev = opaque; + + if (data->type == KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV && + data->dev_id == calc_assigned_dev_id(adev->h_segnr, + adev->h_busnr, adev->h_devfn)) { + assigned_dev_update_routing(opaque, data); + return 0; + } + return -EINVAL; +} + +static void msix_mmio_writel(void *opaque, + target_phys_addr_t addr, uint32_t val) +{ + AssignedDevice *adev = opaque; + void *page = adev->msix_table_page; + unsigned int offset = addr & 0xfff; + struct kvm_msix_routing_data data; +#ifdef KVM_CAP_MSIX_MMIO + int cap_mask = kvm_check_extension(kvm_state, KVM_CAP_MSIX_MMIO); +#else + int cap_mask = 0; +#endif + + DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n", + addr, val); + if (!cap_mask) { + memcpy((void *)((char *)page + offset), &val, 4); + } else { + fprintf(stderr, "msix_mmio_writel: shouldn't be here with KVM_CAP_MSIX_MMIO!\n"); + } + + data.dev_id = calc_assigned_dev_id(adev->h_segnr, adev->h_busnr, + adev->h_devfn); + data.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV; + data.entry_idx = offset / 16; + data.flags = 0; + assigned_dev_update_routing(opaque, &data); +} + static void msix_mmio_writew(void *opaque, target_phys_addr_t addr, uint32_t val) { @@ -1811,7 +1875,17 @@ static int assigned_dev_register_msix_mmio(AssignedDevice *dev) DEVICE_NATIVE_ENDIAN); dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV * sizeof *dev->dev.msix_entry_used); + dev->routing_updater_entry = + kvm_add_routing_updater(assigned_dev_update_routing_handler, dev); + if (!dev->routing_updater_entry) { + perror("kvm_add_routing_updater"); + goto out; + } return 0; +out: + free(dev->dev.msix_entry_used); + munmap(dev->msix_table_page, 0x1000); + return -EFAULT; } static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) @@ -1827,6 +1901,10 @@ static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno)); } dev->msix_table_page = NULL; + if (dev->routing_updater_entry) { + kvm_del_routing_updater(dev->routing_updater_entry); + dev->routing_updater_entry = NULL; + } free(dev->dev.msix_entry_used); dev->dev.msix_entry_used = NULL; } diff --git a/hw/device-assignment.h b/hw/device-assignment.h index d92606e..1716738 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -32,6 +32,7 @@ #include "qemu-common.h" #include "qemu-queue.h" #include "pci.h" +#include "qemu-kvm.h" /* From include/linux/pci.h in the kernel sources */ #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) @@ -108,10 +109,12 @@ typedef struct AssignedDevice { struct kvm_irq_routing_entry *entry; void *msix_table_page; target_phys_addr_t msix_table_addr; + target_phys_addr_t guest_msix_table_addr; int mmio_index; int need_emulate_cmd; char *configfd_name; int32_t bootindex; + KVMRoutingUpdateEntry *routing_updater_entry; QLIST_ENTRY(AssignedDevice) next; } AssignedDevice; diff --git a/qemu-kvm.c b/qemu-kvm.c index d282c95..e4d100f 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -511,6 +511,46 @@ static int handle_mmio(CPUState *env) return 0; } +static QLIST_HEAD(kvm_routing_update_entry_head, kvm_routing_update_entry) kvm_routing_update_entry_head; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque) +{ + KVMRoutingUpdateEntry *e; + + e = qemu_mallocz(sizeof (*e)); + + e->cb = cb; + e->opaque = opaque; + QLIST_INSERT_HEAD(&kvm_routing_update_entry_head, e, entries); + return e; +} + +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e) +{ + QLIST_REMOVE(e, entries); + qemu_free(e); +} + +#ifdef KVM_CAP_MSIX_MMIO +static void kvm_update_msix_routing(CPUState *env) +{ + struct kvm_msix_routing_data data; + KVMRoutingUpdateEntry *e; + + data.dev_id = env->kvm_run->msix_routing.dev_id; + data.type = env->kvm_run->msix_routing.type; + data.entry_idx = env->kvm_run->msix_routing.entry_idx; + data.flags = env->kvm_run->msix_routing.flags; + for (e = kvm_routing_update_entry_head.lh_first; e; e = e->entries.le_next) { + if (e->cb(e->opaque, &data) == 0) + return; + } + fprintf(stderr, "unhandled MSI-X routing update: " + "dev 0x%x, type %d, entry 0x%x, flags 0x%lx\n", + data.dev_id, data.type, data.entry_idx, data.flags); +} +#endif + int handle_io_window(kvm_context_t kvm) { return 1; @@ -647,6 +687,12 @@ int kvm_run(CPUState *env) kvm_handle_internal_error(env, run); r = 1; break; +#ifdef KVM_CAP_MSIX_MMIO + case KVM_EXIT_MSIX_ROUTING_UPDATE: + kvm_update_msix_routing(env); + r = 1; + break; +#endif default: if (kvm_arch_run(env)) { fprintf(stderr, "unhandled vm exit: 0x%x\n", run->exit_reason); diff --git a/qemu-kvm.h b/qemu-kvm.h index 48ff52d..11a62c7 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -773,6 +773,25 @@ int kvm_tpr_enable_vapic(CPUState *env); unsigned long kvm_get_thread_id(void); int kvm_cpu_is_stopped(CPUState *env); +struct kvm_msix_routing_data { + uint32_t dev_id; + uint16_t type; + uint16_t entry_idx; + uint64_t flags; +}; + +typedef struct kvm_routing_update_entry KVMRoutingUpdateEntry; +typedef int KVMRoutingUpdateHandler(void *opaque, + struct kvm_msix_routing_data *data); + +struct kvm_routing_update_entry { + KVMRoutingUpdateHandler *cb; + void *opaque; + QLIST_ENTRY (kvm_routing_update_entry) entries; +}; + +KVMRoutingUpdateEntry *kvm_add_routing_updater(KVMRoutingUpdateHandler *cb, void *opaque); +void kvm_del_routing_updater(KVMRoutingUpdateEntry *e); #endif #endif