From patchwork Fri Nov 5 07:16:23 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sheng Yang X-Patchwork-Id: 303822 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oA57GEZa001998 for ; Fri, 5 Nov 2010 07:16:14 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751246Ab0KEHQL (ORCPT ); Fri, 5 Nov 2010 03:16:11 -0400 Received: from mga02.intel.com ([134.134.136.20]:20413 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750827Ab0KEHQK (ORCPT ); Fri, 5 Nov 2010 03:16:10 -0400 Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga101.jf.intel.com with ESMTP; 05 Nov 2010 00:16:09 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.58,300,1286175600"; d="scan'208";a="570599026" Received: from syang10-desktop.sh.intel.com (HELO syang10-desktop) ([10.239.13.14]) by orsmga002.jf.intel.com with ESMTP; 05 Nov 2010 00:16:08 -0700 Received: from yasker by syang10-desktop with local (Exim 4.72) (envelope-from ) id 1PEGX9-0002sq-J6; Fri, 05 Nov 2010 15:16:23 +0800 From: Sheng Yang To: Avi Kivity , Marcelo Tosatti , "Michael S. Tsirkin" Cc: kvm@vger.kernel.org, Sheng Yang Subject: [PATCH 5/5] KVM: assigned dev: MSI-X mask support Date: Fri, 5 Nov 2010 15:16:23 +0800 Message-Id: <1288941383-11060-1-git-send-email-sheng@linux.intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <201011051220.26836.sheng@linux.intel.com> References: <201011051220.26836.sheng@linux.intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Fri, 05 Nov 2010 07:16:14 +0000 (UTC) diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index b336266..76f800b 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1085,6 +1085,54 @@ of 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +4.47 KVM_ASSIGN_REG_MSIX_MMIO + +Capability: KVM_CAP_DEVICE_MSIX_MASK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_mmio (in) +Returns: 0 on success, !0 on error + +struct kvm_assigned_msix_mmio { + /* Assigned device's ID */ + __u32 assigned_dev_id; + /* Must be 0 */ + __u32 flags; + /* MSI-X table MMIO address */ + __u64 base_addr; + /* Maximum entries contained in the table, <= KVM_MAX_MSIX_PER_DEV */ + __u32 max_entries_nr; + /* Must be 0, reserved for future use */ + __u32 reserved; +}; + +This ioctl would enable in-kernel MSI-X emulation, which would handle MSI-X +mask bit in the kernel. + +4.48 KVM_ASSIGN_GET_MSIX_ENTRY + +Capability: KVM_CAP_DEVICE_MSIX_MASK +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_entry (in and out) +Returns: 0 on success, !0 on error + +struct kvm_assigned_msix_entry { + /* Assigned device's ID */ + __u32 assigned_dev_id; + /* Ignored */ + __u32 gsi; + /* The index of entry in the MSI-X table */ + __u16 entry; + /* Querying flags and returning status */ + __u16 flags; + /* Must be 0 */ + __u16 padding[2]; +}; + +This ioctl would allow userspace to get the status of one specific MSI-X +entry. Currently we support mask bit status querying. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f3f86b2..8fd5121 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1926,6 +1926,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: + case KVM_CAP_DEVICE_MSIX_MASK: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 919ae53..bfe5707 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -540,6 +540,9 @@ struct kvm_ppc_pvinfo { #endif #define KVM_CAP_PPC_GET_PVINFO 57 #define KVM_CAP_PPC_IRQ_LEVEL 58 +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_DEVICE_MSIX_MASK 59 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -671,6 +674,10 @@ struct kvm_clock_data { #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +#define KVM_ASSIGN_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, \ + struct kvm_assigned_msix_entry) +#define KVM_ASSIGN_REG_MSIX_MMIO _IOW(KVMIO, 0x7e, \ + struct kvm_assigned_msix_mmio) /* Available with KVM_CAP_PIT_STATE2 */ #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) @@ -787,11 +794,24 @@ struct kvm_assigned_msix_nr { }; #define KVM_MAX_MSIX_PER_DEV 256 + +#define KVM_MSIX_FLAG_MASK (1 << 0) +#define KVM_MSIX_FLAG_QUERY_MASK (1 << 15) + struct kvm_assigned_msix_entry { __u32 assigned_dev_id; __u32 gsi; __u16 entry; /* The index of entry in the MSI-X table */ - __u16 padding[3]; + __u16 flags; + __u16 padding[2]; +}; + +struct kvm_assigned_msix_mmio { + __u32 assigned_dev_id; + __u32 flags; + __u64 base_addr; + __u32 max_entries_nr; + __u32 reserved; }; #endif /* __LINUX_KVM_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e2ecbac..f58aaca 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -464,6 +464,10 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t assigned_dev_lock; + DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV); + gpa_t msix_mmio_base; + struct kvm_io_device msix_mmio_dev; + int msix_max_entries_nr; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 7c98928..e3b5530 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -232,6 +232,14 @@ static void kvm_free_assigned_device(struct kvm *kvm, { kvm_free_assigned_irq(kvm, assigned_dev); +#ifdef __KVM_HAVE_MSIX + if (assigned_dev->msix_mmio_base) { + mutex_lock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &assigned_dev->msix_mmio_dev); + mutex_unlock(&kvm->slots_lock); + } +#endif pci_reset_function(assigned_dev->dev); pci_release_regions(assigned_dev->dev); @@ -504,7 +512,7 @@ out: static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { - int r = 0, idx; + int r = 0, idx, i; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; @@ -563,6 +571,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); + /* The state after reset of MSI-X table is all masked */ + for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++) + set_bit(i, match->msix_mask_bitmap); + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { if (!kvm->arch.iommu_domain) { r = kvm_iommu_map_guest(kvm); @@ -666,6 +678,43 @@ msix_nr_out: return r; } +static void update_msix_mask(struct kvm_assigned_dev_kernel *adev, + int idx, bool new_mask_flag) +{ + int irq; + bool old_mask_flag, need_flush = false; + + spin_lock_irq(&adev->assigned_dev_lock); + + if (!adev->dev->msix_enabled || + !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX)) + goto out; + + old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + if (old_mask_flag == new_mask_flag) + goto out; + + irq = adev->host_msix_entries[idx].vector; + BUG_ON(irq == 0); + + if (new_mask_flag) { + set_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + disable_irq_nosync(irq); + need_flush = true; + } else { + clear_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + enable_irq(irq); + } +out: + spin_unlock_irq(&adev->assigned_dev_lock); + + if (need_flush) + flush_work(&adev->interrupt_work); +} + static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, struct kvm_assigned_msix_entry *entry) { @@ -700,6 +749,211 @@ msix_entry_out: return r; } + +static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto out; + } + + if (entry->entry >= adev->msix_max_entries_nr) { + r = -ENOSPC; + goto out; + } + + if (entry->flags & KVM_MSIX_FLAG_QUERY_MASK) { + if (test_bit(entry->entry, adev->msix_mask_bitmap)) + entry->flags |= KVM_MSIX_FLAG_MASK; + else + entry->flags &= ~KVM_MSIX_FLAG_MASK; + } + +out: + mutex_unlock(&kvm->lock); + + return r; +} + +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev, + gpa_t addr, int len) +{ + gpa_t start, end; + + BUG_ON(adev->msix_mmio_base == 0); + start = adev->msix_mmio_base; + end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE * + adev->msix_max_entries_nr; + if (addr >= start && addr + len <= end) + return true; + + return false; +} + +static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev, + gpa_t addr, int len) +{ + int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].entry == index) + return i; + + return -EINVAL; +} + +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + u32 entry[4]; + struct kvm_kernel_irq_routing_entry e; + + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr & 0x3) || len != 4) + goto out; + + idx = msix_get_enabled_idx(adev, addr, len); + if (idx < 0) { + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + if ((addr % PCI_MSIX_ENTRY_SIZE) == + PCI_MSIX_ENTRY_VECTOR_CTRL) + *(unsigned long *)val = + test_bit(idx, adev->msix_mask_bitmap) ? + PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; + else + r = -EOPNOTSUPP; + goto out; + } + + r = kvm_get_irq_routing_entry(adev->kvm, + adev->guest_msix_entries[idx].vector, &e); + if (r || e.type != KVM_IRQ_ROUTING_MSI) { + printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! " + "idx %d, addr 0x%llx, len %d\n", idx, addr, len); + r = -EOPNOTSUPP; + goto out; + } + entry[0] = e.msi.address_lo; + entry[1] = e.msi.address_hi; + entry[2] = e.msi.data; + entry[3] = test_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len); + +out: + mutex_unlock(&adev->kvm->lock); + return r; +} + +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + unsigned long new_val = *(unsigned long *)val; + + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr & 0x3) || len != 4) + goto out; + + idx = msix_get_enabled_idx(adev, addr, len); + if (idx < 0) { + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + if (((addr % PCI_MSIX_ENTRY_SIZE) == + PCI_MSIX_ENTRY_VECTOR_CTRL)) { + if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT) + goto out; + if (new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT) + set_bit(idx, adev->msix_mask_bitmap); + else + clear_bit(idx, adev->msix_mask_bitmap); + } else + /* Userspace would handle other MMIO writing */ + r = -EOPNOTSUPP; + goto out; + } + if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) { + r = -EOPNOTSUPP; + goto out; + } + if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT) + goto out; + update_msix_mask(adev, idx, !!(new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT)); +out: + mutex_unlock(&adev->kvm->lock); + + return r; +} + +static const struct kvm_io_device_ops msix_mmio_ops = { + .read = msix_mmio_read, + .write = msix_mmio_write, +}; + +static int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm, + struct kvm_assigned_msix_mmio *msix_mmio) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + msix_mmio->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto out; + } + if (msix_mmio->base_addr == 0) { + r = -EINVAL; + goto out; + } + if (msix_mmio->max_entries_nr == 0 || + msix_mmio->max_entries_nr > KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto out; + } + + mutex_lock(&kvm->slots_lock); + if (adev->msix_mmio_base == 0) { + kvm_iodevice_init(&adev->msix_mmio_dev, &msix_mmio_ops); + r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, + &adev->msix_mmio_dev); + if (r) + goto out2; + } + + adev->msix_mmio_base = msix_mmio->base_addr; + adev->msix_max_entries_nr = msix_mmio->max_entries_nr; +out2: + mutex_unlock(&kvm->slots_lock); +out: + mutex_unlock(&kvm->lock); + + return r; +} #endif long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, @@ -812,6 +1066,36 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } + case KVM_ASSIGN_GET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_get_msix_entry(kvm, &entry); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &entry, sizeof entry)) + goto out; + r = 0; + break; + } + case KVM_ASSIGN_REG_MSIX_MMIO: { + struct kvm_assigned_msix_mmio msix_mmio; + + r = -EFAULT; + if (copy_from_user(&msix_mmio, argp, sizeof(msix_mmio))) + goto out; + + r = -EINVAL; + if (msix_mmio.flags != 0 || msix_mmio.reserved != 0) + goto out; + + r = kvm_vm_ioctl_register_msix_mmio(kvm, &msix_mmio); + if (r) + goto out; + break; + } #endif } out: