From patchwork Mon Nov 15 09:27:45 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sheng Yang X-Patchwork-Id: 324862 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id oAF9R8xQ003026 for ; Mon, 15 Nov 2010 09:27:08 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752911Ab0KOJ1E (ORCPT ); Mon, 15 Nov 2010 04:27:04 -0500 Received: from mga14.intel.com ([143.182.124.37]:40288 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752737Ab0KOJ1D (ORCPT ); Mon, 15 Nov 2010 04:27:03 -0500 Received: from azsmga001.ch.intel.com ([10.2.17.19]) by azsmga102.ch.intel.com with ESMTP; 15 Nov 2010 01:26:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.59,199,1288594800"; d="scan'208";a="348763538" Received: from syang10-desktop.sh.intel.com (HELO syang10-desktop) ([10.239.13.14]) by azsmga001.ch.intel.com with ESMTP; 15 Nov 2010 01:26:50 -0800 Received: from yasker by syang10-desktop with local (Exim 4.72) (envelope-from ) id 1PHvLl-0000up-I2; Mon, 15 Nov 2010 17:27:45 +0800 From: Sheng Yang To: Avi Kivity Cc: Marcelo Tosatti , "Michael S. Tsirkin" , kvm@vger.kernel.org, Sheng Yang Subject: [PATCH 6/6 v5 updated] KVM: assigned dev: MSI-X mask support Date: Mon, 15 Nov 2010 17:27:45 +0800 Message-Id: <1289813265-3495-1-git-send-email-sheng@linux.intel.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1289812532-3227-7-git-send-email-sheng@linux.intel.com> References: <1289812532-3227-7-git-send-email-sheng@linux.intel.com> Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter1.kernel.org [140.211.167.41]); Mon, 15 Nov 2010 09:27:08 +0000 (UTC) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc29223..37602e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_MSIX_MASK: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index ea2dc1a..b3e5ffe 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_GET_PVINFO 57 #define KVM_CAP_PPC_IRQ_LEVEL 58 #define KVM_CAP_ASYNC_PF 59 +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_MSIX_MASK 60 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -672,6 +675,9 @@ struct kvm_clock_data { #define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) #define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) #define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_MSIX_MASK */ +#define KVM_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, struct kvm_msix_entry) +#define KVM_UPDATE_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio) /* Available with KVM_CAP_PIT_STATE2 */ #define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) #define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) @@ -795,4 +801,30 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +#define KVM_MSIX_TYPE_ASSIGNED_DEV 1 + +#define KVM_MSIX_FLAG_MASKBIT (1 << 0) +#define KVM_MSIX_FLAG_QUERY_MASKBIT (1 << 0) + +struct kvm_msix_entry { + __u32 id; + __u32 type; + __u32 entry; /* The index of entry in the MSI-X table */ + __u32 flags; + __u32 query_flags; + __u32 reserved[5]; +}; + +#define KVM_MSIX_MMIO_FLAG_REGISTER (1 << 0) +#define KVM_MSIX_MMIO_FLAG_UNREGISTER (1 << 1) + +struct kvm_msix_mmio { + __u32 id; + __u32 type; + __u64 base_addr; + __u32 max_entries_nr; + __u32 flags; + __u32 reserved[6]; +}; + #endif /* __LINUX_KVM_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f09db87..57a437a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -501,6 +501,7 @@ struct kvm_guest_msix_entry { }; #define KVM_ASSIGNED_ENABLED_IOMMU (1 << 0) +#define KVM_ASSIGNED_ENABLED_MSIX_MMIO (1 << 1) struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -521,6 +522,10 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t assigned_dev_lock; + DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV); + gpa_t msix_mmio_base; + struct kvm_io_device msix_mmio_dev; + int msix_max_entries_nr; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 5c6b96d..a96a74d 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -226,12 +226,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm, kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); } +static void unregister_msix_mmio(struct kvm *kvm, + struct kvm_assigned_dev_kernel *adev) +{ + if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) { + mutex_lock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &adev->msix_mmio_dev); + mutex_unlock(&kvm->slots_lock); + adev->flags &= ~KVM_ASSIGNED_ENABLED_MSIX_MMIO; + } +} + static void kvm_free_assigned_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { kvm_free_assigned_irq(kvm, assigned_dev); +#ifdef __KVM_HAVE_MSIX + unregister_msix_mmio(kvm, assigned_dev); +#endif pci_reset_function(assigned_dev->dev); pci_release_regions(assigned_dev->dev); @@ -504,7 +519,7 @@ out: static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { - int r = 0, idx; + int r = 0, idx, i; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; @@ -564,6 +579,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); + /* The state after reset of MSI-X table is all masked */ + for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++) + set_bit(i, match->msix_mask_bitmap); + if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) { if (!kvm->arch.iommu_domain) { r = kvm_iommu_map_guest(kvm); @@ -667,6 +686,43 @@ msix_nr_out: return r; } +static void update_msix_mask(struct kvm_assigned_dev_kernel *adev, + int idx, bool new_mask_flag) +{ + int irq; + bool old_mask_flag, need_flush = false; + + spin_lock_irq(&adev->assigned_dev_lock); + + if (!adev->dev->msix_enabled || + !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX)) + goto out; + + old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + if (old_mask_flag == new_mask_flag) + goto out; + + irq = adev->host_msix_entries[idx].vector; + BUG_ON(irq == 0); + + if (new_mask_flag) { + set_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + disable_irq_nosync(irq); + need_flush = true; + } else { + clear_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + enable_irq(irq); + } +out: + spin_unlock_irq(&adev->assigned_dev_lock); + + if (need_flush) + flush_work(&adev->interrupt_work); +} + static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, struct kvm_assigned_msix_entry *entry) { @@ -701,6 +757,240 @@ msix_entry_out: return r; } + +static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm, + struct kvm_msix_entry *entry) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + if (entry->type != KVM_MSIX_TYPE_ASSIGNED_DEV) + return -EINVAL; + + if (!entry->query_flags) + return -EINVAL; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->id); + + if (!adev) { + r = -EINVAL; + goto out; + } + + if (entry->entry >= adev->msix_max_entries_nr) { + r = -ENOSPC; + goto out; + } + + if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) { + if (test_bit(entry->entry, adev->msix_mask_bitmap)) + entry->flags |= KVM_MSIX_FLAG_MASKBIT; + else + entry->flags &= ~KVM_MSIX_FLAG_MASKBIT; + } + +out: + mutex_unlock(&kvm->lock); + + return r; +} + +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev, + gpa_t addr, int len) +{ + gpa_t start, end; + + BUG_ON(!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO)); + start = adev->msix_mmio_base; + end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE * + adev->msix_max_entries_nr; + if (addr >= start && addr + len <= end) + return true; + + return false; +} + +static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev, + gpa_t addr, int len) +{ + int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].entry == index) + return i; + + return -EINVAL; +} + +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + u32 entry[4]; + struct kvm_kernel_irq_routing_entry e; + + /* TODO: Get big-endian machine work */ + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr & 0x3) || len != 4) { + r = -EOPNOTSUPP; + goto out; + } + + idx = msix_get_enabled_idx(adev, addr, len); + if (idx < 0) { + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + if ((addr % PCI_MSIX_ENTRY_SIZE) == + PCI_MSIX_ENTRY_VECTOR_CTRL) + *(unsigned long *)val = + test_bit(idx, adev->msix_mask_bitmap) ? + PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; + else + r = -EOPNOTSUPP; + goto out; + } + + r = kvm_get_irq_routing_entry(adev->kvm, + adev->guest_msix_entries[idx].vector, &e); + if (r || e.type != KVM_IRQ_ROUTING_MSI) { + r = -EOPNOTSUPP; + goto out; + } + entry[0] = e.msi.address_lo; + entry[1] = e.msi.address_hi; + entry[2] = e.msi.data; + entry[3] = test_bit(adev->guest_msix_entries[idx].entry, + adev->msix_mask_bitmap); + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len); + +out: + mutex_unlock(&adev->kvm->lock); + return r; +} + +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + unsigned long new_val; + + /* TODO: Get big-endian machine work */ + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr & 0x3) || len != 4) { + r = -EOPNOTSUPP; + goto out; + } + + new_val = *(unsigned long *)val; + idx = msix_get_enabled_idx(adev, addr, len); + if (idx < 0) { + idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE; + if (((addr % PCI_MSIX_ENTRY_SIZE) == + PCI_MSIX_ENTRY_VECTOR_CTRL)) { + if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT) + goto out; + if (new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT) + set_bit(idx, adev->msix_mask_bitmap); + else + clear_bit(idx, adev->msix_mask_bitmap); + /* It's possible that we need re-enable MSI-X, so go + * back to userspace */ + } + /* Userspace would handle other MMIO writing */ + r = -EOPNOTSUPP; + goto out; + } + if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) { + r = -EOPNOTSUPP; + goto out; + } + if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT) + goto out; + update_msix_mask(adev, idx, !!(new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT)); +out: + mutex_unlock(&adev->kvm->lock); + + return r; +} + +static const struct kvm_io_device_ops msix_mmio_ops = { + .read = msix_mmio_read, + .write = msix_mmio_write, +}; + +static int kvm_vm_ioctl_update_msix_mmio(struct kvm *kvm, + struct kvm_msix_mmio *msix_mmio) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + if (msix_mmio->type != KVM_MSIX_TYPE_ASSIGNED_DEV) + return -EINVAL; + + if (!msix_mmio->flags) + return -EINVAL; + + mutex_lock(&kvm->lock); + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + msix_mmio->id); + if (!adev) { + r = -EINVAL; + goto out; + } + if (msix_mmio->base_addr == 0) { + r = -EINVAL; + goto out; + } + if (msix_mmio->max_entries_nr == 0 || + msix_mmio->max_entries_nr > KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto out; + } + + if ((msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) && + (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER)) { + r = -EINVAL; + goto out; + } + + if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) { + if (!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO)) { + mutex_lock(&kvm->slots_lock); + kvm_iodevice_init(&adev->msix_mmio_dev, + &msix_mmio_ops); + r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, + &adev->msix_mmio_dev); + if (!r) + adev->flags |= KVM_ASSIGNED_ENABLED_MSIX_MMIO; + mutex_unlock(&kvm->slots_lock); + } + if (!r) { + adev->msix_mmio_base = msix_mmio->base_addr; + adev->msix_max_entries_nr = msix_mmio->max_entries_nr; + } + } else if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER) + unregister_msix_mmio(kvm, adev); +out: + mutex_unlock(&kvm->lock); + + return r; +} #endif long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, @@ -813,6 +1103,37 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } + case KVM_GET_MSIX_ENTRY: { + struct kvm_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_get_msix_entry(kvm, &entry); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &entry, sizeof entry)) + goto out; + r = 0; + break; + } + case KVM_UPDATE_MSIX_MMIO: { + struct kvm_msix_mmio msix_mmio; + + r = -EFAULT; + if (copy_from_user(&msix_mmio, argp, sizeof(msix_mmio))) + goto out; + + r = -EINVAL; + if (find_first_bit((unsigned long *)msix_mmio.reserved, + sizeof(msix_mmio.reserved)) < sizeof(msix_mmio.reserved)) + goto out; + + r = kvm_vm_ioctl_update_msix_mmio(kvm, &msix_mmio); + if (r) + goto out; + break; + } #endif } out: