@@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_XSAVE:
case KVM_CAP_ASYNC_PF:
+ case KVM_CAP_MSIX_MASK:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -541,6 +541,9 @@ struct kvm_ppc_pvinfo {
#define KVM_CAP_PPC_GET_PVINFO 57
#define KVM_CAP_PPC_IRQ_LEVEL 58
#define KVM_CAP_ASYNC_PF 59
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_MSIX_MASK 60
+#endif
#ifdef KVM_CAP_IRQ_ROUTING
@@ -672,6 +675,9 @@ struct kvm_clock_data {
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_MSIX_MASK */
+#define KVM_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, struct kvm_msix_entry)
+#define KVM_UPDATE_MSIX_MMIO _IOW(KVMIO, 0x7e, struct kvm_msix_mmio)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
@@ -795,4 +801,30 @@ struct kvm_assigned_msix_entry {
__u16 padding[3];
};
+#define KVM_MSIX_TYPE_ASSIGNED_DEV 1
+
+#define KVM_MSIX_FLAG_MASKBIT (1 << 0)
+#define KVM_MSIX_FLAG_QUERY_MASKBIT (1 << 0)
+
+struct kvm_msix_entry {
+ __u32 id;
+ __u32 type;
+ __u32 entry; /* The index of entry in the MSI-X table */
+ __u32 flags;
+ __u32 query_flags;
+ __u32 reserved[5];
+};
+
+#define KVM_MSIX_MMIO_FLAG_REGISTER (1 << 0)
+#define KVM_MSIX_MMIO_FLAG_UNREGISTER (1 << 1)
+
+struct kvm_msix_mmio {
+ __u32 id;
+ __u32 type;
+ __u64 base_addr;
+ __u32 max_entries_nr;
+ __u32 flags;
+ __u32 reserved[6];
+};
+
#endif /* __LINUX_KVM_H */
@@ -501,6 +501,7 @@ struct kvm_guest_msix_entry {
};
#define KVM_ASSIGNED_ENABLED_IOMMU (1 << 0)
+#define KVM_ASSIGNED_ENABLED_MSIX_MMIO (1 << 1)
struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work;
@@ -521,6 +522,10 @@ struct kvm_assigned_dev_kernel {
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t assigned_dev_lock;
+ DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV);
+ gpa_t msix_mmio_base;
+ struct kvm_io_device msix_mmio_dev;
+ int msix_max_entries_nr;
};
struct kvm_irq_mask_notifier {
@@ -226,12 +226,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
}
+static void unregister_msix_mmio(struct kvm *kvm,
+ struct kvm_assigned_dev_kernel *adev)
+{
+ if (adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO) {
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &adev->msix_mmio_dev);
+ mutex_unlock(&kvm->slots_lock);
+ adev->flags &= ~KVM_ASSIGNED_ENABLED_MSIX_MMIO;
+ }
+}
+
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
{
kvm_free_assigned_irq(kvm, assigned_dev);
+#ifdef __KVM_HAVE_MSIX
+ unregister_msix_mmio(kvm, assigned_dev);
+#endif
pci_reset_function(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
@@ -504,7 +519,7 @@ out:
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
- int r = 0, idx;
+ int r = 0, idx, i;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
@@ -564,6 +579,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
+ /* The state after reset of MSI-X table is all masked */
+ for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++)
+ set_bit(i, match->msix_mask_bitmap);
+
if (assigned_dev->flags & KVM_ASSIGNED_ENABLED_IOMMU) {
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
@@ -667,6 +686,43 @@ msix_nr_out:
return r;
}
+static void update_msix_mask(struct kvm_assigned_dev_kernel *adev,
+ int idx, bool new_mask_flag)
+{
+ int irq;
+ bool old_mask_flag, need_flush = false;
+
+ spin_lock_irq(&adev->assigned_dev_lock);
+
+ if (!adev->dev->msix_enabled ||
+ !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX))
+ goto out;
+
+ old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ if (old_mask_flag == new_mask_flag)
+ goto out;
+
+ irq = adev->host_msix_entries[idx].vector;
+ BUG_ON(irq == 0);
+
+ if (new_mask_flag) {
+ set_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ disable_irq_nosync(irq);
+ need_flush = true;
+ } else {
+ clear_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ enable_irq(irq);
+ }
+out:
+ spin_unlock_irq(&adev->assigned_dev_lock);
+
+ if (need_flush)
+ flush_work(&adev->interrupt_work);
+}
+
static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
struct kvm_assigned_msix_entry *entry)
{
@@ -701,6 +757,240 @@ msix_entry_out:
return r;
}
+
+static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm,
+ struct kvm_msix_entry *entry)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *adev;
+
+ if (entry->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
+ return -EINVAL;
+
+ if (!entry->query_flags)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ entry->id);
+
+ if (!adev) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (entry->entry >= adev->msix_max_entries_nr) {
+ r = -ENOSPC;
+ goto out;
+ }
+
+ if (entry->query_flags & KVM_MSIX_FLAG_QUERY_MASKBIT) {
+ if (test_bit(entry->entry, adev->msix_mask_bitmap))
+ entry->flags |= KVM_MSIX_FLAG_MASKBIT;
+ else
+ entry->flags &= ~KVM_MSIX_FLAG_MASKBIT;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
+
+static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
+ gpa_t addr, int len)
+{
+ gpa_t start, end;
+
+ BUG_ON(!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO));
+ start = adev->msix_mmio_base;
+ end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE *
+ adev->msix_max_entries_nr;
+ if (addr >= start && addr + len <= end)
+ return true;
+
+ return false;
+}
+
+static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev,
+ gpa_t addr, int len)
+{
+ int i, index = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+
+ for (i = 0; i < adev->entries_nr; i++)
+ if (adev->guest_msix_entries[i].entry == index)
+ return i;
+
+ return -EINVAL;
+}
+
+static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+ void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ u32 entry[4];
+ struct kvm_kernel_irq_routing_entry e;
+
+ /* TODO: Get big-endian machine work */
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr & 0x3) || len != 4) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
+ idx = msix_get_enabled_idx(adev, addr, len);
+ if (idx < 0) {
+ idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+ if ((addr % PCI_MSIX_ENTRY_SIZE) ==
+ PCI_MSIX_ENTRY_VECTOR_CTRL)
+ *(unsigned long *)val =
+ test_bit(idx, adev->msix_mask_bitmap) ?
+ PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
+ else
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
+ r = kvm_get_irq_routing_entry(adev->kvm,
+ adev->guest_msix_entries[idx].vector, &e);
+ if (r || e.type != KVM_IRQ_ROUTING_MSI) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ entry[0] = e.msi.address_lo;
+ entry[1] = e.msi.address_hi;
+ entry[2] = e.msi.data;
+ entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / sizeof *entry], len);
+
+out:
+ mutex_unlock(&adev->kvm->lock);
+ return r;
+}
+
+static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+ const void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ unsigned long new_val;
+
+ /* TODO: Get big-endian machine work */
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr & 0x3) || len != 4) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
+ new_val = *(unsigned long *)val;
+ idx = msix_get_enabled_idx(adev, addr, len);
+ if (idx < 0) {
+ idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+ if (((addr % PCI_MSIX_ENTRY_SIZE) ==
+ PCI_MSIX_ENTRY_VECTOR_CTRL)) {
+ if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ goto out;
+ if (new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ set_bit(idx, adev->msix_mask_bitmap);
+ else
+ clear_bit(idx, adev->msix_mask_bitmap);
+ /* It's possible that we need re-enable MSI-X, so go
+ * back to userspace */
+ }
+ /* Userspace would handle other MMIO writing */
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ goto out;
+ update_msix_mask(adev, idx, !!(new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT));
+out:
+ mutex_unlock(&adev->kvm->lock);
+
+ return r;
+}
+
+static const struct kvm_io_device_ops msix_mmio_ops = {
+ .read = msix_mmio_read,
+ .write = msix_mmio_write,
+};
+
+static int kvm_vm_ioctl_update_msix_mmio(struct kvm *kvm,
+ struct kvm_msix_mmio *msix_mmio)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *adev;
+
+ if (msix_mmio->type != KVM_MSIX_TYPE_ASSIGNED_DEV)
+ return -EINVAL;
+
+ if (!msix_mmio->flags)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ msix_mmio->id);
+ if (!adev) {
+ r = -EINVAL;
+ goto out;
+ }
+ if (msix_mmio->base_addr == 0) {
+ r = -EINVAL;
+ goto out;
+ }
+ if (msix_mmio->max_entries_nr == 0 ||
+ msix_mmio->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if ((msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) &&
+ (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER)) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_REGISTER) {
+ if (!(adev->flags & KVM_ASSIGNED_ENABLED_MSIX_MMIO)) {
+ mutex_lock(&kvm->slots_lock);
+ kvm_iodevice_init(&adev->msix_mmio_dev,
+ &msix_mmio_ops);
+ r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ &adev->msix_mmio_dev);
+ if (!r)
+ adev->flags |= KVM_ASSIGNED_ENABLED_MSIX_MMIO;
+ mutex_unlock(&kvm->slots_lock);
+ }
+ if (!r) {
+ adev->msix_mmio_base = msix_mmio->base_addr;
+ adev->msix_max_entries_nr = msix_mmio->max_entries_nr;
+ }
+ } else if (msix_mmio->flags & KVM_MSIX_MMIO_FLAG_UNREGISTER)
+ unregister_msix_mmio(kvm, adev);
+out:
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
#endif
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@@ -813,6 +1103,37 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
goto out;
break;
}
+ case KVM_GET_MSIX_ENTRY: {
+ struct kvm_msix_entry entry;
+ r = -EFAULT;
+ if (copy_from_user(&entry, argp, sizeof entry))
+ goto out;
+ r = kvm_vm_ioctl_get_msix_entry(kvm, &entry);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(argp, &entry, sizeof entry))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_UPDATE_MSIX_MMIO: {
+ struct kvm_msix_mmio msix_mmio;
+
+ r = -EFAULT;
+ if (copy_from_user(&msix_mmio, argp, sizeof(msix_mmio)))
+ goto out;
+
+ r = -EINVAL;
+ if (find_first_bit((unsigned long *)msix_mmio.reserved,
+ sizeof(msix_mmio.reserved)) < sizeof(msix_mmio.reserved))
+ goto out;
+
+ r = kvm_vm_ioctl_update_msix_mmio(kvm, &msix_mmio);
+ if (r)
+ goto out;
+ break;
+ }
#endif
}
out: