@@ -1085,6 +1085,52 @@ of 4 instructions that make up a hypercall.
If any additional field gets added to this structure later on, a bit for that
additional piece of information will be set in the flags bitmap.
+4.47 KVM_ASSIGN_REG_MSIX_MMIO
+
+Capability: KVM_CAP_DEVICE_MSIX_MASK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_mmio (in)
+Returns: 0 on success, !0 on error
+
+struct kvm_assigned_msix_mmio {
+ /* Assigned device's ID */
+ __u32 assigned_dev_id;
+ /* Must be 0 */
+ __u32 flags;
+ /* MSI-X table MMIO address */
+ __u64 base_addr;
+ /* Must be 0, reserved for future use */
+ __u64 reserved;
+};
+
+This ioctl would enable in-kernel MSI-X emulation, which would handle MSI-X
+mask bit in the kernel.
+
+4.48 KVM_ASSIGN_GET_MSIX_ENTRY
+
+Capability: KVM_CAP_DEVICE_MSIX_MASK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_entry (in and out)
+Returns: 0 on success, !0 on error
+
+struct kvm_assigned_msix_entry {
+ /* Assigned device's ID */
+ __u32 assigned_dev_id;
+ /* Ignored */
+ __u32 gsi;
+ /* The index of entry in the MSI-X table */
+ __u16 entry;
+ /* Querying flags and returning status */
+ __u16 flags;
+ /* Must be 0 */
+ __u16 padding[2];
+};
+
+This ioctl would allow userspace to get the status of one specific MSI-X
+entry. Currently we support mask bit status querying.
+
5. The kvm_run structure
Application code obtains a pointer to the kvm_run structure by
@@ -1926,6 +1926,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP:
case KVM_CAP_XSAVE:
+ case KVM_CAP_DEVICE_MSIX_MASK:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -540,6 +540,9 @@ struct kvm_ppc_pvinfo {
#endif
#define KVM_CAP_PPC_GET_PVINFO 57
#define KVM_CAP_PPC_IRQ_LEVEL 58
+#ifdef __KVM_HAVE_MSIX
+#define KVM_CAP_DEVICE_MSIX_MASK 59
+#endif
#ifdef KVM_CAP_IRQ_ROUTING
@@ -671,6 +674,10 @@ struct kvm_clock_data {
#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+#define KVM_ASSIGN_GET_MSIX_ENTRY _IOWR(KVMIO, 0x7d, \
+ struct kvm_assigned_msix_entry)
+#define KVM_ASSIGN_REG_MSIX_MMIO _IOW(KVMIO, 0x7e, \
+ struct kvm_assigned_msix_mmio)
/* Available with KVM_CAP_PIT_STATE2 */
#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
@@ -787,11 +794,23 @@ struct kvm_assigned_msix_nr {
};
#define KVM_MAX_MSIX_PER_DEV 256
+
+#define KVM_MSIX_FLAG_MASK (1 << 0)
+#define KVM_MSIX_FLAG_QUERY_MASK (1 << 15)
+
struct kvm_assigned_msix_entry {
__u32 assigned_dev_id;
__u32 gsi;
__u16 entry; /* The index of entry in the MSI-X table */
- __u16 padding[3];
+ __u16 flags;
+ __u16 padding[2];
+};
+
+struct kvm_assigned_msix_mmio {
+ __u32 assigned_dev_id;
+ __u32 flags;
+ __u64 base_addr;
+ __u64 reserved;
};
#endif /* __LINUX_KVM_H */
@@ -464,6 +464,9 @@ struct kvm_assigned_dev_kernel {
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t assigned_dev_lock;
+ DECLARE_BITMAP(msix_mask_bitmap, KVM_MAX_MSIX_PER_DEV);
+ gpa_t msix_mmio_base;
+ struct kvm_io_device msix_mmio_dev;
};
struct kvm_irq_mask_notifier {
@@ -232,6 +232,14 @@ static void kvm_free_assigned_device(struct kvm *kvm,
{
kvm_free_assigned_irq(kvm, assigned_dev);
+#ifdef __KVM_HAVE_MSIX
+ if (assigned_dev->msix_mmio_base) {
+ mutex_lock(&kvm->slots_lock);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &assigned_dev->msix_mmio_dev);
+ mutex_unlock(&kvm->slots_lock);
+ }
+#endif
pci_reset_function(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
@@ -504,7 +512,7 @@ out:
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
- int r = 0, idx;
+ int r = 0, idx, i;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
@@ -563,6 +571,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
+ /* The state after reset of MSI-X table is all masked */
+ for (i = 0; i < KVM_MAX_MSIX_PER_DEV; i++)
+ set_bit(i, match->msix_mask_bitmap);
+
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
if (!kvm->arch.iommu_domain) {
r = kvm_iommu_map_guest(kvm);
@@ -666,6 +678,43 @@ msix_nr_out:
return r;
}
+static void update_msix_mask(struct kvm_assigned_dev_kernel *adev,
+ int idx, bool new_mask_flag)
+{
+ int irq;
+ bool old_mask_flag, need_flush = false;
+
+ spin_lock_irq(&adev->assigned_dev_lock);
+
+ if (!adev->dev->msix_enabled ||
+ !(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX))
+ goto out;
+
+ old_mask_flag = test_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ if (old_mask_flag == new_mask_flag)
+ goto out;
+
+ irq = adev->host_msix_entries[idx].vector;
+ BUG_ON(irq == 0);
+
+ if (new_mask_flag) {
+ set_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ disable_irq_nosync(irq);
+ need_flush = true;
+ } else {
+ clear_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ enable_irq(irq);
+ }
+out:
+ spin_unlock_irq(&adev->assigned_dev_lock);
+
+ if (need_flush)
+ flush_work(&adev->interrupt_work);
+}
+
static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
struct kvm_assigned_msix_entry *entry)
{
@@ -700,6 +749,210 @@ msix_entry_out:
return r;
}
+
+static int kvm_vm_ioctl_get_msix_entry(struct kvm *kvm,
+ struct kvm_assigned_msix_entry *entry)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *adev;
+
+ mutex_lock(&kvm->lock);
+
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ entry->assigned_dev_id);
+
+ if (!adev) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (entry->entry >= KVM_MAX_MSIX_PER_DEV) {
+ r = -ENOSPC;
+ goto out;
+ }
+
+ if (entry->flags & KVM_MSIX_FLAG_QUERY_MASK) {
+ if (test_bit(entry->entry, adev->msix_mask_bitmap))
+ entry->flags |= KVM_MSIX_FLAG_MASK;
+ else
+ entry->flags &= ~KVM_MSIX_FLAG_MASK;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
+
+static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
+ gpa_t addr, int len)
+{
+ gpa_t start, end;
+
+ BUG_ON(adev->msix_mmio_base == 0);
+ start = adev->msix_mmio_base;
+ end = adev->msix_mmio_base + PCI_MSIX_ENTRY_SIZE * KVM_MAX_MSIX_PER_DEV;
+ if (addr >= start && addr + len <= end)
+ return true;
+
+ return false;
+}
+
+static int msix_get_enabled_idx(struct kvm_assigned_dev_kernel *adev,
+ gpa_t addr, int len)
+{
+ int i;
+ gpa_t start, end;
+
+ for (i = 0; i < adev->entries_nr; i++) {
+ start = adev->msix_mmio_base +
+ adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE;
+ end = start + PCI_MSIX_ENTRY_SIZE;
+ if (addr >= start && addr + len <= end) {
+ return i;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+ void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ u32 entry[4];
+ struct kvm_kernel_irq_routing_entry e;
+
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr & 0x3) || len != 4)
+ goto out;
+
+ idx = msix_get_enabled_idx(adev, addr, len);
+ if (idx < 0) {
+ idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+ if ((addr % PCI_MSIX_ENTRY_SIZE) ==
+ PCI_MSIX_ENTRY_VECTOR_CTRL)
+ *(unsigned long *)val =
+ test_bit(idx, adev->msix_mask_bitmap) ?
+ PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
+ else
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+
+ r = kvm_get_irq_routing_entry(adev->kvm,
+ adev->guest_msix_entries[idx].vector, &e);
+ if (r || e.type != KVM_IRQ_ROUTING_MSI) {
+ printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! "
+ "idx %d, addr 0x%llx, len %d\n", idx, addr, len);
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ entry[0] = e.msi.address_lo;
+ entry[1] = e.msi.address_hi;
+ entry[2] = e.msi.data;
+ entry[3] = test_bit(adev->guest_msix_entries[idx].entry,
+ adev->msix_mask_bitmap);
+ memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len);
+
+out:
+ mutex_unlock(&adev->kvm->lock);
+ return r;
+}
+
+static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+ const void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ unsigned long new_val = *(unsigned long *)val;
+
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr & 0x3) || len != 4)
+ goto out;
+
+ idx = msix_get_enabled_idx(adev, addr, len);
+ if (idx < 0) {
+ idx = (addr - adev->msix_mmio_base) / PCI_MSIX_ENTRY_SIZE;
+ if (((addr % PCI_MSIX_ENTRY_SIZE) ==
+ PCI_MSIX_ENTRY_VECTOR_CTRL)) {
+ if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ goto out;
+ if (new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ set_bit(idx, adev->msix_mask_bitmap);
+ else
+ clear_bit(idx, adev->msix_mask_bitmap);
+ } else
+ /* Userspace would handle other MMIO writing */
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if (new_val & ~PCI_MSIX_ENTRY_CTRL_MASKBIT)
+ goto out;
+ update_msix_mask(adev, idx, !!(new_val & PCI_MSIX_ENTRY_CTRL_MASKBIT));
+out:
+ mutex_unlock(&adev->kvm->lock);
+
+ return r;
+}
+
+static const struct kvm_io_device_ops msix_mmio_ops = {
+ .read = msix_mmio_read,
+ .write = msix_mmio_write,
+};
+
+static int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+ struct kvm_assigned_msix_mmio *msix_mmio)
+{
+ int r = 0;
+ struct kvm_assigned_dev_kernel *adev;
+
+ mutex_lock(&kvm->lock);
+ adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+ msix_mmio->assigned_dev_id);
+ if (!adev) {
+ r = -EINVAL;
+ goto out;
+ }
+ if (msix_mmio->base_addr == 0) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ mutex_lock(&kvm->slots_lock);
+ if (adev->msix_mmio_base == 0) {
+ kvm_iodevice_init(&adev->msix_mmio_dev, &msix_mmio_ops);
+ r = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
+ &adev->msix_mmio_dev);
+ if (r)
+ goto out2;
+ }
+
+ adev->msix_mmio_base = msix_mmio->base_addr;
+out2:
+ mutex_unlock(&kvm->slots_lock);
+out:
+ mutex_unlock(&kvm->lock);
+
+ return r;
+}
#endif
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@@ -812,6 +1065,36 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
goto out;
break;
}
+ case KVM_ASSIGN_GET_MSIX_ENTRY: {
+ struct kvm_assigned_msix_entry entry;
+ r = -EFAULT;
+ if (copy_from_user(&entry, argp, sizeof entry))
+ goto out;
+ r = kvm_vm_ioctl_get_msix_entry(kvm, &entry);
+ if (r)
+ goto out;
+ r = -EFAULT;
+ if (copy_to_user(argp, &entry, sizeof entry))
+ goto out;
+ r = 0;
+ break;
+ }
+ case KVM_ASSIGN_REG_MSIX_MMIO: {
+ struct kvm_assigned_msix_mmio msix_mmio;
+
+ r = -EFAULT;
+ if (copy_from_user(&msix_mmio, argp, sizeof(msix_mmio)))
+ goto out;
+
+ r = -EINVAL;
+ if (msix_mmio.flags != 0 || msix_mmio.reserved != 0)
+ goto out;
+
+ r = kvm_vm_ioctl_register_msix_mmio(kvm, &msix_mmio);
+ if (r)
+ goto out;
+ break;
+ }
#endif
}
out: