diff mbox

qemu-kvm: enable msi with irqchip

Message ID 20090702180517.GA27897@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michael S. Tsirkin July 2, 2009, 6:05 p.m. UTC
Support msi-x with irqchip in kernel: allocate entries
when they are used, and update when they are unmasked.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---

This gets qemu-kvm.git on par with qemu.git with respect to MSI support.
Note: it probably makes sense to rewrite msi support in assigned devices
on top of this infrastructure. There's an old untested patch of mine for
this floating around but I don't think I'll have the time for this
anytime soon - any takers?

 hw/apic.c    |    4 +--
 hw/msix.c    |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 hw/pci.h     |    3 ++
 libkvm-all.h |   14 +++++++++
 qemu-kvm.c   |   52 +++++++++++++++++++++++++++++++++
 5 files changed, 157 insertions(+), 6 deletions(-)
diff mbox

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 778a853..cdb5972 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -1088,9 +1088,7 @@  int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
-    if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) {
-        msix_supported = 1;
-    }
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
diff --git a/hw/msix.c b/hw/msix.c
index 98c62a5..64bced6 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -14,6 +14,7 @@ 
 #include "hw.h"
 #include "msix.h"
 #include "pci.h"
+#include "qemu-kvm.h"
 
 /* Declaration from linux/pci_regs.h */
 #define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
@@ -109,6 +110,16 @@  static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
 static void msix_free_irq_entries(PCIDevice *dev)
 {
     int vector;
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        int changed = 0;
+        for (vector = 0; vector < dev->msix_entries_nr; ++vector)
+            if (dev->msix_entry_used[vector]) {
+                kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+                changed = 1;
+            }
+        if (changed)
+            kvm_commit_irq_routes(kvm_context);
+    }
 
     for (vector = 0; vector < dev->msix_entries_nr; ++vector)
         dev->msix_entry_used[vector] = 0;
@@ -181,7 +192,36 @@  static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
     PCIDevice *dev = opaque;
     unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
     int vector = offset / MSIX_ENTRY_SIZE;
+    int was_masked = msix_is_masked(dev, vector);
     memcpy(dev->msix_table_page + offset, &val, 4);
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() &&
+        was_masked && !msix_is_masked(dev, vector) &&
+        dev->msix_entry_used[vector]) {
+        struct kvm_irq_routing_entry e, *entry;
+        uint8_t *table;
+        table = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+        entry = dev->msix_irq_entries + vector;
+        e.gsi = entry->gsi;
+        e.type = entry->type;
+        e.flags = entry->flags;
+        e.u.msi.address_lo = pci_get_long(table + MSIX_MSG_ADDR);
+        e.u.msi.address_hi = pci_get_long(table + MSIX_MSG_UPPER_ADDR);
+        e.u.msi.data = pci_get_long(table + MSIX_MSG_DATA);
+        if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) {
+            int r;
+            r = kvm_update_routing_entry(kvm_context, entry, &e);
+            if (r) {
+                perror("msix_vector_use: kvm_update_routing_entry failed: ");
+                exit(1);
+            }
+            memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi);
+            r = kvm_commit_irq_routes(kvm_context);
+            if (r) {
+                perror("msix_vector_use: kvm_commit_irq_routes failed: ");
+                exit(1);
+            }
+        }
+    }
     if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
         msix_clr_pending(dev, vector);
         msix_notify(dev, vector);
@@ -234,6 +274,10 @@  int msix_init(struct PCIDevice *dev, unsigned short nentries,
     if (nentries > MSIX_MAX_ENTRIES)
         return -EINVAL;
 
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel())
+        dev->msix_irq_entries = qemu_malloc(nentries *
+                                            sizeof *dev->msix_irq_entries);
+
     dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
                                         sizeof *dev->msix_entry_used);
 
@@ -278,6 +322,8 @@  int msix_uninit(PCIDevice *dev)
     dev->msix_table_page = NULL;
     qemu_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
+    qemu_free(dev->msix_irq_entries);
+    dev->msix_irq_entries = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
     return 0;
 }
@@ -339,6 +385,10 @@  void msix_notify(PCIDevice *dev, unsigned vector)
         msix_set_pending(dev, vector);
         return;
     }
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL);
+        return;
+    }
 
     address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
     address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
@@ -368,13 +418,47 @@  int msix_vector_use(PCIDevice *dev, unsigned vector)
 {
     if (vector >= dev->msix_entries_nr)
         return -EINVAL;
-    dev->msix_entry_used[vector]++;
+    if (dev->msix_entry_used[vector]++)
+        return 0;
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+        struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector;
+        int r;
+
+        r = kvm_get_irq_route_gsi(kvm_context);
+        if (r < 0)
+            return r;
+
+        entry->gsi = r;
+        entry->type = KVM_IRQ_ROUTING_MSI;
+        entry->flags = 0;
+        entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR);
+        entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+        entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA);
+        r = kvm_add_routing_entry(kvm_context, entry);
+        if (r < 0) {
+            perror("msix_vector_use: kvm_add_routing_entry failed: ");
+            return r;
+        }
+
+        r = kvm_commit_irq_routes(kvm_context);
+        if (r < 0) {
+            perror("msix_vector_use: kvm_add_routing_entry failed: ");
+            return r;
+        }
+    }
     return 0;
 }
 
 /* Mark vector as unused. */
 void msix_vector_unuse(PCIDevice *dev, unsigned vector)
 {
-    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
-        --dev->msix_entry_used[vector];
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) {
+        if (--dev->msix_entry_used[vector])
+            return;
+        if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+            kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+            kvm_commit_irq_routes(kvm_context);
+        }
+    }
 }
diff --git a/hw/pci.h b/hw/pci.h
index 328b846..baaa96b 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -5,6 +5,8 @@ 
 
 #include "qdev.h"
 
+struct kvm_irq_routing_entry;
+
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
 
@@ -232,6 +234,7 @@  struct PCIDevice {
     unsigned *msix_entry_used;
     /* Region including the MSI-X table */
     uint32_t msix_bar_size;
+    struct kvm_irq_routing_entry *msix_irq_entries;
 
     /* Device capability configuration space */
     struct {
diff --git a/libkvm-all.h b/libkvm-all.h
index ecd3065..4a98bcb 100644
--- a/libkvm-all.h
+++ b/libkvm-all.h
@@ -898,6 +898,20 @@  int kvm_del_routing_entry(kvm_context_t kvm,
 		          struct kvm_irq_routing_entry* entry);
 
 /*!
+ * \brief Updates a routing in the temporary irq routing table
+ *
+ * Update a routing in the temporary irq routing table
+ * with a new value. entry type and GSI can not be changed.
+ * Nothing is committed to the running VM.
+ *
+ * \param kvm Pointer to the current kvm_context
+ */
+int kvm_update_routing_entry(kvm_context_t kvm,
+                             struct kvm_irq_routing_entry* entry,
+                             struct kvm_irq_routing_entry* newentry
+);
+
+/*!
  * \brief Commit the temporary irq routing table
  *
  * Commit the temporary irq routing table to the running VM.
diff --git a/qemu-kvm.c b/qemu-kvm.c
index c5cd038..669b534 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1448,6 +1448,58 @@  int kvm_del_routing_entry(kvm_context_t kvm,
 #endif
 }
 
+int kvm_update_routing_entry(kvm_context_t kvm,
+	                  struct kvm_irq_routing_entry* entry,
+                          struct kvm_irq_routing_entry* newentry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+	struct kvm_irq_routing_entry *e;
+	int i, gsi, found = 0;
+
+        if (entry->gsi != newentry->gsi ||
+            entry->type != newentry->type)
+            return -EINVAL;
+
+	gsi = entry->gsi;
+
+	for (i = 0; i < kvm->irq_routes->nr; ++i) {
+		e = &kvm->irq_routes->entries[i];
+		if (e->type != entry->type || e->gsi != gsi)
+                    continue;
+
+		switch (e->type)
+		{
+		case KVM_IRQ_ROUTING_IRQCHIP: {
+			if (e->u.irqchip.irqchip ==
+			    entry->u.irqchip.irqchip
+			    && e->u.irqchip.pin ==
+			    entry->u.irqchip.pin)
+			    found = 1;
+			break;
+		}
+		case KVM_IRQ_ROUTING_MSI: {
+			if (e->u.msi.address_lo ==
+			    entry->u.msi.address_lo
+			    && e->u.msi.address_hi ==
+			    entry->u.msi.address_hi
+			    && e->u.msi.data == entry->u.msi.data)
+			    found = 1;
+			break;
+		}
+		default:
+			break;
+		}
+		if (found) {
+			memcpy(e, entry, sizeof *e);
+			return 0;
+		}
+	}
+	return -ESRCH;
+#else
+	return -ENOSYS;
+#endif
+}
+
 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
 {
 #ifdef KVM_CAP_IRQ_ROUTING