diff mbox

[PATCHv4] qemu-kvm: enable msi with irqchip

Message ID 20090705191248.GA3573@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michael S. Tsirkin July 5, 2009, 7:12 p.m. UTC
Support msi-x with irqchip in kernel: allocate entries
when they are used, and update when they are unmasked.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
Changes since v3:
  style comments from Avi
Changes since v2:
  move all kvm-specific code into kvm_msi_XXX helpers
Changes since v1:
  add braces as per CODING_STYLE

 hw/apic.c    |    4 +-
 hw/msix.c    |  125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 hw/pci.h     |    3 +
 libkvm-all.h |   14 ++++++
 qemu-kvm.c   |   44 ++++++++++++++++++++
 5 files changed, 185 insertions(+), 5 deletions(-)

Comments

Avi Kivity July 7, 2009, 11:36 a.m. UTC | #1
On 07/05/2009 10:12 PM, Michael S. Tsirkin wrote:
> Support msi-x with irqchip in kernel: allocate entries
> when they are used, and update when they are unmasked.
>    

Applied, thanks.  I changed fprintf(r) to fprintf(strerror(-r)).
Jan Kiszka July 7, 2009, 6:30 p.m. UTC | #2
Avi Kivity wrote:
> On 07/05/2009 10:12 PM, Michael S. Tsirkin wrote:
>> Support msi-x with irqchip in kernel: allocate entries
>> when they are used, and update when they are unmasked.
>>    
> 
> Applied, thanks.  I changed fprintf(r) to fprintf(strerror(-r)).
> 

Breaks --disable-kvm build. Michael, will you fix this?

Jan
diff mbox

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 778a853..cdb5972 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -1088,9 +1088,7 @@  int apic_init(CPUState *env)
     s->cpu_env = env;
 
     apic_reset(s);
-    if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) {
-        msix_supported = 1;
-    }
+    msix_supported = 1;
 
     /* XXX: mapping more APICs at the same memory location */
     if (apic_io_memory == 0) {
diff --git a/hw/msix.c b/hw/msix.c
index 4224d8f..6c9ad0f 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -14,6 +14,7 @@ 
 #include "hw.h"
 #include "msix.h"
 #include "pci.h"
+#include "qemu-kvm.h"
 
 /* Declaration from linux/pci_regs.h */
 #define  PCI_CAP_ID_MSIX 0x11 /* MSI-X */
@@ -62,6 +63,101 @@ 
 /* Flag for interrupt controller to declare MSI-X support */
 int msix_supported;
 
+/* KVM specific MSIX helpers */
+static void kvm_msix_free(PCIDevice *dev)
+{
+    int vector, changed = 0;
+    for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+        if (dev->msix_entry_used[vector]) {
+            kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+            changed = 1;
+        }
+    }
+    if (changed) {
+        kvm_commit_irq_routes(kvm_context);
+    }
+}
+
+static void kvm_msix_routing_entry(PCIDevice *dev, unsigned vector,
+                                   struct kvm_irq_routing_entry *entry)
+{
+    uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+    entry->type = KVM_IRQ_ROUTING_MSI;
+    entry->flags = 0;
+    entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR);
+    entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+    entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA);
+}
+
+static void kvm_msix_update(PCIDevice *dev, int vector,
+                            int was_masked, int is_masked)
+{
+    struct kvm_irq_routing_entry e = {}, *entry;
+    int mask_cleared = was_masked && !is_masked;
+    /* It is only legal to change an entry when it is masked. Therefore, it is
+     * enough to update the routing in kernel when mask is being cleared. */
+    if (!mask_cleared) {
+        return;
+    }
+    if (!dev->msix_entry_used[vector]) {
+        return;
+    }
+    entry = dev->msix_irq_entries + vector;
+    e.gsi = entry->gsi;
+    kvm_msix_routing_entry(dev, vector, &e);
+    if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) {
+        int r;
+        r = kvm_update_routing_entry(kvm_context, entry, &e);
+        if (r) {
+            fprintf(stderr, "%s: kvm_update_routing_entry failed: %d\n", __func__,
+		    r);
+            exit(1);
+        }
+        memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi);
+        r = kvm_commit_irq_routes(kvm_context);
+        if (r) {
+            fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__,
+		    r);
+            exit(1);
+        }
+    }
+}
+
+static int kvm_msix_add(PCIDevice *dev, unsigned vector)
+{
+    struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector;
+    int r;
+
+    r = kvm_get_irq_route_gsi(kvm_context);
+    if (r < 0) {
+        fprintf(stderr, "%s: kvm_get_irq_route_gsi failed: %d\n", __func__, r);
+        return r;
+    }
+    entry->gsi = r;
+    kvm_msix_routing_entry(dev, vector, entry);
+    r = kvm_add_routing_entry(kvm_context, entry);
+    if (r < 0) {
+        fprintf(stderr, "%s: kvm_add_routing_entry failed: %d\n", __func__, r);
+        return r;
+    }
+
+    r = kvm_commit_irq_routes(kvm_context);
+    if (r < 0) {
+        fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__, r);
+        return r;
+    }
+    return 0;
+}
+
+static void kvm_msix_del(PCIDevice *dev, unsigned vector)
+{
+    if (dev->msix_entry_used[vector]) {
+        return;
+    }
+    kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+    kvm_commit_irq_routes(kvm_context);
+}
+
 /* Add MSI-X capability to the config space for the device. */
 /* Given a bar and its size, add MSI-X table on top of it
  * and fill MSI-X capability in the config space.
@@ -109,6 +205,9 @@  static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
 static void msix_free_irq_entries(PCIDevice *dev)
 {
     int vector;
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_msix_free(dev);
+    }
 
     for (vector = 0; vector < dev->msix_entries_nr; ++vector)
         dev->msix_entry_used[vector] = 0;
@@ -181,7 +280,11 @@  static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
     PCIDevice *dev = opaque;
     unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
     int vector = offset / MSIX_ENTRY_SIZE;
+    int was_masked = msix_is_masked(dev, vector);
     memcpy(dev->msix_table_page + offset, &val, 4);
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
+    }
     if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
         msix_clr_pending(dev, vector);
         msix_notify(dev, vector);
@@ -234,6 +337,10 @@  int msix_init(struct PCIDevice *dev, unsigned short nentries,
     if (nentries > MSIX_MAX_ENTRIES)
         return -EINVAL;
 
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        dev->msix_irq_entries = qemu_malloc(nentries *
+                                            sizeof *dev->msix_irq_entries);
+    }
     dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
                                         sizeof *dev->msix_entry_used);
 
@@ -278,6 +385,8 @@  int msix_uninit(PCIDevice *dev)
     dev->msix_table_page = NULL;
     qemu_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
+    qemu_free(dev->msix_irq_entries);
+    dev->msix_irq_entries = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
     return 0;
 }
@@ -340,6 +449,10 @@  void msix_notify(PCIDevice *dev, unsigned vector)
         msix_set_pending(dev, vector);
         return;
     }
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL);
+        return;
+    }
 
     address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
     address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
@@ -369,13 +482,21 @@  int msix_vector_use(PCIDevice *dev, unsigned vector)
 {
     if (vector >= dev->msix_entries_nr)
         return -EINVAL;
-    dev->msix_entry_used[vector]++;
+    if (dev->msix_entry_used[vector]++)
+        return 0;
+    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+        return kvm_msix_add(dev, vector);
+    }
     return 0;
 }
 
 /* Mark vector as unused. */
 void msix_vector_unuse(PCIDevice *dev, unsigned vector)
 {
-    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+    if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) {
         --dev->msix_entry_used[vector];
+        if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+            kvm_msix_del(dev, vector);
+        }
+    }
 }
diff --git a/hw/pci.h b/hw/pci.h
index 7ae9c93..66235e4 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -5,6 +5,8 @@ 
 
 #include "qdev.h"
 
+struct kvm_irq_routing_entry;
+
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
 
@@ -232,6 +234,7 @@  struct PCIDevice {
     unsigned *msix_entry_used;
     /* Region including the MSI-X table */
     uint32_t msix_bar_size;
+    struct kvm_irq_routing_entry *msix_irq_entries;
 
     /* Device capability configuration space */
     struct {
diff --git a/libkvm-all.h b/libkvm-all.h
index ecd3065..4a98bcb 100644
--- a/libkvm-all.h
+++ b/libkvm-all.h
@@ -898,6 +898,20 @@  int kvm_del_routing_entry(kvm_context_t kvm,
 		          struct kvm_irq_routing_entry* entry);
 
 /*!
+ * \brief Updates a routing in the temporary irq routing table
+ *
+ * Update a routing in the temporary irq routing table
+ * with a new value. entry type and GSI can not be changed.
+ * Nothing is committed to the running VM.
+ *
+ * \param kvm Pointer to the current kvm_context
+ */
+int kvm_update_routing_entry(kvm_context_t kvm,
+                             struct kvm_irq_routing_entry* entry,
+                             struct kvm_irq_routing_entry* newentry
+);
+
+/*!
  * \brief Commit the temporary irq routing table
  *
  * Commit the temporary irq routing table to the running VM.
diff --git a/qemu-kvm.c b/qemu-kvm.c
index c5cd038..8d226be 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1448,6 +1448,50 @@  int kvm_del_routing_entry(kvm_context_t kvm,
 #endif
 }
 
+int kvm_update_routing_entry(kvm_context_t kvm,
+	                  struct kvm_irq_routing_entry* entry,
+                          struct kvm_irq_routing_entry* newentry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+    struct kvm_irq_routing_entry *e;
+    int i;
+
+    if (entry->gsi != newentry->gsi ||
+        entry->type != newentry->type) {
+        return -EINVAL;
+    }
+
+    for (i = 0; i < kvm->irq_routes->nr; ++i) {
+        e = &kvm->irq_routes->entries[i];
+        if (e->type != entry->type || e->gsi != entry->gsi) {
+            continue;
+        }
+        switch (e->type) {
+        case KVM_IRQ_ROUTING_IRQCHIP:
+            if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
+                e->u.irqchip.pin == entry->u.irqchip.pin) {
+                memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
+                return 0;
+            }
+            break;
+        case KVM_IRQ_ROUTING_MSI:
+            if (e->u.msi.address_lo == entry->u.msi.address_lo &&
+                e->u.msi.address_hi == entry->u.msi.address_hi &&
+                e->u.msi.data == entry->u.msi.data) {
+                memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
+                return 0;
+            }
+            break;
+        default:
+            break;
+        }
+    }
+    return -ESRCH;
+#else
+    return -ENOSYS;
+#endif
+}
+
 int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
 {
 #ifdef KVM_CAP_IRQ_ROUTING