@@ -1088,9 +1088,7 @@ int apic_init(CPUState *env)
s->cpu_env = env;
apic_reset(s);
- if (!kvm_enabled() || !qemu_kvm_irqchip_in_kernel()) {
- msix_supported = 1;
- }
+ msix_supported = 1;
/* XXX: mapping more APICs at the same memory location */
if (apic_io_memory == 0) {
@@ -14,6 +14,7 @@
#include "hw.h"
#include "msix.h"
#include "pci.h"
+#include "qemu-kvm.h"
/* Declaration from linux/pci_regs.h */
#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */
@@ -62,6 +63,101 @@
/* Flag for interrupt controller to declare MSI-X support */
int msix_supported;
+/* KVM specific MSIX helpers */
+static void kvm_msix_free(PCIDevice *dev)
+{
+ int vector, changed = 0;
+ for (vector = 0; vector < dev->msix_entries_nr; ++vector) {
+ if (dev->msix_entry_used[vector]) {
+ kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+ changed = 1;
+ }
+ }
+ if (changed) {
+ kvm_commit_irq_routes(kvm_context);
+ }
+}
+
+static void kvm_msix_routing_entry(PCIDevice *dev, unsigned vector,
+ struct kvm_irq_routing_entry *entry)
+{
+ uint8_t *table_entry = dev->msix_table_page + vector * MSIX_ENTRY_SIZE;
+ entry->type = KVM_IRQ_ROUTING_MSI;
+ entry->flags = 0;
+ entry->u.msi.address_lo = pci_get_long(table_entry + MSIX_MSG_ADDR);
+ entry->u.msi.address_hi = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
+ entry->u.msi.data = pci_get_long(table_entry + MSIX_MSG_DATA);
+}
+
+static void kvm_msix_update(PCIDevice *dev, int vector,
+ int was_masked, int is_masked)
+{
+ struct kvm_irq_routing_entry e = {}, *entry;
+ int mask_cleared = was_masked && !is_masked;
+ /* It is only legal to change an entry when it is masked. Therefore, it is
+ * enough to update the routing in kernel when mask is being cleared. */
+ if (!mask_cleared) {
+ return;
+ }
+ if (!dev->msix_entry_used[vector]) {
+ return;
+ }
+ entry = dev->msix_irq_entries + vector;
+ e.gsi = entry->gsi;
+ kvm_msix_routing_entry(dev, vector, &e);
+ if (memcmp(&entry->u.msi, &e.u.msi, sizeof entry->u.msi)) {
+ int r;
+ r = kvm_update_routing_entry(kvm_context, entry, &e);
+ if (r) {
+ fprintf(stderr, "%s: kvm_update_routing_entry failed: %d\n", __func__,
+ r);
+ exit(1);
+ }
+ memcpy(&entry->u.msi, &e.u.msi, sizeof entry->u.msi);
+ r = kvm_commit_irq_routes(kvm_context);
+ if (r) {
+ fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__,
+ r);
+ exit(1);
+ }
+ }
+}
+
+static int kvm_msix_add(PCIDevice *dev, unsigned vector)
+{
+ struct kvm_irq_routing_entry *entry = dev->msix_irq_entries + vector;
+ int r;
+
+ r = kvm_get_irq_route_gsi(kvm_context);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_get_irq_route_gsi failed: %d\n", __func__, r);
+ return r;
+ }
+ entry->gsi = r;
+ kvm_msix_routing_entry(dev, vector, entry);
+ r = kvm_add_routing_entry(kvm_context, entry);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_add_routing_entry failed: %d\n", __func__, r);
+ return r;
+ }
+
+ r = kvm_commit_irq_routes(kvm_context);
+ if (r < 0) {
+ fprintf(stderr, "%s: kvm_commit_irq_routes failed: %d\n", __func__, r);
+ return r;
+ }
+ return 0;
+}
+
+static void kvm_msix_del(PCIDevice *dev, unsigned vector)
+{
+ if (dev->msix_entry_used[vector]) {
+ return;
+ }
+ kvm_del_routing_entry(kvm_context, &dev->msix_irq_entries[vector]);
+ kvm_commit_irq_routes(kvm_context);
+}
+
/* Add MSI-X capability to the config space for the device. */
/* Given a bar and its size, add MSI-X table on top of it
* and fill MSI-X capability in the config space.
@@ -109,6 +205,9 @@ static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
static void msix_free_irq_entries(PCIDevice *dev)
{
int vector;
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ kvm_msix_free(dev);
+ }
for (vector = 0; vector < dev->msix_entries_nr; ++vector)
dev->msix_entry_used[vector] = 0;
@@ -181,7 +280,11 @@ static void msix_mmio_writel(void *opaque, target_phys_addr_t addr,
PCIDevice *dev = opaque;
unsigned int offset = addr & (MSIX_PAGE_SIZE - 1);
int vector = offset / MSIX_ENTRY_SIZE;
+ int was_masked = msix_is_masked(dev, vector);
memcpy(dev->msix_table_page + offset, &val, 4);
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ kvm_msix_update(dev, vector, was_masked, msix_is_masked(dev, vector));
+ }
if (!msix_is_masked(dev, vector) && msix_is_pending(dev, vector)) {
msix_clr_pending(dev, vector);
msix_notify(dev, vector);
@@ -234,6 +337,10 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
if (nentries > MSIX_MAX_ENTRIES)
return -EINVAL;
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ dev->msix_irq_entries = qemu_malloc(nentries *
+ sizeof *dev->msix_irq_entries);
+ }
dev->msix_entry_used = qemu_mallocz(MSIX_MAX_ENTRIES *
sizeof *dev->msix_entry_used);
@@ -278,6 +385,8 @@ int msix_uninit(PCIDevice *dev)
dev->msix_table_page = NULL;
qemu_free(dev->msix_entry_used);
dev->msix_entry_used = NULL;
+ qemu_free(dev->msix_irq_entries);
+ dev->msix_irq_entries = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
return 0;
}
@@ -340,6 +449,10 @@ void msix_notify(PCIDevice *dev, unsigned vector)
msix_set_pending(dev, vector);
return;
}
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ kvm_set_irq(dev->msix_irq_entries[vector].gsi, 1, NULL);
+ return;
+ }
address = pci_get_long(table_entry + MSIX_MSG_UPPER_ADDR);
address = (address << 32) | pci_get_long(table_entry + MSIX_MSG_ADDR);
@@ -369,13 +482,21 @@ int msix_vector_use(PCIDevice *dev, unsigned vector)
{
if (vector >= dev->msix_entries_nr)
return -EINVAL;
- dev->msix_entry_used[vector]++;
+ if (dev->msix_entry_used[vector]++)
+ return 0;
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ return kvm_msix_add(dev, vector);
+ }
return 0;
}
/* Mark vector as unused. */
void msix_vector_unuse(PCIDevice *dev, unsigned vector)
{
- if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector])
+ if (vector < dev->msix_entries_nr && dev->msix_entry_used[vector]) {
--dev->msix_entry_used[vector];
+ if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
+ kvm_msix_del(dev, vector);
+ }
+ }
}
@@ -5,6 +5,8 @@
#include "qdev.h"
+struct kvm_irq_routing_entry;
+
/* PCI includes legacy ISA access. */
#include "isa.h"
@@ -232,6 +234,7 @@ struct PCIDevice {
unsigned *msix_entry_used;
/* Region including the MSI-X table */
uint32_t msix_bar_size;
+ struct kvm_irq_routing_entry *msix_irq_entries;
/* Device capability configuration space */
struct {
@@ -898,6 +898,20 @@ int kvm_del_routing_entry(kvm_context_t kvm,
struct kvm_irq_routing_entry* entry);
/*!
+ * \brief Updates a routing in the temporary irq routing table
+ *
+ * Update a routing in the temporary irq routing table
+ * with a new value. entry type and GSI can not be changed.
+ * Nothing is committed to the running VM.
+ *
+ * \param kvm Pointer to the current kvm_context
+ */
+int kvm_update_routing_entry(kvm_context_t kvm,
+ struct kvm_irq_routing_entry* entry,
+ struct kvm_irq_routing_entry* newentry
+);
+
+/*!
* \brief Commit the temporary irq routing table
*
* Commit the temporary irq routing table to the running VM.
@@ -1448,6 +1448,50 @@ int kvm_del_routing_entry(kvm_context_t kvm,
#endif
}
+int kvm_update_routing_entry(kvm_context_t kvm,
+ struct kvm_irq_routing_entry* entry,
+ struct kvm_irq_routing_entry* newentry)
+{
+#ifdef KVM_CAP_IRQ_ROUTING
+ struct kvm_irq_routing_entry *e;
+ int i;
+
+ if (entry->gsi != newentry->gsi ||
+ entry->type != newentry->type) {
+ return -EINVAL;
+ }
+
+ for (i = 0; i < kvm->irq_routes->nr; ++i) {
+ e = &kvm->irq_routes->entries[i];
+ if (e->type != entry->type || e->gsi != entry->gsi) {
+ continue;
+ }
+ switch (e->type) {
+ case KVM_IRQ_ROUTING_IRQCHIP:
+ if (e->u.irqchip.irqchip == entry->u.irqchip.irqchip &&
+ e->u.irqchip.pin == entry->u.irqchip.pin) {
+ memcpy(&e->u.irqchip, &entry->u.irqchip, sizeof e->u.irqchip);
+ return 0;
+ }
+ break;
+ case KVM_IRQ_ROUTING_MSI:
+ if (e->u.msi.address_lo == entry->u.msi.address_lo &&
+ e->u.msi.address_hi == entry->u.msi.address_hi &&
+ e->u.msi.data == entry->u.msi.data) {
+ memcpy(&e->u.msi, &entry->u.msi, sizeof e->u.msi);
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ return -ESRCH;
+#else
+ return -ENOSYS;
+#endif
+}
+
int kvm_del_irq_route(kvm_context_t kvm, int gsi, int irqchip, int pin)
{
#ifdef KVM_CAP_IRQ_ROUTING
Support msi-x with irqchip in kernel: allocate entries when they are used, and update when they are unmasked. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- Changes since v3: style comments from Avi Changes since v2: move all kvm-specific code into kvm_msi_XXX helpers Changes since v1: add braces as per CODING_STYLE hw/apic.c | 4 +- hw/msix.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- hw/pci.h | 3 + libkvm-all.h | 14 ++++++ qemu-kvm.c | 44 ++++++++++++++++++++ 5 files changed, 185 insertions(+), 5 deletions(-)