diff mbox series

[v4,12/14] vfio-user: handle device interrupts

Message ID 219410a610c55ac7f8db806aa8a4bc2e124e9d5c.1639549843.git.jag.raman@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio-user server in QEMU | expand

Commit Message

Jag Raman Dec. 15, 2021, 3:35 p.m. UTC
Forward remote device's interrupts to the guest

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
 include/hw/pci/pci.h      |  6 ++++
 include/hw/remote/iohub.h |  1 +
 hw/pci/msi.c              | 13 ++++++-
 hw/pci/msix.c             | 12 ++++++-
 hw/remote/iohub.c         |  7 ++++
 hw/remote/vfio-user-obj.c | 74 +++++++++++++++++++++++++++++++++++++++
 hw/remote/trace-events    |  1 +
 7 files changed, 112 insertions(+), 2 deletions(-)

Comments

Stefan Hajnoczi Dec. 16, 2021, 3:56 p.m. UTC | #1
On Wed, Dec 15, 2021 at 10:35:36AM -0500, Jagannathan Raman wrote:
> @@ -62,6 +66,9 @@ void remote_iohub_set_irq(void *opaque, int pirq, int level)
>      QEMU_LOCK_GUARD(&iohub->irq_level_lock[pirq]);
>  
>      if (level) {
> +        if (iohub->intx_notify) {
> +            iohub->intx_notify(pirq, 0);
> +        }
>          if (++iohub->irq_level[pirq] == 1) {
>              event_notifier_set(&iohub->irqfds[pirq]);
>          }

Does it make sense to use iohub.c in vfio-user since these irqfds are
unused?

Instead of adding iohub->intx_notify(), can vfio-user register its own
set_irq() callback for the PCI bus?

> diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
> index ae375e69b9..2b28d465d5 100644
> --- a/hw/remote/vfio-user-obj.c
> +++ b/hw/remote/vfio-user-obj.c
> @@ -50,6 +50,9 @@
>  #include "hw/pci/pci.h"
>  #include "qemu/timer.h"
>  #include "hw/remote/iommu.h"
> +#include "hw/pci/msi.h"
> +#include "hw/pci/msix.h"
> +#include "hw/remote/iohub.h"
>  
>  #define TYPE_VFU_OBJECT "x-vfio-user-server"
>  OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
> @@ -81,6 +84,8 @@ struct VfuObject {
>      int vfu_poll_fd;
>  };
>  
> +static GHashTable *vfu_object_dev_table;
> +
>  static void vfu_object_init_ctx(VfuObject *o, Error **errp);
>  
>  static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
> @@ -347,6 +352,54 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
>      }
>  }
>  
> +static void vfu_object_intx_notify(int pci_devfn, unsigned vector)
> +{
> +    vfu_ctx_t *vfu_ctx = g_hash_table_lookup(vfu_object_dev_table,
> +                                             (void *)(uint64_t)pci_devfn);

I'm not sure the hash table is necessary. The function arguments
currently don't contain the information we need, but that's easy to fix
because these functions are added by this patch. Add an opaque pointer
argument to ->intx_notify, ->msix_notify, and ->msi_notify in order to
pass along the VfuObject we need.

> +
> +    if (vfu_ctx) {
> +        vfu_irq_trigger(vfu_ctx, vector);
> +    }
> +}
> +
> +static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
> +{
> +    vfu_object_intx_notify(pci_dev->devfn, vector);
> +}
> +
> +static int vfu_object_setup_irqs(vfu_ctx_t *vfu_ctx, PCIDevice *pci_dev)
> +{
> +    RemoteMachineState *machine = REMOTE_MACHINE(current_machine);
> +    RemoteIOHubState *iohub = &machine->iohub;
> +    int ret;
> +
> +    ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    iohub->intx_notify = vfu_object_intx_notify;
> +
> +    ret = 0;
> +    if (msix_nr_vectors_allocated(pci_dev)) {
> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
> +                                       msix_nr_vectors_allocated(pci_dev));
> +
> +        pci_dev->msix_notify = vfu_object_msi_notify;
> +    } else if (msi_nr_vectors_allocated(pci_dev)) {
> +        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
> +                                       msi_nr_vectors_allocated(pci_dev));
> +
> +        pci_dev->msi_notify = vfu_object_msi_notify;
> +    }
> +
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
>  /*
>   * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
>   * properties. It also depends on devices instantiated in QEMU. These
> @@ -437,6 +490,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
>  
>      vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
>  
> +    ret = vfu_object_setup_irqs(o->vfu_ctx, o->pci_dev);
> +    if (ret < 0) {
> +        error_setg(errp, "vfu: Failed to setup interrupts for %s",
> +                   o->device);
> +        goto fail;
> +    }
> +
>      ret = vfu_realize_ctx(o->vfu_ctx);
>      if (ret < 0) {
>          error_setg(errp, "vfu: Failed to realize device %s- %s",
> @@ -450,6 +510,9 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
>          goto fail;
>      }
>  
> +    g_hash_table_insert(vfu_object_dev_table,
> +                        (void *)(uint64_t)o->pci_dev->devfn, o->vfu_ctx);

vfu_object_dev_table seems like a misnomer since the values stored are
actually vfu_ctx_t, not VfuObjects. vfu_devfn_to_vfu_ctx_table?

> +
>      qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
>  
>      return;
> @@ -504,9 +567,18 @@ static void vfu_object_finalize(Object *obj)
>          remote_iommu_free(o->pci_dev);
>      }
>  
> +    if (o->pci_dev &&
> +            g_hash_table_lookup(vfu_object_dev_table,
> +                                (void *)(uint64_t)o->pci_dev->devfn)) {

lookup() is unnecessary since remove() is a nop if the key does not
exist.
diff mbox series

Patch

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index f2fc2d5375..ffc030d9ca 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -128,6 +128,8 @@  typedef uint32_t PCIConfigReadFunc(PCIDevice *pci_dev,
 typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
                                 pcibus_t addr, pcibus_t size, int type);
 typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
+typedef void PCIMSINotify(PCIDevice *pci_dev, unsigned vector);
+typedef void PCIMSIxNotify(PCIDevice *pci_dev, unsigned vector);
 
 typedef struct PCIIORegion {
     pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
@@ -321,6 +323,10 @@  struct PCIDevice {
     /* Space to store MSIX table & pending bit array */
     uint8_t *msix_table;
     uint8_t *msix_pba;
+
+    PCIMSINotify *msi_notify;
+    PCIMSIxNotify *msix_notify;
+
     /* MemoryRegion container for msix exclusive BAR setup */
     MemoryRegion msix_exclusive_bar;
     /* Memory Regions for MSIX table and pending bit entries. */
diff --git a/include/hw/remote/iohub.h b/include/hw/remote/iohub.h
index 0bf98e0d78..70d98b38d0 100644
--- a/include/hw/remote/iohub.h
+++ b/include/hw/remote/iohub.h
@@ -30,6 +30,7 @@  typedef struct RemoteIOHubState {
     unsigned int irq_level[REMOTE_IOHUB_NB_PIRQS];
     ResampleToken token[REMOTE_IOHUB_NB_PIRQS];
     QemuMutex irq_level_lock[REMOTE_IOHUB_NB_PIRQS];
+    void (*intx_notify)(int pirq, unsigned vector);
 } RemoteIOHubState;
 
 int remote_iohub_map_irq(PCIDevice *pci_dev, int intx);
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index 47d2b0f33c..93f5e400cc 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -51,6 +51,8 @@ 
  */
 bool msi_nonbroken;
 
+static void pci_msi_notify(PCIDevice *dev, unsigned int vector);
+
 /* If we get rid of cap allocator, we won't need this. */
 static inline uint8_t msi_cap_sizeof(uint16_t flags)
 {
@@ -225,6 +227,8 @@  int msi_init(struct PCIDevice *dev, uint8_t offset,
     dev->msi_cap = config_offset;
     dev->cap_present |= QEMU_PCI_CAP_MSI;
 
+    dev->msi_notify = pci_msi_notify;
+
     pci_set_word(dev->config + msi_flags_off(dev), flags);
     pci_set_word(dev->wmask + msi_flags_off(dev),
                  PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
@@ -307,7 +311,7 @@  bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
     return mask & (1U << vector);
 }
 
-void msi_notify(PCIDevice *dev, unsigned int vector)
+static void pci_msi_notify(PCIDevice *dev, unsigned int vector)
 {
     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
@@ -332,6 +336,13 @@  void msi_notify(PCIDevice *dev, unsigned int vector)
     msi_send_message(dev, msg);
 }
 
+void msi_notify(PCIDevice *dev, unsigned int vector)
+{
+    if (dev->msi_notify) {
+        dev->msi_notify(dev, vector);
+    }
+}
+
 void msi_send_message(PCIDevice *dev, MSIMessage msg)
 {
     MemTxAttrs attrs = {};
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index ae9331cd0b..1c71e67f53 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -31,6 +31,8 @@ 
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
+static void pci_msix_notify(PCIDevice *dev, unsigned vector);
+
 MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
     uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
@@ -334,6 +336,7 @@  int msix_init(struct PCIDevice *dev, unsigned short nentries,
     dev->msix_table = g_malloc0(table_size);
     dev->msix_pba = g_malloc0(pba_size);
     dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
+    dev->msix_notify = pci_msix_notify;
 
     msix_mask_all(dev, nentries);
 
@@ -485,7 +488,7 @@  int msix_enabled(PCIDevice *dev)
 }
 
 /* Send an MSI-X message */
-void msix_notify(PCIDevice *dev, unsigned vector)
+static void pci_msix_notify(PCIDevice *dev, unsigned vector)
 {
     MSIMessage msg;
 
@@ -503,6 +506,13 @@  void msix_notify(PCIDevice *dev, unsigned vector)
     msi_send_message(dev, msg);
 }
 
+void msix_notify(PCIDevice *dev, unsigned vector)
+{
+    if (dev->msix_notify) {
+        dev->msix_notify(dev, vector);
+    }
+}
+
 void msix_reset(PCIDevice *dev)
 {
     if (!msix_present(dev)) {
diff --git a/hw/remote/iohub.c b/hw/remote/iohub.c
index 547d597f0f..d28d9f3ce2 100644
--- a/hw/remote/iohub.c
+++ b/hw/remote/iohub.c
@@ -17,7 +17,9 @@ 
 #include "qemu/thread.h"
 #include "hw/remote/machine.h"
 #include "hw/remote/iohub.h"
+#include "hw/pci/msi.h"
 #include "qemu/main-loop.h"
+#include "trace.h"
 
 void remote_iohub_init(RemoteIOHubState *iohub)
 {
@@ -32,6 +34,8 @@  void remote_iohub_init(RemoteIOHubState *iohub)
         event_notifier_init_fd(&iohub->irqfds[pirq], -1);
         event_notifier_init_fd(&iohub->resamplefds[pirq], -1);
     }
+
+    msi_nonbroken = true;
 }
 
 void remote_iohub_finalize(RemoteIOHubState *iohub)
@@ -62,6 +66,9 @@  void remote_iohub_set_irq(void *opaque, int pirq, int level)
     QEMU_LOCK_GUARD(&iohub->irq_level_lock[pirq]);
 
     if (level) {
+        if (iohub->intx_notify) {
+            iohub->intx_notify(pirq, 0);
+        }
         if (++iohub->irq_level[pirq] == 1) {
             event_notifier_set(&iohub->irqfds[pirq]);
         }
diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c
index ae375e69b9..2b28d465d5 100644
--- a/hw/remote/vfio-user-obj.c
+++ b/hw/remote/vfio-user-obj.c
@@ -50,6 +50,9 @@ 
 #include "hw/pci/pci.h"
 #include "qemu/timer.h"
 #include "hw/remote/iommu.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/remote/iohub.h"
 
 #define TYPE_VFU_OBJECT "x-vfio-user-server"
 OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@@ -81,6 +84,8 @@  struct VfuObject {
     int vfu_poll_fd;
 };
 
+static GHashTable *vfu_object_dev_table;
+
 static void vfu_object_init_ctx(VfuObject *o, Error **errp);
 
 static void vfu_object_set_socket(Object *obj, Visitor *v, const char *name,
@@ -347,6 +352,54 @@  static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
     }
 }
 
+static void vfu_object_intx_notify(int pci_devfn, unsigned vector)
+{
+    vfu_ctx_t *vfu_ctx = g_hash_table_lookup(vfu_object_dev_table,
+                                             (void *)(uint64_t)pci_devfn);
+
+    if (vfu_ctx) {
+        vfu_irq_trigger(vfu_ctx, vector);
+    }
+}
+
+static void vfu_object_msi_notify(PCIDevice *pci_dev, unsigned vector)
+{
+    vfu_object_intx_notify(pci_dev->devfn, vector);
+}
+
+static int vfu_object_setup_irqs(vfu_ctx_t *vfu_ctx, PCIDevice *pci_dev)
+{
+    RemoteMachineState *machine = REMOTE_MACHINE(current_machine);
+    RemoteIOHubState *iohub = &machine->iohub;
+    int ret;
+
+    ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
+    if (ret < 0) {
+        return ret;
+    }
+
+    iohub->intx_notify = vfu_object_intx_notify;
+
+    ret = 0;
+    if (msix_nr_vectors_allocated(pci_dev)) {
+        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
+                                       msix_nr_vectors_allocated(pci_dev));
+
+        pci_dev->msix_notify = vfu_object_msi_notify;
+    } else if (msi_nr_vectors_allocated(pci_dev)) {
+        ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
+                                       msi_nr_vectors_allocated(pci_dev));
+
+        pci_dev->msi_notify = vfu_object_msi_notify;
+    }
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
 /*
  * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
  * properties. It also depends on devices instantiated in QEMU. These
@@ -437,6 +490,13 @@  static void vfu_object_init_ctx(VfuObject *o, Error **errp)
 
     vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
 
+    ret = vfu_object_setup_irqs(o->vfu_ctx, o->pci_dev);
+    if (ret < 0) {
+        error_setg(errp, "vfu: Failed to setup interrupts for %s",
+                   o->device);
+        goto fail;
+    }
+
     ret = vfu_realize_ctx(o->vfu_ctx);
     if (ret < 0) {
         error_setg(errp, "vfu: Failed to realize device %s- %s",
@@ -450,6 +510,9 @@  static void vfu_object_init_ctx(VfuObject *o, Error **errp)
         goto fail;
     }
 
+    g_hash_table_insert(vfu_object_dev_table,
+                        (void *)(uint64_t)o->pci_dev->devfn, o->vfu_ctx);
+
     qemu_set_fd_handler(o->vfu_poll_fd, vfu_object_attach_ctx, NULL, o);
 
     return;
@@ -504,9 +567,18 @@  static void vfu_object_finalize(Object *obj)
         remote_iommu_free(o->pci_dev);
     }
 
+    if (o->pci_dev &&
+            g_hash_table_lookup(vfu_object_dev_table,
+                                (void *)(uint64_t)o->pci_dev->devfn)) {
+        g_hash_table_remove(vfu_object_dev_table,
+                            (void *)(uint64_t)o->pci_dev->devfn);
+    }
+
     o->pci_dev = NULL;
 
     if (!k->nr_devs && !k->daemon) {
+        g_hash_table_destroy(vfu_object_dev_table);
+        vfu_object_dev_table = NULL;
         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
     }
 
@@ -525,6 +597,8 @@  static void vfu_object_class_init(ObjectClass *klass, void *data)
     /* Later determine how to detect a daemon */
     k->daemon = false;
 
+    vfu_object_dev_table = g_hash_table_new_full(NULL, NULL, NULL, NULL);
+
     object_class_property_add(klass, "socket", "SocketAddress", NULL,
                               vfu_object_set_socket, NULL, NULL);
     object_class_property_set_description(klass, "socket",
diff --git a/hw/remote/trace-events b/hw/remote/trace-events
index 847d50d88f..c167b3c7a5 100644
--- a/hw/remote/trace-events
+++ b/hw/remote/trace-events
@@ -12,3 +12,4 @@  vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
 vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
 vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
 vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
+vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"