diff mbox series

[17/26] vfio-user: pci_user_realize PCI setup

Message ID 20250108115032.1677686-18-john.levon@nutanix.com (mailing list archive)
State New
Headers show
Series vfio-user client | expand

Commit Message

John Levon Jan. 8, 2025, 11:50 a.m. UTC
From: Jagannathan Raman <jag.raman@oracle.com>

PCI BARs read from remote device
PCI config reads/writes sent to remote server

Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
 hw/vfio/pci.c      | 243 ++++++++++++++++++++++++++-------------------
 hw/vfio/pci.h      |  10 ++
 hw/vfio/user-pci.c |  42 ++++++++
 3 files changed, 191 insertions(+), 104 deletions(-)
diff mbox series

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 90cf29325f..cd7bff2b4c 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1728,7 +1728,7 @@  static bool vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
     return true;
 }
 
-static void vfio_teardown_msi(VFIOPCIDevice *vdev)
+void vfio_teardown_msi(VFIOPCIDevice *vdev)
 {
     msi_uninit(&vdev->pdev);
 
@@ -1829,7 +1829,7 @@  static void vfio_bars_register(VFIOPCIDevice *vdev)
     }
 }
 
-static void vfio_bars_exit(VFIOPCIDevice *vdev)
+void vfio_bars_exit(VFIOPCIDevice *vdev)
 {
     int i;
 
@@ -1849,7 +1849,7 @@  static void vfio_bars_exit(VFIOPCIDevice *vdev)
     }
 }
 
-static void vfio_bars_finalize(VFIOPCIDevice *vdev)
+void vfio_bars_finalize(VFIOPCIDevice *vdev)
 {
     int i;
 
@@ -2417,7 +2417,7 @@  static void vfio_add_ext_cap(VFIOPCIDevice *vdev)
     return;
 }
 
-static bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp)
 {
     PCIDevice *pdev = &vdev->pdev;
 
@@ -2766,7 +2766,7 @@  bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp)
     return true;
 }
 
-static bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
+bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp)
 {
     VFIODevice *vbasedev = &vdev->vbasedev;
     struct vfio_region_info *reg_info = NULL;
@@ -2884,7 +2884,7 @@  static void vfio_err_notifier_handler(void *opaque)
  * and continue after disabling error recovery support for the
  * device.
  */
-static void vfio_register_err_notifier(VFIOPCIDevice *vdev)
+void vfio_register_err_notifier(VFIOPCIDevice *vdev)
 {
     Error *err = NULL;
     int32_t fd;
@@ -2943,7 +2943,7 @@  static void vfio_req_notifier_handler(void *opaque)
     }
 }
 
-static void vfio_register_req_notifier(VFIOPCIDevice *vdev)
+void vfio_register_req_notifier(VFIOPCIDevice *vdev)
 {
     struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info),
                                       .index = VFIO_PCI_REQ_IRQ_INDEX };
@@ -2998,79 +2998,10 @@  static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
     vdev->req_enabled = false;
 }
 
-static void vfio_realize(PCIDevice *pdev, Error **errp)
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp)
 {
-    ERRP_GUARD();
-    VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+    PCIDevice *pdev = &vdev->pdev;
     VFIODevice *vbasedev = &vdev->vbasedev;
-    int i, ret;
-    char uuid[UUID_STR_LEN];
-    g_autofree char *name = NULL;
-
-    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
-        if (!(~vdev->host.domain || ~vdev->host.bus ||
-              ~vdev->host.slot || ~vdev->host.function)) {
-            error_setg(errp, "No provided host device");
-            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
-#ifdef CONFIG_IOMMUFD
-                              "or -device vfio-pci,fd=DEVICE_FD "
-#endif
-                              "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
-            return;
-        }
-        vbasedev->sysfsdev =
-            g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
-                            vdev->host.domain, vdev->host.bus,
-                            vdev->host.slot, vdev->host.function);
-    }
-
-    if (!vfio_device_get_name(vbasedev, errp)) {
-        return;
-    }
-
-    /*
-     * Mediated devices *might* operate compatibly with discarding of RAM, but
-     * we cannot know for certain, it depends on whether the mdev vendor driver
-     * stays in sync with the active working set of the guest driver.  Prevent
-     * the x-balloon-allowed option unless this is minimally an mdev device.
-     */
-    vbasedev->mdev = vfio_device_is_mdev(vbasedev);
-
-    trace_vfio_mdev(vbasedev->name, vbasedev->mdev);
-
-    if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) {
-        error_setg(errp, "x-balloon-allowed only potentially compatible "
-                   "with mdev devices");
-        goto error;
-    }
-
-    if (!qemu_uuid_is_null(&vdev->vf_token)) {
-        qemu_uuid_unparse(&vdev->vf_token, uuid);
-        name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
-    } else {
-        name = g_strdup(vbasedev->name);
-    }
-
-    vbasedev->use_regfds = false;
-
-    if (!vfio_attach_device(name, vbasedev,
-                            pci_device_iommu_address_space(pdev), errp)) {
-        goto error;
-    }
-
-    if (!vfio_populate_device(vdev, errp)) {
-        goto error;
-    }
-
-    /* Get a copy of config space */
-    ret = pread(vbasedev->fd, vdev->pdev.config,
-                MIN(pci_config_size(&vdev->pdev), vdev->config_size),
-                vdev->config_offset);
-    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
-        ret = ret < 0 ? -errno : -EFAULT;
-        error_setg_errno(errp, -ret, "failed to read device config space");
-        goto error;
-    }
 
     /* vfio emulates a lot for us, but some bits need extra love */
     vdev->emulated_config_bits = g_malloc0(vdev->config_size);
@@ -3088,10 +3019,10 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (vdev->vendor_id != PCI_ANY_ID) {
         if (vdev->vendor_id >= 0xffff) {
             error_setg(errp, "invalid PCI vendor ID provided");
-            goto error;
+            return false;
         }
         vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0);
-        trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id);
+        trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id);
     } else {
         vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID);
     }
@@ -3099,7 +3030,7 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (vdev->device_id != PCI_ANY_ID) {
         if (vdev->device_id > 0xffff) {
             error_setg(errp, "invalid PCI device ID provided");
-            goto error;
+            return false;
         }
         vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0);
         trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id);
@@ -3110,7 +3041,7 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (vdev->sub_vendor_id != PCI_ANY_ID) {
         if (vdev->sub_vendor_id > 0xffff) {
             error_setg(errp, "invalid PCI subsystem vendor ID provided");
-            goto error;
+            return false;
         }
         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID,
                                vdev->sub_vendor_id, ~0);
@@ -3121,7 +3052,7 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     if (vdev->sub_device_id != PCI_ANY_ID) {
         if (vdev->sub_device_id > 0xffff) {
             error_setg(errp, "invalid PCI subsystem device ID provided");
-            goto error;
+            return false;
         }
         vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0);
         trace_vfio_pci_emulated_sub_device_id(vbasedev->name,
@@ -3152,11 +3083,129 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
     vfio_bars_prepare(vdev);
 
     if (!vfio_msix_early_setup(vdev, errp)) {
-        goto error;
+        return false;
     }
 
     vfio_bars_register(vdev);
 
+    return true;
+}
+
+bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp)
+{
+    PCIDevice *pdev = &vdev->pdev;
+
+    /* QEMU emulates all of MSI & MSIX */
+    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
+        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
+               MSIX_CAP_LENGTH);
+    }
+
+    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
+        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
+               vdev->msi_cap_size);
+    }
+
+    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
+        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
+                                                  vfio_intx_mmap_enable, vdev);
+        pci_device_set_intx_routing_notifier(&vdev->pdev,
+                                             vfio_intx_routing_notifier);
+        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
+        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
+        if (!vfio_intx_enable(vdev, errp)) {
+            pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+            kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier);
+            return false;
+        }
+    }
+    return true;
+}
+
+static void vfio_realize(PCIDevice *pdev, Error **errp)
+{
+    ERRP_GUARD();
+    VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+    VFIODevice *vbasedev = &vdev->vbasedev;
+    int i, ret;
+    char uuid[UUID_STR_LEN];
+    g_autofree char *name = NULL;
+
+    if (vbasedev->fd < 0 && !vbasedev->sysfsdev) {
+        if (!(~vdev->host.domain || ~vdev->host.bus ||
+              ~vdev->host.slot || ~vdev->host.function)) {
+            error_setg(errp, "No provided host device");
+            error_append_hint(errp, "Use -device vfio-pci,host=DDDD:BB:DD.F "
+#ifdef CONFIG_IOMMUFD
+                              "or -device vfio-pci,fd=DEVICE_FD "
+#endif
+                              "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+            return;
+        }
+        vbasedev->sysfsdev =
+            g_strdup_printf("/sys/bus/pci/devices/%04x:%02x:%02x.%01x",
+                            vdev->host.domain, vdev->host.bus,
+                            vdev->host.slot, vdev->host.function);
+    }
+
+    if (!vfio_device_get_name(vbasedev, errp)) {
+        return;
+    }
+
+    /*
+     * Mediated devices *might* operate compatibly with discarding of RAM, but
+     * we cannot know for certain, it depends on whether the mdev vendor driver
+     * stays in sync with the active working set of the guest driver.  Prevent
+     * the x-balloon-allowed option unless this is minimally an mdev device.
+     */
+    vbasedev->mdev = vfio_device_is_mdev(vbasedev);
+
+    trace_vfio_mdev(vbasedev->name, vbasedev->mdev);
+
+    if (vbasedev->ram_block_discard_allowed && !vbasedev->mdev) {
+        error_setg(errp, "x-balloon-allowed only potentially compatible "
+                   "with mdev devices");
+        goto error;
+    }
+
+    if (!qemu_uuid_is_null(&vdev->vf_token)) {
+        qemu_uuid_unparse(&vdev->vf_token, uuid);
+        name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid);
+    } else {
+        name = g_strdup(vbasedev->name);
+    }
+
+    vbasedev->use_regfds = false;
+
+    if (!vfio_attach_device(name, vbasedev,
+                            pci_device_iommu_address_space(pdev), errp)) {
+        goto error;
+    }
+
+    if (!vfio_populate_device(vdev, errp)) {
+        goto error;
+    }
+
+    /* Get a copy of config space */
+    ret = pread(vbasedev->fd, vdev->pdev.config,
+                MIN(pci_config_size(&vdev->pdev), vdev->config_size),
+                vdev->config_offset);
+    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+        ret = ret < 0 ? -errno : -EFAULT;
+        error_setg_errno(errp, -ret, "failed to read device config space");
+        goto error;
+    }
+
+    if (!vfio_pci_config_setup(vdev, errp)) {
+        goto error;
+    }
+
+    /*
+     * vfio_pci_config_setup will have registered the device's BARs
+     * and setup any MSIX BARs, so errors after it succeeds must
+     * use out_teardown
+     */
+
     if (!vbasedev->mdev &&
         !pci_device_set_iommu_device(pdev, vbasedev->hiod, errp)) {
         error_prepend(errp, "Failed to set iommu_device: ");
@@ -3200,28 +3249,14 @@  static void vfio_realize(PCIDevice *pdev, Error **errp)
         }
     }
 
-    /* QEMU emulates all of MSI & MSIX */
-    if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
-        memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
-               MSIX_CAP_LENGTH);
-    }
-
-    if (pdev->cap_present & QEMU_PCI_CAP_MSI) {
-        memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff,
-               vdev->msi_cap_size);
+    if (!vfio_interrupt_setup(vdev, errp)) {
+        goto out_teardown;
     }
 
-    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
-        vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
-                                                  vfio_intx_mmap_enable, vdev);
-        pci_device_set_intx_routing_notifier(&vdev->pdev,
-                                             vfio_intx_routing_notifier);
-        vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
-        kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
-        if (!vfio_intx_enable(vdev, errp)) {
-            goto out_deregister;
-        }
-    }
+    /*
+     * vfio_interrupt_setup will have setup INTx's KVM routing
+     * so errors after it succeeds must use out_deregister
+     */
 
     if (vdev->display != ON_OFF_AUTO_OFF) {
         if (!vfio_display_probe(vdev, errp)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index c0f030f4db..5fe6eb282c 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -218,6 +218,16 @@  Object *vfio_pci_get_object(VFIODevice *vbasedev);
 int vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f, Error **errp);
 int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f);
 void vfio_pci_put_device(VFIOPCIDevice *vdev);
+bool vfio_populate_device(VFIOPCIDevice *vdev, Error **errp);
+void vfio_teardown_msi(VFIOPCIDevice *vdev);
+void vfio_bars_exit(VFIOPCIDevice *vdev);
+void vfio_bars_finalize(VFIOPCIDevice *vdev);
+bool vfio_add_capabilities(VFIOPCIDevice *vdev, Error **errp);
+void vfio_put_device(VFIOPCIDevice *vdev);
+void vfio_register_err_notifier(VFIOPCIDevice *vdev);
+void vfio_register_req_notifier(VFIOPCIDevice *vdev);
+bool vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp);
+bool vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp);
 void vfio_instance_init(Object *obj);
 
 uint64_t vfio_vga_read(void *opaque, hwaddr addr, unsigned size);
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index aa5146db0a..5758e1e234 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -78,6 +78,7 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
     AddressSpace *as;
     SocketAddress addr;
     VFIOUserProxy *proxy;
+    int ret;
 
     /*
      * TODO: make option parser understand SocketAddress
@@ -130,8 +131,45 @@  static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
         goto error;
     }
 
+    if (!vfio_populate_device(vdev, errp)) {
+        goto error;
+    }
+
+    /* Get a copy of config space */
+    ret = vbasedev->io->region_read(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0,
+                               MIN(pci_config_size(pdev), vdev->config_size),
+                               pdev->config);
+    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+        error_setg_errno(errp, -ret, "failed to read device config space");
+        goto error;
+    }
+
+    if (!vfio_pci_config_setup(vdev, errp)) {
+        goto error;
+    }
+
+    /*
+     * vfio_pci_config_setup will have registered the device's BARs
+     * and setup any MSIX BARs, so errors after it succeeds must
+     * use out_teardown
+     */
+
+    if (!vfio_add_capabilities(vdev, errp)) {
+        goto out_teardown;
+    }
+
+    if (!vfio_interrupt_setup(vdev, errp)) {
+        goto out_teardown;
+    }
+
+    vfio_register_err_notifier(vdev);
+    vfio_register_req_notifier(vdev);
+
     return;
 
+out_teardown:
+    vfio_teardown_msi(vdev);
+    vfio_bars_exit(vdev);
 error:
     error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
 }
@@ -167,6 +205,10 @@  static void vfio_user_instance_finalize(Object *obj)
     VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
     VFIODevice *vbasedev = &vdev->vbasedev;
 
+    vfio_bars_finalize(vdev);
+    g_free(vdev->emulated_config_bits);
+    g_free(vdev->rom);
+
     vfio_pci_put_device(vdev);
 
     if (vbasedev->proxy != NULL) {