Message ID | 9b5702602c3bc0c79df893b269276c74b057026b.1667542066.git.john.g.johnson@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | vfio-user client | expand |
On Tue, Nov 08, 2022 at 03:13:35PM -0800, John Johnson wrote: > PCI BARs read from remote device > PCI config reads/writes sent to remote server Reviewed-by: John Levon <john.levon@nutanix.com> regards john
On 11/9/22 00:13, John Johnson wrote: > PCI BARs read from remote device > PCI config reads/writes sent to remote server > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Thanks, C. > --- > hw/vfio/pci.c | 277 ++++++++++++++++++++++++++++++++++++---------------------- > 1 file changed, 174 insertions(+), 103 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 027f9d5..7abe44e 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2874,6 +2874,133 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) > vdev->req_enabled = false; > } > > +static void vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) > +{ > + PCIDevice *pdev = &vdev->pdev; > + VFIODevice *vbasedev = &vdev->vbasedev; > + Error *err = NULL; > + > + /* vfio emulates a lot for us, but some bits need extra love */ > + vdev->emulated_config_bits = g_malloc0(vdev->config_size); > + > + /* QEMU can choose to expose the ROM or not */ > + memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); > + /* QEMU can also add or extend BARs */ > + memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); > + > + /* > + * The PCI spec reserves vendor ID 0xffff as an invalid value. The > + * device ID is managed by the vendor and need only be a 16-bit value. > + * Allow any 16-bit value for subsystem so they can be hidden or changed. > + */ > + if (vdev->vendor_id != PCI_ANY_ID) { > + if (vdev->vendor_id >= 0xffff) { > + error_setg(errp, "invalid PCI vendor ID provided"); > + return; > + } > + vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); > + trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id); > + } else { > + vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); > + } > + > + if (vdev->device_id != PCI_ANY_ID) { > + if (vdev->device_id > 0xffff) { > + error_setg(errp, "invalid PCI device ID provided"); > + return; > + } > + vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); > + trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); > + } else { > + vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); > + } > + > + if (vdev->sub_vendor_id != PCI_ANY_ID) { > + if (vdev->sub_vendor_id > 0xffff) { > + error_setg(errp, "invalid PCI subsystem vendor ID provided"); > + return; > + } > + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, > + vdev->sub_vendor_id, ~0); > + trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, > + vdev->sub_vendor_id); > + } > + > + if (vdev->sub_device_id != PCI_ANY_ID) { > + if (vdev->sub_device_id > 0xffff) { > + error_setg(errp, "invalid PCI subsystem device ID provided"); > + return; > + } > + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); > + trace_vfio_pci_emulated_sub_device_id(vbasedev->name, > + vdev->sub_device_id); > + } > + > + /* QEMU can change multi-function devices to single function, or reverse */ > + vdev->emulated_config_bits[PCI_HEADER_TYPE] = > + PCI_HEADER_TYPE_MULTI_FUNCTION; > + > + /* Restore or clear multifunction, this is always controlled by QEMU */ > + if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { > + vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; > + } else { > + vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; > + } > + > + /* > + * Clear host resource mapping info. If we choose not to register a > + * BAR, such as might be the case with the option ROM, we can get > + * confusing, unwritable, residual addresses from the host here. > + */ > + memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); > + memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); > + > + vfio_pci_size_rom(vdev); > + > + vfio_bars_prepare(vdev); > + > + vfio_msix_early_setup(vdev, &err); > + if (err) { > + error_propagate(errp, err); > + return; > + } > + > + vfio_bars_register(vdev); > +} > + > +static int vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) > +{ > + PCIDevice *pdev = &vdev->pdev; > + int ret; > + > + /* QEMU emulates all of MSI & MSIX */ > + if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { > + memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, > + MSIX_CAP_LENGTH); > + } > + > + if (pdev->cap_present & QEMU_PCI_CAP_MSI) { > + memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, > + vdev->msi_cap_size); > + } > + > + if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { > + vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, > + vfio_intx_mmap_enable, vdev); > + pci_device_set_intx_routing_notifier(&vdev->pdev, > + vfio_intx_routing_notifier); > + vdev->irqchip_change_notifier.notify = vfio_irqchip_change; > + kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); > + ret = vfio_intx_enable(vdev, errp); > + if (ret) { > + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); > + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); > + return ret; > + } > + } > + return 0; > +} > + > static void vfio_realize(PCIDevice *pdev, Error **errp) > { > VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); > @@ -2990,92 +3117,16 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > goto error; > } > > - /* vfio emulates a lot for us, but some bits need extra love */ > - vdev->emulated_config_bits = g_malloc0(vdev->config_size); > - > - /* QEMU can choose to expose the ROM or not */ > - memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); > - /* QEMU can also add or extend BARs */ > - memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); > - > - /* > - * The PCI spec reserves vendor ID 0xffff as an invalid value. The > - * device ID is managed by the vendor and need only be a 16-bit value. > - * Allow any 16-bit value for subsystem so they can be hidden or changed. > - */ > - if (vdev->vendor_id != PCI_ANY_ID) { > - if (vdev->vendor_id >= 0xffff) { > - error_setg(errp, "invalid PCI vendor ID provided"); > - goto error; > - } > - vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); > - trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); > - } else { > - vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); > - } > - > - if (vdev->device_id != PCI_ANY_ID) { > - if (vdev->device_id > 0xffff) { > - error_setg(errp, "invalid PCI device ID provided"); > - goto error; > - } > - vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); > - trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); > - } else { > - vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); > - } > - > - if (vdev->sub_vendor_id != PCI_ANY_ID) { > - if (vdev->sub_vendor_id > 0xffff) { > - error_setg(errp, "invalid PCI subsystem vendor ID provided"); > - goto error; > - } > - vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, > - vdev->sub_vendor_id, ~0); > - trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, > - vdev->sub_vendor_id); > - } > - > - if (vdev->sub_device_id != PCI_ANY_ID) { > - if (vdev->sub_device_id > 0xffff) { > - error_setg(errp, "invalid PCI subsystem device ID provided"); > - goto error; > - } > - vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); > - trace_vfio_pci_emulated_sub_device_id(vbasedev->name, > - vdev->sub_device_id); > - } > - > - /* QEMU can change multi-function devices to single function, or reverse */ > - vdev->emulated_config_bits[PCI_HEADER_TYPE] = > - PCI_HEADER_TYPE_MULTI_FUNCTION; > - > - /* Restore or clear multifunction, this is always controlled by QEMU */ > - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { > - vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; > - } else { > - vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; > - } > - > - /* > - * Clear host resource mapping info. If we choose not to register a > - * BAR, such as might be the case with the option ROM, we can get > - * confusing, unwritable, residual addresses from the host here. > - */ > - memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); > - memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); > - > - vfio_pci_size_rom(vdev); > - > - vfio_bars_prepare(vdev); > - > - vfio_msix_early_setup(vdev, &err); > + vfio_pci_config_setup(vdev, &err); > if (err) { > - error_propagate(errp, err); > goto error; > } > > - vfio_bars_register(vdev); > + /* > + * vfio_pci_config_setup will have registered the device's BARs > + * and setup any MSIX BARs, so errors after it succeeds must > + * use out_teardown > + */ > > ret = vfio_add_capabilities(vdev, errp); > if (ret) { > @@ -3116,29 +3167,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > } > } > > - /* QEMU emulates all of MSI & MSIX */ > - if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { > - memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, > - MSIX_CAP_LENGTH); > - } > - > - if (pdev->cap_present & QEMU_PCI_CAP_MSI) { > - memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, > - vdev->msi_cap_size); > + ret = vfio_interrupt_setup(vdev, errp); > + if (ret) { > + goto out_teardown; > } > > - if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { > - vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, > - vfio_intx_mmap_enable, vdev); > - pci_device_set_intx_routing_notifier(&vdev->pdev, > - vfio_intx_routing_notifier); > - vdev->irqchip_change_notifier.notify = vfio_irqchip_change; > - kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); > - ret = vfio_intx_enable(vdev, errp); > - if (ret) { > - goto out_deregister; > - } > - } > + /* > + * vfio_interrupt_setup will have setup INTx's KVM routing > + * so errors after it succeeds must use out_deregister > + */ > > if (vdev->display != ON_OFF_AUTO_OFF) { > ret = vfio_display_probe(vdev, errp); > @@ -3525,8 +3562,42 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) > goto error; > } > > + /* Get a copy of config space */ > + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0, > + MIN(pci_config_size(pdev), vdev->config_size), > + pdev->config); > + if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { > + error_setg_errno(errp, -ret, "failed to read device config space"); > + goto error; > + } > + > + vfio_pci_config_setup(vdev, &err); > + if (err) { > + error_propagate(errp, err); > + goto error; > + } > + > + /* > + * vfio_pci_config_setup will have registered the device's BARs > + * and setup any MSIX BARs, so errors after it succeeds must > + * use out_teardown > + */ > + > + ret = vfio_add_capabilities(vdev, errp); > + if (ret) { > + goto out_teardown; > + } > + > + ret = vfio_interrupt_setup(vdev, errp); > + if (ret) { > + goto out_teardown; > + } > + > return; > > +out_teardown: > + vfio_teardown_msi(vdev); > + vfio_bars_exit(vdev); > error: > error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); > }
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 027f9d5..7abe44e 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2874,6 +2874,133 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev) vdev->req_enabled = false; } +static void vfio_pci_config_setup(VFIOPCIDevice *vdev, Error **errp) +{ + PCIDevice *pdev = &vdev->pdev; + VFIODevice *vbasedev = &vdev->vbasedev; + Error *err = NULL; + + /* vfio emulates a lot for us, but some bits need extra love */ + vdev->emulated_config_bits = g_malloc0(vdev->config_size); + + /* QEMU can choose to expose the ROM or not */ + memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); + /* QEMU can also add or extend BARs */ + memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); + + /* + * The PCI spec reserves vendor ID 0xffff as an invalid value. The + * device ID is managed by the vendor and need only be a 16-bit value. + * Allow any 16-bit value for subsystem so they can be hidden or changed. + */ + if (vdev->vendor_id != PCI_ANY_ID) { + if (vdev->vendor_id >= 0xffff) { + error_setg(errp, "invalid PCI vendor ID provided"); + return; + } + vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); + trace_vfio_pci_emulated_vendor_id(vdev->vbasedev.name, vdev->vendor_id); + } else { + vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); + } + + if (vdev->device_id != PCI_ANY_ID) { + if (vdev->device_id > 0xffff) { + error_setg(errp, "invalid PCI device ID provided"); + return; + } + vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); + trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); + } else { + vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); + } + + if (vdev->sub_vendor_id != PCI_ANY_ID) { + if (vdev->sub_vendor_id > 0xffff) { + error_setg(errp, "invalid PCI subsystem vendor ID provided"); + return; + } + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, + vdev->sub_vendor_id, ~0); + trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, + vdev->sub_vendor_id); + } + + if (vdev->sub_device_id != PCI_ANY_ID) { + if (vdev->sub_device_id > 0xffff) { + error_setg(errp, "invalid PCI subsystem device ID provided"); + return; + } + vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); + trace_vfio_pci_emulated_sub_device_id(vbasedev->name, + vdev->sub_device_id); + } + + /* QEMU can change multi-function devices to single function, or reverse */ + vdev->emulated_config_bits[PCI_HEADER_TYPE] = + PCI_HEADER_TYPE_MULTI_FUNCTION; + + /* Restore or clear multifunction, this is always controlled by QEMU */ + if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { + vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; + } else { + vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; + } + + /* + * Clear host resource mapping info. If we choose not to register a + * BAR, such as might be the case with the option ROM, we can get + * confusing, unwritable, residual addresses from the host here. + */ + memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); + memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); + + vfio_pci_size_rom(vdev); + + vfio_bars_prepare(vdev); + + vfio_msix_early_setup(vdev, &err); + if (err) { + error_propagate(errp, err); + return; + } + + vfio_bars_register(vdev); +} + +static int vfio_interrupt_setup(VFIOPCIDevice *vdev, Error **errp) +{ + PCIDevice *pdev = &vdev->pdev; + int ret; + + /* QEMU emulates all of MSI & MSIX */ + if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { + memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, + MSIX_CAP_LENGTH); + } + + if (pdev->cap_present & QEMU_PCI_CAP_MSI) { + memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, + vdev->msi_cap_size); + } + + if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { + vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, + vfio_intx_mmap_enable, vdev); + pci_device_set_intx_routing_notifier(&vdev->pdev, + vfio_intx_routing_notifier); + vdev->irqchip_change_notifier.notify = vfio_irqchip_change; + kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); + ret = vfio_intx_enable(vdev, errp); + if (ret) { + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); + return ret; + } + } + return 0; +} + static void vfio_realize(PCIDevice *pdev, Error **errp) { VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev); @@ -2990,92 +3117,16 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) goto error; } - /* vfio emulates a lot for us, but some bits need extra love */ - vdev->emulated_config_bits = g_malloc0(vdev->config_size); - - /* QEMU can choose to expose the ROM or not */ - memset(vdev->emulated_config_bits + PCI_ROM_ADDRESS, 0xff, 4); - /* QEMU can also add or extend BARs */ - memset(vdev->emulated_config_bits + PCI_BASE_ADDRESS_0, 0xff, 6 * 4); - - /* - * The PCI spec reserves vendor ID 0xffff as an invalid value. The - * device ID is managed by the vendor and need only be a 16-bit value. - * Allow any 16-bit value for subsystem so they can be hidden or changed. - */ - if (vdev->vendor_id != PCI_ANY_ID) { - if (vdev->vendor_id >= 0xffff) { - error_setg(errp, "invalid PCI vendor ID provided"); - goto error; - } - vfio_add_emulated_word(vdev, PCI_VENDOR_ID, vdev->vendor_id, ~0); - trace_vfio_pci_emulated_vendor_id(vbasedev->name, vdev->vendor_id); - } else { - vdev->vendor_id = pci_get_word(pdev->config + PCI_VENDOR_ID); - } - - if (vdev->device_id != PCI_ANY_ID) { - if (vdev->device_id > 0xffff) { - error_setg(errp, "invalid PCI device ID provided"); - goto error; - } - vfio_add_emulated_word(vdev, PCI_DEVICE_ID, vdev->device_id, ~0); - trace_vfio_pci_emulated_device_id(vbasedev->name, vdev->device_id); - } else { - vdev->device_id = pci_get_word(pdev->config + PCI_DEVICE_ID); - } - - if (vdev->sub_vendor_id != PCI_ANY_ID) { - if (vdev->sub_vendor_id > 0xffff) { - error_setg(errp, "invalid PCI subsystem vendor ID provided"); - goto error; - } - vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_VENDOR_ID, - vdev->sub_vendor_id, ~0); - trace_vfio_pci_emulated_sub_vendor_id(vbasedev->name, - vdev->sub_vendor_id); - } - - if (vdev->sub_device_id != PCI_ANY_ID) { - if (vdev->sub_device_id > 0xffff) { - error_setg(errp, "invalid PCI subsystem device ID provided"); - goto error; - } - vfio_add_emulated_word(vdev, PCI_SUBSYSTEM_ID, vdev->sub_device_id, ~0); - trace_vfio_pci_emulated_sub_device_id(vbasedev->name, - vdev->sub_device_id); - } - - /* QEMU can change multi-function devices to single function, or reverse */ - vdev->emulated_config_bits[PCI_HEADER_TYPE] = - PCI_HEADER_TYPE_MULTI_FUNCTION; - - /* Restore or clear multifunction, this is always controlled by QEMU */ - if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { - vdev->pdev.config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION; - } else { - vdev->pdev.config[PCI_HEADER_TYPE] &= ~PCI_HEADER_TYPE_MULTI_FUNCTION; - } - - /* - * Clear host resource mapping info. If we choose not to register a - * BAR, such as might be the case with the option ROM, we can get - * confusing, unwritable, residual addresses from the host here. - */ - memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24); - memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4); - - vfio_pci_size_rom(vdev); - - vfio_bars_prepare(vdev); - - vfio_msix_early_setup(vdev, &err); + vfio_pci_config_setup(vdev, &err); if (err) { - error_propagate(errp, err); goto error; } - vfio_bars_register(vdev); + /* + * vfio_pci_config_setup will have registered the device's BARs + * and setup any MSIX BARs, so errors after it succeeds must + * use out_teardown + */ ret = vfio_add_capabilities(vdev, errp); if (ret) { @@ -3116,29 +3167,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) } } - /* QEMU emulates all of MSI & MSIX */ - if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { - memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, - MSIX_CAP_LENGTH); - } - - if (pdev->cap_present & QEMU_PCI_CAP_MSI) { - memset(vdev->emulated_config_bits + pdev->msi_cap, 0xff, - vdev->msi_cap_size); + ret = vfio_interrupt_setup(vdev, errp); + if (ret) { + goto out_teardown; } - if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) { - vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, - vfio_intx_mmap_enable, vdev); - pci_device_set_intx_routing_notifier(&vdev->pdev, - vfio_intx_routing_notifier); - vdev->irqchip_change_notifier.notify = vfio_irqchip_change; - kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier); - ret = vfio_intx_enable(vdev, errp); - if (ret) { - goto out_deregister; - } - } + /* + * vfio_interrupt_setup will have setup INTx's KVM routing + * so errors after it succeeds must use out_deregister + */ if (vdev->display != ON_OFF_AUTO_OFF) { ret = vfio_display_probe(vdev, errp); @@ -3525,8 +3562,42 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp) goto error; } + /* Get a copy of config space */ + ret = VDEV_REGION_READ(vbasedev, VFIO_PCI_CONFIG_REGION_INDEX, 0, + MIN(pci_config_size(pdev), vdev->config_size), + pdev->config); + if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) { + error_setg_errno(errp, -ret, "failed to read device config space"); + goto error; + } + + vfio_pci_config_setup(vdev, &err); + if (err) { + error_propagate(errp, err); + goto error; + } + + /* + * vfio_pci_config_setup will have registered the device's BARs + * and setup any MSIX BARs, so errors after it succeeds must + * use out_teardown + */ + + ret = vfio_add_capabilities(vdev, errp); + if (ret) { + goto out_teardown; + } + + ret = vfio_interrupt_setup(vdev, errp); + if (ret) { + goto out_teardown; + } + return; +out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); error: error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name); }