Message ID | 20250124191245.12464-6-tomitamoeko@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | vfio/igd: remove incorrect IO BAR4 quirk | expand |
On Sat, 25 Jan 2025 03:12:45 +0800 Tomita Moeko <tomitamoeko@gmail.com> wrote: > Both enable opregion option (x-igd-opregion) and legacy mode require > setting up OpRegion copy for IGD devices. Move x-igd-opregion handler > in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate > code. Finally we moved all the IGD-related code into igd.c. > > Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> > --- > hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- > hw/vfio/pci-quirks.c | 50 --------------- > hw/vfio/pci.c | 25 -------- > hw/vfio/pci.h | 4 -- > 4 files changed, 110 insertions(+), 112 deletions(-) > > diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c > index 6e06dd774a..015beacf5f 100644 > --- a/hw/vfio/igd.c > +++ b/hw/vfio/igd.c > @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) > return -1; > } > > +#define IGD_ASLS 0xfc /* ASL Storage Register */ > #define IGD_GMCH 0x50 /* Graphics Control Register */ > #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ > #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ > @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) > return 0; > } > > +/* > + * The OpRegion includes the Video BIOS Table, which seems important for > + * telling the driver what sort of outputs it has. Without this, the device > + * may work in the guest, but we may not get output. This also requires BIOS > + * support to reserve and populate a section of guest memory sufficient for > + * the table and to write the base address of that memory to the ASLS register > + * of the IGD device. > + */ > +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, > + struct vfio_region_info *info, > + Error **errp) > +{ > + int ret; > + > + vdev->igd_opregion = g_malloc0(info->size); > + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, > + info->size, info->offset); > + if (ret != info->size) { > + error_setg(errp, "failed to read IGD OpRegion"); > + g_free(vdev->igd_opregion); > + vdev->igd_opregion = NULL; > + return false; > + } > + > + /* > + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to > + * allocate 32bit reserved memory for, copy these contents into, and write > + * the reserved memory base address to the device ASLS register at 0xFC. > + * Alignment of this reserved region seems flexible, but using a 4k page > + * alignment seems to work well. This interface assumes a single IGD > + * device, which may be at VM address 00:02.0 in legacy mode or another > + * address in UPT mode. > + * > + * NB, there may be future use cases discovered where the VM should have > + * direct interaction with the host OpRegion, in which case the write to > + * the ASLS register would trigger MemoryRegion setup to enable that. > + */ > + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", > + vdev->igd_opregion, info->size); > + > + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); > + > + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); > + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); > + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); > + > + return true; > +} > + > /* > * The rather short list of registers that we copy from the host devices. > * The LPC/ISA bridge values are definitely needed to support the vBIOS, the > @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) > QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); > } > > +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) > +{ > + g_autofree struct vfio_region_info *opregion = NULL; > + int ret; > + > + /* > + * Hotplugging is not supprted for both opregion access and legacy mode. > + * For legacy mode, we also need to mark the ROM failed. > + */ The explanation was a little better in the removed comment. > + if (vdev->pdev.qdev.hotplugged) { > + vdev->rom_read_failed = true; > + error_setg(errp, > + "IGD OpRegion is not supported on hotplugged device"); As was the error log. > + return false; > + } > + > + ret = vfio_get_dev_region_info(&vdev->vbasedev, > + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, > + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); > + if (ret) { > + error_setg_errno(errp, -ret, > + "device does not supports IGD OpRegion feature"); > + return false; > + } > + > + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { > + return false; > + } > + > + return true; > +} > + > bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, > - Error **errp G_GNUC_UNUSED) > + Error **errp) > { > g_autofree struct vfio_region_info *rom = NULL; > - g_autofree struct vfio_region_info *opregion = NULL; > g_autofree struct vfio_region_info *host = NULL; > g_autofree struct vfio_region_info *lpc = NULL; > + PCIBus *bus; > PCIDevice *lpc_bridge; > int ret, gen; > + bool legacy_mode, enable_opregion; > uint64_t gms_size; > uint64_t *bdsm_size; > uint32_t gmch; > Error *err = NULL; > > + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > + !vfio_is_vga(vdev)) { > + return true; > + } > + > /* > * This must be an Intel VGA device at address 00:02.0 for us to even > * consider enabling legacy mode. The vBIOS has dependencies on the > * PCI bus address. > */ > - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > - !vfio_is_vga(vdev) || > - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), > - 0, PCI_DEVFN(0x2, 0))) { > + bus = pci_device_root_bus(&vdev->pdev); > + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); > + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); > + > + if (!enable_opregion && !legacy_mode) { > + return true; > + } > + > + if (!vfio_igd_try_enable_opregion(vdev, &err)) { > + if (enable_opregion) { > + error_propagate(errp, err); > + return false; > + } else if (legacy_mode) { > + error_append_hint(&err, "IGD legacy mode disabled\n"); > + error_report_err(err); > + return true; > + } > + } > + > + if (!legacy_mode) { > return true; > } > > @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, > return true; > } > > - /* > - * Ignore the hotplug corner case, mark the ROM failed, we can't > - * create the devices we need for legacy mode in the hotplug scenario. > - */ > - if (vdev->pdev.qdev.hotplugged) { > - error_report("IGD device %s hotplugged, ROM disabled, " > - "legacy mode disabled", vdev->vbasedev.name); > - vdev->rom_read_failed = true; > - return true; > - } > - > /* > * Check whether we have all the vfio device specific regions to > * support legacy mode (added in Linux v4.6). If not, bail. > */ And we're disassociating opregion setup from this useful comment. What are we actually accomplishing here? What specific code duplication is eliminated? Why is it important to move all this code to igd.c? It's really difficult to untangle this refactor, I think it could be done more iteratively, if it's really even beneficial. Thanks, Alex > - ret = vfio_get_dev_region_info(&vdev->vbasedev, > - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, > - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); > - if (ret) { > - error_report("IGD device %s does not support OpRegion access," > - "legacy mode disabled", vdev->vbasedev.name); > - return true; > - } > - > ret = vfio_get_dev_region_info(&vdev->vbasedev, > VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, > VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); > @@ -476,13 +560,6 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, > return true; > } > > - /* Setup OpRegion access */ > - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { > - error_append_hint(&err, "IGD legacy mode disabled\n"); > - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); > - return true; > - } > - > /* > * Allow user to override dsm size using x-igd-gms option, in multiples of > * 32MiB. This option should only be used when the desired size cannot be > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c > index b8379cb512..f2b37910f0 100644 > --- a/hw/vfio/pci-quirks.c > +++ b/hw/vfio/pci-quirks.c > @@ -1114,56 +1114,6 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) > trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); > } > > -#define IGD_ASLS 0xfc /* ASL Storage Register */ > - > -/* > - * The OpRegion includes the Video BIOS Table, which seems important for > - * telling the driver what sort of outputs it has. Without this, the device > - * may work in the guest, but we may not get output. This also requires BIOS > - * support to reserve and populate a section of guest memory sufficient for > - * the table and to write the base address of that memory to the ASLS register > - * of the IGD device. > - */ > -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, > - struct vfio_region_info *info, Error **errp) > -{ > - int ret; > - > - vdev->igd_opregion = g_malloc0(info->size); > - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, > - info->size, info->offset); > - if (ret != info->size) { > - error_setg(errp, "failed to read IGD OpRegion"); > - g_free(vdev->igd_opregion); > - vdev->igd_opregion = NULL; > - return false; > - } > - > - /* > - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to > - * allocate 32bit reserved memory for, copy these contents into, and write > - * the reserved memory base address to the device ASLS register at 0xFC. > - * Alignment of this reserved region seems flexible, but using a 4k page > - * alignment seems to work well. This interface assumes a single IGD > - * device, which may be at VM address 00:02.0 in legacy mode or another > - * address in UPT mode. > - * > - * NB, there may be future use cases discovered where the VM should have > - * direct interaction with the host OpRegion, in which case the write to > - * the ASLS register would trigger MemoryRegion setup to enable that. > - */ > - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", > - vdev->igd_opregion, info->size); > - > - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); > - > - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); > - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); > - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); > - > - return true; > -} > - > /* > * Common quirk probe entry points. > */ > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index e624ae56c4..1b90c78c5a 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -3136,31 +3136,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) > vfio_bar_quirk_setup(vdev, i); > } > > - if (!vdev->igd_opregion && > - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { > - g_autofree struct vfio_region_info *opregion = NULL; > - > - if (vdev->pdev.qdev.hotplugged) { > - error_setg(errp, > - "cannot support IGD OpRegion feature on hotplugged " > - "device"); > - goto out_unset_idev; > - } > - > - ret = vfio_get_dev_region_info(vbasedev, > - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, > - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); > - if (ret) { > - error_setg_errno(errp, -ret, > - "does not support requested IGD OpRegion feature"); > - goto out_unset_idev; > - } > - > - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { > - goto out_unset_idev; > - } > - } > - > /* QEMU emulates all of MSI & MSIX */ > if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { > memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h > index 5c64de0270..b9e920a6b4 100644 > --- a/hw/vfio/pci.h > +++ b/hw/vfio/pci.h > @@ -229,10 +229,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, > > bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); > > -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, > - struct vfio_region_info *info, > - Error **errp); > - > void vfio_display_reset(VFIOPCIDevice *vdev); > bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); > void vfio_display_finalize(VFIOPCIDevice *vdev);
On 1/25/25 05:13, Alex Williamson wrote: > On Sat, 25 Jan 2025 03:12:45 +0800 > Tomita Moeko <tomitamoeko@gmail.com> wrote: > >> Both enable opregion option (x-igd-opregion) and legacy mode require >> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler >> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate >> code. Finally we moved all the IGD-related code into igd.c. >> >> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> >> --- >> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- >> hw/vfio/pci-quirks.c | 50 --------------- >> hw/vfio/pci.c | 25 -------- >> hw/vfio/pci.h | 4 -- >> 4 files changed, 110 insertions(+), 112 deletions(-) >> >> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c >> index 6e06dd774a..015beacf5f 100644 >> --- a/hw/vfio/igd.c >> +++ b/hw/vfio/igd.c >> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) >> return -1; >> } >> >> +#define IGD_ASLS 0xfc /* ASL Storage Register */ >> #define IGD_GMCH 0x50 /* Graphics Control Register */ >> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ >> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ >> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) >> return 0; >> } >> >> +/* >> + * The OpRegion includes the Video BIOS Table, which seems important for >> + * telling the driver what sort of outputs it has. Without this, the device >> + * may work in the guest, but we may not get output. This also requires BIOS >> + * support to reserve and populate a section of guest memory sufficient for >> + * the table and to write the base address of that memory to the ASLS register >> + * of the IGD device. >> + */ >> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >> + struct vfio_region_info *info, >> + Error **errp) >> +{ >> + int ret; >> + >> + vdev->igd_opregion = g_malloc0(info->size); >> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >> + info->size, info->offset); >> + if (ret != info->size) { >> + error_setg(errp, "failed to read IGD OpRegion"); >> + g_free(vdev->igd_opregion); >> + vdev->igd_opregion = NULL; >> + return false; >> + } >> + >> + /* >> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >> + * allocate 32bit reserved memory for, copy these contents into, and write >> + * the reserved memory base address to the device ASLS register at 0xFC. >> + * Alignment of this reserved region seems flexible, but using a 4k page >> + * alignment seems to work well. This interface assumes a single IGD >> + * device, which may be at VM address 00:02.0 in legacy mode or another >> + * address in UPT mode. >> + * >> + * NB, there may be future use cases discovered where the VM should have >> + * direct interaction with the host OpRegion, in which case the write to >> + * the ASLS register would trigger MemoryRegion setup to enable that. >> + */ >> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >> + vdev->igd_opregion, info->size); >> + >> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >> + >> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >> + >> + return true; >> +} >> + >> /* >> * The rather short list of registers that we copy from the host devices. >> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the >> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) >> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); >> } >> >> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) >> +{ >> + g_autofree struct vfio_region_info *opregion = NULL; >> + int ret; >> + >> + /* >> + * Hotplugging is not supprted for both opregion access and legacy mode. >> + * For legacy mode, we also need to mark the ROM failed. >> + */ > > The explanation was a little better in the removed comment. > >> + if (vdev->pdev.qdev.hotplugged) { >> + vdev->rom_read_failed = true; >> + error_setg(errp, >> + "IGD OpRegion is not supported on hotplugged device"); > > As was the error log. > >> + return false; >> + } >> + >> + ret = vfio_get_dev_region_info(&vdev->vbasedev, >> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >> + if (ret) { >> + error_setg_errno(errp, -ret, >> + "device does not supports IGD OpRegion feature"); >> + return false; >> + } >> + >> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >> + return false; >> + } >> + >> + return true; >> +} >> + >> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >> - Error **errp G_GNUC_UNUSED) >> + Error **errp) >> { >> g_autofree struct vfio_region_info *rom = NULL; >> - g_autofree struct vfio_region_info *opregion = NULL; >> g_autofree struct vfio_region_info *host = NULL; >> g_autofree struct vfio_region_info *lpc = NULL; >> + PCIBus *bus; >> PCIDevice *lpc_bridge; >> int ret, gen; >> + bool legacy_mode, enable_opregion; >> uint64_t gms_size; >> uint64_t *bdsm_size; >> uint32_t gmch; >> Error *err = NULL; >> >> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >> + !vfio_is_vga(vdev)) { >> + return true; >> + } >> + >> /* >> * This must be an Intel VGA device at address 00:02.0 for us to even >> * consider enabling legacy mode. The vBIOS has dependencies on the >> * PCI bus address. >> */ >> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >> - !vfio_is_vga(vdev) || >> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), >> - 0, PCI_DEVFN(0x2, 0))) { >> + bus = pci_device_root_bus(&vdev->pdev); >> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); >> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); >> + >> + if (!enable_opregion && !legacy_mode) { >> + return true; >> + } >> + >> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { >> + if (enable_opregion) { >> + error_propagate(errp, err); >> + return false; >> + } else if (legacy_mode) { >> + error_append_hint(&err, "IGD legacy mode disabled\n"); >> + error_report_err(err); >> + return true; >> + } >> + } >> + >> + if (!legacy_mode) { >> return true; >> } >> >> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >> return true; >> } >> >> - /* >> - * Ignore the hotplug corner case, mark the ROM failed, we can't >> - * create the devices we need for legacy mode in the hotplug scenario. >> - */ >> - if (vdev->pdev.qdev.hotplugged) { >> - error_report("IGD device %s hotplugged, ROM disabled, " >> - "legacy mode disabled", vdev->vbasedev.name); >> - vdev->rom_read_failed = true; >> - return true; >> - } >> - >> /* >> * Check whether we have all the vfio device specific regions to >> * support legacy mode (added in Linux v4.6). If not, bail. >> */ > > And we're disassociating opregion setup from this useful comment. > > What are we actually accomplishing here? What specific code > duplication is eliminated? This patch is designed for moving the opregion quirk in vfio_realize() to igd.c, for better isolation of the igd-specific code. Legacy mode also need to initialize opregion as x-igd-opregion=on option. These code are almost the same, except legacy mode continues on error, while x-igd-opregion fails. I am going to clearify that in the commit message of v3. > Why is it important to move all this code to igd.c? > > It's really difficult to untangle this refactor, I think it could be > done more iteratively, if it's really even beneficial. Thanks, > > Alex Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT mode" concept in future, my proposal is: * Emulate and initialize ASLS and BDSM register unconditionally. These registers holds HPA, keeping the old value to guest is not a good idea * Make the host bridge and LPC bridge ID quirks optional like OpRegion. Recent Linux kernel and Windows driver seems not relying on it. This enables IGD passthrough on Q35 machines, but probably without UEFI GOP or VBIOS support, as it is observed they require specific LPC bridge DID to work. * Remove the requirement of IGD device class being VGA controller, this was previous discussed in my kernel change [1] * Update the document It would time consuming to implement all them, coding is not difficult, but I have to verify my change on diffrent platforms. And they are out of this patchset's scope I think. I personally perfers doing it in a future patchset. [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ Thanks, Moeko >> - ret = vfio_get_dev_region_info(&vdev->vbasedev, >> - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >> - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >> - if (ret) { >> - error_report("IGD device %s does not support OpRegion access," >> - "legacy mode disabled", vdev->vbasedev.name); >> - return true; >> - } >> - >> ret = vfio_get_dev_region_info(&vdev->vbasedev, >> VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >> VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); >> @@ -476,13 +560,6 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >> return true; >> } >> >> - /* Setup OpRegion access */ >> - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { >> - error_append_hint(&err, "IGD legacy mode disabled\n"); >> - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); >> - return true; >> - } >> - >> /* >> * Allow user to override dsm size using x-igd-gms option, in multiples of >> * 32MiB. This option should only be used when the desired size cannot be >> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c >> index b8379cb512..f2b37910f0 100644 >> --- a/hw/vfio/pci-quirks.c >> +++ b/hw/vfio/pci-quirks.c >> @@ -1114,56 +1114,6 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) >> trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); >> } >> >> -#define IGD_ASLS 0xfc /* ASL Storage Register */ >> - >> -/* >> - * The OpRegion includes the Video BIOS Table, which seems important for >> - * telling the driver what sort of outputs it has. Without this, the device >> - * may work in the guest, but we may not get output. This also requires BIOS >> - * support to reserve and populate a section of guest memory sufficient for >> - * the table and to write the base address of that memory to the ASLS register >> - * of the IGD device. >> - */ >> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >> - struct vfio_region_info *info, Error **errp) >> -{ >> - int ret; >> - >> - vdev->igd_opregion = g_malloc0(info->size); >> - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >> - info->size, info->offset); >> - if (ret != info->size) { >> - error_setg(errp, "failed to read IGD OpRegion"); >> - g_free(vdev->igd_opregion); >> - vdev->igd_opregion = NULL; >> - return false; >> - } >> - >> - /* >> - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >> - * allocate 32bit reserved memory for, copy these contents into, and write >> - * the reserved memory base address to the device ASLS register at 0xFC. >> - * Alignment of this reserved region seems flexible, but using a 4k page >> - * alignment seems to work well. This interface assumes a single IGD >> - * device, which may be at VM address 00:02.0 in legacy mode or another >> - * address in UPT mode. >> - * >> - * NB, there may be future use cases discovered where the VM should have >> - * direct interaction with the host OpRegion, in which case the write to >> - * the ASLS register would trigger MemoryRegion setup to enable that. >> - */ >> - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >> - vdev->igd_opregion, info->size); >> - >> - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >> - >> - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >> - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >> - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >> - >> - return true; >> -} >> - >> /* >> * Common quirk probe entry points. >> */ >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index e624ae56c4..1b90c78c5a 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -3136,31 +3136,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >> vfio_bar_quirk_setup(vdev, i); >> } >> >> - if (!vdev->igd_opregion && >> - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { >> - g_autofree struct vfio_region_info *opregion = NULL; >> - >> - if (vdev->pdev.qdev.hotplugged) { >> - error_setg(errp, >> - "cannot support IGD OpRegion feature on hotplugged " >> - "device"); >> - goto out_unset_idev; >> - } >> - >> - ret = vfio_get_dev_region_info(vbasedev, >> - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >> - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >> - if (ret) { >> - error_setg_errno(errp, -ret, >> - "does not support requested IGD OpRegion feature"); >> - goto out_unset_idev; >> - } >> - >> - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >> - goto out_unset_idev; >> - } >> - } >> - >> /* QEMU emulates all of MSI & MSIX */ >> if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { >> memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, >> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h >> index 5c64de0270..b9e920a6b4 100644 >> --- a/hw/vfio/pci.h >> +++ b/hw/vfio/pci.h >> @@ -229,10 +229,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, >> >> bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); >> >> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >> - struct vfio_region_info *info, >> - Error **errp); >> - >> void vfio_display_reset(VFIOPCIDevice *vdev); >> bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); >> void vfio_display_finalize(VFIOPCIDevice *vdev); >
On 1/25/25 15:42, Tomita Moeko wrote: > On 1/25/25 05:13, Alex Williamson wrote: >> On Sat, 25 Jan 2025 03:12:45 +0800 >> Tomita Moeko <tomitamoeko@gmail.com> wrote: >> >>> Both enable opregion option (x-igd-opregion) and legacy mode require >>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler >>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate >>> code. Finally we moved all the IGD-related code into igd.c. >>> >>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> >>> --- >>> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- >>> hw/vfio/pci-quirks.c | 50 --------------- >>> hw/vfio/pci.c | 25 -------- >>> hw/vfio/pci.h | 4 -- >>> 4 files changed, 110 insertions(+), 112 deletions(-) >>> >>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c >>> index 6e06dd774a..015beacf5f 100644 >>> --- a/hw/vfio/igd.c >>> +++ b/hw/vfio/igd.c >>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) >>> return -1; >>> } >>> >>> +#define IGD_ASLS 0xfc /* ASL Storage Register */ >>> #define IGD_GMCH 0x50 /* Graphics Control Register */ >>> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ >>> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ >>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) >>> return 0; >>> } >>> >>> +/* >>> + * The OpRegion includes the Video BIOS Table, which seems important for >>> + * telling the driver what sort of outputs it has. Without this, the device >>> + * may work in the guest, but we may not get output. This also requires BIOS >>> + * support to reserve and populate a section of guest memory sufficient for >>> + * the table and to write the base address of that memory to the ASLS register >>> + * of the IGD device. >>> + */ >>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>> + struct vfio_region_info *info, >>> + Error **errp) >>> +{ >>> + int ret; >>> + >>> + vdev->igd_opregion = g_malloc0(info->size); >>> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >>> + info->size, info->offset); >>> + if (ret != info->size) { >>> + error_setg(errp, "failed to read IGD OpRegion"); >>> + g_free(vdev->igd_opregion); >>> + vdev->igd_opregion = NULL; >>> + return false; >>> + } >>> + >>> + /* >>> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >>> + * allocate 32bit reserved memory for, copy these contents into, and write >>> + * the reserved memory base address to the device ASLS register at 0xFC. >>> + * Alignment of this reserved region seems flexible, but using a 4k page >>> + * alignment seems to work well. This interface assumes a single IGD >>> + * device, which may be at VM address 00:02.0 in legacy mode or another >>> + * address in UPT mode. >>> + * >>> + * NB, there may be future use cases discovered where the VM should have >>> + * direct interaction with the host OpRegion, in which case the write to >>> + * the ASLS register would trigger MemoryRegion setup to enable that. >>> + */ >>> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >>> + vdev->igd_opregion, info->size); >>> + >>> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >>> + >>> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >>> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >>> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >>> + >>> + return true; >>> +} >>> + >>> /* >>> * The rather short list of registers that we copy from the host devices. >>> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the >>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) >>> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); >>> } >>> >>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) >>> +{ >>> + g_autofree struct vfio_region_info *opregion = NULL; >>> + int ret; >>> + >>> + /* >>> + * Hotplugging is not supprted for both opregion access and legacy mode. >>> + * For legacy mode, we also need to mark the ROM failed. >>> + */ >> >> The explanation was a little better in the removed comment. >> >>> + if (vdev->pdev.qdev.hotplugged) { >>> + vdev->rom_read_failed = true; >>> + error_setg(errp, >>> + "IGD OpRegion is not supported on hotplugged device"); >> >> As was the error log. >> >>> + return false; >>> + } >>> + >>> + ret = vfio_get_dev_region_info(&vdev->vbasedev, >>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>> + if (ret) { >>> + error_setg_errno(errp, -ret, >>> + "device does not supports IGD OpRegion feature"); >>> + return false; >>> + } >>> + >>> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >>> + return false; >>> + } >>> + >>> + return true; >>> +} >>> + >>> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>> - Error **errp G_GNUC_UNUSED) >>> + Error **errp) >>> { >>> g_autofree struct vfio_region_info *rom = NULL; >>> - g_autofree struct vfio_region_info *opregion = NULL; >>> g_autofree struct vfio_region_info *host = NULL; >>> g_autofree struct vfio_region_info *lpc = NULL; >>> + PCIBus *bus; >>> PCIDevice *lpc_bridge; >>> int ret, gen; >>> + bool legacy_mode, enable_opregion; >>> uint64_t gms_size; >>> uint64_t *bdsm_size; >>> uint32_t gmch; >>> Error *err = NULL; >>> >>> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>> + !vfio_is_vga(vdev)) { >>> + return true; >>> + } >>> + >>> /* >>> * This must be an Intel VGA device at address 00:02.0 for us to even >>> * consider enabling legacy mode. The vBIOS has dependencies on the >>> * PCI bus address. >>> */ >>> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>> - !vfio_is_vga(vdev) || >>> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), >>> - 0, PCI_DEVFN(0x2, 0))) { >>> + bus = pci_device_root_bus(&vdev->pdev); >>> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); >>> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); >>> + >>> + if (!enable_opregion && !legacy_mode) { >>> + return true; >>> + } >>> + >>> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { >>> + if (enable_opregion) { >>> + error_propagate(errp, err); >>> + return false; >>> + } else if (legacy_mode) { >>> + error_append_hint(&err, "IGD legacy mode disabled\n"); >>> + error_report_err(err); >>> + return true; >>> + } >>> + } >>> + >>> + if (!legacy_mode) { >>> return true; >>> } >>> >>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>> return true; >>> } >>> >>> - /* >>> - * Ignore the hotplug corner case, mark the ROM failed, we can't >>> - * create the devices we need for legacy mode in the hotplug scenario. >>> - */ >>> - if (vdev->pdev.qdev.hotplugged) { >>> - error_report("IGD device %s hotplugged, ROM disabled, " >>> - "legacy mode disabled", vdev->vbasedev.name); >>> - vdev->rom_read_failed = true; >>> - return true; >>> - } >>> - >>> /* >>> * Check whether we have all the vfio device specific regions to >>> * support legacy mode (added in Linux v4.6). If not, bail. >>> */ >>> And we're disassociating opregion setup from this useful comment. >> >> What are we actually accomplishing here? What specific code >> duplication is eliminated? > > This patch is designed for moving the opregion quirk in vfio_realize() > to igd.c, for better isolation of the igd-specific code. Legacy mode > also need to initialize opregion as x-igd-opregion=on option. These > code are almost the same, except legacy mode continues on error, while > x-igd-opregion fails. > > I am going to clearify that in the commit message of v3. > >> Why is it important to move all this code to igd.c? x-igd-opreqion quirk is currently located in pci-quirks.c, which is not controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building that unnecessary code in certain binaries, for example, non x86 builds. >> It's really difficult to untangle this refactor, I think it could be >> done more iteratively, if it's really even beneficial. Thanks, >> >> Alex > > Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT > mode" concept in future, my proposal is: > * Emulate and initialize ASLS and BDSM register unconditionally. These > registers holds HPA, keeping the old value to guest is not a good > idea > * Make the host bridge and LPC bridge ID quirks optional like OpRegion. > Recent Linux kernel and Windows driver seems not relying on it. This > enables IGD passthrough on Q35 machines, but probably without UEFI > GOP or VBIOS support, as it is observed they require specific LPC > bridge DID to work. > * Remove the requirement of IGD device class being VGA controller, this > was previous discussed in my kernel change [1] > * Update the document > > It would time consuming to implement all them, coding is not difficult, > but I have to verify my change on diffrent platforms. And they are out > of this patchset's scope I think. I personally perfers doing it in a > future patchset. > > [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ > > Thanks, > Moeko Please let me know if you have any thoughts or suggestions, in case you missed the previous mail. Best Regards, Moeko >>> - ret = vfio_get_dev_region_info(&vdev->vbasedev, >>> - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>> - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>> - if (ret) { >>> - error_report("IGD device %s does not support OpRegion access," >>> - "legacy mode disabled", vdev->vbasedev.name); >>> - return true; >>> - } >>> - >>> ret = vfio_get_dev_region_info(&vdev->vbasedev, >>> VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>> VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); >>> @@ -476,13 +560,6 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>> return true; >>> } >>> >>> - /* Setup OpRegion access */ >>> - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { >>> - error_append_hint(&err, "IGD legacy mode disabled\n"); >>> - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); >>> - return true; >>> - } >>> - >>> /* >>> * Allow user to override dsm size using x-igd-gms option, in multiples of >>> * 32MiB. This option should only be used when the desired size cannot be >>> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c >>> index b8379cb512..f2b37910f0 100644 >>> --- a/hw/vfio/pci-quirks.c >>> +++ b/hw/vfio/pci-quirks.c >>> @@ -1114,56 +1114,6 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) >>> trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); >>> } >>> >>> -#define IGD_ASLS 0xfc /* ASL Storage Register */ >>> - >>> -/* >>> - * The OpRegion includes the Video BIOS Table, which seems important for >>> - * telling the driver what sort of outputs it has. Without this, the device >>> - * may work in the guest, but we may not get output. This also requires BIOS >>> - * support to reserve and populate a section of guest memory sufficient for >>> - * the table and to write the base address of that memory to the ASLS register >>> - * of the IGD device. >>> - */ >>> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>> - struct vfio_region_info *info, Error **errp) >>> -{ >>> - int ret; >>> - >>> - vdev->igd_opregion = g_malloc0(info->size); >>> - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >>> - info->size, info->offset); >>> - if (ret != info->size) { >>> - error_setg(errp, "failed to read IGD OpRegion"); >>> - g_free(vdev->igd_opregion); >>> - vdev->igd_opregion = NULL; >>> - return false; >>> - } >>> - >>> - /* >>> - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >>> - * allocate 32bit reserved memory for, copy these contents into, and write >>> - * the reserved memory base address to the device ASLS register at 0xFC. >>> - * Alignment of this reserved region seems flexible, but using a 4k page >>> - * alignment seems to work well. This interface assumes a single IGD >>> - * device, which may be at VM address 00:02.0 in legacy mode or another >>> - * address in UPT mode. >>> - * >>> - * NB, there may be future use cases discovered where the VM should have >>> - * direct interaction with the host OpRegion, in which case the write to >>> - * the ASLS register would trigger MemoryRegion setup to enable that. >>> - */ >>> - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >>> - vdev->igd_opregion, info->size); >>> - >>> - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >>> - >>> - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >>> - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >>> - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >>> - >>> - return true; >>> -} >>> - >>> /* >>> * Common quirk probe entry points. >>> */ >>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>> index e624ae56c4..1b90c78c5a 100644 >>> --- a/hw/vfio/pci.c >>> +++ b/hw/vfio/pci.c >>> @@ -3136,31 +3136,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) >>> vfio_bar_quirk_setup(vdev, i); >>> } >>> >>> - if (!vdev->igd_opregion && >>> - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { >>> - g_autofree struct vfio_region_info *opregion = NULL; >>> - >>> - if (vdev->pdev.qdev.hotplugged) { >>> - error_setg(errp, >>> - "cannot support IGD OpRegion feature on hotplugged " >>> - "device"); >>> - goto out_unset_idev; >>> - } >>> - >>> - ret = vfio_get_dev_region_info(vbasedev, >>> - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>> - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>> - if (ret) { >>> - error_setg_errno(errp, -ret, >>> - "does not support requested IGD OpRegion feature"); >>> - goto out_unset_idev; >>> - } >>> - >>> - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >>> - goto out_unset_idev; >>> - } >>> - } >>> - >>> /* QEMU emulates all of MSI & MSIX */ >>> if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { >>> memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, >>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h >>> index 5c64de0270..b9e920a6b4 100644 >>> --- a/hw/vfio/pci.h >>> +++ b/hw/vfio/pci.h >>> @@ -229,10 +229,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, >>> >>> bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); >>> >>> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>> - struct vfio_region_info *info, >>> - Error **errp); >>> - >>> void vfio_display_reset(VFIOPCIDevice *vdev); >>> bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); >>> void vfio_display_finalize(VFIOPCIDevice *vdev); >> >
On Fri, 31 Jan 2025 02:33:03 +0800 Tomita Moeko <tomitamoeko@gmail.com> wrote: > On 1/25/25 15:42, Tomita Moeko wrote: > > On 1/25/25 05:13, Alex Williamson wrote: > >> On Sat, 25 Jan 2025 03:12:45 +0800 > >> Tomita Moeko <tomitamoeko@gmail.com> wrote: > >> > >>> Both enable opregion option (x-igd-opregion) and legacy mode require > >>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler > >>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate > >>> code. Finally we moved all the IGD-related code into igd.c. > >>> > >>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> > >>> --- > >>> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- > >>> hw/vfio/pci-quirks.c | 50 --------------- > >>> hw/vfio/pci.c | 25 -------- > >>> hw/vfio/pci.h | 4 -- > >>> 4 files changed, 110 insertions(+), 112 deletions(-) > >>> > >>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c > >>> index 6e06dd774a..015beacf5f 100644 > >>> --- a/hw/vfio/igd.c > >>> +++ b/hw/vfio/igd.c > >>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) > >>> return -1; > >>> } > >>> > >>> +#define IGD_ASLS 0xfc /* ASL Storage Register */ > >>> #define IGD_GMCH 0x50 /* Graphics Control Register */ > >>> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ > >>> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ > >>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) > >>> return 0; > >>> } > >>> > >>> +/* > >>> + * The OpRegion includes the Video BIOS Table, which seems important for > >>> + * telling the driver what sort of outputs it has. Without this, the device > >>> + * may work in the guest, but we may not get output. This also requires BIOS > >>> + * support to reserve and populate a section of guest memory sufficient for > >>> + * the table and to write the base address of that memory to the ASLS register > >>> + * of the IGD device. > >>> + */ > >>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, > >>> + struct vfio_region_info *info, > >>> + Error **errp) > >>> +{ > >>> + int ret; > >>> + > >>> + vdev->igd_opregion = g_malloc0(info->size); > >>> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, > >>> + info->size, info->offset); > >>> + if (ret != info->size) { > >>> + error_setg(errp, "failed to read IGD OpRegion"); > >>> + g_free(vdev->igd_opregion); > >>> + vdev->igd_opregion = NULL; > >>> + return false; > >>> + } > >>> + > >>> + /* > >>> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to > >>> + * allocate 32bit reserved memory for, copy these contents into, and write > >>> + * the reserved memory base address to the device ASLS register at 0xFC. > >>> + * Alignment of this reserved region seems flexible, but using a 4k page > >>> + * alignment seems to work well. This interface assumes a single IGD > >>> + * device, which may be at VM address 00:02.0 in legacy mode or another > >>> + * address in UPT mode. > >>> + * > >>> + * NB, there may be future use cases discovered where the VM should have > >>> + * direct interaction with the host OpRegion, in which case the write to > >>> + * the ASLS register would trigger MemoryRegion setup to enable that. > >>> + */ > >>> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", > >>> + vdev->igd_opregion, info->size); > >>> + > >>> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); > >>> + > >>> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); > >>> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); > >>> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); > >>> + > >>> + return true; > >>> +} > >>> + > >>> /* > >>> * The rather short list of registers that we copy from the host devices. > >>> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the > >>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) > >>> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); > >>> } > >>> > >>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) > >>> +{ > >>> + g_autofree struct vfio_region_info *opregion = NULL; > >>> + int ret; > >>> + > >>> + /* > >>> + * Hotplugging is not supprted for both opregion access and legacy mode. > >>> + * For legacy mode, we also need to mark the ROM failed. > >>> + */ > >> > >> The explanation was a little better in the removed comment. > >> > >>> + if (vdev->pdev.qdev.hotplugged) { > >>> + vdev->rom_read_failed = true; > >>> + error_setg(errp, > >>> + "IGD OpRegion is not supported on hotplugged device"); > >> > >> As was the error log. > >> > >>> + return false; > >>> + } > >>> + > >>> + ret = vfio_get_dev_region_info(&vdev->vbasedev, > >>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, > >>> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); > >>> + if (ret) { > >>> + error_setg_errno(errp, -ret, > >>> + "device does not supports IGD OpRegion feature"); > >>> + return false; > >>> + } > >>> + > >>> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { > >>> + return false; > >>> + } > >>> + > >>> + return true; > >>> +} > >>> + > >>> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, > >>> - Error **errp G_GNUC_UNUSED) > >>> + Error **errp) > >>> { > >>> g_autofree struct vfio_region_info *rom = NULL; > >>> - g_autofree struct vfio_region_info *opregion = NULL; > >>> g_autofree struct vfio_region_info *host = NULL; > >>> g_autofree struct vfio_region_info *lpc = NULL; > >>> + PCIBus *bus; > >>> PCIDevice *lpc_bridge; > >>> int ret, gen; > >>> + bool legacy_mode, enable_opregion; > >>> uint64_t gms_size; > >>> uint64_t *bdsm_size; > >>> uint32_t gmch; > >>> Error *err = NULL; > >>> > >>> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > >>> + !vfio_is_vga(vdev)) { > >>> + return true; > >>> + } > >>> + > >>> /* > >>> * This must be an Intel VGA device at address 00:02.0 for us to even > >>> * consider enabling legacy mode. The vBIOS has dependencies on the > >>> * PCI bus address. > >>> */ > >>> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || > >>> - !vfio_is_vga(vdev) || > >>> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), > >>> - 0, PCI_DEVFN(0x2, 0))) { > >>> + bus = pci_device_root_bus(&vdev->pdev); > >>> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); > >>> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); > >>> + > >>> + if (!enable_opregion && !legacy_mode) { > >>> + return true; > >>> + } > >>> + > >>> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { > >>> + if (enable_opregion) { > >>> + error_propagate(errp, err); > >>> + return false; > >>> + } else if (legacy_mode) { > >>> + error_append_hint(&err, "IGD legacy mode disabled\n"); > >>> + error_report_err(err); > >>> + return true; > >>> + } > >>> + } > >>> + > >>> + if (!legacy_mode) { > >>> return true; > >>> } > >>> > >>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, > >>> return true; > >>> } > >>> > >>> - /* > >>> - * Ignore the hotplug corner case, mark the ROM failed, we can't > >>> - * create the devices we need for legacy mode in the hotplug scenario. > >>> - */ > >>> - if (vdev->pdev.qdev.hotplugged) { > >>> - error_report("IGD device %s hotplugged, ROM disabled, " > >>> - "legacy mode disabled", vdev->vbasedev.name); > >>> - vdev->rom_read_failed = true; > >>> - return true; > >>> - } > >>> - > >>> /* > >>> * Check whether we have all the vfio device specific regions to > >>> * support legacy mode (added in Linux v4.6). If not, bail. > >>> */ > >>> And we're disassociating opregion setup from this useful comment. > >> > >> What are we actually accomplishing here? What specific code > >> duplication is eliminated? > > > > This patch is designed for moving the opregion quirk in vfio_realize() > > to igd.c, for better isolation of the igd-specific code. Legacy mode > > also need to initialize opregion as x-igd-opregion=on option. These > > code are almost the same, except legacy mode continues on error, while > > x-igd-opregion fails. > > > > I am going to clearify that in the commit message of v3. > > > >> Why is it important to move all this code to igd.c? > > x-igd-opreqion quirk is currently located in pci-quirks.c, which is not > controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building > that unnecessary code in certain binaries, for example, non x86 builds. > > >> It's really difficult to untangle this refactor, I think it could be > >> done more iteratively, if it's really even beneficial. Thanks, > >> > >> Alex > > > > Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT > > mode" concept in future, my proposal is: > > * Emulate and initialize ASLS and BDSM register unconditionally. These > > registers holds HPA, keeping the old value to guest is not a good > > idea > > * Make the host bridge and LPC bridge ID quirks optional like OpRegion. > > Recent Linux kernel and Windows driver seems not relying on it. This > > enables IGD passthrough on Q35 machines, but probably without UEFI > > GOP or VBIOS support, as it is observed they require specific LPC > > bridge DID to work. > > * Remove the requirement of IGD device class being VGA controller, this > > was previous discussed in my kernel change [1] > > * Update the document > > > > It would time consuming to implement all them, coding is not difficult, > > but I have to verify my change on diffrent platforms. And they are out > > of this patchset's scope I think. I personally perfers doing it in a > > future patchset. > > > > [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ > > > > Thanks, > > Moeko > > Please let me know if you have any thoughts or suggestions, in case > you missed the previous mail. TBH, I'm surprised there's so much interest in direct assignment of igd. I'd be curious in your motivation, if you can share it. Regardless, it's nice to see it updated for newer hardware and I don't mind the goal of isolating the code so it can be disabled on other archs, so long as we can do so in small, logical steps that are easy to follow. At this point, the idea of legacy vs UPT might only exist in QEMU. There are going to be some challenges to avoid breaking existing VM command lines if the host and LPC bridge quirks become optional. There are a couple x-igd- options that we're free to break as they've always been experimental, but the implicit LPC bridge and host bridge quirks need to be considered carefully. The fact that "legacy" mode has never previously worked on q35 could mean that we can tie those quirks to a new experimental option that's off by default and only enabled for 440fx machine types. I'm glad you included the documentation update in your list, it's clearly out of date, as is some of my knowledge regarding guest driver requirements. I hope we can make some progress on uefi support as well, as that's essentially a requirement for newer guests. If we can't get the code upstream into edk2, maybe we can at least document steps for others to create images. Thanks, Alex
On 1/30/25 21:41, Alex Williamson wrote: > On Fri, 31 Jan 2025 02:33:03 +0800 > Tomita Moeko <tomitamoeko@gmail.com> wrote: > >> On 1/25/25 15:42, Tomita Moeko wrote: >>> On 1/25/25 05:13, Alex Williamson wrote: >>>> On Sat, 25 Jan 2025 03:12:45 +0800 >>>> Tomita Moeko <tomitamoeko@gmail.com> wrote: >>>> >>>>> Both enable opregion option (x-igd-opregion) and legacy mode require >>>>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler >>>>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate >>>>> code. Finally we moved all the IGD-related code into igd.c. >>>>> >>>>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> >>>>> --- >>>>> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- >>>>> hw/vfio/pci-quirks.c | 50 --------------- >>>>> hw/vfio/pci.c | 25 -------- >>>>> hw/vfio/pci.h | 4 -- >>>>> 4 files changed, 110 insertions(+), 112 deletions(-) >>>>> >>>>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c >>>>> index 6e06dd774a..015beacf5f 100644 >>>>> --- a/hw/vfio/igd.c >>>>> +++ b/hw/vfio/igd.c >>>>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) >>>>> return -1; >>>>> } >>>>> >>>>> +#define IGD_ASLS 0xfc /* ASL Storage Register */ >>>>> #define IGD_GMCH 0x50 /* Graphics Control Register */ >>>>> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ >>>>> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ >>>>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) >>>>> return 0; >>>>> } >>>>> >>>>> +/* >>>>> + * The OpRegion includes the Video BIOS Table, which seems important for >>>>> + * telling the driver what sort of outputs it has. Without this, the device >>>>> + * may work in the guest, but we may not get output. This also requires BIOS >>>>> + * support to reserve and populate a section of guest memory sufficient for >>>>> + * the table and to write the base address of that memory to the ASLS register >>>>> + * of the IGD device. >>>>> + */ >>>>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>>>> + struct vfio_region_info *info, >>>>> + Error **errp) >>>>> +{ >>>>> + int ret; >>>>> + >>>>> + vdev->igd_opregion = g_malloc0(info->size); >>>>> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >>>>> + info->size, info->offset); >>>>> + if (ret != info->size) { >>>>> + error_setg(errp, "failed to read IGD OpRegion"); >>>>> + g_free(vdev->igd_opregion); >>>>> + vdev->igd_opregion = NULL; >>>>> + return false; >>>>> + } >>>>> + >>>>> + /* >>>>> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >>>>> + * allocate 32bit reserved memory for, copy these contents into, and write >>>>> + * the reserved memory base address to the device ASLS register at 0xFC. >>>>> + * Alignment of this reserved region seems flexible, but using a 4k page >>>>> + * alignment seems to work well. This interface assumes a single IGD >>>>> + * device, which may be at VM address 00:02.0 in legacy mode or another >>>>> + * address in UPT mode. >>>>> + * >>>>> + * NB, there may be future use cases discovered where the VM should have >>>>> + * direct interaction with the host OpRegion, in which case the write to >>>>> + * the ASLS register would trigger MemoryRegion setup to enable that. >>>>> + */ >>>>> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >>>>> + vdev->igd_opregion, info->size); >>>>> + >>>>> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >>>>> + >>>>> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >>>>> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >>>>> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> /* >>>>> * The rather short list of registers that we copy from the host devices. >>>>> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the >>>>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) >>>>> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); >>>>> } >>>>> >>>>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) >>>>> +{ >>>>> + g_autofree struct vfio_region_info *opregion = NULL; >>>>> + int ret; >>>>> + >>>>> + /* >>>>> + * Hotplugging is not supprted for both opregion access and legacy mode. >>>>> + * For legacy mode, we also need to mark the ROM failed. >>>>> + */ >>>> >>>> The explanation was a little better in the removed comment. >>>> >>>>> + if (vdev->pdev.qdev.hotplugged) { >>>>> + vdev->rom_read_failed = true; >>>>> + error_setg(errp, >>>>> + "IGD OpRegion is not supported on hotplugged device"); >>>> >>>> As was the error log. >>>> >>>>> + return false; >>>>> + } >>>>> + >>>>> + ret = vfio_get_dev_region_info(&vdev->vbasedev, >>>>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>>>> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>>>> + if (ret) { >>>>> + error_setg_errno(errp, -ret, >>>>> + "device does not supports IGD OpRegion feature"); >>>>> + return false; >>>>> + } >>>>> + >>>>> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >>>>> + return false; >>>>> + } >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>> - Error **errp G_GNUC_UNUSED) >>>>> + Error **errp) >>>>> { >>>>> g_autofree struct vfio_region_info *rom = NULL; >>>>> - g_autofree struct vfio_region_info *opregion = NULL; >>>>> g_autofree struct vfio_region_info *host = NULL; >>>>> g_autofree struct vfio_region_info *lpc = NULL; >>>>> + PCIBus *bus; >>>>> PCIDevice *lpc_bridge; >>>>> int ret, gen; >>>>> + bool legacy_mode, enable_opregion; >>>>> uint64_t gms_size; >>>>> uint64_t *bdsm_size; >>>>> uint32_t gmch; >>>>> Error *err = NULL; >>>>> >>>>> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>> + !vfio_is_vga(vdev)) { >>>>> + return true; >>>>> + } >>>>> + >>>>> /* >>>>> * This must be an Intel VGA device at address 00:02.0 for us to even >>>>> * consider enabling legacy mode. The vBIOS has dependencies on the >>>>> * PCI bus address. >>>>> */ >>>>> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>> - !vfio_is_vga(vdev) || >>>>> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), >>>>> - 0, PCI_DEVFN(0x2, 0))) { >>>>> + bus = pci_device_root_bus(&vdev->pdev); >>>>> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); >>>>> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); >>>>> + >>>>> + if (!enable_opregion && !legacy_mode) { >>>>> + return true; >>>>> + } >>>>> + >>>>> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { >>>>> + if (enable_opregion) { >>>>> + error_propagate(errp, err); >>>>> + return false; >>>>> + } else if (legacy_mode) { >>>>> + error_append_hint(&err, "IGD legacy mode disabled\n"); >>>>> + error_report_err(err); >>>>> + return true; >>>>> + } >>>>> + } >>>>> + >>>>> + if (!legacy_mode) { >>>>> return true; >>>>> } >>>>> >>>>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>> return true; >>>>> } >>>>> >>>>> - /* >>>>> - * Ignore the hotplug corner case, mark the ROM failed, we can't >>>>> - * create the devices we need for legacy mode in the hotplug scenario. >>>>> - */ >>>>> - if (vdev->pdev.qdev.hotplugged) { >>>>> - error_report("IGD device %s hotplugged, ROM disabled, " >>>>> - "legacy mode disabled", vdev->vbasedev.name); >>>>> - vdev->rom_read_failed = true; >>>>> - return true; >>>>> - } >>>>> - >>>>> /* >>>>> * Check whether we have all the vfio device specific regions to >>>>> * support legacy mode (added in Linux v4.6). If not, bail. >>>>> */ >>>>> And we're disassociating opregion setup from this useful comment. >>>> >>>> What are we actually accomplishing here? What specific code >>>> duplication is eliminated? >>> >>> This patch is designed for moving the opregion quirk in vfio_realize() >>> to igd.c, for better isolation of the igd-specific code. Legacy mode >>> also need to initialize opregion as x-igd-opregion=on option. These >>> code are almost the same, except legacy mode continues on error, while >>> x-igd-opregion fails. >>> >>> I am going to clearify that in the commit message of v3. >>> >>>> Why is it important to move all this code to igd.c? >> >> x-igd-opreqion quirk is currently located in pci-quirks.c, which is not >> controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building >> that unnecessary code in certain binaries, for example, non x86 builds. >> >>>> It's really difficult to untangle this refactor, I think it could be >>>> done more iteratively, if it's really even beneficial. Thanks, >>>> >>>> Alex >>> >>> Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT >>> mode" concept in future, my proposal is: >>> * Emulate and initialize ASLS and BDSM register unconditionally. These >>> registers holds HPA, keeping the old value to guest is not a good >>> idea >>> * Make the host bridge and LPC bridge ID quirks optional like OpRegion. >>> Recent Linux kernel and Windows driver seems not relying on it. This >>> enables IGD passthrough on Q35 machines, but probably without UEFI >>> GOP or VBIOS support, as it is observed they require specific LPC >>> bridge DID to work. >>> * Remove the requirement of IGD device class being VGA controller, this >>> was previous discussed in my kernel change [1] >>> * Update the document >>> >>> It would time consuming to implement all them, coding is not difficult, >>> but I have to verify my change on diffrent platforms. And they are out >>> of this patchset's scope I think. I personally perfers doing it in a >>> future patchset. >>> >>> [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ >>> >>> Thanks, >>> Moeko >> >> Please let me know if you have any thoughts or suggestions, in case >> you missed the previous mail. > > TBH, I'm surprised there's so much interest in direct assignment of > igd. I'd be curious in your motivation, if you can share it. > > Regardless, it's nice to see it updated for newer hardware and I don't > mind the goal of isolating the code so it can be disabled on other > archs, so long as we can do so in small, logical steps that are easy to > follow. > > At this point, the idea of legacy vs UPT might only exist in QEMU. > There are going to be some challenges to avoid breaking existing VM > command lines if the host and LPC bridge quirks become optional. There > are a couple x-igd- options that we're free to break as they've always > been experimental, but the implicit LPC bridge and host bridge quirks > need to be considered carefully. The fact that "legacy" mode has never > previously worked on q35 could mean that we can tie those quirks to a > new experimental option that's off by default and only enabled for > 440fx machine types. > > I'm glad you included the documentation update in your list, it's > clearly out of date, as is some of my knowledge regarding guest driver > requirements. Could we please have an update of docs/igd-assign.txt too ? As some point, we should consolidate all VFIO documentation under one section. That's another topic. > I hope we can make some progress on uefi support as well, > as that's essentially a requirement for newer guests. If we can't get > the code upstream into edk2, maybe we can at least document steps for > others to create images. Thanks, > So, I am bit lot here, forgive my ignorance. I am seeing issues (a black screen and nothing else to report) with : 00:02.0 VGA compatible controller: Intel Corporation AlderLake-S GT1 (rev 0c) using uefi, seabios, pc or q35 does not change the result. However, it works fine with a uefi q35 machine using : 00:02.0 VGA compatible controller: Intel Corporation Alder Lake-N [UHD Graphics] How can I dig into the first issue ? Also, if we know that there are platform requirements for IGD assignment to work, we should try to verify that they are met when the machine boots. Thanks, C.
On 1/31/25 04:41, Alex Williamson wrote: > On Fri, 31 Jan 2025 02:33:03 +0800 > Tomita Moeko <tomitamoeko@gmail.com> wrote: > >> On 1/25/25 15:42, Tomita Moeko wrote: >>> On 1/25/25 05:13, Alex Williamson wrote: >>>> On Sat, 25 Jan 2025 03:12:45 +0800 >>>> Tomita Moeko <tomitamoeko@gmail.com> wrote: >>>> >>>>> Both enable opregion option (x-igd-opregion) and legacy mode require >>>>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler >>>>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate >>>>> code. Finally we moved all the IGD-related code into igd.c. >>>>> >>>>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> >>>>> --- >>>>> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- >>>>> hw/vfio/pci-quirks.c | 50 --------------- >>>>> hw/vfio/pci.c | 25 -------- >>>>> hw/vfio/pci.h | 4 -- >>>>> 4 files changed, 110 insertions(+), 112 deletions(-) >>>>> >>>>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c >>>>> index 6e06dd774a..015beacf5f 100644 >>>>> --- a/hw/vfio/igd.c >>>>> +++ b/hw/vfio/igd.c >>>>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) >>>>> return -1; >>>>> } >>>>> >>>>> +#define IGD_ASLS 0xfc /* ASL Storage Register */ >>>>> #define IGD_GMCH 0x50 /* Graphics Control Register */ >>>>> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ >>>>> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ >>>>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) >>>>> return 0; >>>>> } >>>>> >>>>> +/* >>>>> + * The OpRegion includes the Video BIOS Table, which seems important for >>>>> + * telling the driver what sort of outputs it has. Without this, the device >>>>> + * may work in the guest, but we may not get output. This also requires BIOS >>>>> + * support to reserve and populate a section of guest memory sufficient for >>>>> + * the table and to write the base address of that memory to the ASLS register >>>>> + * of the IGD device. >>>>> + */ >>>>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>>>> + struct vfio_region_info *info, >>>>> + Error **errp) >>>>> +{ >>>>> + int ret; >>>>> + >>>>> + vdev->igd_opregion = g_malloc0(info->size); >>>>> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >>>>> + info->size, info->offset); >>>>> + if (ret != info->size) { >>>>> + error_setg(errp, "failed to read IGD OpRegion"); >>>>> + g_free(vdev->igd_opregion); >>>>> + vdev->igd_opregion = NULL; >>>>> + return false; >>>>> + } >>>>> + >>>>> + /* >>>>> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >>>>> + * allocate 32bit reserved memory for, copy these contents into, and write >>>>> + * the reserved memory base address to the device ASLS register at 0xFC. >>>>> + * Alignment of this reserved region seems flexible, but using a 4k page >>>>> + * alignment seems to work well. This interface assumes a single IGD >>>>> + * device, which may be at VM address 00:02.0 in legacy mode or another >>>>> + * address in UPT mode. >>>>> + * >>>>> + * NB, there may be future use cases discovered where the VM should have >>>>> + * direct interaction with the host OpRegion, in which case the write to >>>>> + * the ASLS register would trigger MemoryRegion setup to enable that. >>>>> + */ >>>>> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >>>>> + vdev->igd_opregion, info->size); >>>>> + >>>>> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >>>>> + >>>>> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >>>>> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >>>>> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> /* >>>>> * The rather short list of registers that we copy from the host devices. >>>>> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the >>>>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) >>>>> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); >>>>> } >>>>> >>>>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) >>>>> +{ >>>>> + g_autofree struct vfio_region_info *opregion = NULL; >>>>> + int ret; >>>>> + >>>>> + /* >>>>> + * Hotplugging is not supprted for both opregion access and legacy mode. >>>>> + * For legacy mode, we also need to mark the ROM failed. >>>>> + */ >>>> >>>> The explanation was a little better in the removed comment. >>>> >>>>> + if (vdev->pdev.qdev.hotplugged) { >>>>> + vdev->rom_read_failed = true; >>>>> + error_setg(errp, >>>>> + "IGD OpRegion is not supported on hotplugged device"); >>>> >>>> As was the error log. >>>> >>>>> + return false; >>>>> + } >>>>> + >>>>> + ret = vfio_get_dev_region_info(&vdev->vbasedev, >>>>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>>>> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>>>> + if (ret) { >>>>> + error_setg_errno(errp, -ret, >>>>> + "device does not supports IGD OpRegion feature"); >>>>> + return false; >>>>> + } >>>>> + >>>>> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >>>>> + return false; >>>>> + } >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>> - Error **errp G_GNUC_UNUSED) >>>>> + Error **errp) >>>>> { >>>>> g_autofree struct vfio_region_info *rom = NULL; >>>>> - g_autofree struct vfio_region_info *opregion = NULL; >>>>> g_autofree struct vfio_region_info *host = NULL; >>>>> g_autofree struct vfio_region_info *lpc = NULL; >>>>> + PCIBus *bus; >>>>> PCIDevice *lpc_bridge; >>>>> int ret, gen; >>>>> + bool legacy_mode, enable_opregion; >>>>> uint64_t gms_size; >>>>> uint64_t *bdsm_size; >>>>> uint32_t gmch; >>>>> Error *err = NULL; >>>>> >>>>> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>> + !vfio_is_vga(vdev)) { >>>>> + return true; >>>>> + } >>>>> + >>>>> /* >>>>> * This must be an Intel VGA device at address 00:02.0 for us to even >>>>> * consider enabling legacy mode. The vBIOS has dependencies on the >>>>> * PCI bus address. >>>>> */ >>>>> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>> - !vfio_is_vga(vdev) || >>>>> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), >>>>> - 0, PCI_DEVFN(0x2, 0))) { >>>>> + bus = pci_device_root_bus(&vdev->pdev); >>>>> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); >>>>> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); >>>>> + >>>>> + if (!enable_opregion && !legacy_mode) { >>>>> + return true; >>>>> + } >>>>> + >>>>> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { >>>>> + if (enable_opregion) { >>>>> + error_propagate(errp, err); >>>>> + return false; >>>>> + } else if (legacy_mode) { >>>>> + error_append_hint(&err, "IGD legacy mode disabled\n"); >>>>> + error_report_err(err); >>>>> + return true; >>>>> + } >>>>> + } >>>>> + >>>>> + if (!legacy_mode) { >>>>> return true; >>>>> } >>>>> >>>>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>> return true; >>>>> } >>>>> >>>>> - /* >>>>> - * Ignore the hotplug corner case, mark the ROM failed, we can't >>>>> - * create the devices we need for legacy mode in the hotplug scenario. >>>>> - */ >>>>> - if (vdev->pdev.qdev.hotplugged) { >>>>> - error_report("IGD device %s hotplugged, ROM disabled, " >>>>> - "legacy mode disabled", vdev->vbasedev.name); >>>>> - vdev->rom_read_failed = true; >>>>> - return true; >>>>> - } >>>>> - >>>>> /* >>>>> * Check whether we have all the vfio device specific regions to >>>>> * support legacy mode (added in Linux v4.6). If not, bail. >>>>> */ >>>>> And we're disassociating opregion setup from this useful comment. >>>> >>>> What are we actually accomplishing here? What specific code >>>> duplication is eliminated? >>> >>> This patch is designed for moving the opregion quirk in vfio_realize() >>> to igd.c, for better isolation of the igd-specific code. Legacy mode >>> also need to initialize opregion as x-igd-opregion=on option. These >>> code are almost the same, except legacy mode continues on error, while >>> x-igd-opregion fails. >>> >>> I am going to clearify that in the commit message of v3. >>> >>>> Why is it important to move all this code to igd.c? >> >> x-igd-opreqion quirk is currently located in pci-quirks.c, which is not >> controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building >> that unnecessary code in certain binaries, for example, non x86 builds. >> >>>> It's really difficult to untangle this refactor, I think it could be >>>> done more iteratively, if it's really even beneficial. Thanks, >>>> >>>> Alex >>> >>> Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT >>> mode" concept in future, my proposal is: >>> * Emulate and initialize ASLS and BDSM register unconditionally. These >>> registers holds HPA, keeping the old value to guest is not a good >>> idea >>> * Make the host bridge and LPC bridge ID quirks optional like OpRegion. >>> Recent Linux kernel and Windows driver seems not relying on it. This >>> enables IGD passthrough on Q35 machines, but probably without UEFI >>> GOP or VBIOS support, as it is observed they require specific LPC >>> bridge DID to work. >>> * Remove the requirement of IGD device class being VGA controller, this >>> was previous discussed in my kernel change [1] >>> * Update the document >>> >>> It would time consuming to implement all them, coding is not difficult, >>> but I have to verify my change on diffrent platforms. And they are out >>> of this patchset's scope I think. I personally perfers doing it in a >>> future patchset. >>> >>> [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ >>> >>> Thanks, >>> Moeko >> >> Please let me know if you have any thoughts or suggestions, in case >> you missed the previous mail. > > TBH, I'm surprised there's so much interest in direct assignment of > igd. I'd be curious in your motivation, if you can share it. I was setting up a windows guest on the linux box in my university lab since some software used in the university only supports Windows :(. As the linux box only runs background services like nfs share, and I only ssh to it for coding. I'd like to passthrough the GPU to guset so it can have baremetal-like GUI experience. During setup, I found there were limitations and conflicting tutorials, which makes me curious about how it actually works. > Regardless, it's nice to see it updated for newer hardware and I don't > mind the goal of isolating the code so it can be disabled on other > archs, so long as we can do so in small, logical steps that are easy to > follow. > > At this point, the idea of legacy vs UPT might only exist in QEMU. > There are going to be some challenges to avoid breaking existing VM > command lines if the host and LPC bridge quirks become optional. There > are a couple x-igd- options that we're free to break as they've always > been experimental, but the implicit LPC bridge and host bridge quirks > need to be considered carefully. The fact that "legacy" mode has never > previously worked on q35 could mean that we can tie those quirks to a > new experimental option that's off by default and only enabled for > 440fx machine types. Currently legacy mode requires creating a dummy LPC bridge at 00:1f.0, but on q35, there is already an ICH9 LPC there. In my trials, LPC is not a must for linux guests and windows guests with recent driver. Only UEFI GOP requires it. Making it as another x-igd- option will give more flexibility I believe. "Legacy" mode implicitly means enabling the options below I think * x-igd-opregion=on * dummy lpc bridge, as well as host bridge ids * x-vga=on, not a must, but code tries to enable VGA access Remoing legacy mode while keeping the current behavior is challenging, as legacy mode is implicitly enabled by addr=0x2 and doesn't fail if any condition is not met. It seems we cannot replace legacy mode in a completely clean mannar, my proposal for the default values are: | config | x-igd-opregion | lpc quirk | |-----------------|----------------|-----------| | i440fx,addr=0x2 | yes | yes | | i440fx,others | no | no | | q35 | no | no | However, it doesn't handle the cases legacy mode fails halfway :(, any suggestions would be greatly appreciated. > I'm glad you included the documentation update in your list, it's > clearly out of date, as is some of my knowledge regarding guest driver > requirements. I hope we can make some progress on uefi support as well, > as that's essentially a requirement for newer guests. If we can't get > the code upstream into edk2, maybe we can at least document steps for > others to create images. Thanks, Sure. I will include it in the documentation update. > Alex >
On 1/31/25 18:15, Cédric Le Goater wrote: > On 1/30/25 21:41, Alex Williamson wrote: >> On Fri, 31 Jan 2025 02:33:03 +0800 >> Tomita Moeko <tomitamoeko@gmail.com> wrote: >> >>> On 1/25/25 15:42, Tomita Moeko wrote: >>>> On 1/25/25 05:13, Alex Williamson wrote: >>>>> On Sat, 25 Jan 2025 03:12:45 +0800 >>>>> Tomita Moeko <tomitamoeko@gmail.com> wrote: >>>>> >>>>>> Both enable opregion option (x-igd-opregion) and legacy mode require >>>>>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler >>>>>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate >>>>>> code. Finally we moved all the IGD-related code into igd.c. >>>>>> >>>>>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> >>>>>> --- >>>>>> hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- >>>>>> hw/vfio/pci-quirks.c | 50 --------------- >>>>>> hw/vfio/pci.c | 25 -------- >>>>>> hw/vfio/pci.h | 4 -- >>>>>> 4 files changed, 110 insertions(+), 112 deletions(-) >>>>>> >>>>>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c >>>>>> index 6e06dd774a..015beacf5f 100644 >>>>>> --- a/hw/vfio/igd.c >>>>>> +++ b/hw/vfio/igd.c >>>>>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) >>>>>> return -1; >>>>>> } >>>>>> +#define IGD_ASLS 0xfc /* ASL Storage Register */ >>>>>> #define IGD_GMCH 0x50 /* Graphics Control Register */ >>>>>> #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ >>>>>> #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ >>>>>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) >>>>>> return 0; >>>>>> } >>>>>> +/* >>>>>> + * The OpRegion includes the Video BIOS Table, which seems important for >>>>>> + * telling the driver what sort of outputs it has. Without this, the device >>>>>> + * may work in the guest, but we may not get output. This also requires BIOS >>>>>> + * support to reserve and populate a section of guest memory sufficient for >>>>>> + * the table and to write the base address of that memory to the ASLS register >>>>>> + * of the IGD device. >>>>>> + */ >>>>>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, >>>>>> + struct vfio_region_info *info, >>>>>> + Error **errp) >>>>>> +{ >>>>>> + int ret; >>>>>> + >>>>>> + vdev->igd_opregion = g_malloc0(info->size); >>>>>> + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, >>>>>> + info->size, info->offset); >>>>>> + if (ret != info->size) { >>>>>> + error_setg(errp, "failed to read IGD OpRegion"); >>>>>> + g_free(vdev->igd_opregion); >>>>>> + vdev->igd_opregion = NULL; >>>>>> + return false; >>>>>> + } >>>>>> + >>>>>> + /* >>>>>> + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to >>>>>> + * allocate 32bit reserved memory for, copy these contents into, and write >>>>>> + * the reserved memory base address to the device ASLS register at 0xFC. >>>>>> + * Alignment of this reserved region seems flexible, but using a 4k page >>>>>> + * alignment seems to work well. This interface assumes a single IGD >>>>>> + * device, which may be at VM address 00:02.0 in legacy mode or another >>>>>> + * address in UPT mode. >>>>>> + * >>>>>> + * NB, there may be future use cases discovered where the VM should have >>>>>> + * direct interaction with the host OpRegion, in which case the write to >>>>>> + * the ASLS register would trigger MemoryRegion setup to enable that. >>>>>> + */ >>>>>> + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", >>>>>> + vdev->igd_opregion, info->size); >>>>>> + >>>>>> + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); >>>>>> + >>>>>> + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); >>>>>> + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); >>>>>> + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); >>>>>> + >>>>>> + return true; >>>>>> +} >>>>>> + >>>>>> /* >>>>>> * The rather short list of registers that we copy from the host devices. >>>>>> * The LPC/ISA bridge values are definitely needed to support the vBIOS, the >>>>>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) >>>>>> QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); >>>>>> } >>>>>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) >>>>>> +{ >>>>>> + g_autofree struct vfio_region_info *opregion = NULL; >>>>>> + int ret; >>>>>> + >>>>>> + /* >>>>>> + * Hotplugging is not supprted for both opregion access and legacy mode. >>>>>> + * For legacy mode, we also need to mark the ROM failed. >>>>>> + */ >>>>> >>>>> The explanation was a little better in the removed comment. >>>>> >>>>>> + if (vdev->pdev.qdev.hotplugged) { >>>>>> + vdev->rom_read_failed = true; >>>>>> + error_setg(errp, >>>>>> + "IGD OpRegion is not supported on hotplugged device"); >>>>> >>>>> As was the error log. >>>>> >>>>>> + return false; >>>>>> + } >>>>>> + >>>>>> + ret = vfio_get_dev_region_info(&vdev->vbasedev, >>>>>> + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, >>>>>> + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); >>>>>> + if (ret) { >>>>>> + error_setg_errno(errp, -ret, >>>>>> + "device does not supports IGD OpRegion feature"); >>>>>> + return false; >>>>>> + } >>>>>> + >>>>>> + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { >>>>>> + return false; >>>>>> + } >>>>>> + >>>>>> + return true; >>>>>> +} >>>>>> + >>>>>> bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>>> - Error **errp G_GNUC_UNUSED) >>>>>> + Error **errp) >>>>>> { >>>>>> g_autofree struct vfio_region_info *rom = NULL; >>>>>> - g_autofree struct vfio_region_info *opregion = NULL; >>>>>> g_autofree struct vfio_region_info *host = NULL; >>>>>> g_autofree struct vfio_region_info *lpc = NULL; >>>>>> + PCIBus *bus; >>>>>> PCIDevice *lpc_bridge; >>>>>> int ret, gen; >>>>>> + bool legacy_mode, enable_opregion; >>>>>> uint64_t gms_size; >>>>>> uint64_t *bdsm_size; >>>>>> uint32_t gmch; >>>>>> Error *err = NULL; >>>>>> + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>>> + !vfio_is_vga(vdev)) { >>>>>> + return true; >>>>>> + } >>>>>> + >>>>>> /* >>>>>> * This must be an Intel VGA device at address 00:02.0 for us to even >>>>>> * consider enabling legacy mode. The vBIOS has dependencies on the >>>>>> * PCI bus address. >>>>>> */ >>>>>> - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || >>>>>> - !vfio_is_vga(vdev) || >>>>>> - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), >>>>>> - 0, PCI_DEVFN(0x2, 0))) { >>>>>> + bus = pci_device_root_bus(&vdev->pdev); >>>>>> + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); >>>>>> + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); >>>>>> + >>>>>> + if (!enable_opregion && !legacy_mode) { >>>>>> + return true; >>>>>> + } >>>>>> + >>>>>> + if (!vfio_igd_try_enable_opregion(vdev, &err)) { >>>>>> + if (enable_opregion) { >>>>>> + error_propagate(errp, err); >>>>>> + return false; >>>>>> + } else if (legacy_mode) { >>>>>> + error_append_hint(&err, "IGD legacy mode disabled\n"); >>>>>> + error_report_err(err); >>>>>> + return true; >>>>>> + } >>>>>> + } >>>>>> + >>>>>> + if (!legacy_mode) { >>>>>> return true; >>>>>> } >>>>>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, >>>>>> return true; >>>>>> } >>>>>> - /* >>>>>> - * Ignore the hotplug corner case, mark the ROM failed, we can't >>>>>> - * create the devices we need for legacy mode in the hotplug scenario. >>>>>> - */ >>>>>> - if (vdev->pdev.qdev.hotplugged) { >>>>>> - error_report("IGD device %s hotplugged, ROM disabled, " >>>>>> - "legacy mode disabled", vdev->vbasedev.name); >>>>>> - vdev->rom_read_failed = true; >>>>>> - return true; >>>>>> - } >>>>>> - >>>>>> /* >>>>>> * Check whether we have all the vfio device specific regions to >>>>>> * support legacy mode (added in Linux v4.6). If not, bail. >>>>>> */ >>>>>> And we're disassociating opregion setup from this useful comment. >>>>> >>>>> What are we actually accomplishing here? What specific code >>>>> duplication is eliminated? >>>> >>>> This patch is designed for moving the opregion quirk in vfio_realize() >>>> to igd.c, for better isolation of the igd-specific code. Legacy mode >>>> also need to initialize opregion as x-igd-opregion=on option. These >>>> code are almost the same, except legacy mode continues on error, while >>>> x-igd-opregion fails. >>>> >>>> I am going to clearify that in the commit message of v3. >>>> >>>>> Why is it important to move all this code to igd.c? >>> >>> x-igd-opreqion quirk is currently located in pci-quirks.c, which is not >>> controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building >>> that unnecessary code in certain binaries, for example, non x86 builds. >>> >>>>> It's really difficult to untangle this refactor, I think it could be >>>>> done more iteratively, if it's really even beneficial. Thanks, >>>>> >>>>> Alex >>>> >>>> Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT >>>> mode" concept in future, my proposal is: >>>> * Emulate and initialize ASLS and BDSM register unconditionally. These >>>> registers holds HPA, keeping the old value to guest is not a good >>>> idea >>>> * Make the host bridge and LPC bridge ID quirks optional like OpRegion. >>>> Recent Linux kernel and Windows driver seems not relying on it. This >>>> enables IGD passthrough on Q35 machines, but probably without UEFI >>>> GOP or VBIOS support, as it is observed they require specific LPC >>>> bridge DID to work. >>>> * Remove the requirement of IGD device class being VGA controller, this >>>> was previous discussed in my kernel change [1] >>>> * Update the document >>>> >>>> It would time consuming to implement all them, coding is not difficult, >>>> but I have to verify my change on diffrent platforms. And they are out >>>> of this patchset's scope I think. I personally perfers doing it in a >>>> future patchset. >>>> >>>> [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/ >>>> >>>> Thanks, >>>> Moeko >>> >>> Please let me know if you have any thoughts or suggestions, in case >>> you missed the previous mail. >> >> TBH, I'm surprised there's so much interest in direct assignment of >> igd. I'd be curious in your motivation, if you can share it. >> >> Regardless, it's nice to see it updated for newer hardware and I don't >> mind the goal of isolating the code so it can be disabled on other >> archs, so long as we can do so in small, logical steps that are easy to >> follow. >> >> At this point, the idea of legacy vs UPT might only exist in QEMU. >> There are going to be some challenges to avoid breaking existing VM >> command lines if the host and LPC bridge quirks become optional. There >> are a couple x-igd- options that we're free to break as they've always >> been experimental, but the implicit LPC bridge and host bridge quirks >> need to be considered carefully. The fact that "legacy" mode has never >> previously worked on q35 could mean that we can tie those quirks to a >> new experimental option that's off by default and only enabled for >> 440fx machine types. >> >> I'm glad you included the documentation update in your list, it's >> clearly out of date, as is some of my knowledge regarding guest driver >> requirements. > > Could we please have an update of docs/igd-assign.txt too ? > > As some point, we should consolidate all VFIO documentation under > one section. That's another topic. > >> I hope we can make some progress on uefi support as well, >> as that's essentially a requirement for newer guests. If we can't get >> the code upstream into edk2, maybe we can at least document steps for >> others to create images. Thanks, >> > > So, I am bit lot here, forgive my ignorance. > > I am seeing issues (a black screen and nothing else to report) with : > > 00:02.0 VGA compatible controller: Intel Corporation AlderLake-S GT1 (rev 0c) > > using uefi, seabios, pc or q35 does not change the result. > > > However, it works fine with a uefi q35 machine using : > > 00:02.0 VGA compatible controller: Intel Corporation Alder Lake-N [UHD Graphics] > > How can I dig into the first issue ? > If you are running a linux guest, `dmesg | grep i915` in guest is always a good start. Adding `drm_debug` to kernel cmdline logs more details. I mainly uses `drm_debug=0x6` for igd passthrough (Enable Driver and KMS logging). https://docs.kernel.org/gpu/drm-internals.html > Also, if we know that there are platform requirements for IGD assignment to work, we > should try to verify that they are met when the machine boots. Well noted. > Thanks, > > C. > >
diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c index 6e06dd774a..015beacf5f 100644 --- a/hw/vfio/igd.c +++ b/hw/vfio/igd.c @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev) return -1; } +#define IGD_ASLS 0xfc /* ASL Storage Register */ #define IGD_GMCH 0x50 /* Graphics Control Register */ #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */ #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and later */ @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, uint32_t gmch) return 0; } +/* + * The OpRegion includes the Video BIOS Table, which seems important for + * telling the driver what sort of outputs it has. Without this, the device + * may work in the guest, but we may not get output. This also requires BIOS + * support to reserve and populate a section of guest memory sufficient for + * the table and to write the base address of that memory to the ASLS register + * of the IGD device. + */ +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, + struct vfio_region_info *info, + Error **errp) +{ + int ret; + + vdev->igd_opregion = g_malloc0(info->size); + ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, + info->size, info->offset); + if (ret != info->size) { + error_setg(errp, "failed to read IGD OpRegion"); + g_free(vdev->igd_opregion); + vdev->igd_opregion = NULL; + return false; + } + + /* + * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to + * allocate 32bit reserved memory for, copy these contents into, and write + * the reserved memory base address to the device ASLS register at 0xFC. + * Alignment of this reserved region seems flexible, but using a 4k page + * alignment seems to work well. This interface assumes a single IGD + * device, which may be at VM address 00:02.0 in legacy mode or another + * address in UPT mode. + * + * NB, there may be future use cases discovered where the VM should have + * direct interaction with the host OpRegion, in which case the write to + * the ASLS register would trigger MemoryRegion setup to enable that. + */ + fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", + vdev->igd_opregion, info->size); + + trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); + + pci_set_long(vdev->pdev.config + IGD_ASLS, 0); + pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); + pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); + + return true; +} + /* * The rather short list of registers that we copy from the host devices. * The LPC/ISA bridge values are definitely needed to support the vBIOS, the @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr) QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next); } +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp) +{ + g_autofree struct vfio_region_info *opregion = NULL; + int ret; + + /* + * Hotplugging is not supprted for both opregion access and legacy mode. + * For legacy mode, we also need to mark the ROM failed. + */ + if (vdev->pdev.qdev.hotplugged) { + vdev->rom_read_failed = true; + error_setg(errp, + "IGD OpRegion is not supported on hotplugged device"); + return false; + } + + ret = vfio_get_dev_region_info(&vdev->vbasedev, + VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); + if (ret) { + error_setg_errno(errp, -ret, + "device does not supports IGD OpRegion feature"); + return false; + } + + if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { + return false; + } + + return true; +} + bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, - Error **errp G_GNUC_UNUSED) + Error **errp) { g_autofree struct vfio_region_info *rom = NULL; - g_autofree struct vfio_region_info *opregion = NULL; g_autofree struct vfio_region_info *host = NULL; g_autofree struct vfio_region_info *lpc = NULL; + PCIBus *bus; PCIDevice *lpc_bridge; int ret, gen; + bool legacy_mode, enable_opregion; uint64_t gms_size; uint64_t *bdsm_size; uint32_t gmch; Error *err = NULL; + if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || + !vfio_is_vga(vdev)) { + return true; + } + /* * This must be an Intel VGA device at address 00:02.0 for us to even * consider enabling legacy mode. The vBIOS has dependencies on the * PCI bus address. */ - if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) || - !vfio_is_vga(vdev) || - &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev), - 0, PCI_DEVFN(0x2, 0))) { + bus = pci_device_root_bus(&vdev->pdev); + legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 0))); + enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION); + + if (!enable_opregion && !legacy_mode) { + return true; + } + + if (!vfio_igd_try_enable_opregion(vdev, &err)) { + if (enable_opregion) { + error_propagate(errp, err); + return false; + } else if (legacy_mode) { + error_append_hint(&err, "IGD legacy mode disabled\n"); + error_report_err(err); + return true; + } + } + + if (!legacy_mode) { return true; } @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, return true; } - /* - * Ignore the hotplug corner case, mark the ROM failed, we can't - * create the devices we need for legacy mode in the hotplug scenario. - */ - if (vdev->pdev.qdev.hotplugged) { - error_report("IGD device %s hotplugged, ROM disabled, " - "legacy mode disabled", vdev->vbasedev.name); - vdev->rom_read_failed = true; - return true; - } - /* * Check whether we have all the vfio device specific regions to * support legacy mode (added in Linux v4.6). If not, bail. */ - ret = vfio_get_dev_region_info(&vdev->vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); - if (ret) { - error_report("IGD device %s does not support OpRegion access," - "legacy mode disabled", vdev->vbasedev.name); - return true; - } - ret = vfio_get_dev_region_info(&vdev->vbasedev, VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host); @@ -476,13 +560,6 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev, return true; } - /* Setup OpRegion access */ - if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) { - error_append_hint(&err, "IGD legacy mode disabled\n"); - error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name); - return true; - } - /* * Allow user to override dsm size using x-igd-gms option, in multiples of * 32MiB. This option should only be used when the desired size cannot be diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c index b8379cb512..f2b37910f0 100644 --- a/hw/vfio/pci-quirks.c +++ b/hw/vfio/pci-quirks.c @@ -1114,56 +1114,6 @@ static void vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr) trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name); } -#define IGD_ASLS 0xfc /* ASL Storage Register */ - -/* - * The OpRegion includes the Video BIOS Table, which seems important for - * telling the driver what sort of outputs it has. Without this, the device - * may work in the guest, but we may not get output. This also requires BIOS - * support to reserve and populate a section of guest memory sufficient for - * the table and to write the base address of that memory to the ASLS register - * of the IGD device. - */ -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, - struct vfio_region_info *info, Error **errp) -{ - int ret; - - vdev->igd_opregion = g_malloc0(info->size); - ret = pread(vdev->vbasedev.fd, vdev->igd_opregion, - info->size, info->offset); - if (ret != info->size) { - error_setg(errp, "failed to read IGD OpRegion"); - g_free(vdev->igd_opregion); - vdev->igd_opregion = NULL; - return false; - } - - /* - * Provide fw_cfg with a copy of the OpRegion which the VM firmware is to - * allocate 32bit reserved memory for, copy these contents into, and write - * the reserved memory base address to the device ASLS register at 0xFC. - * Alignment of this reserved region seems flexible, but using a 4k page - * alignment seems to work well. This interface assumes a single IGD - * device, which may be at VM address 00:02.0 in legacy mode or another - * address in UPT mode. - * - * NB, there may be future use cases discovered where the VM should have - * direct interaction with the host OpRegion, in which case the write to - * the ASLS register would trigger MemoryRegion setup to enable that. - */ - fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion", - vdev->igd_opregion, info->size); - - trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name); - - pci_set_long(vdev->pdev.config + IGD_ASLS, 0); - pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0); - pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0); - - return true; -} - /* * Common quirk probe entry points. */ diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index e624ae56c4..1b90c78c5a 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3136,31 +3136,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bar_quirk_setup(vdev, i); } - if (!vdev->igd_opregion && - vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) { - g_autofree struct vfio_region_info *opregion = NULL; - - if (vdev->pdev.qdev.hotplugged) { - error_setg(errp, - "cannot support IGD OpRegion feature on hotplugged " - "device"); - goto out_unset_idev; - } - - ret = vfio_get_dev_region_info(vbasedev, - VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL, - VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion); - if (ret) { - error_setg_errno(errp, -ret, - "does not support requested IGD OpRegion feature"); - goto out_unset_idev; - } - - if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) { - goto out_unset_idev; - } - } - /* QEMU emulates all of MSI & MSIX */ if (pdev->cap_present & QEMU_PCI_CAP_MSIX) { memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff, diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 5c64de0270..b9e920a6b4 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -229,10 +229,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice *vdev, bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp); -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev, - struct vfio_region_info *info, - Error **errp); - void vfio_display_reset(VFIOPCIDevice *vdev); bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp); void vfio_display_finalize(VFIOPCIDevice *vdev);
Both enable opregion option (x-igd-opregion) and legacy mode require setting up OpRegion copy for IGD devices. Move x-igd-opregion handler in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate code. Finally we moved all the IGD-related code into igd.c. Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com> --- hw/vfio/igd.c | 143 +++++++++++++++++++++++++++++++++---------- hw/vfio/pci-quirks.c | 50 --------------- hw/vfio/pci.c | 25 -------- hw/vfio/pci.h | 4 -- 4 files changed, 110 insertions(+), 112 deletions(-)