diff mbox series

[4/7] vfio/igd: add new bar0 quirk to emulate BDSM mirror

Message ID 20240822111819.34306-5-c.koehne@beckhoff.com (mailing list archive)
State New, archived
Headers show
Series vfio/igd: add passthrough support for IGDs of gen 11 and later | expand

Commit Message

Corvin Köhne Aug. 22, 2024, 11:08 a.m. UTC
The BDSM register is mirrored into MMIO space at least for gen 11 and
later devices. Unfortunately, the Windows driver reads the register
value from MMIO space instead of PCI config space for those devices [1].
Therefore, we either have to keep a 1:1 mapping for the host and guest
address or we have to emulate the MMIO register too. Using the igd in
legacy mode is already hard due to it's many constraints. Keeping a 1:1
mapping may not work in all cases and makes it even harder to use. An
MMIO emulation has to trap the whole MMIO page. This makes accesses to
this page slower compared to using second level address translation.
Nevertheless, it doesn't have any constraints and I haven't noticed any
performance degradation yet making it a better solution.

[1] https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653

Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com>
---
 hw/vfio/igd.c        | 97 ++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci-quirks.c |  1 +
 hw/vfio/pci.h        |  1 +
 3 files changed, 99 insertions(+)

Comments

Alex Williamson Aug. 26, 2024, 4:35 p.m. UTC | #1
On Thu, 22 Aug 2024 13:08:29 +0200
Corvin Köhne <c.koehne@beckhoff.com> wrote:

> The BDSM register is mirrored into MMIO space at least for gen 11 and
> later devices. Unfortunately, the Windows driver reads the register
> value from MMIO space instead of PCI config space for those devices [1].
> Therefore, we either have to keep a 1:1 mapping for the host and guest
> address or we have to emulate the MMIO register too. Using the igd in
> legacy mode is already hard due to it's many constraints. Keeping a 1:1
> mapping may not work in all cases and makes it even harder to use. An
> MMIO emulation has to trap the whole MMIO page. This makes accesses to
> this page slower compared to using second level address translation.
> Nevertheless, it doesn't have any constraints and I haven't noticed any
> performance degradation yet making it a better solution.
> 
> [1] https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653
> 
> Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com>
> ---
>  hw/vfio/igd.c        | 97 ++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/pci-quirks.c |  1 +
>  hw/vfio/pci.h        |  1 +
>  3 files changed, 99 insertions(+)
> 
> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> index 0b6533bbf7..863b58565e 100644
> --- a/hw/vfio/igd.c
> +++ b/hw/vfio/igd.c
> @@ -374,6 +374,103 @@ static const MemoryRegionOps vfio_igd_index_quirk = {
>      .endianness = DEVICE_LITTLE_ENDIAN,
>  };
>  
> +#define IGD_BDSM_MMIO_OFFSET 0x1080C0
> +
> +static uint64_t vfio_igd_quirk_bdsm_read(void *opaque,
> +                                          hwaddr addr, unsigned size)
> +{
> +    VFIOPCIDevice *vdev = opaque;
> +    uint64_t offset;
> +
> +    offset = IGD_BDSM_GEN11 + addr;
> +
> +    switch (size) {
> +    case 1:
> +        return pci_get_byte(vdev->pdev.config + offset);
> +    case 2:
> +        return le16_to_cpu(pci_get_word(vdev->pdev.config + offset));
> +    case 4:
> +        return le32_to_cpu(pci_get_long(vdev->pdev.config + offset));
> +    case 8:
> +        return le64_to_cpu(pci_get_quad(vdev->pdev.config + offset));
> +    default:
> +        hw_error("igd: unsupported read size, %u bytes", size);
> +        break;
> +    }
> +
> +    return 0;
> +}
> +
> +static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr,
> +                                       uint64_t data, unsigned size)
> +{
> +    VFIOPCIDevice *vdev = opaque;
> +    uint64_t offset;
> +
> +    offset = IGD_BDSM_GEN11 + addr;
> +
> +    switch (size) {
> +    case 1:
> +        pci_set_byte(vdev->pdev.config + offset, data);
> +        break;
> +    case 2:
> +        pci_set_word(vdev->pdev.config + offset, data);
> +        break;
> +    case 4:
> +        pci_set_long(vdev->pdev.config + offset, data);
> +        break;
> +    case 8:
> +        pci_set_quad(vdev->pdev.config + offset, data);
> +        break;
> +    default:
> +        hw_error("igd: unsupported read size, %u bytes", size);
> +        break;
> +    }
> +}

If we have the leXX_to_cpu() in the read path, don't we need
cpu_to_leXX() in the write path?  Maybe we should in fact just get rid
of all of them since we're quirking a device that's specific to a
little endian architecture and we're defining the memory region as
little endian, but minimally we should be consistent.

> +
> +static const MemoryRegionOps vfio_igd_bdsm_quirk = {
> +    .read = vfio_igd_quirk_bdsm_read,
> +    .write = vfio_igd_quirk_bdsm_write,
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +};
> +
> +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
> +{
> +    VFIOQuirk *quirk;
> +    int gen;
> +
> +    /*
> +     * This must be an Intel VGA device at address 00:02.0 for us to even
> +     * consider enabling legacy mode. Some driver have dependencies on the PCI
> +     * bus address.
> +     */
> +    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
> +        !vfio_is_vga(vdev) || nr != 0 ||
> +        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
> +                                       0, PCI_DEVFN(0x2, 0))) {
> +        return;
> +    }
> +
> +    /*
> +     * Only on IGD devices of gen 11 and above, the BDSM register is mirrored
> +     * into MMIO space and read from MMIO space by the Windows driver.
> +     */
> +    gen = igd_gen(vdev);
> +    if (gen < 11) {
> +        return;
> +    }
> +
> +    quirk = vfio_quirk_alloc(1);
> +    quirk->data = vdev;
> +
> +    memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_bdsm_quirk,
> +                          vdev, "vfio-igd-bdsm-quirk", 8);
> +    memory_region_add_subregion_overlap(vdev->bars[0].region.mem, 0x1080C0,

Use your macro here, IGD_BDSM_MMIO_OFFSET.  Thanks,

Alex

PS - please drop the confidential email warning signature when posting
to public lists.

> +                                        &quirk->mem[0], 1);
> +
> +    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> +}
> +
>  void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
>  {
>      g_autofree struct vfio_region_info *rom = NULL;
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index 39dae72497..d37f722cce 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
>      vfio_probe_nvidia_bar0_quirk(vdev, nr);
>      vfio_probe_rtl8168_bar2_quirk(vdev, nr);
>  #ifdef CONFIG_VFIO_IGD
> +    vfio_probe_igd_bar0_quirk(vdev, nr);
>      vfio_probe_igd_bar4_quirk(vdev, nr);
>  #endif
>  }
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index bf67df2fbc..5ad090a229 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
>  bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
>  void vfio_quirk_reset(VFIOPCIDevice *vdev);
>  VFIOQuirk *vfio_quirk_alloc(int nr_mem);
> +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
>  void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
>  
>  extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
Corvin Köhne Aug. 28, 2024, 10:40 a.m. UTC | #2
On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote:
> CAUTION: External Email!!
> On Thu, 22 Aug 2024 13:08:29 +0200
> Corvin Köhne <c.koehne@beckhoff.com> wrote:
> 
> > The BDSM register is mirrored into MMIO space at least for gen 11
> > and
> > later devices. Unfortunately, the Windows driver reads the register
> > value from MMIO space instead of PCI config space for those devices
> > [1].
> > Therefore, we either have to keep a 1:1 mapping for the host and
> > guest
> > address or we have to emulate the MMIO register too. Using the igd
> > in
> > legacy mode is already hard due to it's many constraints. Keeping a
> > 1:1
> > mapping may not work in all cases and makes it even harder to use.
> > An
> > MMIO emulation has to trap the whole MMIO page. This makes accesses
> > to
> > this page slower compared to using second level address
> > translation.
> > Nevertheless, it doesn't have any constraints and I haven't noticed
> > any
> > performance degradation yet making it a better solution.
> > 
> > [1]
> > https://github.com/projectacrn/acrn-hypervisor/blob/5c351bee0f6ae46250eefc07f44b4a31e770f3cf/devicemodel/hw/pci/passthrough.c#L650-L653
> > 
> > Signed-off-by: Corvin Köhne <c.koehne@beckhoff.com>
> > ---
> >  hw/vfio/igd.c        | 97
> > ++++++++++++++++++++++++++++++++++++++++++++
> >  hw/vfio/pci-quirks.c |  1 +
> >  hw/vfio/pci.h        |  1 +
> >  3 files changed, 99 insertions(+)
> > 
> > diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
> > index 0b6533bbf7..863b58565e 100644
> > --- a/hw/vfio/igd.c
> > +++ b/hw/vfio/igd.c
> > @@ -374,6 +374,103 @@ static const MemoryRegionOps
> > vfio_igd_index_quirk = {
> >      .endianness = DEVICE_LITTLE_ENDIAN,
> >  };
> >  
> > +#define IGD_BDSM_MMIO_OFFSET 0x1080C0
> > +
> > +static uint64_t vfio_igd_quirk_bdsm_read(void *opaque,
> > +                                          hwaddr addr, unsigned
> > size)
> > +{
> > +    VFIOPCIDevice *vdev = opaque;
> > +    uint64_t offset;
> > +
> > +    offset = IGD_BDSM_GEN11 + addr;
> > +
> > +    switch (size) {
> > +    case 1:
> > +        return pci_get_byte(vdev->pdev.config + offset);
> > +    case 2:
> > +        return le16_to_cpu(pci_get_word(vdev->pdev.config +
> > offset));
> > +    case 4:
> > +        return le32_to_cpu(pci_get_long(vdev->pdev.config +
> > offset));
> > +    case 8:
> > +        return le64_to_cpu(pci_get_quad(vdev->pdev.config +
> > offset));
> > +    default:
> > +        hw_error("igd: unsupported read size, %u bytes", size);
> > +        break;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr,
> > +                                       uint64_t data, unsigned
> > size)
> > +{
> > +    VFIOPCIDevice *vdev = opaque;
> > +    uint64_t offset;
> > +
> > +    offset = IGD_BDSM_GEN11 + addr;
> > +
> > +    switch (size) {
> > +    case 1:
> > +        pci_set_byte(vdev->pdev.config + offset, data);
> > +        break;
> > +    case 2:
> > +        pci_set_word(vdev->pdev.config + offset, data);
> > +        break;
> > +    case 4:
> > +        pci_set_long(vdev->pdev.config + offset, data);
> > +        break;
> > +    case 8:
> > +        pci_set_quad(vdev->pdev.config + offset, data);
> > +        break;
> > +    default:
> > +        hw_error("igd: unsupported read size, %u bytes", size);
> > +        break;
> > +    }
> > +}
> 
> If we have the leXX_to_cpu() in the read path, don't we need
> cpu_to_leXX() in the write path?  Maybe we should in fact just get
> rid
> of all of them since we're quirking a device that's specific to a
> little endian architecture and we're defining the memory region as
> little endian, but minimally we should be consistent.
> 

Will drop leXX_to_cpu in the read path.

> > +
> > +static const MemoryRegionOps vfio_igd_bdsm_quirk = {
> > +    .read = vfio_igd_quirk_bdsm_read,
> > +    .write = vfio_igd_quirk_bdsm_write,
> > +    .endianness = DEVICE_LITTLE_ENDIAN,
> > +};
> > +
> > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
> > +{
> > +    VFIOQuirk *quirk;
> > +    int gen;
> > +
> > +    /*
> > +     * This must be an Intel VGA device at address 00:02.0 for us
> > to even
> > +     * consider enabling legacy mode. Some driver have
> > dependencies on the PCI
> > +     * bus address.
> > +     */
> > +    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
> > +        !vfio_is_vga(vdev) || nr != 0 ||
> > +        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev-
> > >pdev),
> > +                                       0, PCI_DEVFN(0x2, 0))) {
> > +        return;
> > +    }
> > +
> > +    /*
> > +     * Only on IGD devices of gen 11 and above, the BDSM register
> > is mirrored
> > +     * into MMIO space and read from MMIO space by the Windows
> > driver.
> > +     */
> > +    gen = igd_gen(vdev);
> > +    if (gen < 11) {
> > +        return;
> > +    }
> > +
> > +    quirk = vfio_quirk_alloc(1);
> > +    quirk->data = vdev;
> > +
> > +    memory_region_init_io(&quirk->mem[0], OBJECT(vdev),
> > &vfio_igd_bdsm_quirk,
> > +                          vdev, "vfio-igd-bdsm-quirk", 8);
> > +    memory_region_add_subregion_overlap(vdev->bars[0].region.mem,
> > 0x1080C0,
> 
> Use your macro here, IGD_BDSM_MMIO_OFFSET.  Thanks,
> 

Thanks for catching.

> Alex
> 
> PS - please drop the confidential email warning signature when
> posting
> to public lists.
> 

Sry for the noise. I can't drop it, so I'm going to use another mail
address to post my patches.

> > +                                        &quirk->mem[0], 1);
> > +
> > +    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
> > +}
> > +
> >  void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
> >  {
> >      g_autofree struct vfio_region_info *rom = NULL;
> > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > index 39dae72497..d37f722cce 100644
> > --- a/hw/vfio/pci-quirks.c
> > +++ b/hw/vfio/pci-quirks.c
> > @@ -1259,6 +1259,7 @@ void vfio_bar_quirk_setup(VFIOPCIDevice
> > *vdev, int nr)
> >      vfio_probe_nvidia_bar0_quirk(vdev, nr);
> >      vfio_probe_rtl8168_bar2_quirk(vdev, nr);
> >  #ifdef CONFIG_VFIO_IGD
> > +    vfio_probe_igd_bar0_quirk(vdev, nr);
> >      vfio_probe_igd_bar4_quirk(vdev, nr);
> >  #endif
> >  }
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index bf67df2fbc..5ad090a229 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -215,6 +215,7 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice
> > *vdev);
> >  bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
> >  void vfio_quirk_reset(VFIOPCIDevice *vdev);
> >  VFIOQuirk *vfio_quirk_alloc(int nr_mem);
> > +void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
> >  void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
> >  
> >  extern const PropertyInfo qdev_prop_nv_gpudirect_clique;
> 
>
Corvin Köhne Aug. 28, 2024, 12:50 p.m. UTC | #3
On Wed, 2024-08-28 at 12:40 +0200, Corvin Köhne wrote:
> On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote:
> > 
> > PS - please drop the confidential email warning signature when
> > posting
> > to public lists.
> > 
> 
> Sry for the noise. I can't drop it, so I'm going to use another mail
> address to post my patches.
> 
> > 

Argh, forgot updating my send-email config when resending the patch
series. Should I resend it again?
Cédric Le Goater Aug. 28, 2024, 1:08 p.m. UTC | #4
On 8/28/24 14:50, Corvin Köhne wrote:
> On Wed, 2024-08-28 at 12:40 +0200, Corvin Köhne wrote:
>> On Mon, 2024-08-26 at 10:35 -0600, Alex Williamson wrote:
>>>
>>> PS - please drop the confidential email warning signature when
>>> posting
>>> to public lists.
>>>
>>
>> Sry for the noise. I can't drop it, so I'm going to use another mail
>> address to post my patches.
>>
>>>
> 
> Argh, forgot updating my send-email config when resending the patch
> series. Should I resend it again?

Please do because the result is not compatible with the tools we use
to extract patches (b4).

Thanks,

C.
diff mbox series

Patch

diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
index 0b6533bbf7..863b58565e 100644
--- a/hw/vfio/igd.c
+++ b/hw/vfio/igd.c
@@ -374,6 +374,103 @@  static const MemoryRegionOps vfio_igd_index_quirk = {
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+#define IGD_BDSM_MMIO_OFFSET 0x1080C0
+
+static uint64_t vfio_igd_quirk_bdsm_read(void *opaque,
+                                          hwaddr addr, unsigned size)
+{
+    VFIOPCIDevice *vdev = opaque;
+    uint64_t offset;
+
+    offset = IGD_BDSM_GEN11 + addr;
+
+    switch (size) {
+    case 1:
+        return pci_get_byte(vdev->pdev.config + offset);
+    case 2:
+        return le16_to_cpu(pci_get_word(vdev->pdev.config + offset));
+    case 4:
+        return le32_to_cpu(pci_get_long(vdev->pdev.config + offset));
+    case 8:
+        return le64_to_cpu(pci_get_quad(vdev->pdev.config + offset));
+    default:
+        hw_error("igd: unsupported read size, %u bytes", size);
+        break;
+    }
+
+    return 0;
+}
+
+static void vfio_igd_quirk_bdsm_write(void *opaque, hwaddr addr,
+                                       uint64_t data, unsigned size)
+{
+    VFIOPCIDevice *vdev = opaque;
+    uint64_t offset;
+
+    offset = IGD_BDSM_GEN11 + addr;
+
+    switch (size) {
+    case 1:
+        pci_set_byte(vdev->pdev.config + offset, data);
+        break;
+    case 2:
+        pci_set_word(vdev->pdev.config + offset, data);
+        break;
+    case 4:
+        pci_set_long(vdev->pdev.config + offset, data);
+        break;
+    case 8:
+        pci_set_quad(vdev->pdev.config + offset, data);
+        break;
+    default:
+        hw_error("igd: unsupported read size, %u bytes", size);
+        break;
+    }
+}
+
+static const MemoryRegionOps vfio_igd_bdsm_quirk = {
+    .read = vfio_igd_quirk_bdsm_read,
+    .write = vfio_igd_quirk_bdsm_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr)
+{
+    VFIOQuirk *quirk;
+    int gen;
+
+    /*
+     * This must be an Intel VGA device at address 00:02.0 for us to even
+     * consider enabling legacy mode. Some driver have dependencies on the PCI
+     * bus address.
+     */
+    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
+        !vfio_is_vga(vdev) || nr != 0 ||
+        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
+                                       0, PCI_DEVFN(0x2, 0))) {
+        return;
+    }
+
+    /*
+     * Only on IGD devices of gen 11 and above, the BDSM register is mirrored
+     * into MMIO space and read from MMIO space by the Windows driver.
+     */
+    gen = igd_gen(vdev);
+    if (gen < 11) {
+        return;
+    }
+
+    quirk = vfio_quirk_alloc(1);
+    quirk->data = vdev;
+
+    memory_region_init_io(&quirk->mem[0], OBJECT(vdev), &vfio_igd_bdsm_quirk,
+                          vdev, "vfio-igd-bdsm-quirk", 8);
+    memory_region_add_subregion_overlap(vdev->bars[0].region.mem, 0x1080C0,
+                                        &quirk->mem[0], 1);
+
+    QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, quirk, next);
+}
+
 void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr)
 {
     g_autofree struct vfio_region_info *rom = NULL;
diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index 39dae72497..d37f722cce 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -1259,6 +1259,7 @@  void vfio_bar_quirk_setup(VFIOPCIDevice *vdev, int nr)
     vfio_probe_nvidia_bar0_quirk(vdev, nr);
     vfio_probe_rtl8168_bar2_quirk(vdev, nr);
 #ifdef CONFIG_VFIO_IGD
+    vfio_probe_igd_bar0_quirk(vdev, nr);
     vfio_probe_igd_bar4_quirk(vdev, nr);
 #endif
 }
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index bf67df2fbc..5ad090a229 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -215,6 +215,7 @@  void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev);
 bool vfio_add_virt_caps(VFIOPCIDevice *vdev, Error **errp);
 void vfio_quirk_reset(VFIOPCIDevice *vdev);
 VFIOQuirk *vfio_quirk_alloc(int nr_mem);
+void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, int nr);
 void vfio_probe_igd_bar4_quirk(VFIOPCIDevice *vdev, int nr);
 
 extern const PropertyInfo qdev_prop_nv_gpudirect_clique;