diff mbox

[RFC,v2,7/7] vfio/pci: Find and expose Intel IGD OpRegion

Message ID 20160202200845.5260.64183.stgit@gimli.home (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Williamson Feb. 2, 2016, 8:09 p.m. UTC
This is provided via a device specific region, look for it on Intel
VGA class devices.  Our default mechanism to expose this to the BIOS
is via fw_cfg where it's expected that the BIOS will copy the data
into a reserved RAM area and update the ASL Storage register to
reference the GPA of that buffer.  We also support directly mapping
the OpRegion through to the host in response to the ASL Storage
register write, which makes the data "live" and potentially provides
write access should the underlying vfio region support writes.  This
option is automatically enabled if we somehow don't support fw_cfg (is
this a good idea?).

Register 0x5C is the base of the stolen memory region (BDSM).
Emulating this register and setting it to zero prevents the driver
from trying to blindly use this address, which might be in-use RAM in
the VM.  This avoids DMAR faults, or potentially VM memory corruption,
but we likely need a better solution, this is just a hack.  It also
avoids framebuffer corruption.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---

Note - Sending v2 of only 7/7, applies in place of v1 7/7.

 hw/vfio/common.c              |    2 -
 hw/vfio/pci.c                 |   99 +++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h                 |    7 +++
 include/hw/vfio/vfio-common.h |    2 +
 4 files changed, 109 insertions(+), 1 deletion(-)

Comments

Gerd Hoffman Feb. 3, 2016, 9:29 a.m. UTC | #1
On Di, 2016-02-02 at 13:09 -0700, Alex Williamson wrote:
> This is provided via a device specific region, look for it on Intel
> VGA class devices.  Our default mechanism to expose this to the BIOS
> is via fw_cfg where it's expected that the BIOS will copy the data
> into a reserved RAM area and update the ASL Storage register to
> reference the GPA of that buffer.

>   We also support directly mapping
> the OpRegion through to the host in response to the ASL Storage
> register write, which makes the data "live" and potentially provides
> write access should the underlying vfio region support writes.

This should better be splitted into a separate patch.

> This
> option is automatically enabled if we somehow don't support fw_cfg (is
> this a good idea?).

I think this can't happen.  And even in case it can: we have bigger
problems than the opregion then.

cheers,
  Gerd
Alex Williamson Feb. 3, 2016, 7:52 p.m. UTC | #2
On Wed, 2016-02-03 at 10:29 +0100, Gerd Hoffmann wrote:
> On Di, 2016-02-02 at 13:09 -0700, Alex Williamson wrote:
> > This is provided via a device specific region, look for it on Intel
> > VGA class devices.  Our default mechanism to expose this to the BIOS
> > is via fw_cfg where it's expected that the BIOS will copy the data
> > into a reserved RAM area and update the ASL Storage register to
> > reference the GPA of that buffer.

> >   We also support directly mapping
> > the OpRegion through to the host in response to the ASL Storage
> > register write, which makes the data "live" and potentially provides
> > write access should the underlying vfio region support writes.

> This should better be splitted into a separate patch.

Ok.  Perhaps I'll drop it for now and make the kernel-level vfio code
expose the OpRegion as read-only as a start.  It's easy enough to dig
out of the mail archives and add back later if we want.

> > This
> > option is automatically enabled if we somehow don't support fw_cfg (is
> > this a good idea?).

> I think this can't happen.  And even in case it can: we have bigger
> problems than the opregion then.

A few of the other fw_cfg_find() users seem to handle the error case,
which is why I ask.  Thanks,

Alex
diff mbox

Patch

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 879a657..c201bee 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -493,7 +493,7 @@  static void vfio_listener_release(VFIOContainer *container)
     memory_listener_unregister(&container->listener);
 }
 
-static struct vfio_info_cap_header *
+struct vfio_info_cap_header *
 vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id)
 {
     struct vfio_info_cap_header *hdr;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8e20781..97362f9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -23,6 +23,7 @@ 
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 
+#include "hw/nvram/fw_cfg.h"
 #include "hw/pci/msi.h"
 #include "hw/pci/msix.h"
 #include "hw/pci/pci_bridge.h"
@@ -34,6 +35,8 @@ 
 #include "trace.h"
 
 #define MSIX_CAP_LENGTH 12
+#define IGD_OPREGION 0xFC /* ASL Storage register */
+#define IGD_BDSM 0x5C     /* Base Data of Stolen Memory register */
 
 static void vfio_disable_interrupts(VFIOPCIDevice *vdev);
 static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled);
@@ -1108,6 +1111,25 @@  void vfio_pci_write_config(PCIDevice *pdev,
         } else if (was_enabled && !is_enabled) {
             vfio_msix_disable(vdev);
         }
+    } else if (vdev->igd_opregion && vdev->igd_opregion_map &&
+               ranges_overlap(addr, len, IGD_OPREGION, 4)) {
+        uint32_t orig, cur;
+
+        orig = pci_get_long(pdev->config + IGD_OPREGION);
+        pci_default_write_config(pdev, addr, val, len);
+        cur = pci_get_long(pdev->config + IGD_OPREGION);
+
+        if (cur != orig) {
+            if (orig) {
+                memory_region_del_subregion(get_system_memory(),
+                                            vdev->igd_opregion->region->mem);
+            }
+
+            if (cur) {
+                memory_region_add_subregion(get_system_memory(), cur,
+                                            vdev->igd_opregion->region->mem);
+            }
+        }
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);
@@ -1459,6 +1481,10 @@  static void vfio_bars_exit(VFIOPCIDevice *vdev)
         pci_unregister_vga(&vdev->pdev);
         vfio_vga_quirk_exit(vdev);
     }
+
+    if (vdev->igd_opregion) {
+        vfio_region_exit(vdev->igd_opregion->region);
+    }
 }
 
 static void vfio_bars_finalize(VFIOPCIDevice *vdev)
@@ -1477,6 +1503,13 @@  static void vfio_bars_finalize(VFIOPCIDevice *vdev)
         }
         g_free(vdev->vga);
     }
+
+    if (vdev->igd_opregion) {
+        vfio_region_finalize(vdev->igd_opregion->region);
+        g_free(vdev->igd_opregion->region);
+        g_free(vdev->igd_opregion->data);
+        g_free(vdev->igd_opregion);
+    }
 }
 
 /*
@@ -2123,6 +2156,64 @@  static int vfio_populate_device(VFIOPCIDevice *vdev)
         QLIST_INIT(&vdev->vga->region[QEMU_PCI_VGA_IO_HI].quirks);
     }
 
+    if (vbasedev->num_regions > VFIO_PCI_NUM_REGIONS) {
+        for (i = VFIO_PCI_NUM_REGIONS; i < vbasedev->num_regions; i++) {
+            struct vfio_info_cap_header *hdr;
+            struct vfio_region_info_cap_type *type;
+
+            ret = vfio_get_region_info(vbasedev, i, &reg_info);
+            if (ret) {
+                continue;
+            }
+
+            hdr = vfio_get_region_info_cap(reg_info, VFIO_REGION_INFO_CAP_TYPE);
+            if (!hdr) {
+                g_free(reg_info);
+                continue;
+            }
+
+            type = container_of(hdr, struct vfio_region_info_cap_type, header);
+            if (type->type ==
+                (VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL) &&
+                type->subtype == VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION) {
+                VFIORegion *region;
+                char *name;
+                FWCfgState *fw_cfg;
+
+                vdev->igd_opregion = g_new0(VFIOIGDOpRegion, 1);
+                region = vdev->igd_opregion->region = g_new0(VFIORegion, 1);
+                name = g_strdup_printf("%s IGD OpRegion", vbasedev->name);
+
+                ret = vfio_region_setup(OBJECT(vdev), vbasedev,
+                                        region, i, name);
+                g_free(name);
+
+                if (ret) {
+                    error_report("vfio: Error setting up IGD OpRegion\n");
+                    goto error;
+                }
+
+                fw_cfg = fw_cfg_find();
+                if (fw_cfg) {
+		    vdev->igd_opregion->data = g_malloc0(region->size);
+                    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion->data,
+                                region->size, region->fd_offset);
+                    if (ret != region->size) {
+                        error_report("vfio: Error reading IGD OpRegion\n");
+                        goto error;
+                    }
+
+                    fw_cfg_add_file(fw_cfg, "etc/igd-opregion",
+                                    vdev->igd_opregion->data, region->size);
+                } else {
+                    vdev->igd_opregion_map = true;
+                }
+            }
+
+            g_free(reg_info);
+        }
+    }
+
     irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
 
     ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
@@ -2525,6 +2616,12 @@  static int vfio_initfn(PCIDevice *pdev)
                vdev->msi_cap_size);
     }
 
+    if (vdev->igd_opregion) {
+        pci_set_long(vdev->pdev.config + IGD_OPREGION, 0);
+        pci_set_long(vdev->pdev.wmask + IGD_OPREGION, ~0);
+        vfio_add_emulated_long(vdev, IGD_BDSM, 0, ~0);
+    }
+
     if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
         vdev->intx.mmap_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                                   vfio_intx_mmap_enable, vdev);
@@ -2635,6 +2732,8 @@  static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
     DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
     DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
+    DEFINE_PROP_BOOL("x-igd-opregion-map",
+                     VFIOPCIDevice, igd_opregion_map, false),
     DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice, vendor_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice, device_id, PCI_ANY_ID),
     DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index b8a7189..a24e6f5 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -98,6 +98,11 @@  typedef struct VFIOMSIXInfo {
     unsigned long *pending;
 } VFIOMSIXInfo;
 
+typedef struct VFIOIGDOpRegion {
+    void *data;
+    VFIORegion *region;
+} VFIOIGDOpRegion;
+
 typedef struct VFIOPCIDevice {
     PCIDevice pdev;
     VFIODevice vbasedev;
@@ -115,6 +120,7 @@  typedef struct VFIOPCIDevice {
     int interrupt; /* Current interrupt type */
     VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
     VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
+    VFIOIGDOpRegion *igd_opregion;
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
@@ -139,6 +145,7 @@  typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool igd_opregion_map;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 594905a..45ba6a3 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -146,6 +146,8 @@  int vfio_get_device(VFIOGroup *group, const char *name,
                     VFIODevice *vbasedev);
 int vfio_get_region_info(VFIODevice *vbasedev, int index,
                          struct vfio_region_info **info);
+struct vfio_info_cap_header *
+    vfio_get_region_info_cap(struct vfio_region_info *info, uint16_t id);
 
 extern const MemoryRegionOps vfio_region_ops;
 extern QLIST_HEAD(vfio_group_head, VFIOGroup) vfio_group_list;