Message ID | 1562665760-26158-5-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add migration support for VFIO device | expand |
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > These functions save and restore PCI device specific data - config > space of PCI device. > Tested save and restore with MSI and MSIX type. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio-common.h | 2 + > 2 files changed, 116 insertions(+) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index de0d286fc9dd..5fe4f8076cac 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) > return OBJECT(vdev); > } > > +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > +{ > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + PCIDevice *pdev = &vdev->pdev; > + uint16_t pci_cmd; > + int i; > + > + for (i = 0; i < PCI_ROM_SLOT; i++) { > + uint32_t bar; > + > + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); > + qemu_put_be32(f, bar); > + } > + > + qemu_put_be32(f, vdev->interrupt); > + if (vdev->interrupt == VFIO_INT_MSI) { > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > + bool msi_64bit; > + > + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + 2); > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > + > + msi_addr_lo = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > + qemu_put_be32(f, msi_addr_lo); > + > + if (msi_64bit) { > + msi_addr_hi = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > + 4); > + } > + qemu_put_be32(f, msi_addr_hi); > + > + msi_data = pci_default_read_config(pdev, > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > + 2); > + qemu_put_be32(f, msi_data); > + } else if (vdev->interrupt == VFIO_INT_MSIX) { > + uint16_t offset; > + > + /* save enable bit and maskall bit */ > + offset = pci_default_read_config(pdev, > + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > + qemu_put_be16(f, offset); > + msix_save(pdev, f); > + } > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > + qemu_put_be16(f, pci_cmd); > +} > + > +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > +{ > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + PCIDevice *pdev = &vdev->pdev; > + uint32_t interrupt_type; > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > + uint16_t pci_cmd; > + bool msi_64bit; > + int i; > + > + /* retore pci bar configuration */ > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > + vfio_pci_write_config(pdev, PCI_COMMAND, > + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); > + for (i = 0; i < PCI_ROM_SLOT; i++) { > + uint32_t bar = qemu_get_be32(f); > + > + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > + } Is it possible to validate the bar's at all? We just had a bug on a virtual device where one version was asking for a larger bar than the other; our validation caught this in some cases so we could tell that the guest had a BAR that was aligned at the wrong alignment. > + vfio_pci_write_config(pdev, PCI_COMMAND, > + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); Can you explain what this is for? You write the command register at the end of the function with the original value; there's no guarantee that the device is using IO for example, so ORing it seems odd. Also, are the other flags in COMMAND safe at this point - e.g. what about interrupts and stuff? > + interrupt_type = qemu_get_be32(f); > + > + if (interrupt_type == VFIO_INT_MSI) { > + /* restore msi configuration */ > + msi_flags = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_FLAGS, 2); > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > + > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); > + > + msi_addr_lo = qemu_get_be32(f); > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, > + msi_addr_lo, 4); > + > + msi_addr_hi = qemu_get_be32(f); > + if (msi_64bit) { > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, > + msi_addr_hi, 4); > + } > + msi_data = qemu_get_be32(f); > + vfio_pci_write_config(pdev, > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > + msi_data, 2); > + > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); > + } else if (interrupt_type == VFIO_INT_MSIX) { > + uint16_t offset = qemu_get_be16(f); > + > + /* load enable bit and maskall bit */ > + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, > + offset, 2); > + msix_load(pdev, f); > + } > + pci_cmd = qemu_get_be16(f); > + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); > +} > + > static VFIODeviceOps vfio_pci_ops = { > .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, > .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, > .vfio_eoi = vfio_intx_eoi, > .vfio_get_object = vfio_pci_get_object, > + .vfio_save_config = vfio_pci_save_config, > + .vfio_load_config = vfio_pci_load_config, > }; > > int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 771b6d59a3db..ee72bd984a36 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -120,6 +120,8 @@ struct VFIODeviceOps { > int (*vfio_hot_reset_multi)(VFIODevice *vdev); > void (*vfio_eoi)(VFIODevice *vdev); > Object *(*vfio_get_object)(VFIODevice *vdev); > + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); > + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > }; > > typedef struct VFIOGroup { > -- > 2.7.0 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Tue, 9 Jul 2019 15:19:11 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > These functions save and restore PCI device specific data - config > space of PCI device. > Tested save and restore with MSI and MSIX type. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ > include/hw/vfio/vfio-common.h | 2 + > 2 files changed, 116 insertions(+) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index de0d286fc9dd..5fe4f8076cac 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) > return OBJECT(vdev); > } > > +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > +{ > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + PCIDevice *pdev = &vdev->pdev; > + uint16_t pci_cmd; > + int i; > + > + for (i = 0; i < PCI_ROM_SLOT; i++) { > + uint32_t bar; > + > + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); > + qemu_put_be32(f, bar); > + } > + > + qemu_put_be32(f, vdev->interrupt); > + if (vdev->interrupt == VFIO_INT_MSI) { > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > + bool msi_64bit; > + > + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + 2); > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > + > + msi_addr_lo = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > + qemu_put_be32(f, msi_addr_lo); > + > + if (msi_64bit) { > + msi_addr_hi = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > + 4); > + } > + qemu_put_be32(f, msi_addr_hi); > + > + msi_data = pci_default_read_config(pdev, > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > + 2); > + qemu_put_be32(f, msi_data); > + } else if (vdev->interrupt == VFIO_INT_MSIX) { > + uint16_t offset; > + > + /* save enable bit and maskall bit */ > + offset = pci_default_read_config(pdev, > + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > + qemu_put_be16(f, offset); > + msix_save(pdev, f); > + } > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > + qemu_put_be16(f, pci_cmd); > +} > + > +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > +{ > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > + PCIDevice *pdev = &vdev->pdev; > + uint32_t interrupt_type; > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > + uint16_t pci_cmd; > + bool msi_64bit; > + int i; > + > + /* retore pci bar configuration */ > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > + vfio_pci_write_config(pdev, PCI_COMMAND, > + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); > + for (i = 0; i < PCI_ROM_SLOT; i++) { > + uint32_t bar = qemu_get_be32(f); > + > + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > + } > + vfio_pci_write_config(pdev, PCI_COMMAND, > + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); > + > + interrupt_type = qemu_get_be32(f); > + > + if (interrupt_type == VFIO_INT_MSI) { > + /* restore msi configuration */ > + msi_flags = pci_default_read_config(pdev, > + pdev->msi_cap + PCI_MSI_FLAGS, 2); > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > + > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); > + > + msi_addr_lo = qemu_get_be32(f); > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, > + msi_addr_lo, 4); > + > + msi_addr_hi = qemu_get_be32(f); > + if (msi_64bit) { > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, > + msi_addr_hi, 4); > + } > + msi_data = qemu_get_be32(f); > + vfio_pci_write_config(pdev, > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > + msi_data, 2); > + > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); > + } else if (interrupt_type == VFIO_INT_MSIX) { > + uint16_t offset = qemu_get_be16(f); > + > + /* load enable bit and maskall bit */ > + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, > + offset, 2); > + msix_load(pdev, f); > + } > + pci_cmd = qemu_get_be16(f); > + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); > +} Pardon my migration ignorance, but there are bound to be more fields and capabilities that get migrated over time. How does this get extended to support that and maintain backwards compatibility? Thanks, Alex > + > static VFIODeviceOps vfio_pci_ops = { > .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, > .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, > .vfio_eoi = vfio_intx_eoi, > .vfio_get_object = vfio_pci_get_object, > + .vfio_save_config = vfio_pci_save_config, > + .vfio_load_config = vfio_pci_load_config, > }; > > int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index 771b6d59a3db..ee72bd984a36 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -120,6 +120,8 @@ struct VFIODeviceOps { > int (*vfio_hot_reset_multi)(VFIODevice *vdev); > void (*vfio_eoi)(VFIODevice *vdev); > Object *(*vfio_get_object)(VFIODevice *vdev); > + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); > + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > }; > > typedef struct VFIOGroup {
* Alex Williamson (alex.williamson@redhat.com) wrote: > On Tue, 9 Jul 2019 15:19:11 +0530 > Kirti Wankhede <kwankhede@nvidia.com> wrote: > > > These functions save and restore PCI device specific data - config > > space of PCI device. > > Tested save and restore with MSI and MSIX type. > > > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > Reviewed-by: Neo Jia <cjia@nvidia.com> > > --- > > hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ > > include/hw/vfio/vfio-common.h | 2 + > > 2 files changed, 116 insertions(+) > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index de0d286fc9dd..5fe4f8076cac 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) > > return OBJECT(vdev); > > } > > > > +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > > +{ > > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > > + PCIDevice *pdev = &vdev->pdev; > > + uint16_t pci_cmd; > > + int i; > > + > > + for (i = 0; i < PCI_ROM_SLOT; i++) { > > + uint32_t bar; > > + > > + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); > > + qemu_put_be32(f, bar); > > + } > > + > > + qemu_put_be32(f, vdev->interrupt); > > + if (vdev->interrupt == VFIO_INT_MSI) { > > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > + bool msi_64bit; > > + > > + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > > + 2); > > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > > + > > + msi_addr_lo = pci_default_read_config(pdev, > > + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > > + qemu_put_be32(f, msi_addr_lo); > > + > > + if (msi_64bit) { > > + msi_addr_hi = pci_default_read_config(pdev, > > + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > > + 4); > > + } > > + qemu_put_be32(f, msi_addr_hi); > > + > > + msi_data = pci_default_read_config(pdev, > > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > > + 2); > > + qemu_put_be32(f, msi_data); > > + } else if (vdev->interrupt == VFIO_INT_MSIX) { > > + uint16_t offset; > > + > > + /* save enable bit and maskall bit */ > > + offset = pci_default_read_config(pdev, > > + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > > + qemu_put_be16(f, offset); > > + msix_save(pdev, f); > > + } > > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > + qemu_put_be16(f, pci_cmd); > > +} > > + > > +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > > +{ > > + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > > + PCIDevice *pdev = &vdev->pdev; > > + uint32_t interrupt_type; > > + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > + uint16_t pci_cmd; > > + bool msi_64bit; > > + int i; > > + > > + /* retore pci bar configuration */ > > + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > + vfio_pci_write_config(pdev, PCI_COMMAND, > > + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); > > + for (i = 0; i < PCI_ROM_SLOT; i++) { > > + uint32_t bar = qemu_get_be32(f); > > + > > + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > > + } > > + vfio_pci_write_config(pdev, PCI_COMMAND, > > + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); > > + > > + interrupt_type = qemu_get_be32(f); > > + > > + if (interrupt_type == VFIO_INT_MSI) { > > + /* restore msi configuration */ > > + msi_flags = pci_default_read_config(pdev, > > + pdev->msi_cap + PCI_MSI_FLAGS, 2); > > + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > > + > > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > > + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); > > + > > + msi_addr_lo = qemu_get_be32(f); > > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, > > + msi_addr_lo, 4); > > + > > + msi_addr_hi = qemu_get_be32(f); > > + if (msi_64bit) { > > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, > > + msi_addr_hi, 4); > > + } > > + msi_data = qemu_get_be32(f); > > + vfio_pci_write_config(pdev, > > + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > > + msi_data, 2); > > + > > + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > > + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); > > + } else if (interrupt_type == VFIO_INT_MSIX) { > > + uint16_t offset = qemu_get_be16(f); > > + > > + /* load enable bit and maskall bit */ > > + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, > > + offset, 2); > > + msix_load(pdev, f); > > + } > > + pci_cmd = qemu_get_be16(f); > > + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); > > +} > > Pardon my migration ignorance, but there are bound to be more fields > and capabilities that get migrated over time. How does this get > extended to support that and maintain backwards compatibility? Thanks, You normally tie those fields to a property on your device and set the property in the machine type so that newer machine types send the extra fields. Dave > Alex > > > + > > static VFIODeviceOps vfio_pci_ops = { > > .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, > > .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, > > .vfio_eoi = vfio_intx_eoi, > > .vfio_get_object = vfio_pci_get_object, > > + .vfio_save_config = vfio_pci_save_config, > > + .vfio_load_config = vfio_pci_load_config, > > }; > > > > int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) > > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > > index 771b6d59a3db..ee72bd984a36 100644 > > --- a/include/hw/vfio/vfio-common.h > > +++ b/include/hw/vfio/vfio-common.h > > @@ -120,6 +120,8 @@ struct VFIODeviceOps { > > int (*vfio_hot_reset_multi)(VFIODevice *vdev); > > void (*vfio_eoi)(VFIODevice *vdev); > > Object *(*vfio_get_object)(VFIODevice *vdev); > > + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); > > + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > > }; > > > > typedef struct VFIOGroup { > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Sorry for delay to respond. On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > * Kirti Wankhede (kwankhede@nvidia.com) wrote: >> These functions save and restore PCI device specific data - config >> space of PCI device. >> Tested save and restore with MSI and MSIX type. >> >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >> Reviewed-by: Neo Jia <cjia@nvidia.com> >> --- >> hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ >> include/hw/vfio/vfio-common.h | 2 + >> 2 files changed, 116 insertions(+) >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index de0d286fc9dd..5fe4f8076cac 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) >> return OBJECT(vdev); >> } >> >> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) >> +{ >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >> + PCIDevice *pdev = &vdev->pdev; >> + uint16_t pci_cmd; >> + int i; >> + >> + for (i = 0; i < PCI_ROM_SLOT; i++) { >> + uint32_t bar; >> + >> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); >> + qemu_put_be32(f, bar); >> + } >> + >> + qemu_put_be32(f, vdev->interrupt); >> + if (vdev->interrupt == VFIO_INT_MSI) { >> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; >> + bool msi_64bit; >> + >> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >> + 2); >> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); >> + >> + msi_addr_lo = pci_default_read_config(pdev, >> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); >> + qemu_put_be32(f, msi_addr_lo); >> + >> + if (msi_64bit) { >> + msi_addr_hi = pci_default_read_config(pdev, >> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, >> + 4); >> + } >> + qemu_put_be32(f, msi_addr_hi); >> + >> + msi_data = pci_default_read_config(pdev, >> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), >> + 2); >> + qemu_put_be32(f, msi_data); >> + } else if (vdev->interrupt == VFIO_INT_MSIX) { >> + uint16_t offset; >> + >> + /* save enable bit and maskall bit */ >> + offset = pci_default_read_config(pdev, >> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); >> + qemu_put_be16(f, offset); >> + msix_save(pdev, f); >> + } >> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); >> + qemu_put_be16(f, pci_cmd); >> +} >> + >> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) >> +{ >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >> + PCIDevice *pdev = &vdev->pdev; >> + uint32_t interrupt_type; >> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; >> + uint16_t pci_cmd; >> + bool msi_64bit; >> + int i; >> + >> + /* retore pci bar configuration */ >> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); >> + vfio_pci_write_config(pdev, PCI_COMMAND, >> + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); >> + for (i = 0; i < PCI_ROM_SLOT; i++) { >> + uint32_t bar = qemu_get_be32(f); >> + >> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); >> + } > > Is it possible to validate the bar's at all? We just had a bug on a > virtual device where one version was asking for a larger bar than the > other; our validation caught this in some cases so we could tell that > the guest had a BAR that was aligned at the wrong alignment. > "Validate the bars" does that means validate size of bars? >> + vfio_pci_write_config(pdev, PCI_COMMAND, >> + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); > > Can you explain what this is for? You write the command register at the > end of the function with the original value; there's no guarantee that > the device is using IO for example, so ORing it seems odd. > IO space and memory space accesses are disabled before writing BAR addresses, only those are enabled here. > Also, are the other flags in COMMAND safe at this point - e.g. what > about interrupts and stuff? > COMMAND registers is saved from stop-and-copy phase, interrupt should be disabled, then restoring here when vCPU are not yet running. >> + interrupt_type = qemu_get_be32(f); >> + >> + if (interrupt_type == VFIO_INT_MSI) { >> + /* restore msi configuration */ >> + msi_flags = pci_default_read_config(pdev, >> + pdev->msi_cap + PCI_MSI_FLAGS, 2); >> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); >> + >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >> + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); >> + >> + msi_addr_lo = qemu_get_be32(f); >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, >> + msi_addr_lo, 4); >> + >> + msi_addr_hi = qemu_get_be32(f); >> + if (msi_64bit) { >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, >> + msi_addr_hi, 4); >> + } >> + msi_data = qemu_get_be32(f); >> + vfio_pci_write_config(pdev, >> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), >> + msi_data, 2); >> + >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >> + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); >> + } else if (interrupt_type == VFIO_INT_MSIX) { >> + uint16_t offset = qemu_get_be16(f); >> + >> + /* load enable bit and maskall bit */ >> + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, >> + offset, 2); >> + msix_load(pdev, f); >> + } >> + pci_cmd = qemu_get_be16(f); >> + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); >> +} >> + >> static VFIODeviceOps vfio_pci_ops = { >> .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, >> .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, >> .vfio_eoi = vfio_intx_eoi, >> .vfio_get_object = vfio_pci_get_object, >> + .vfio_save_config = vfio_pci_save_config, >> + .vfio_load_config = vfio_pci_load_config, >> }; >> >> int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >> index 771b6d59a3db..ee72bd984a36 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -120,6 +120,8 @@ struct VFIODeviceOps { >> int (*vfio_hot_reset_multi)(VFIODevice *vdev); >> void (*vfio_eoi)(VFIODevice *vdev); >> Object *(*vfio_get_object)(VFIODevice *vdev); >> + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); >> + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); >> }; >> >> typedef struct VFIOGroup { >> -- >> 2.7.0 >> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > Sorry for delay to respond. > > On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > >> These functions save and restore PCI device specific data - config > >> space of PCI device. > >> Tested save and restore with MSI and MSIX type. > >> > >> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >> Reviewed-by: Neo Jia <cjia@nvidia.com> > >> --- > >> hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ > >> include/hw/vfio/vfio-common.h | 2 + > >> 2 files changed, 116 insertions(+) > >> > >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > >> index de0d286fc9dd..5fe4f8076cac 100644 > >> --- a/hw/vfio/pci.c > >> +++ b/hw/vfio/pci.c > >> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) > >> return OBJECT(vdev); > >> } > >> > >> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > >> +{ > >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > >> + PCIDevice *pdev = &vdev->pdev; > >> + uint16_t pci_cmd; > >> + int i; > >> + > >> + for (i = 0; i < PCI_ROM_SLOT; i++) { > >> + uint32_t bar; > >> + > >> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); > >> + qemu_put_be32(f, bar); > >> + } > >> + > >> + qemu_put_be32(f, vdev->interrupt); > >> + if (vdev->interrupt == VFIO_INT_MSI) { > >> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > >> + bool msi_64bit; > >> + > >> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >> + 2); > >> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > >> + > >> + msi_addr_lo = pci_default_read_config(pdev, > >> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > >> + qemu_put_be32(f, msi_addr_lo); > >> + > >> + if (msi_64bit) { > >> + msi_addr_hi = pci_default_read_config(pdev, > >> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > >> + 4); > >> + } > >> + qemu_put_be32(f, msi_addr_hi); > >> + > >> + msi_data = pci_default_read_config(pdev, > >> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > >> + 2); > >> + qemu_put_be32(f, msi_data); > >> + } else if (vdev->interrupt == VFIO_INT_MSIX) { > >> + uint16_t offset; > >> + > >> + /* save enable bit and maskall bit */ > >> + offset = pci_default_read_config(pdev, > >> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > >> + qemu_put_be16(f, offset); > >> + msix_save(pdev, f); > >> + } > >> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > >> + qemu_put_be16(f, pci_cmd); > >> +} > >> + > >> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > >> +{ > >> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > >> + PCIDevice *pdev = &vdev->pdev; > >> + uint32_t interrupt_type; > >> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > >> + uint16_t pci_cmd; > >> + bool msi_64bit; > >> + int i; > >> + > >> + /* retore pci bar configuration */ > >> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > >> + vfio_pci_write_config(pdev, PCI_COMMAND, > >> + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); > >> + for (i = 0; i < PCI_ROM_SLOT; i++) { > >> + uint32_t bar = qemu_get_be32(f); > >> + > >> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > >> + } > > > > Is it possible to validate the bar's at all? We just had a bug on a > > virtual device where one version was asking for a larger bar than the > > other; our validation caught this in some cases so we could tell that > > the guest had a BAR that was aligned at the wrong alignment. > > > > "Validate the bars" does that means validate size of bars? I meant validate the address programmed into the BAR against the size, assuming you know the size; e.g. if it's a 128MB BAR, then make sure the address programmed in is 128MB aligned. > >> + vfio_pci_write_config(pdev, PCI_COMMAND, > >> + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); > > > > Can you explain what this is for? You write the command register at the > > end of the function with the original value; there's no guarantee that > > the device is using IO for example, so ORing it seems odd. > > > > IO space and memory space accesses are disabled before writing BAR > addresses, only those are enabled here. But do you need to enable them here, or can it wait until the pci_cmd write at the end of the function? > > Also, are the other flags in COMMAND safe at this point - e.g. what > > about interrupts and stuff? > > > > COMMAND registers is saved from stop-and-copy phase, interrupt should be > disabled, then restoring here when vCPU are not yet running. Dave > >> + interrupt_type = qemu_get_be32(f); > >> + > >> + if (interrupt_type == VFIO_INT_MSI) { > >> + /* restore msi configuration */ > >> + msi_flags = pci_default_read_config(pdev, > >> + pdev->msi_cap + PCI_MSI_FLAGS, 2); > >> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > >> + > >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >> + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); > >> + > >> + msi_addr_lo = qemu_get_be32(f); > >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, > >> + msi_addr_lo, 4); > >> + > >> + msi_addr_hi = qemu_get_be32(f); > >> + if (msi_64bit) { > >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, > >> + msi_addr_hi, 4); > >> + } > >> + msi_data = qemu_get_be32(f); > >> + vfio_pci_write_config(pdev, > >> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > >> + msi_data, 2); > >> + > >> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >> + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); > >> + } else if (interrupt_type == VFIO_INT_MSIX) { > >> + uint16_t offset = qemu_get_be16(f); > >> + > >> + /* load enable bit and maskall bit */ > >> + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, > >> + offset, 2); > >> + msix_load(pdev, f); > >> + } > >> + pci_cmd = qemu_get_be16(f); > >> + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); > >> +} > >> + > >> static VFIODeviceOps vfio_pci_ops = { > >> .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, > >> .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, > >> .vfio_eoi = vfio_intx_eoi, > >> .vfio_get_object = vfio_pci_get_object, > >> + .vfio_save_config = vfio_pci_save_config, > >> + .vfio_load_config = vfio_pci_load_config, > >> }; > >> > >> int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) > >> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >> index 771b6d59a3db..ee72bd984a36 100644 > >> --- a/include/hw/vfio/vfio-common.h > >> +++ b/include/hw/vfio/vfio-common.h > >> @@ -120,6 +120,8 @@ struct VFIODeviceOps { > >> int (*vfio_hot_reset_multi)(VFIODevice *vdev); > >> void (*vfio_eoi)(VFIODevice *vdev); > >> Object *(*vfio_get_object)(VFIODevice *vdev); > >> + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); > >> + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > >> }; > >> > >> typedef struct VFIOGroup { > >> -- > >> 2.7.0 > >> > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote: > * Kirti Wankhede (kwankhede@nvidia.com) wrote: >> Sorry for delay to respond. >> >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote: >>>> These functions save and restore PCI device specific data - config >>>> space of PCI device. >>>> Tested save and restore with MSI and MSIX type. >>>> >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> >>>> Reviewed-by: Neo Jia <cjia@nvidia.com> >>>> --- >>>> hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ >>>> include/hw/vfio/vfio-common.h | 2 + >>>> 2 files changed, 116 insertions(+) >>>> >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>>> index de0d286fc9dd..5fe4f8076cac 100644 >>>> --- a/hw/vfio/pci.c >>>> +++ b/hw/vfio/pci.c >>>> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) >>>> return OBJECT(vdev); >>>> } >>>> >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) >>>> +{ >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >>>> + PCIDevice *pdev = &vdev->pdev; >>>> + uint16_t pci_cmd; >>>> + int i; >>>> + >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { >>>> + uint32_t bar; >>>> + >>>> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); >>>> + qemu_put_be32(f, bar); >>>> + } >>>> + >>>> + qemu_put_be32(f, vdev->interrupt); >>>> + if (vdev->interrupt == VFIO_INT_MSI) { >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; >>>> + bool msi_64bit; >>>> + >>>> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >>>> + 2); >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); >>>> + >>>> + msi_addr_lo = pci_default_read_config(pdev, >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); >>>> + qemu_put_be32(f, msi_addr_lo); >>>> + >>>> + if (msi_64bit) { >>>> + msi_addr_hi = pci_default_read_config(pdev, >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, >>>> + 4); >>>> + } >>>> + qemu_put_be32(f, msi_addr_hi); >>>> + >>>> + msi_data = pci_default_read_config(pdev, >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), >>>> + 2); >>>> + qemu_put_be32(f, msi_data); >>>> + } else if (vdev->interrupt == VFIO_INT_MSIX) { >>>> + uint16_t offset; >>>> + >>>> + /* save enable bit and maskall bit */ >>>> + offset = pci_default_read_config(pdev, >>>> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); >>>> + qemu_put_be16(f, offset); >>>> + msix_save(pdev, f); >>>> + } >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); >>>> + qemu_put_be16(f, pci_cmd); >>>> +} >>>> + >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) >>>> +{ >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); >>>> + PCIDevice *pdev = &vdev->pdev; >>>> + uint32_t interrupt_type; >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; >>>> + uint16_t pci_cmd; >>>> + bool msi_64bit; >>>> + int i; >>>> + >>>> + /* retore pci bar configuration */ >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, >>>> + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { >>>> + uint32_t bar = qemu_get_be32(f); >>>> + >>>> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); >>>> + } >>> >>> Is it possible to validate the bar's at all? We just had a bug on a >>> virtual device where one version was asking for a larger bar than the >>> other; our validation caught this in some cases so we could tell that >>> the guest had a BAR that was aligned at the wrong alignment. >>> >> >> "Validate the bars" does that means validate size of bars? > > I meant validate the address programmed into the BAR against the size, > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the > address programmed in is 128MB aligned. > If this validation fails, migration resume should fail, right? >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, >>>> + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); >>> >>> Can you explain what this is for? You write the command register at the >>> end of the function with the original value; there's no guarantee that >>> the device is using IO for example, so ORing it seems odd. >>> >> >> IO space and memory space accesses are disabled before writing BAR >> addresses, only those are enabled here. > > But do you need to enable them here, or can it wait until the pci_cmd > write at the end of the function? > Ok, it can wait. Thanks, Kirti >>> Also, are the other flags in COMMAND safe at this point - e.g. what >>> about interrupts and stuff? >>> >> >> COMMAND registers is saved from stop-and-copy phase, interrupt should be >> disabled, then restoring here when vCPU are not yet running. > > Dave > >>>> + interrupt_type = qemu_get_be32(f); >>>> + >>>> + if (interrupt_type == VFIO_INT_MSI) { >>>> + /* restore msi configuration */ >>>> + msi_flags = pci_default_read_config(pdev, >>>> + pdev->msi_cap + PCI_MSI_FLAGS, 2); >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); >>>> + >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >>>> + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); >>>> + >>>> + msi_addr_lo = qemu_get_be32(f); >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, >>>> + msi_addr_lo, 4); >>>> + >>>> + msi_addr_hi = qemu_get_be32(f); >>>> + if (msi_64bit) { >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, >>>> + msi_addr_hi, 4); >>>> + } >>>> + msi_data = qemu_get_be32(f); >>>> + vfio_pci_write_config(pdev, >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), >>>> + msi_data, 2); >>>> + >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, >>>> + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); >>>> + } else if (interrupt_type == VFIO_INT_MSIX) { >>>> + uint16_t offset = qemu_get_be16(f); >>>> + >>>> + /* load enable bit and maskall bit */ >>>> + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, >>>> + offset, 2); >>>> + msix_load(pdev, f); >>>> + } >>>> + pci_cmd = qemu_get_be16(f); >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); >>>> +} >>>> + >>>> static VFIODeviceOps vfio_pci_ops = { >>>> .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, >>>> .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, >>>> .vfio_eoi = vfio_intx_eoi, >>>> .vfio_get_object = vfio_pci_get_object, >>>> + .vfio_save_config = vfio_pci_save_config, >>>> + .vfio_load_config = vfio_pci_load_config, >>>> }; >>>> >>>> int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h >>>> index 771b6d59a3db..ee72bd984a36 100644 >>>> --- a/include/hw/vfio/vfio-common.h >>>> +++ b/include/hw/vfio/vfio-common.h >>>> @@ -120,6 +120,8 @@ struct VFIODeviceOps { >>>> int (*vfio_hot_reset_multi)(VFIODevice *vdev); >>>> void (*vfio_eoi)(VFIODevice *vdev); >>>> Object *(*vfio_get_object)(VFIODevice *vdev); >>>> + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); >>>> + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); >>>> }; >>>> >>>> typedef struct VFIOGroup { >>>> -- >>>> 2.7.0 >>>> >>> -- >>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >>> > -- > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK >
* Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote: > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > >> Sorry for delay to respond. > >> > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote: > >>>> These functions save and restore PCI device specific data - config > >>>> space of PCI device. > >>>> Tested save and restore with MSI and MSIX type. > >>>> > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com> > >>>> --- > >>>> hw/vfio/pci.c | 114 ++++++++++++++++++++++++++++++++++++++++++ > >>>> include/hw/vfio/vfio-common.h | 2 + > >>>> 2 files changed, 116 insertions(+) > >>>> > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > >>>> index de0d286fc9dd..5fe4f8076cac 100644 > >>>> --- a/hw/vfio/pci.c > >>>> +++ b/hw/vfio/pci.c > >>>> @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) > >>>> return OBJECT(vdev); > >>>> } > >>>> > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > >>>> +{ > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > >>>> + PCIDevice *pdev = &vdev->pdev; > >>>> + uint16_t pci_cmd; > >>>> + int i; > >>>> + > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > >>>> + uint32_t bar; > >>>> + > >>>> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); > >>>> + qemu_put_be32(f, bar); > >>>> + } > >>>> + > >>>> + qemu_put_be32(f, vdev->interrupt); > >>>> + if (vdev->interrupt == VFIO_INT_MSI) { > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > >>>> + bool msi_64bit; > >>>> + > >>>> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >>>> + 2); > >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > >>>> + > >>>> + msi_addr_lo = pci_default_read_config(pdev, > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > >>>> + qemu_put_be32(f, msi_addr_lo); > >>>> + > >>>> + if (msi_64bit) { > >>>> + msi_addr_hi = pci_default_read_config(pdev, > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > >>>> + 4); > >>>> + } > >>>> + qemu_put_be32(f, msi_addr_hi); > >>>> + > >>>> + msi_data = pci_default_read_config(pdev, > >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > >>>> + 2); > >>>> + qemu_put_be32(f, msi_data); > >>>> + } else if (vdev->interrupt == VFIO_INT_MSIX) { > >>>> + uint16_t offset; > >>>> + > >>>> + /* save enable bit and maskall bit */ > >>>> + offset = pci_default_read_config(pdev, > >>>> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > >>>> + qemu_put_be16(f, offset); > >>>> + msix_save(pdev, f); > >>>> + } > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > >>>> + qemu_put_be16(f, pci_cmd); > >>>> +} > >>>> + > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > >>>> +{ > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); > >>>> + PCIDevice *pdev = &vdev->pdev; > >>>> + uint32_t interrupt_type; > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > >>>> + uint16_t pci_cmd; > >>>> + bool msi_64bit; > >>>> + int i; > >>>> + > >>>> + /* retore pci bar configuration */ > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, > >>>> + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > >>>> + uint32_t bar = qemu_get_be32(f); > >>>> + > >>>> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > >>>> + } > >>> > >>> Is it possible to validate the bar's at all? We just had a bug on a > >>> virtual device where one version was asking for a larger bar than the > >>> other; our validation caught this in some cases so we could tell that > >>> the guest had a BAR that was aligned at the wrong alignment. > >>> > >> > >> "Validate the bars" does that means validate size of bars? > > > > I meant validate the address programmed into the BAR against the size, > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the > > address programmed in is 128MB aligned. > > > > If this validation fails, migration resume should fail, right? Yes I think so; if you've got a device that wants 128MB alignment and someone gives you a non-aligned address, who knows what will happen. > > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, > >>>> + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); > >>> > >>> Can you explain what this is for? You write the command register at the > >>> end of the function with the original value; there's no guarantee that > >>> the device is using IO for example, so ORing it seems odd. > >>> > >> > >> IO space and memory space accesses are disabled before writing BAR > >> addresses, only those are enabled here. > > > > But do you need to enable them here, or can it wait until the pci_cmd > > write at the end of the function? > > > > Ok, it can wait. Great. Dave > Thanks, > Kirti > > > >>> Also, are the other flags in COMMAND safe at this point - e.g. what > >>> about interrupts and stuff? > >>> > >> > >> COMMAND registers is saved from stop-and-copy phase, interrupt should be > >> disabled, then restoring here when vCPU are not yet running. > > > > Dave > > > >>>> + interrupt_type = qemu_get_be32(f); > >>>> + > >>>> + if (interrupt_type == VFIO_INT_MSI) { > >>>> + /* restore msi configuration */ > >>>> + msi_flags = pci_default_read_config(pdev, > >>>> + pdev->msi_cap + PCI_MSI_FLAGS, 2); > >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > >>>> + > >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >>>> + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); > >>>> + > >>>> + msi_addr_lo = qemu_get_be32(f); > >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, > >>>> + msi_addr_lo, 4); > >>>> + > >>>> + msi_addr_hi = qemu_get_be32(f); > >>>> + if (msi_64bit) { > >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, > >>>> + msi_addr_hi, 4); > >>>> + } > >>>> + msi_data = qemu_get_be32(f); > >>>> + vfio_pci_write_config(pdev, > >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), > >>>> + msi_data, 2); > >>>> + > >>>> + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, > >>>> + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); > >>>> + } else if (interrupt_type == VFIO_INT_MSIX) { > >>>> + uint16_t offset = qemu_get_be16(f); > >>>> + > >>>> + /* load enable bit and maskall bit */ > >>>> + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, > >>>> + offset, 2); > >>>> + msix_load(pdev, f); > >>>> + } > >>>> + pci_cmd = qemu_get_be16(f); > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); > >>>> +} > >>>> + > >>>> static VFIODeviceOps vfio_pci_ops = { > >>>> .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, > >>>> .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, > >>>> .vfio_eoi = vfio_intx_eoi, > >>>> .vfio_get_object = vfio_pci_get_object, > >>>> + .vfio_save_config = vfio_pci_save_config, > >>>> + .vfio_load_config = vfio_pci_load_config, > >>>> }; > >>>> > >>>> int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) > >>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > >>>> index 771b6d59a3db..ee72bd984a36 100644 > >>>> --- a/include/hw/vfio/vfio-common.h > >>>> +++ b/include/hw/vfio/vfio-common.h > >>>> @@ -120,6 +120,8 @@ struct VFIODeviceOps { > >>>> int (*vfio_hot_reset_multi)(VFIODevice *vdev); > >>>> void (*vfio_eoi)(VFIODevice *vdev); > >>>> Object *(*vfio_get_object)(VFIODevice *vdev); > >>>> + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); > >>>> + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); > >>>> }; > >>>> > >>>> typedef struct VFIOGroup { > >>>> -- > >>>> 2.7.0 > >>>> > >>> -- > >>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > >>> > > -- > > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com] > Sent: Friday, August 23, 2019 3:13 AM > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > > > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote: > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > >> Sorry for delay to respond. > > >> > > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > >>>> These functions save and restore PCI device specific data - config > > >>>> space of PCI device. > > >>>> Tested save and restore with MSI and MSIX type. > > >>>> > > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com> > > >>>> --- > > >>>> hw/vfio/pci.c | 114 > ++++++++++++++++++++++++++++++++++++++++++ > > >>>> include/hw/vfio/vfio-common.h | 2 + > > >>>> 2 files changed, 116 insertions(+) > > >>>> > > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > >>>> index de0d286fc9dd..5fe4f8076cac 100644 > > >>>> --- a/hw/vfio/pci.c > > >>>> +++ b/hw/vfio/pci.c > > >>>> @@ -2395,11 +2395,125 @@ static Object > *vfio_pci_get_object(VFIODevice *vbasedev) > > >>>> return OBJECT(vdev); > > >>>> } > > >>>> > > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > > >>>> +{ > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > vbasedev); > > >>>> + PCIDevice *pdev = &vdev->pdev; > > >>>> + uint16_t pci_cmd; > > >>>> + int i; > > >>>> + > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > >>>> + uint32_t bar; > > >>>> + > > >>>> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * > 4, 4); > > >>>> + qemu_put_be32(f, bar); > > >>>> + } > > >>>> + > > >>>> + qemu_put_be32(f, vdev->interrupt); > > >>>> + if (vdev->interrupt == VFIO_INT_MSI) { > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > >>>> + bool msi_64bit; > > >>>> + > > >>>> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + > PCI_MSI_FLAGS, > > >>>> + 2); > > >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > > >>>> + > > >>>> + msi_addr_lo = pci_default_read_config(pdev, > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > > >>>> + qemu_put_be32(f, msi_addr_lo); > > >>>> + > > >>>> + if (msi_64bit) { > > >>>> + msi_addr_hi = pci_default_read_config(pdev, > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > > >>>> + 4); > > >>>> + } > > >>>> + qemu_put_be32(f, msi_addr_hi); > > >>>> + > > >>>> + msi_data = pci_default_read_config(pdev, > > >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : > PCI_MSI_DATA_32), > > >>>> + 2); > > >>>> + qemu_put_be32(f, msi_data); > > >>>> + } else if (vdev->interrupt == VFIO_INT_MSIX) { > > >>>> + uint16_t offset; > > >>>> + > > >>>> + /* save enable bit and maskall bit */ > > >>>> + offset = pci_default_read_config(pdev, > > >>>> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > > >>>> + qemu_put_be16(f, offset); > > >>>> + msix_save(pdev, f); > > >>>> + } > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > >>>> + qemu_put_be16(f, pci_cmd); > > >>>> +} > > >>>> + > > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > > >>>> +{ > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > vbasedev); > > >>>> + PCIDevice *pdev = &vdev->pdev; > > >>>> + uint32_t interrupt_type; > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > >>>> + uint16_t pci_cmd; > > >>>> + bool msi_64bit; > > >>>> + int i; > > >>>> + > > >>>> + /* retore pci bar configuration */ > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, > > >>>> + pci_cmd & (!(PCI_COMMAND_IO | > PCI_COMMAND_MEMORY)), 2); > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > >>>> + uint32_t bar = qemu_get_be32(f); > > >>>> + > > >>>> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > > >>>> + } > > >>> > > >>> Is it possible to validate the bar's at all? We just had a bug on a > > >>> virtual device where one version was asking for a larger bar than the > > >>> other; our validation caught this in some cases so we could tell that > > >>> the guest had a BAR that was aligned at the wrong alignment. I'm a bit confused here. Did you mean that src and dest include different versions of the virtual device which implements different BAR size? If that is the case, shouldn't the migration fail at the start when doing compatibility check? > > >>> > > >> > > >> "Validate the bars" does that means validate size of bars? > > > > > > I meant validate the address programmed into the BAR against the size, > > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the > > > address programmed in is 128MB aligned. > > > > > > > If this validation fails, migration resume should fail, right? > > Yes I think so; if you've got a device that wants 128MB alignment and > someone gives you a non-aligned address, who knows what will happen. If misalignment is really caused by the guest, shouldn't we just follow the hardware behavior, i.e. hard-wiring the lower bits to 0 before updating the cfg space? Thanks Kevin
* Tian, Kevin (kevin.tian@intel.com) wrote: > > From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com] > > Sent: Friday, August 23, 2019 3:13 AM > > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > > > > > > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote: > > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > >> Sorry for delay to respond. > > > >> > > > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > > > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > >>>> These functions save and restore PCI device specific data - config > > > >>>> space of PCI device. > > > >>>> Tested save and restore with MSI and MSIX type. > > > >>>> > > > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com> > > > >>>> --- > > > >>>> hw/vfio/pci.c | 114 > > ++++++++++++++++++++++++++++++++++++++++++ > > > >>>> include/hw/vfio/vfio-common.h | 2 + > > > >>>> 2 files changed, 116 insertions(+) > > > >>>> > > > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > >>>> index de0d286fc9dd..5fe4f8076cac 100644 > > > >>>> --- a/hw/vfio/pci.c > > > >>>> +++ b/hw/vfio/pci.c > > > >>>> @@ -2395,11 +2395,125 @@ static Object > > *vfio_pci_get_object(VFIODevice *vbasedev) > > > >>>> return OBJECT(vdev); > > > >>>> } > > > >>>> > > > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) > > > >>>> +{ > > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > > vbasedev); > > > >>>> + PCIDevice *pdev = &vdev->pdev; > > > >>>> + uint16_t pci_cmd; > > > >>>> + int i; > > > >>>> + > > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > > >>>> + uint32_t bar; > > > >>>> + > > > >>>> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * > > 4, 4); > > > >>>> + qemu_put_be32(f, bar); > > > >>>> + } > > > >>>> + > > > >>>> + qemu_put_be32(f, vdev->interrupt); > > > >>>> + if (vdev->interrupt == VFIO_INT_MSI) { > > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > > >>>> + bool msi_64bit; > > > >>>> + > > > >>>> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + > > PCI_MSI_FLAGS, > > > >>>> + 2); > > > >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > > > >>>> + > > > >>>> + msi_addr_lo = pci_default_read_config(pdev, > > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > > > >>>> + qemu_put_be32(f, msi_addr_lo); > > > >>>> + > > > >>>> + if (msi_64bit) { > > > >>>> + msi_addr_hi = pci_default_read_config(pdev, > > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > > > >>>> + 4); > > > >>>> + } > > > >>>> + qemu_put_be32(f, msi_addr_hi); > > > >>>> + > > > >>>> + msi_data = pci_default_read_config(pdev, > > > >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : > > PCI_MSI_DATA_32), > > > >>>> + 2); > > > >>>> + qemu_put_be32(f, msi_data); > > > >>>> + } else if (vdev->interrupt == VFIO_INT_MSIX) { > > > >>>> + uint16_t offset; > > > >>>> + > > > >>>> + /* save enable bit and maskall bit */ > > > >>>> + offset = pci_default_read_config(pdev, > > > >>>> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > > > >>>> + qemu_put_be16(f, offset); > > > >>>> + msix_save(pdev, f); > > > >>>> + } > > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > > >>>> + qemu_put_be16(f, pci_cmd); > > > >>>> +} > > > >>>> + > > > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) > > > >>>> +{ > > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > > vbasedev); > > > >>>> + PCIDevice *pdev = &vdev->pdev; > > > >>>> + uint32_t interrupt_type; > > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > > >>>> + uint16_t pci_cmd; > > > >>>> + bool msi_64bit; > > > >>>> + int i; > > > >>>> + > > > >>>> + /* retore pci bar configuration */ > > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, > > > >>>> + pci_cmd & (!(PCI_COMMAND_IO | > > PCI_COMMAND_MEMORY)), 2); > > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > > >>>> + uint32_t bar = qemu_get_be32(f); > > > >>>> + > > > >>>> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); > > > >>>> + } > > > >>> > > > >>> Is it possible to validate the bar's at all? We just had a bug on a > > > >>> virtual device where one version was asking for a larger bar than the > > > >>> other; our validation caught this in some cases so we could tell that > > > >>> the guest had a BAR that was aligned at the wrong alignment. > > I'm a bit confused here. Did you mean that src and dest include > different versions of the virtual device which implements different > BAR size? If that is the case, shouldn't the migration fail at the start > when doing compatibility check? It was a mistake where the destination had accidentally changed the BAR size; checking the alignment was the only check that failed. > > > >>> > > > >> > > > >> "Validate the bars" does that means validate size of bars? > > > > > > > > I meant validate the address programmed into the BAR against the size, > > > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure the > > > > address programmed in is 128MB aligned. > > > > > > > > > > If this validation fails, migration resume should fail, right? > > > > Yes I think so; if you've got a device that wants 128MB alignment and > > someone gives you a non-aligned address, who knows what will happen. > > If misalignment is really caused by the guest, shouldn't we just follow > the hardware behavior, i.e. hard-wiring the lower bits to 0 before > updating the cfg space? That should already happen on the source; but when loading a migration stream I try and be very untrusting; so it's good to check that the destination devices idea of the BAR matches what the register has. Dave > Thanks > Kevin -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com] > Sent: Friday, August 23, 2019 5:27 PM > > * Tian, Kevin (kevin.tian@intel.com) wrote: > > > From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com] > > > Sent: Friday, August 23, 2019 3:13 AM > > > > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > > > > > > > > > > On 8/22/2019 3:02 PM, Dr. David Alan Gilbert wrote: > > > > > * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > > >> Sorry for delay to respond. > > > > >> > > > > >> On 7/11/2019 5:37 PM, Dr. David Alan Gilbert wrote: > > > > >>> * Kirti Wankhede (kwankhede@nvidia.com) wrote: > > > > >>>> These functions save and restore PCI device specific data - config > > > > >>>> space of PCI device. > > > > >>>> Tested save and restore with MSI and MSIX type. > > > > >>>> > > > > >>>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > > > > >>>> Reviewed-by: Neo Jia <cjia@nvidia.com> > > > > >>>> --- > > > > >>>> hw/vfio/pci.c | 114 > > > ++++++++++++++++++++++++++++++++++++++++++ > > > > >>>> include/hw/vfio/vfio-common.h | 2 + > > > > >>>> 2 files changed, 116 insertions(+) > > > > >>>> > > > > >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > > > >>>> index de0d286fc9dd..5fe4f8076cac 100644 > > > > >>>> --- a/hw/vfio/pci.c > > > > >>>> +++ b/hw/vfio/pci.c > > > > >>>> @@ -2395,11 +2395,125 @@ static Object > > > *vfio_pci_get_object(VFIODevice *vbasedev) > > > > >>>> return OBJECT(vdev); > > > > >>>> } > > > > >>>> > > > > >>>> +static void vfio_pci_save_config(VFIODevice *vbasedev, > QEMUFile *f) > > > > >>>> +{ > > > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > > > vbasedev); > > > > >>>> + PCIDevice *pdev = &vdev->pdev; > > > > >>>> + uint16_t pci_cmd; > > > > >>>> + int i; > > > > >>>> + > > > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > > > >>>> + uint32_t bar; > > > > >>>> + > > > > >>>> + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + > i * > > > 4, 4); > > > > >>>> + qemu_put_be32(f, bar); > > > > >>>> + } > > > > >>>> + > > > > >>>> + qemu_put_be32(f, vdev->interrupt); > > > > >>>> + if (vdev->interrupt == VFIO_INT_MSI) { > > > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > > > >>>> + bool msi_64bit; > > > > >>>> + > > > > >>>> + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + > > > PCI_MSI_FLAGS, > > > > >>>> + 2); > > > > >>>> + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); > > > > >>>> + > > > > >>>> + msi_addr_lo = pci_default_read_config(pdev, > > > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); > > > > >>>> + qemu_put_be32(f, msi_addr_lo); > > > > >>>> + > > > > >>>> + if (msi_64bit) { > > > > >>>> + msi_addr_hi = pci_default_read_config(pdev, > > > > >>>> + pdev->msi_cap + PCI_MSI_ADDRESS_HI, > > > > >>>> + 4); > > > > >>>> + } > > > > >>>> + qemu_put_be32(f, msi_addr_hi); > > > > >>>> + > > > > >>>> + msi_data = pci_default_read_config(pdev, > > > > >>>> + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : > > > PCI_MSI_DATA_32), > > > > >>>> + 2); > > > > >>>> + qemu_put_be32(f, msi_data); > > > > >>>> + } else if (vdev->interrupt == VFIO_INT_MSIX) { > > > > >>>> + uint16_t offset; > > > > >>>> + > > > > >>>> + /* save enable bit and maskall bit */ > > > > >>>> + offset = pci_default_read_config(pdev, > > > > >>>> + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); > > > > >>>> + qemu_put_be16(f, offset); > > > > >>>> + msix_save(pdev, f); > > > > >>>> + } > > > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > > > >>>> + qemu_put_be16(f, pci_cmd); > > > > >>>> +} > > > > >>>> + > > > > >>>> +static void vfio_pci_load_config(VFIODevice *vbasedev, > QEMUFile *f) > > > > >>>> +{ > > > > >>>> + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, > > > vbasedev); > > > > >>>> + PCIDevice *pdev = &vdev->pdev; > > > > >>>> + uint32_t interrupt_type; > > > > >>>> + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; > > > > >>>> + uint16_t pci_cmd; > > > > >>>> + bool msi_64bit; > > > > >>>> + int i; > > > > >>>> + > > > > >>>> + /* retore pci bar configuration */ > > > > >>>> + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); > > > > >>>> + vfio_pci_write_config(pdev, PCI_COMMAND, > > > > >>>> + pci_cmd & (!(PCI_COMMAND_IO | > > > PCI_COMMAND_MEMORY)), 2); > > > > >>>> + for (i = 0; i < PCI_ROM_SLOT; i++) { > > > > >>>> + uint32_t bar = qemu_get_be32(f); > > > > >>>> + > > > > >>>> + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, > 4); > > > > >>>> + } > > > > >>> > > > > >>> Is it possible to validate the bar's at all? We just had a bug on a > > > > >>> virtual device where one version was asking for a larger bar than the > > > > >>> other; our validation caught this in some cases so we could tell that > > > > >>> the guest had a BAR that was aligned at the wrong alignment. > > > > I'm a bit confused here. Did you mean that src and dest include > > different versions of the virtual device which implements different > > BAR size? If that is the case, shouldn't the migration fail at the start > > when doing compatibility check? > > It was a mistake where the destination had accidentally changed the BAR > size; checking the alignment was the only check that failed. > > > > > >>> > > > > >> > > > > >> "Validate the bars" does that means validate size of bars? > > > > > > > > > > I meant validate the address programmed into the BAR against the size, > > > > > assuming you know the size; e.g. if it's a 128MB BAR, then make sure > the > > > > > address programmed in is 128MB aligned. > > > > > > > > > > > > > If this validation fails, migration resume should fail, right? > > > > > > Yes I think so; if you've got a device that wants 128MB alignment and > > > someone gives you a non-aligned address, who knows what will happen. > > > > If misalignment is really caused by the guest, shouldn't we just follow > > the hardware behavior, i.e. hard-wiring the lower bits to 0 before > > updating the cfg space? > > That should already happen on the source; but when loading a migration > stream I try and be very untrusting; so it's good to check that the > destination devices idea of the BAR matches what the register has. > OK, it makes sense. Thanks Kevin
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index de0d286fc9dd..5fe4f8076cac 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2395,11 +2395,125 @@ static Object *vfio_pci_get_object(VFIODevice *vbasedev) return OBJECT(vdev); } +static void vfio_pci_save_config(VFIODevice *vbasedev, QEMUFile *f) +{ + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + PCIDevice *pdev = &vdev->pdev; + uint16_t pci_cmd; + int i; + + for (i = 0; i < PCI_ROM_SLOT; i++) { + uint32_t bar; + + bar = pci_default_read_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, 4); + qemu_put_be32(f, bar); + } + + qemu_put_be32(f, vdev->interrupt); + if (vdev->interrupt == VFIO_INT_MSI) { + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; + bool msi_64bit; + + msi_flags = pci_default_read_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, + 2); + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); + + msi_addr_lo = pci_default_read_config(pdev, + pdev->msi_cap + PCI_MSI_ADDRESS_LO, 4); + qemu_put_be32(f, msi_addr_lo); + + if (msi_64bit) { + msi_addr_hi = pci_default_read_config(pdev, + pdev->msi_cap + PCI_MSI_ADDRESS_HI, + 4); + } + qemu_put_be32(f, msi_addr_hi); + + msi_data = pci_default_read_config(pdev, + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), + 2); + qemu_put_be32(f, msi_data); + } else if (vdev->interrupt == VFIO_INT_MSIX) { + uint16_t offset; + + /* save enable bit and maskall bit */ + offset = pci_default_read_config(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS + 1, 2); + qemu_put_be16(f, offset); + msix_save(pdev, f); + } + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); + qemu_put_be16(f, pci_cmd); +} + +static void vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f) +{ + VFIOPCIDevice *vdev = container_of(vbasedev, VFIOPCIDevice, vbasedev); + PCIDevice *pdev = &vdev->pdev; + uint32_t interrupt_type; + uint32_t msi_flags, msi_addr_lo, msi_addr_hi = 0, msi_data; + uint16_t pci_cmd; + bool msi_64bit; + int i; + + /* retore pci bar configuration */ + pci_cmd = pci_default_read_config(pdev, PCI_COMMAND, 2); + vfio_pci_write_config(pdev, PCI_COMMAND, + pci_cmd & (!(PCI_COMMAND_IO | PCI_COMMAND_MEMORY)), 2); + for (i = 0; i < PCI_ROM_SLOT; i++) { + uint32_t bar = qemu_get_be32(f); + + vfio_pci_write_config(pdev, PCI_BASE_ADDRESS_0 + i * 4, bar, 4); + } + vfio_pci_write_config(pdev, PCI_COMMAND, + pci_cmd | PCI_COMMAND_IO | PCI_COMMAND_MEMORY, 2); + + interrupt_type = qemu_get_be32(f); + + if (interrupt_type == VFIO_INT_MSI) { + /* restore msi configuration */ + msi_flags = pci_default_read_config(pdev, + pdev->msi_cap + PCI_MSI_FLAGS, 2); + msi_64bit = (msi_flags & PCI_MSI_FLAGS_64BIT); + + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, + msi_flags & (!PCI_MSI_FLAGS_ENABLE), 2); + + msi_addr_lo = qemu_get_be32(f); + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_LO, + msi_addr_lo, 4); + + msi_addr_hi = qemu_get_be32(f); + if (msi_64bit) { + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_ADDRESS_HI, + msi_addr_hi, 4); + } + msi_data = qemu_get_be32(f); + vfio_pci_write_config(pdev, + pdev->msi_cap + (msi_64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32), + msi_data, 2); + + vfio_pci_write_config(pdev, pdev->msi_cap + PCI_MSI_FLAGS, + msi_flags | PCI_MSI_FLAGS_ENABLE, 2); + } else if (interrupt_type == VFIO_INT_MSIX) { + uint16_t offset = qemu_get_be16(f); + + /* load enable bit and maskall bit */ + vfio_pci_write_config(pdev, pdev->msix_cap + PCI_MSIX_FLAGS + 1, + offset, 2); + msix_load(pdev, f); + } + pci_cmd = qemu_get_be16(f); + vfio_pci_write_config(pdev, PCI_COMMAND, pci_cmd, 2); +} + static VFIODeviceOps vfio_pci_ops = { .vfio_compute_needs_reset = vfio_pci_compute_needs_reset, .vfio_hot_reset_multi = vfio_pci_hot_reset_multi, .vfio_eoi = vfio_intx_eoi, .vfio_get_object = vfio_pci_get_object, + .vfio_save_config = vfio_pci_save_config, + .vfio_load_config = vfio_pci_load_config, }; int vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp) diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h index 771b6d59a3db..ee72bd984a36 100644 --- a/include/hw/vfio/vfio-common.h +++ b/include/hw/vfio/vfio-common.h @@ -120,6 +120,8 @@ struct VFIODeviceOps { int (*vfio_hot_reset_multi)(VFIODevice *vdev); void (*vfio_eoi)(VFIODevice *vdev); Object *(*vfio_get_object)(VFIODevice *vdev); + void (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f); + void (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f); }; typedef struct VFIOGroup {