Message ID | 20221028122629.3269-2-akihiko.odaki@daynix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | pci: Abort if pci_add_capability fails | expand |
On Fri, 28 Oct 2022 21:26:13 +0900 Akihiko Odaki <akihiko.odaki@daynix.com> wrote: > vfio_add_std_cap() is designed to ensure that capabilities do not > overlap, but it failed to do so for MSI and MSI-X capabilities. > > Ensure MSI and MSI-X capabilities do not overlap with others by omitting > other overlapping capabilities. > > Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > --- > hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- > hw/vfio/pci.h | 3 +++ > 2 files changed, 56 insertions(+), 10 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 939dcc3d4a..36c8f3dc85 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) > } > } > > -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) > { > uint16_t ctrl; > - bool msi_64bit, msi_maskbit; > - int ret, entries; > - Error *err = NULL; > + uint8_t pos; > + > + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); > + if (!pos) { > + return; > + } > > if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), > vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { > error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); > - return -errno; > + return; > } > - ctrl = le16_to_cpu(ctrl); > + vdev->msi_pos = pos; > + vdev->msi_ctrl = le16_to_cpu(ctrl); > > - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); > - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); > - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > + vdev->msi_cap_size = 0xa; > + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { > + vdev->msi_cap_size += 0xa; > + } > + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { > + vdev->msi_cap_size += 0x4; > + } > +} > + > +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +{ > + bool msi_64bit, msi_maskbit; > + int ret, entries; > + Error *err = NULL; > + > + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); > + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); > + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > > trace_vfio_msi_setup(vdev->vbasedev.name, pos); > > @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > error_propagate_prepend(errp, err, "msi_init failed: "); > return ret; > } > - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); > > return 0; > } > @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) > pba = le32_to_cpu(pba); > > msix = g_malloc0(sizeof(*msix)); > + msix->pos = pos; > msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; > msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; > msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; > @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) > } > } > > + if (cap_id != PCI_CAP_ID_MSI && > + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { > + warn_report(VFIO_MSG_PREFIX > + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > + vdev->vbasedev.name, cap_id, pos, > + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); > + return 0; > + } > + > + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && > + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { > + warn_report(VFIO_MSG_PREFIX > + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > + vdev->vbasedev.name, cap_id, pos, > + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); > + return 0; > + } Capabilities are not a single byte, the fact that it doesn't start within the MSI or MSI-X capability is not a sufficient test. We're also choosing to prioritize MSI and MSI-X capabilities by protecting that range rather than the existing behavior where we'd drop those capabilities if they overlap with another capability that has already been placed. There are merits to both approaches, but I don't see any justification here to change the current behavior. Isn't the most similar behavior to existing to pass the available size to vfio_msi[x]_setup() and return an errno if the size would be exceeded? Something like below (untested, and requires exporting msi_cap_sizeof()). Thanks, Alex diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 939dcc3d4a9e..485f9bc5102d 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1278,11 +1278,13 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) } } -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, + uint8_t size, Error **errp) { uint16_t ctrl; bool msi_64bit, msi_maskbit; int ret, entries; + uint8_t msi_cap_size; Error *err = NULL; if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), @@ -1295,6 +1297,10 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); + msi_cap_size = msi_cap_sizeof(ctrl); + + if (msi_cap_size > size) + return -ENOSPC; trace_vfio_msi_setup(vdev->vbasedev.name, pos); @@ -1306,7 +1312,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) error_propagate_prepend(errp, err, "msi_init failed: "); return ret; } - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); + vdev->msi_cap_size = msi_cap_size; return 0; } @@ -1570,11 +1576,15 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) vfio_pci_relocate_msix(vdev, errp); } -static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, + uint8_t size, Error **errp) { int ret; Error *err = NULL; + if (MSIX_CAP_LENGTH > size) + return -ENOSPC; + vdev->msix->pending = g_new0(unsigned long, BITS_TO_LONGS(vdev->msix->entries)); ret = msix_init(&vdev->pdev, vdev->msix->entries, @@ -2033,14 +2043,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) switch (cap_id) { case PCI_CAP_ID_MSI: - ret = vfio_msi_setup(vdev, pos, errp); + ret = vfio_msi_setup(vdev, pos, size, errp); break; case PCI_CAP_ID_EXP: vfio_check_pcie_flr(vdev, pos); ret = vfio_setup_pcie_cap(vdev, pos, size, errp); break; case PCI_CAP_ID_MSIX: - ret = vfio_msix_setup(vdev, pos, errp); + ret = vfio_msix_setup(vdev, pos, size, errp); break; case PCI_CAP_ID_PM: vfio_check_pm_reset(vdev, pos);
On 2022/10/28 23:16, Alex Williamson wrote: > On Fri, 28 Oct 2022 21:26:13 +0900 > Akihiko Odaki <akihiko.odaki@daynix.com> wrote: > >> vfio_add_std_cap() is designed to ensure that capabilities do not >> overlap, but it failed to do so for MSI and MSI-X capabilities. >> >> Ensure MSI and MSI-X capabilities do not overlap with others by omitting >> other overlapping capabilities. >> >> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> >> --- >> hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- >> hw/vfio/pci.h | 3 +++ >> 2 files changed, 56 insertions(+), 10 deletions(-) >> >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >> index 939dcc3d4a..36c8f3dc85 100644 >> --- a/hw/vfio/pci.c >> +++ b/hw/vfio/pci.c >> @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) >> } >> } >> >> -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >> +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) >> { >> uint16_t ctrl; >> - bool msi_64bit, msi_maskbit; >> - int ret, entries; >> - Error *err = NULL; >> + uint8_t pos; >> + >> + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); >> + if (!pos) { >> + return; >> + } >> >> if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), >> vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { >> error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); >> - return -errno; >> + return; >> } >> - ctrl = le16_to_cpu(ctrl); >> + vdev->msi_pos = pos; >> + vdev->msi_ctrl = le16_to_cpu(ctrl); >> >> - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); >> - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); >> - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); >> + vdev->msi_cap_size = 0xa; >> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { >> + vdev->msi_cap_size += 0xa; >> + } >> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { >> + vdev->msi_cap_size += 0x4; >> + } >> +} >> + >> +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >> +{ >> + bool msi_64bit, msi_maskbit; >> + int ret, entries; >> + Error *err = NULL; >> + >> + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); >> + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); >> + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); >> >> trace_vfio_msi_setup(vdev->vbasedev.name, pos); >> >> @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >> error_propagate_prepend(errp, err, "msi_init failed: "); >> return ret; >> } >> - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); >> >> return 0; >> } >> @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) >> pba = le32_to_cpu(pba); >> >> msix = g_malloc0(sizeof(*msix)); >> + msix->pos = pos; >> msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; >> msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; >> msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; >> @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) >> } >> } >> >> + if (cap_id != PCI_CAP_ID_MSI && >> + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { >> + warn_report(VFIO_MSG_PREFIX >> + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", >> + vdev->vbasedev.name, cap_id, pos, >> + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); >> + return 0; >> + } >> + >> + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && >> + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { >> + warn_report(VFIO_MSG_PREFIX >> + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", >> + vdev->vbasedev.name, cap_id, pos, >> + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); >> + return 0; >> + } > > Capabilities are not a single byte, the fact that it doesn't start > within the MSI or MSI-X capability is not a sufficient test. We're > also choosing to prioritize MSI and MSI-X capabilities by protecting > that range rather than the existing behavior where we'd drop those > capabilities if they overlap with another capability that has already > been placed. There are merits to both approaches, but I don't see any > justification here to change the current behavior. > > Isn't the most similar behavior to existing to pass the available size > to vfio_msi[x]_setup() and return an errno if the size would be > exceeded? Something like below (untested, and requires exporting > msi_cap_sizeof()). Thanks, It only tests the beginning of the capability currently being added because its end is determined by vfio_std_cap_max_size() so that the overlap does not happen. A comment in vfio_add_std_cap() says: > /* > * If it becomes important to configure capabilities to their actual > * size, use this as the default when it's something we don't recognize. > * Since QEMU doesn't actually handle many of the config accesses, > * exact size doesn't seem worthwhile. > */ My understanding of the problem is that while clipping is performed when overlapping two capabilities other than MSI and MSI-X according to the comment, the clipping does not happen when one of the overlapping capability is MSI or MSI-X. According to that, the correct way to fix is to perform clipping also in such a case. As QEMU actually handles the config acccesses for MSI and MSI-X, MSI and MSI-X are always priotized over the other capabilities. Regards, Akihiko Odaki > > Alex > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index 939dcc3d4a9e..485f9bc5102d 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -1278,11 +1278,13 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) > } > } > > -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, > + uint8_t size, Error **errp) > { > uint16_t ctrl; > bool msi_64bit, msi_maskbit; > int ret, entries; > + uint8_t msi_cap_size; > Error *err = NULL; > > if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), > @@ -1295,6 +1297,10 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); > msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); > entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > + msi_cap_size = msi_cap_sizeof(ctrl); > + > + if (msi_cap_size > size) > + return -ENOSPC; > > trace_vfio_msi_setup(vdev->vbasedev.name, pos); > > @@ -1306,7 +1312,7 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > error_propagate_prepend(errp, err, "msi_init failed: "); > return ret; > } > - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); > + vdev->msi_cap_size = msi_cap_size; > > return 0; > } > @@ -1570,11 +1576,15 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) > vfio_pci_relocate_msix(vdev, errp); > } > > -static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > +static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, > + uint8_t size, Error **errp) > { > int ret; > Error *err = NULL; > > + if (MSIX_CAP_LENGTH > size) > + return -ENOSPC; > + > vdev->msix->pending = g_new0(unsigned long, > BITS_TO_LONGS(vdev->msix->entries)); > ret = msix_init(&vdev->pdev, vdev->msix->entries, > @@ -2033,14 +2043,14 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) > > switch (cap_id) { > case PCI_CAP_ID_MSI: > - ret = vfio_msi_setup(vdev, pos, errp); > + ret = vfio_msi_setup(vdev, pos, size, errp); > break; > case PCI_CAP_ID_EXP: > vfio_check_pcie_flr(vdev, pos); > ret = vfio_setup_pcie_cap(vdev, pos, size, errp); > break; > case PCI_CAP_ID_MSIX: > - ret = vfio_msix_setup(vdev, pos, errp); > + ret = vfio_msix_setup(vdev, pos, size, errp); > break; > case PCI_CAP_ID_PM: > vfio_check_pm_reset(vdev, pos); >
On Sat, 29 Oct 2022 01:12:11 +0900 Akihiko Odaki <akihiko.odaki@daynix.com> wrote: > On 2022/10/28 23:16, Alex Williamson wrote: > > On Fri, 28 Oct 2022 21:26:13 +0900 > > Akihiko Odaki <akihiko.odaki@daynix.com> wrote: > > > >> vfio_add_std_cap() is designed to ensure that capabilities do not > >> overlap, but it failed to do so for MSI and MSI-X capabilities. > >> > >> Ensure MSI and MSI-X capabilities do not overlap with others by omitting > >> other overlapping capabilities. > >> > >> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> > >> --- > >> hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- > >> hw/vfio/pci.h | 3 +++ > >> 2 files changed, 56 insertions(+), 10 deletions(-) > >> > >> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > >> index 939dcc3d4a..36c8f3dc85 100644 > >> --- a/hw/vfio/pci.c > >> +++ b/hw/vfio/pci.c > >> @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) > >> } > >> } > >> > >> -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > >> +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) > >> { > >> uint16_t ctrl; > >> - bool msi_64bit, msi_maskbit; > >> - int ret, entries; > >> - Error *err = NULL; > >> + uint8_t pos; > >> + > >> + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); > >> + if (!pos) { > >> + return; > >> + } > >> > >> if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), > >> vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { > >> error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); > >> - return -errno; > >> + return; > >> } > >> - ctrl = le16_to_cpu(ctrl); > >> + vdev->msi_pos = pos; > >> + vdev->msi_ctrl = le16_to_cpu(ctrl); > >> > >> - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); > >> - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); > >> - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > >> + vdev->msi_cap_size = 0xa; > >> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { > >> + vdev->msi_cap_size += 0xa; > >> + } > >> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { > >> + vdev->msi_cap_size += 0x4; > >> + } > >> +} > >> + > >> +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > >> +{ > >> + bool msi_64bit, msi_maskbit; > >> + int ret, entries; > >> + Error *err = NULL; > >> + > >> + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); > >> + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); > >> + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); > >> > >> trace_vfio_msi_setup(vdev->vbasedev.name, pos); > >> > >> @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) > >> error_propagate_prepend(errp, err, "msi_init failed: "); > >> return ret; > >> } > >> - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); > >> > >> return 0; > >> } > >> @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) > >> pba = le32_to_cpu(pba); > >> > >> msix = g_malloc0(sizeof(*msix)); > >> + msix->pos = pos; > >> msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; > >> msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; > >> msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; > >> @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) > >> } > >> } > >> > >> + if (cap_id != PCI_CAP_ID_MSI && > >> + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { > >> + warn_report(VFIO_MSG_PREFIX > >> + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > >> + vdev->vbasedev.name, cap_id, pos, > >> + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); > >> + return 0; > >> + } > >> + > >> + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && > >> + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { > >> + warn_report(VFIO_MSG_PREFIX > >> + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", > >> + vdev->vbasedev.name, cap_id, pos, > >> + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); > >> + return 0; > >> + } > > > > Capabilities are not a single byte, the fact that it doesn't start > > within the MSI or MSI-X capability is not a sufficient test. We're > > also choosing to prioritize MSI and MSI-X capabilities by protecting > > that range rather than the existing behavior where we'd drop those > > capabilities if they overlap with another capability that has already > > been placed. There are merits to both approaches, but I don't see any > > justification here to change the current behavior. > > > > Isn't the most similar behavior to existing to pass the available size > > to vfio_msi[x]_setup() and return an errno if the size would be > > exceeded? Something like below (untested, and requires exporting > > msi_cap_sizeof()). Thanks, > > It only tests the beginning of the capability currently being added > because its end is determined by vfio_std_cap_max_size() so that the > overlap does not happen. > > A comment in vfio_add_std_cap() says: > > /* > > * If it becomes important to configure capabilities to their actual > > * size, use this as the default when it's something we don't > recognize. > > * Since QEMU doesn't actually handle many of the config accesses, > > * exact size doesn't seem worthwhile. > > */ > > My understanding of the problem is that while clipping is performed when > overlapping two capabilities other than MSI and MSI-X according to the > comment, the clipping does not happen when one of the overlapping > capability is MSI or MSI-X. > > According to that, the correct way to fix is to perform clipping also in > such a case. As QEMU actually handles the config acccesses for MSI and > MSI-X, MSI and MSI-X are always priotized over the other capabilities. Here's a scenario, a vendor ships a device with an MSI capability where the MSI control register reports per vector masking, but the packing of the capabilities is such that the next capability doesn't allow for the mask and pending bits registers. Currently, depending on the order we add them, pci_add_capability() will fail for either the MSI capability or the encroaching capability. This failure will propagate back to vfio_realize and we'll fail to instantiate the device. To make this scenario even a bit more pathological, let's assume the encroaching capability is MSI-X. As proposed here, we'd drop the MSI-X capability because it's starting position lies within our expectation of the extent of the MSI capability, and we'd allow the device to initialize with only MSI. Was that intentional? Was that a good choice? What if the driver only supports MSI-X? We've subtly, perhaps unintentionally, changed the policy based on some notion of prioritizing certain capabilities over others. The intent of vfio_std_cap_max_size() is not to intentionally clip ranges, it's only meant to simplify defining the extent of a capability to be bounded by the nearest capability after it in config space. Currently we rely on a combination of our own range management and the overlap detection in pci_add_capability() to generate a device instantiation failure. If it's deemed worthwhile to remove the latter, and that is the extent of the focus of this series, let's not go dabbling into defining new priority schemes for capabilities and defining certain overlap scenarios as arbitrarily continue'able. Thanks, Alex
On 2022/10/29 4:23, Alex Williamson wrote: > On Sat, 29 Oct 2022 01:12:11 +0900 > Akihiko Odaki <akihiko.odaki@daynix.com> wrote: > >> On 2022/10/28 23:16, Alex Williamson wrote: >>> On Fri, 28 Oct 2022 21:26:13 +0900 >>> Akihiko Odaki <akihiko.odaki@daynix.com> wrote: >>> >>>> vfio_add_std_cap() is designed to ensure that capabilities do not >>>> overlap, but it failed to do so for MSI and MSI-X capabilities. >>>> >>>> Ensure MSI and MSI-X capabilities do not overlap with others by omitting >>>> other overlapping capabilities. >>>> >>>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> >>>> --- >>>> hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- >>>> hw/vfio/pci.h | 3 +++ >>>> 2 files changed, 56 insertions(+), 10 deletions(-) >>>> >>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c >>>> index 939dcc3d4a..36c8f3dc85 100644 >>>> --- a/hw/vfio/pci.c >>>> +++ b/hw/vfio/pci.c >>>> @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) >>>> } >>>> } >>>> >>>> -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >>>> +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) >>>> { >>>> uint16_t ctrl; >>>> - bool msi_64bit, msi_maskbit; >>>> - int ret, entries; >>>> - Error *err = NULL; >>>> + uint8_t pos; >>>> + >>>> + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); >>>> + if (!pos) { >>>> + return; >>>> + } >>>> >>>> if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), >>>> vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { >>>> error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); >>>> - return -errno; >>>> + return; >>>> } >>>> - ctrl = le16_to_cpu(ctrl); >>>> + vdev->msi_pos = pos; >>>> + vdev->msi_ctrl = le16_to_cpu(ctrl); >>>> >>>> - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); >>>> - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); >>>> - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); >>>> + vdev->msi_cap_size = 0xa; >>>> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { >>>> + vdev->msi_cap_size += 0xa; >>>> + } >>>> + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { >>>> + vdev->msi_cap_size += 0x4; >>>> + } >>>> +} >>>> + >>>> +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >>>> +{ >>>> + bool msi_64bit, msi_maskbit; >>>> + int ret, entries; >>>> + Error *err = NULL; >>>> + >>>> + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); >>>> + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); >>>> + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); >>>> >>>> trace_vfio_msi_setup(vdev->vbasedev.name, pos); >>>> >>>> @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) >>>> error_propagate_prepend(errp, err, "msi_init failed: "); >>>> return ret; >>>> } >>>> - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); >>>> >>>> return 0; >>>> } >>>> @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) >>>> pba = le32_to_cpu(pba); >>>> >>>> msix = g_malloc0(sizeof(*msix)); >>>> + msix->pos = pos; >>>> msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; >>>> msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; >>>> msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; >>>> @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) >>>> } >>>> } >>>> >>>> + if (cap_id != PCI_CAP_ID_MSI && >>>> + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { >>>> + warn_report(VFIO_MSG_PREFIX >>>> + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", >>>> + vdev->vbasedev.name, cap_id, pos, >>>> + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); >>>> + return 0; >>>> + } >>>> + >>>> + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && >>>> + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { >>>> + warn_report(VFIO_MSG_PREFIX >>>> + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", >>>> + vdev->vbasedev.name, cap_id, pos, >>>> + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); >>>> + return 0; >>>> + } >>> >>> Capabilities are not a single byte, the fact that it doesn't start >>> within the MSI or MSI-X capability is not a sufficient test. We're >>> also choosing to prioritize MSI and MSI-X capabilities by protecting >>> that range rather than the existing behavior where we'd drop those >>> capabilities if they overlap with another capability that has already >>> been placed. There are merits to both approaches, but I don't see any >>> justification here to change the current behavior. >>> >>> Isn't the most similar behavior to existing to pass the available size >>> to vfio_msi[x]_setup() and return an errno if the size would be >>> exceeded? Something like below (untested, and requires exporting >>> msi_cap_sizeof()). Thanks, >> >> It only tests the beginning of the capability currently being added >> because its end is determined by vfio_std_cap_max_size() so that the >> overlap does not happen. >> >> A comment in vfio_add_std_cap() says: >> > /* >> > * If it becomes important to configure capabilities to their actual >> > * size, use this as the default when it's something we don't >> recognize. >> > * Since QEMU doesn't actually handle many of the config accesses, >> > * exact size doesn't seem worthwhile. >> > */ >> >> My understanding of the problem is that while clipping is performed when >> overlapping two capabilities other than MSI and MSI-X according to the >> comment, the clipping does not happen when one of the overlapping >> capability is MSI or MSI-X. >> >> According to that, the correct way to fix is to perform clipping also in >> such a case. As QEMU actually handles the config acccesses for MSI and >> MSI-X, MSI and MSI-X are always priotized over the other capabilities. > > Here's a scenario, a vendor ships a device with an MSI capability where > the MSI control register reports per vector masking, but the packing of > the capabilities is such that the next capability doesn't allow for the > mask and pending bits registers. Currently, depending on the order we > add them, pci_add_capability() will fail for either the MSI capability > or the encroaching capability. This failure will propagate back to > vfio_realize and we'll fail to instantiate the device. To make this > scenario even a bit more pathological, let's assume the encroaching > capability is MSI-X. > > As proposed here, we'd drop the MSI-X capability because it's starting > position lies within our expectation of the extent of the MSI > capability, and we'd allow the device to initialize with only MSI. > Was that intentional? Was that a good choice? What if the driver > only supports MSI-X? We've subtly, perhaps unintentionally, changed > the policy based on some notion of prioritizing certain capabilities > over others. > > The intent of vfio_std_cap_max_size() is not to intentionally > clip ranges, it's only meant to simplify defining the extent of a > capability to be bounded by the nearest capability after it in config > space. > > Currently we rely on a combination of our own range management and the > overlap detection in pci_add_capability() to generate a device > instantiation failure. If it's deemed worthwhile to remove the latter, > and that is the extent of the focus of this series, let's not go > dabbling into defining new priority schemes for capabilities and > defining certain overlap scenarios as arbitrarily continue'able. > Thanks, > > Alex > You are right. I missed the part that vfio_std_cap_max_size() is not intended to clip ranges. That invalidates reasoning to continue when MSI/MSI-X capability overlaps with another. I have sent v6 to make the cases error. Thanks for reviewing and pointing this out. Regards, Akihiko Odaki
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 939dcc3d4a..36c8f3dc85 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -1278,23 +1278,42 @@ static void vfio_disable_interrupts(VFIOPCIDevice *vdev) } } -static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +static void vfio_msi_early_setup(VFIOPCIDevice *vdev, Error **errp) { uint16_t ctrl; - bool msi_64bit, msi_maskbit; - int ret, entries; - Error *err = NULL; + uint8_t pos; + + pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSI); + if (!pos) { + return; + } if (pread(vdev->vbasedev.fd, &ctrl, sizeof(ctrl), vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) { error_setg_errno(errp, errno, "failed reading MSI PCI_CAP_FLAGS"); - return -errno; + return; } - ctrl = le16_to_cpu(ctrl); + vdev->msi_pos = pos; + vdev->msi_ctrl = le16_to_cpu(ctrl); - msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT); - msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT); - entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1); + vdev->msi_cap_size = 0xa; + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT)) { + vdev->msi_cap_size += 0xa; + } + if ((vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT)) { + vdev->msi_cap_size += 0x4; + } +} + +static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) +{ + bool msi_64bit, msi_maskbit; + int ret, entries; + Error *err = NULL; + + msi_64bit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_64BIT); + msi_maskbit = !!(vdev->msi_ctrl & PCI_MSI_FLAGS_MASKBIT); + entries = 1 << ((vdev->msi_ctrl & PCI_MSI_FLAGS_QMASK) >> 1); trace_vfio_msi_setup(vdev->vbasedev.name, pos); @@ -1306,7 +1325,6 @@ static int vfio_msi_setup(VFIOPCIDevice *vdev, int pos, Error **errp) error_propagate_prepend(errp, err, "msi_init failed: "); return ret; } - vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0); return 0; } @@ -1524,6 +1542,7 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, Error **errp) pba = le32_to_cpu(pba); msix = g_malloc0(sizeof(*msix)); + msix->pos = pos; msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK; msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK; msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK; @@ -2025,6 +2044,24 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) } } + if (cap_id != PCI_CAP_ID_MSI && + range_covers_byte(vdev->msi_pos, vdev->msi_cap_size, pos)) { + warn_report(VFIO_MSG_PREFIX + "A capability overlaps with MSI, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", + vdev->vbasedev.name, cap_id, pos, + vdev->msi_pos, vdev->msi_pos + vdev->msi_cap_size); + return 0; + } + + if (cap_id != PCI_CAP_ID_MSIX && vdev->msix && + range_covers_byte(vdev->msix->pos, MSIX_CAP_LENGTH, pos)) { + warn_report(VFIO_MSG_PREFIX + "A capability overlaps with MSI-X, ignoring (%" PRIu8 " @ %" PRIu8 " in [%" PRIu8 ", %" PRIu8 "))", + vdev->vbasedev.name, cap_id, pos, + vdev->msix->pos, vdev->msix->pos + MSIX_CAP_LENGTH); + return 0; + } + /* Scale down size, esp in case virt caps were added above */ size = MIN(size, vfio_std_cap_max_size(pdev, pos)); @@ -3037,6 +3074,12 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) vfio_bars_prepare(vdev); + vfio_msi_early_setup(vdev, &err); + if (err) { + error_propagate(errp, err); + goto error; + } + vfio_msix_early_setup(vdev, &err); if (err) { error_propagate(errp, err); diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 7c236a52f4..9ae0278058 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -107,6 +107,7 @@ enum { /* Cache of MSI-X setup */ typedef struct VFIOMSIXInfo { + uint8_t pos; uint8_t table_bar; uint8_t pba_bar; uint16_t entries; @@ -128,6 +129,8 @@ struct VFIOPCIDevice { unsigned int rom_size; off_t rom_offset; /* Offset of ROM region within device fd */ void *rom; + uint8_t msi_pos; + uint16_t msi_ctrl; int msi_cap_size; VFIOMSIVector *msi_vectors; VFIOMSIXInfo *msix;
vfio_add_std_cap() is designed to ensure that capabilities do not overlap, but it failed to do so for MSI and MSI-X capabilities. Ensure MSI and MSI-X capabilities do not overlap with others by omitting other overlapping capabilities. Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> --- hw/vfio/pci.c | 63 +++++++++++++++++++++++++++++++++++++++++++-------- hw/vfio/pci.h | 3 +++ 2 files changed, 56 insertions(+), 10 deletions(-)