Message ID | 20231012220854.2736994-11-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On Thu, Oct 12, 2023 at 10:09:17PM +0000, Volodymyr Babchuk wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Instead of handling a single range set, that contains all the memory > regions of all the BARs and ROM, have them per BAR. > As the range sets are now created when a PCI device is added and destroyed > when it is removed so make them named and accounted. > > Note that rangesets were chosen here despite there being only up to > 3 separate ranges in each set (typically just 1). But rangeset per BAR > was chosen for the ease of implementation and existing code re-usability. > > This is in preparation of making non-identity mappings in p2m for the MMIOs. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> I have some minor comments below, but overall looks good, with the comments below addresses: Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> I think it's worth mentioning in the commit message that the error handling of vpci_process_pending() is slightly modified, and that vPCI handlers are no longer removed if the creation of the mappings in vpci_process_pending() fails, as that's unlikely to lead to a functional device in any case. > > --- > In v10: > - Added additional checks to vpci_process_pending() > - vpci_process_pending() now clears rangeset in case of failure > - Fixed locks in vpci_process_pending() > - Fixed coding style issues > - Fixed error handling in init_bars > In v9: > - removed d->vpci.map_pending in favor of checking v->vpci.pdev != > NULL > - printk -> gprintk > - renamed bar variable to fix shadowing > - fixed bug with iterating on remote device's BARs > - relaxed lock in vpci_process_pending > - removed stale comment > Since v6: > - update according to the new locking scheme > - remove odd fail label in modify_bars > Since v5: > - fix comments > - move rangeset allocation to init_bars and only allocate > for MAPPABLE BARs > - check for overlap with the already setup BAR ranges > Since v4: > - use named range sets for BARs (Jan) > - changes required by the new locking scheme > - updated commit message (Jan) > Since v3: > - re-work vpci_cancel_pending accordingly to the per-BAR handling > - s/num_mem_ranges/map_pending and s/uint8_t/bool > - ASSERT(bar->mem) in modify_bars > - create and destroy the rangesets on add/remove > --- > xen/drivers/vpci/header.c | 256 ++++++++++++++++++++++++++------------ > xen/drivers/vpci/vpci.c | 6 + > xen/include/xen/vpci.h | 2 +- > 3 files changed, 184 insertions(+), 80 deletions(-) > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > index 40d1a07ada..5c056923ad 100644 > --- a/xen/drivers/vpci/header.c > +++ b/xen/drivers/vpci/header.c > @@ -161,63 +161,106 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, > > bool vpci_process_pending(struct vcpu *v) > { > - if ( v->vpci.mem ) > + struct pci_dev *pdev = v->vpci.pdev; > + struct map_data data = { > + .d = v->domain, > + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > + }; > + struct vpci_header *header = NULL; > + unsigned int i; > + > + if ( !pdev ) > + return false; > + > + read_lock(&v->domain->pci_lock); > + > + if ( !pdev->vpci || (v->domain != pdev->domain) ) > { > - struct map_data data = { > - .d = v->domain, > - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > - }; > - int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data); > + read_unlock(&v->domain->pci_lock); I think you want to clear v->vpci.pdev here in order to avoid having to take the lock again and exit early from vpci_process_pending() if possible. > + return false; > + } > + > + header = &pdev->vpci->header; > + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > + { > + struct vpci_bar *bar = &header->bars[i]; > + int rc; > + > + if ( rangeset_is_empty(bar->mem) ) > + continue; > + > + rc = rangeset_consume_ranges(bar->mem, map_range, &data); > > if ( rc == -ERESTART ) > + { > + read_unlock(&v->domain->pci_lock); > return true; > + } > > - write_lock(&v->domain->pci_lock); > - spin_lock(&v->vpci.pdev->vpci->lock); > - /* Disable memory decoding unconditionally on failure. */ > - modify_decoding(v->vpci.pdev, > - rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd, > - !rc && v->vpci.rom_only); > - spin_unlock(&v->vpci.pdev->vpci->lock); > - > - rangeset_destroy(v->vpci.mem); > - v->vpci.mem = NULL; > if ( rc ) > - /* > - * FIXME: in case of failure remove the device from the domain. > - * Note that there might still be leftover mappings. While this is > - * safe for Dom0, for DomUs the domain will likely need to be > - * killed in order to avoid leaking stale p2m mappings on > - * failure. > - */ > - vpci_deassign_device(v->vpci.pdev); > - write_unlock(&v->domain->pci_lock); > + { > + spin_lock(&pdev->vpci->lock); > + /* Disable memory decoding unconditionally on failure. */ > + modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY, > + false); > + spin_unlock(&pdev->vpci->lock); > + > + /* Clean all the rangesets */ > + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > + if ( !rangeset_is_empty(header->bars[i].mem) ) > + rangeset_empty(header->bars[i].mem); > + > + v->vpci.pdev = NULL; > + > + read_unlock(&v->domain->pci_lock); > + > + if ( !is_hardware_domain(v->domain) ) > + domain_crash(v->domain); > + > + return false; > + } > } > + v->vpci.pdev = NULL; > + > + spin_lock(&pdev->vpci->lock); > + modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only); > + spin_unlock(&pdev->vpci->lock); > + > + read_unlock(&v->domain->pci_lock); > > return false; > } > > static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > - struct rangeset *mem, uint16_t cmd) > + uint16_t cmd) > { > struct map_data data = { .d = d, .map = true }; > - int rc; > + struct vpci_header *header = &pdev->vpci->header; > + int rc = 0; > + unsigned int i; > > ASSERT(rw_is_write_locked(&d->pci_lock)); > > - while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART ) > + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > - /* > - * It's safe to drop and reacquire the lock in this context > - * without risking pdev disappearing because devices cannot be > - * removed until the initial domain has been started. > - */ > - read_unlock(&d->pci_lock); > - process_pending_softirqs(); > - read_lock(&d->pci_lock); > - } > + struct vpci_bar *bar = &header->bars[i]; > + > + if ( rangeset_is_empty(bar->mem) ) > + continue; > > - rangeset_destroy(mem); > + while ( (rc = rangeset_consume_ranges(bar->mem, map_range, > + &data)) == -ERESTART ) > + { > + /* > + * It's safe to drop and reacquire the lock in this context > + * without risking pdev disappearing because devices cannot be > + * removed until the initial domain has been started. > + */ > + write_unlock(&d->pci_lock); > + process_pending_softirqs(); > + write_lock(&d->pci_lock); > + } > + } > if ( !rc ) > modify_decoding(pdev, cmd, false); > > @@ -225,10 +268,12 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > } > > static void defer_map(struct domain *d, struct pci_dev *pdev, > - struct rangeset *mem, uint16_t cmd, bool rom_only) > + uint16_t cmd, bool rom_only) > { > struct vcpu *curr = current; > > + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); Shouldn't this be part of the previous commit that introduces the usage of d->pci_lock? > + > /* > * FIXME: when deferring the {un}map the state of the device should not > * be trusted. For example the enable bit is toggled after the device > @@ -236,7 +281,6 @@ static void defer_map(struct domain *d, struct pci_dev *pdev, > * started for the same device if the domain is not well-behaved. > */ > curr->vpci.pdev = pdev; > - curr->vpci.mem = mem; > curr->vpci.cmd = cmd; > curr->vpci.rom_only = rom_only; > /* > @@ -250,33 +294,33 @@ static void defer_map(struct domain *d, struct pci_dev *pdev, > static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > { > struct vpci_header *header = &pdev->vpci->header; > - struct rangeset *mem = rangeset_new(NULL, NULL, 0); > struct pci_dev *tmp, *dev = NULL; > const struct domain *d; > const struct vpci_msix *msix = pdev->vpci->msix; > - unsigned int i; > + unsigned int i, j; > int rc; > > ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); > > - if ( !mem ) > - return -ENOMEM; > - > /* > - * Create a rangeset that represents the current device BARs memory region > - * and compare it against all the currently active BAR memory regions. If > - * an overlap is found, subtract it from the region to be mapped/unmapped. > + * Create a rangeset per BAR that represents the current device memory > + * region and compare it against all the currently active BAR memory > + * regions. If an overlap is found, subtract it from the region to be > + * mapped/unmapped. > * > - * First fill the rangeset with all the BARs of this device or with the ROM > + * First fill the rangesets with the BAR of this device or with the ROM > * BAR only, depending on whether the guest is toggling the memory decode > * bit of the command register, or the enable bit of the ROM BAR register. > */ > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > - const struct vpci_bar *bar = &header->bars[i]; > + struct vpci_bar *bar = &header->bars[i]; > unsigned long start = PFN_DOWN(bar->addr); > unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); > > + if ( !bar->mem ) > + continue; > + > if ( !MAPPABLE_BAR(bar) || > (rom_only ? bar->type != VPCI_BAR_ROM > : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) || > @@ -292,14 +336,31 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > continue; > } > > - rc = rangeset_add_range(mem, start, end); > + rc = rangeset_add_range(bar->mem, start, end); > if ( rc ) > { > printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", > start, end, rc); > - rangeset_destroy(mem); > return rc; > } > + > + /* Check for overlap with the already setup BAR ranges. */ > + for ( j = 0; j < i; j++ ) > + { > + struct vpci_bar *prev_bar = &header->bars[j]; > + > + if ( rangeset_is_empty(prev_bar->mem) ) > + continue; > + > + rc = rangeset_remove_range(prev_bar->mem, start, end); > + if ( rc ) > + { > + gprintk(XENLOG_WARNING, > + "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", > + &pdev->sbdf, start, end, rc); > + return rc; > + } > + } > } > > /* Remove any MSIX regions if present. */ > @@ -309,14 +370,21 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) + > vmsix_table_size(pdev->vpci, i) - 1); > > - rc = rangeset_remove_range(mem, start, end); > - if ( rc ) > + for ( j = 0; j < ARRAY_SIZE(header->bars); j++ ) > { > - printk(XENLOG_G_WARNING > - "Failed to remove MSIX table [%lx, %lx]: %d\n", > - start, end, rc); > - rangeset_destroy(mem); > - return rc; > + const struct vpci_bar *bar = &header->bars[j]; > + > + if ( rangeset_is_empty(bar->mem) ) > + continue; > + > + rc = rangeset_remove_range(bar->mem, start, end); > + if ( rc ) > + { > + gprintk(XENLOG_WARNING, > + "%pp: failed to remove MSIX table [%lx, %lx]: %d\n", > + &pdev->sbdf, start, end, rc); > + return rc; > + } > } > } > > @@ -356,27 +424,35 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > > for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) > { > - const struct vpci_bar *bar = &tmp->vpci->header.bars[i]; > - unsigned long start = PFN_DOWN(bar->addr); > - unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); > - > - if ( !bar->enabled || > - !rangeset_overlaps_range(mem, start, end) || > - /* > - * If only the ROM enable bit is toggled check against > - * other BARs in the same device for overlaps, but not > - * against the same ROM BAR. > - */ > - (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) ) > + const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; > + unsigned long start = PFN_DOWN(remote_bar->addr); > + unsigned long end = PFN_DOWN(remote_bar->addr + > + remote_bar->size - 1); > + > + if ( !remote_bar->enabled ) > continue; > > - rc = rangeset_remove_range(mem, start, end); > - if ( rc ) > + for ( j = 0; j < ARRAY_SIZE(header->bars); j++) > { > - printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n", > - start, end, rc); > - rangeset_destroy(mem); > - return rc; > + const struct vpci_bar *bar = &header->bars[j]; > + > + if ( !rangeset_overlaps_range(bar->mem, start, end) || > + /* > + * If only the ROM enable bit is toggled check against > + * other BARs in the same device for overlaps, but not > + * against the same ROM BAR. > + */ > + (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) ) Is this line slightly too long? Thanks, Roger.
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index 40d1a07ada..5c056923ad 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -161,63 +161,106 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, bool vpci_process_pending(struct vcpu *v) { - if ( v->vpci.mem ) + struct pci_dev *pdev = v->vpci.pdev; + struct map_data data = { + .d = v->domain, + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, + }; + struct vpci_header *header = NULL; + unsigned int i; + + if ( !pdev ) + return false; + + read_lock(&v->domain->pci_lock); + + if ( !pdev->vpci || (v->domain != pdev->domain) ) { - struct map_data data = { - .d = v->domain, - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, - }; - int rc = rangeset_consume_ranges(v->vpci.mem, map_range, &data); + read_unlock(&v->domain->pci_lock); + return false; + } + + header = &pdev->vpci->header; + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) + { + struct vpci_bar *bar = &header->bars[i]; + int rc; + + if ( rangeset_is_empty(bar->mem) ) + continue; + + rc = rangeset_consume_ranges(bar->mem, map_range, &data); if ( rc == -ERESTART ) + { + read_unlock(&v->domain->pci_lock); return true; + } - write_lock(&v->domain->pci_lock); - spin_lock(&v->vpci.pdev->vpci->lock); - /* Disable memory decoding unconditionally on failure. */ - modify_decoding(v->vpci.pdev, - rc ? v->vpci.cmd & ~PCI_COMMAND_MEMORY : v->vpci.cmd, - !rc && v->vpci.rom_only); - spin_unlock(&v->vpci.pdev->vpci->lock); - - rangeset_destroy(v->vpci.mem); - v->vpci.mem = NULL; if ( rc ) - /* - * FIXME: in case of failure remove the device from the domain. - * Note that there might still be leftover mappings. While this is - * safe for Dom0, for DomUs the domain will likely need to be - * killed in order to avoid leaking stale p2m mappings on - * failure. - */ - vpci_deassign_device(v->vpci.pdev); - write_unlock(&v->domain->pci_lock); + { + spin_lock(&pdev->vpci->lock); + /* Disable memory decoding unconditionally on failure. */ + modify_decoding(pdev, v->vpci.cmd & ~PCI_COMMAND_MEMORY, + false); + spin_unlock(&pdev->vpci->lock); + + /* Clean all the rangesets */ + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) + if ( !rangeset_is_empty(header->bars[i].mem) ) + rangeset_empty(header->bars[i].mem); + + v->vpci.pdev = NULL; + + read_unlock(&v->domain->pci_lock); + + if ( !is_hardware_domain(v->domain) ) + domain_crash(v->domain); + + return false; + } } + v->vpci.pdev = NULL; + + spin_lock(&pdev->vpci->lock); + modify_decoding(pdev, v->vpci.cmd, v->vpci.rom_only); + spin_unlock(&pdev->vpci->lock); + + read_unlock(&v->domain->pci_lock); return false; } static int __init apply_map(struct domain *d, const struct pci_dev *pdev, - struct rangeset *mem, uint16_t cmd) + uint16_t cmd) { struct map_data data = { .d = d, .map = true }; - int rc; + struct vpci_header *header = &pdev->vpci->header; + int rc = 0; + unsigned int i; ASSERT(rw_is_write_locked(&d->pci_lock)); - while ( (rc = rangeset_consume_ranges(mem, map_range, &data)) == -ERESTART ) + for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { - /* - * It's safe to drop and reacquire the lock in this context - * without risking pdev disappearing because devices cannot be - * removed until the initial domain has been started. - */ - read_unlock(&d->pci_lock); - process_pending_softirqs(); - read_lock(&d->pci_lock); - } + struct vpci_bar *bar = &header->bars[i]; + + if ( rangeset_is_empty(bar->mem) ) + continue; - rangeset_destroy(mem); + while ( (rc = rangeset_consume_ranges(bar->mem, map_range, + &data)) == -ERESTART ) + { + /* + * It's safe to drop and reacquire the lock in this context + * without risking pdev disappearing because devices cannot be + * removed until the initial domain has been started. + */ + write_unlock(&d->pci_lock); + process_pending_softirqs(); + write_lock(&d->pci_lock); + } + } if ( !rc ) modify_decoding(pdev, cmd, false); @@ -225,10 +268,12 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, } static void defer_map(struct domain *d, struct pci_dev *pdev, - struct rangeset *mem, uint16_t cmd, bool rom_only) + uint16_t cmd, bool rom_only) { struct vcpu *curr = current; + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); + /* * FIXME: when deferring the {un}map the state of the device should not * be trusted. For example the enable bit is toggled after the device @@ -236,7 +281,6 @@ static void defer_map(struct domain *d, struct pci_dev *pdev, * started for the same device if the domain is not well-behaved. */ curr->vpci.pdev = pdev; - curr->vpci.mem = mem; curr->vpci.cmd = cmd; curr->vpci.rom_only = rom_only; /* @@ -250,33 +294,33 @@ static void defer_map(struct domain *d, struct pci_dev *pdev, static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) { struct vpci_header *header = &pdev->vpci->header; - struct rangeset *mem = rangeset_new(NULL, NULL, 0); struct pci_dev *tmp, *dev = NULL; const struct domain *d; const struct vpci_msix *msix = pdev->vpci->msix; - unsigned int i; + unsigned int i, j; int rc; ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); - if ( !mem ) - return -ENOMEM; - /* - * Create a rangeset that represents the current device BARs memory region - * and compare it against all the currently active BAR memory regions. If - * an overlap is found, subtract it from the region to be mapped/unmapped. + * Create a rangeset per BAR that represents the current device memory + * region and compare it against all the currently active BAR memory + * regions. If an overlap is found, subtract it from the region to be + * mapped/unmapped. * - * First fill the rangeset with all the BARs of this device or with the ROM + * First fill the rangesets with the BAR of this device or with the ROM * BAR only, depending on whether the guest is toggling the memory decode * bit of the command register, or the enable bit of the ROM BAR register. */ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { - const struct vpci_bar *bar = &header->bars[i]; + struct vpci_bar *bar = &header->bars[i]; unsigned long start = PFN_DOWN(bar->addr); unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); + if ( !bar->mem ) + continue; + if ( !MAPPABLE_BAR(bar) || (rom_only ? bar->type != VPCI_BAR_ROM : (bar->type == VPCI_BAR_ROM && !header->rom_enabled)) || @@ -292,14 +336,31 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) continue; } - rc = rangeset_add_range(mem, start, end); + rc = rangeset_add_range(bar->mem, start, end); if ( rc ) { printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", start, end, rc); - rangeset_destroy(mem); return rc; } + + /* Check for overlap with the already setup BAR ranges. */ + for ( j = 0; j < i; j++ ) + { + struct vpci_bar *prev_bar = &header->bars[j]; + + if ( rangeset_is_empty(prev_bar->mem) ) + continue; + + rc = rangeset_remove_range(prev_bar->mem, start, end); + if ( rc ) + { + gprintk(XENLOG_WARNING, + "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", + &pdev->sbdf, start, end, rc); + return rc; + } + } } /* Remove any MSIX regions if present. */ @@ -309,14 +370,21 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) unsigned long end = PFN_DOWN(vmsix_table_addr(pdev->vpci, i) + vmsix_table_size(pdev->vpci, i) - 1); - rc = rangeset_remove_range(mem, start, end); - if ( rc ) + for ( j = 0; j < ARRAY_SIZE(header->bars); j++ ) { - printk(XENLOG_G_WARNING - "Failed to remove MSIX table [%lx, %lx]: %d\n", - start, end, rc); - rangeset_destroy(mem); - return rc; + const struct vpci_bar *bar = &header->bars[j]; + + if ( rangeset_is_empty(bar->mem) ) + continue; + + rc = rangeset_remove_range(bar->mem, start, end); + if ( rc ) + { + gprintk(XENLOG_WARNING, + "%pp: failed to remove MSIX table [%lx, %lx]: %d\n", + &pdev->sbdf, start, end, rc); + return rc; + } } } @@ -356,27 +424,35 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) { - const struct vpci_bar *bar = &tmp->vpci->header.bars[i]; - unsigned long start = PFN_DOWN(bar->addr); - unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); - - if ( !bar->enabled || - !rangeset_overlaps_range(mem, start, end) || - /* - * If only the ROM enable bit is toggled check against - * other BARs in the same device for overlaps, but not - * against the same ROM BAR. - */ - (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) ) + const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; + unsigned long start = PFN_DOWN(remote_bar->addr); + unsigned long end = PFN_DOWN(remote_bar->addr + + remote_bar->size - 1); + + if ( !remote_bar->enabled ) continue; - rc = rangeset_remove_range(mem, start, end); - if ( rc ) + for ( j = 0; j < ARRAY_SIZE(header->bars); j++) { - printk(XENLOG_G_WARNING "Failed to remove [%lx, %lx]: %d\n", - start, end, rc); - rangeset_destroy(mem); - return rc; + const struct vpci_bar *bar = &header->bars[j]; + + if ( !rangeset_overlaps_range(bar->mem, start, end) || + /* + * If only the ROM enable bit is toggled check against + * other BARs in the same device for overlaps, but not + * against the same ROM BAR. + */ + (rom_only && tmp == pdev && bar->type == VPCI_BAR_ROM) ) + continue; + + rc = rangeset_remove_range(bar->mem, start, end); + if ( rc ) + { + gprintk(XENLOG_WARNING, + "%pp: failed to remove [%lx, %lx]: %d\n", + &pdev->sbdf, start, end, rc); + return rc; + } } } } @@ -400,10 +476,10 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) * will always be to establish mappings and process all the BARs. */ ASSERT((cmd & PCI_COMMAND_MEMORY) && !rom_only); - return apply_map(pdev->domain, pdev, mem, cmd); + return apply_map(pdev->domain, pdev, cmd); } - defer_map(dev->domain, dev, mem, cmd, rom_only); + defer_map(dev->domain, dev, cmd, rom_only); return 0; } @@ -597,6 +673,18 @@ static void cf_check rom_write( rom->addr = val & PCI_ROM_ADDRESS_MASK; } +static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar, + unsigned int i) +{ + char str[32]; + + snprintf(str, sizeof(str), "%pp:BAR%u", &pdev->sbdf, i); + + bar->mem = rangeset_new(pdev->domain, str, RANGESETF_no_print); + + return !bar->mem ? -ENOMEM : 0; +} + static int cf_check init_bars(struct pci_dev *pdev) { uint16_t cmd; @@ -678,6 +766,10 @@ static int cf_check init_bars(struct pci_dev *pdev) else bars[i].type = VPCI_BAR_MEM32; + rc = bar_add_rangeset(pdev, &bars[i], i); + if ( rc ) + goto fail; + rc = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size, (i == num_bars - 1) ? PCI_BAR_LAST : 0); if ( rc < 0 ) @@ -728,6 +820,12 @@ static int cf_check init_bars(struct pci_dev *pdev) rom_reg, 4, rom); if ( rc ) rom->type = VPCI_BAR_EMPTY; + else + { + rc = bar_add_rangeset(pdev, rom, i); + if ( rc ) + goto fail; + } } } else diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c index b20bee2b0b..5e34d0092a 100644 --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -38,6 +38,8 @@ extern vpci_register_init_t *const __end_vpci_array[]; void vpci_deassign_device(struct pci_dev *pdev) { + unsigned int i; + ASSERT(rw_is_write_locked(&pdev->domain->pci_lock)); if ( !has_vpci(pdev->domain) || !pdev->vpci ) @@ -63,6 +65,10 @@ void vpci_deassign_device(struct pci_dev *pdev) if ( pdev->vpci->msix->table[i] ) iounmap(pdev->vpci->msix->table[i]); } + + for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ ) + rangeset_destroy(pdev->vpci->header.bars[i].mem); + xfree(pdev->vpci->msix); xfree(pdev->vpci->msi); xfree(pdev->vpci); diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 2028f2151f..18a0eca3da 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -72,6 +72,7 @@ struct vpci { /* Guest address. */ uint64_t guest_addr; uint64_t size; + struct rangeset *mem; enum { VPCI_BAR_EMPTY, VPCI_BAR_IO, @@ -156,7 +157,6 @@ struct vpci { struct vpci_vcpu { /* Per-vcpu structure to store state while {un}mapping of PCI BARs. */ - struct rangeset *mem; struct pci_dev *pdev; uint16_t cmd; bool rom_only : 1;