Message ID | 20230829231912.4091958-10-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On Tue, Aug 29, 2023 at 11:19:44PM +0000, Volodymyr Babchuk wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Take into account guest's BAR view and program its p2m accordingly: > gfn is guest's view of the BAR and mfn is the physical BAR value. > This way hardware domain sees physical BAR values and guest sees > emulated ones. > > Hardware domain continues getting the BARs identity mapped, while for > domUs the BARs are mapped at the requested guest address without > modifying the BAR address in the device PCI config space. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > --- > Since v9: > - Extended the commit message > - Use bar->guest_addr in modify_bars > - Extended printk error message in map_range > - Moved map_data initialization so .bar can be initialized during declaration > Since v5: > - remove debug print in map_range callback > - remove "identity" from the debug print > Since v4: > - moved start_{gfn|mfn} calculation into map_range > - pass vpci_bar in the map_data instead of start_{gfn|mfn} > - s/guest_addr/guest_reg > Since v3: > - updated comment (Roger) > - removed gfn_add(map->start_gfn, rc); which is wrong > - use v->domain instead of v->vpci.pdev->domain > - removed odd e.g. in comment > - s/d%d/%pd in altered code > - use gdprintk for map/unmap logs > Since v2: > - improve readability for data.start_gfn and restructure ?: construct > Since v1: > - s/MSI/MSI-X in comments > --- > xen/drivers/vpci/header.c | 52 ++++++++++++++++++++++++++++----------- > 1 file changed, 38 insertions(+), 14 deletions(-) > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > index 3cc6a96849..1e82217200 100644 > --- a/xen/drivers/vpci/header.c > +++ b/xen/drivers/vpci/header.c > @@ -33,6 +33,7 @@ > > struct map_data { > struct domain *d; > + const struct vpci_bar *bar; > bool map; > }; > > @@ -44,6 +45,12 @@ static int cf_check map_range( > > for ( ; ; ) > { > + /* Start address of the BAR as seen by the guest. */ > + gfn_t start_gfn = _gfn(PFN_DOWN(is_hardware_domain(map->d) > + ? map->bar->addr > + : map->bar->guest_addr)); > + /* Physical start address of the BAR. */ > + mfn_t start_mfn = _mfn(PFN_DOWN(map->bar->addr)); Both of those should be declared outside of the loop, as there's no need to (re)calculate them at each iteration. Also start_gfn likely wants to be unsigned long? All the usages of it in the patch convert it to integer by using gfn_x(). > unsigned long size = e - s + 1; > > if ( !iomem_access_permitted(map->d, s, e) ) > @@ -63,6 +70,13 @@ static int cf_check map_range( > return rc; > } > > + /* > + * Ranges to be mapped don't always start at the BAR start address, as > + * there can be holes or partially consumed ranges. Account for the > + * offset of the current address from the BAR start. > + */ > + start_mfn = mfn_add(start_mfn, s - gfn_x(start_gfn)); This should then be a local loop variable with a different name. > + > /* > * ARM TODOs: > * - On ARM whether the memory is prefetchable or not should be passed > @@ -72,8 +86,8 @@ static int cf_check map_range( > * - {un}map_mmio_regions doesn't support preemption. > */ > > - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s)) > - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s)); > + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, start_mfn) > + : unmap_mmio_regions(map->d, _gfn(s), size, start_mfn); > if ( rc == 0 ) > { > *c += size; > @@ -82,8 +96,9 @@ static int cf_check map_range( > if ( rc < 0 ) > { > printk(XENLOG_G_WARNING > - "Failed to identity %smap [%lx, %lx] for d%d: %d\n", > - map->map ? "" : "un", s, e, map->d->domain_id, rc); > + "Failed to %smap [%lx (%lx), %lx (%lx)] for %pd: %d\n", I think we would usually write such mapping messages as: [start gfn, end gfn] -> [start mfn, end mfn] So: "Failed to %smap [%lx, %lx] -> [%lx, %lx] for %pd: %d\n" > + map->map ? "" : "un", s, mfn_x(start_mfn), e, > + mfn_x(start_mfn) + size, map->d, rc); > break; > } > ASSERT(rc < size); > @@ -162,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, > bool vpci_process_pending(struct vcpu *v) > { > struct pci_dev *pdev = v->vpci.pdev; > - struct map_data data = { > - .d = v->domain, > - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > - }; > struct vpci_header *header = NULL; > unsigned int i; > > @@ -177,6 +188,11 @@ bool vpci_process_pending(struct vcpu *v) > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > + struct map_data data = { > + .d = v->domain, > + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > + .bar = bar, > + }; > int rc; > > if ( rangeset_is_empty(bar->mem) ) > @@ -229,7 +245,6 @@ bool vpci_process_pending(struct vcpu *v) > static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > uint16_t cmd) > { > - struct map_data data = { .d = d, .map = true }; > struct vpci_header *header = &pdev->vpci->header; > int rc = 0; > unsigned int i; > @@ -239,6 +254,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > + struct map_data data = { .d = d, .map = true, .bar = bar }; > > if ( rangeset_is_empty(bar->mem) ) > continue; > @@ -306,12 +322,18 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > * First fill the rangesets with the BAR of this device or with the ROM > * BAR only, depending on whether the guest is toggling the memory decode > * bit of the command register, or the enable bit of the ROM BAR register. > + * > + * For non-hardware domain we use guest physical addresses. > */ > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > unsigned long start = PFN_DOWN(bar->addr); > unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); > + unsigned long start_guest = PFN_DOWN(is_hardware_domain(pdev->domain) ? > + bar->addr : bar->guest_addr); > + unsigned long end_guest = PFN_DOWN((is_hardware_domain(pdev->domain) ? > + bar->addr : bar->guest_addr) + bar->size - 1); > > if ( !bar->mem ) > continue; > @@ -331,11 +353,11 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > continue; > } > > - rc = rangeset_add_range(bar->mem, start, end); > + rc = rangeset_add_range(bar->mem, start_guest, end_guest); > if ( rc ) > { > printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", > - start, end, rc); > + start_guest, end_guest, rc); > return rc; > } > > @@ -352,7 +374,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > { > gprintk(XENLOG_WARNING, > "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", > - &pdev->sbdf, start, end, rc); > + &pdev->sbdf, start_guest, end_guest, rc); > return rc; > } > } I think you are missing a change to adjust vmsix_table_base() to also return the MSI-X table position in guest address space for domUs, or else the MSI-X overlapping range checks for domUs are wrong. Thanks, Roger.
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index 3cc6a96849..1e82217200 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -33,6 +33,7 @@ struct map_data { struct domain *d; + const struct vpci_bar *bar; bool map; }; @@ -44,6 +45,12 @@ static int cf_check map_range( for ( ; ; ) { + /* Start address of the BAR as seen by the guest. */ + gfn_t start_gfn = _gfn(PFN_DOWN(is_hardware_domain(map->d) + ? map->bar->addr + : map->bar->guest_addr)); + /* Physical start address of the BAR. */ + mfn_t start_mfn = _mfn(PFN_DOWN(map->bar->addr)); unsigned long size = e - s + 1; if ( !iomem_access_permitted(map->d, s, e) ) @@ -63,6 +70,13 @@ static int cf_check map_range( return rc; } + /* + * Ranges to be mapped don't always start at the BAR start address, as + * there can be holes or partially consumed ranges. Account for the + * offset of the current address from the BAR start. + */ + start_mfn = mfn_add(start_mfn, s - gfn_x(start_gfn)); + /* * ARM TODOs: * - On ARM whether the memory is prefetchable or not should be passed @@ -72,8 +86,8 @@ static int cf_check map_range( * - {un}map_mmio_regions doesn't support preemption. */ - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s)) - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s)); + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, start_mfn) + : unmap_mmio_regions(map->d, _gfn(s), size, start_mfn); if ( rc == 0 ) { *c += size; @@ -82,8 +96,9 @@ static int cf_check map_range( if ( rc < 0 ) { printk(XENLOG_G_WARNING - "Failed to identity %smap [%lx, %lx] for d%d: %d\n", - map->map ? "" : "un", s, e, map->d->domain_id, rc); + "Failed to %smap [%lx (%lx), %lx (%lx)] for %pd: %d\n", + map->map ? "" : "un", s, mfn_x(start_mfn), e, + mfn_x(start_mfn) + size, map->d, rc); break; } ASSERT(rc < size); @@ -162,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, bool vpci_process_pending(struct vcpu *v) { struct pci_dev *pdev = v->vpci.pdev; - struct map_data data = { - .d = v->domain, - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, - }; struct vpci_header *header = NULL; unsigned int i; @@ -177,6 +188,11 @@ bool vpci_process_pending(struct vcpu *v) for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; + struct map_data data = { + .d = v->domain, + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, + .bar = bar, + }; int rc; if ( rangeset_is_empty(bar->mem) ) @@ -229,7 +245,6 @@ bool vpci_process_pending(struct vcpu *v) static int __init apply_map(struct domain *d, const struct pci_dev *pdev, uint16_t cmd) { - struct map_data data = { .d = d, .map = true }; struct vpci_header *header = &pdev->vpci->header; int rc = 0; unsigned int i; @@ -239,6 +254,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; + struct map_data data = { .d = d, .map = true, .bar = bar }; if ( rangeset_is_empty(bar->mem) ) continue; @@ -306,12 +322,18 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) * First fill the rangesets with the BAR of this device or with the ROM * BAR only, depending on whether the guest is toggling the memory decode * bit of the command register, or the enable bit of the ROM BAR register. + * + * For non-hardware domain we use guest physical addresses. */ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; unsigned long start = PFN_DOWN(bar->addr); unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); + unsigned long start_guest = PFN_DOWN(is_hardware_domain(pdev->domain) ? + bar->addr : bar->guest_addr); + unsigned long end_guest = PFN_DOWN((is_hardware_domain(pdev->domain) ? + bar->addr : bar->guest_addr) + bar->size - 1); if ( !bar->mem ) continue; @@ -331,11 +353,11 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) continue; } - rc = rangeset_add_range(bar->mem, start, end); + rc = rangeset_add_range(bar->mem, start_guest, end_guest); if ( rc ) { printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", - start, end, rc); + start_guest, end_guest, rc); return rc; } @@ -352,7 +374,7 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) { gprintk(XENLOG_WARNING, "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", - &pdev->sbdf, start, end, rc); + &pdev->sbdf, start_guest, end_guest, rc); return rc; } } @@ -420,8 +442,10 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) { const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; - unsigned long start = PFN_DOWN(remote_bar->addr); - unsigned long end = PFN_DOWN(remote_bar->addr + + unsigned long start = PFN_DOWN(is_hardware_domain(pdev->domain) ? + remote_bar->addr : remote_bar->guest_addr); + unsigned long end = PFN_DOWN(is_hardware_domain(pdev->domain) ? + remote_bar->addr : remote_bar->guest_addr + remote_bar->size - 1); if ( !remote_bar->enabled )