Message ID | 20240109215145.430207-10-stewart.hildebrand@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Take into account guest's BAR view and program its p2m accordingly: > gfn is guest's view of the BAR and mfn is the physical BAR value. > This way hardware domain sees physical BAR values and guest sees > emulated ones. > > Hardware domain continues getting the BARs identity mapped, while for > domUs the BARs are mapped at the requested guest address without > modifying the BAR address in the device PCI config space. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> > Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> Some nits and a request to add an extra assert. If you agree: Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> > --- > In v12: > - Update guest_addr in rom_write() > - Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls > - Use existing vmsix_table_*() functions > - Change vmsix_table_base() to use .guest_addr > In v11: > - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions > to access guest's view of the VMSIx tables. > - Use MFN (not GFN) to check access permissions > - Move page offset check to this patch > - Call rangeset_remove_range() with correct parameters > In v10: > - Moved GFN variable definition outside the loop in map_range() > - Updated printk error message in map_range() > - Now BAR address is always stored in bar->guest_addr, even for > HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars() > - vmsix_table_base() now uses .guest_addr instead of .addr > In v9: > - Extended the commit message > - Use bar->guest_addr in modify_bars > - Extended printk error message in map_range > - Moved map_data initialization so .bar can be initialized during declaration > Since v5: > - remove debug print in map_range callback > - remove "identity" from the debug print > Since v4: > - moved start_{gfn|mfn} calculation into map_range > - pass vpci_bar in the map_data instead of start_{gfn|mfn} > - s/guest_addr/guest_reg > Since v3: > - updated comment (Roger) > - removed gfn_add(map->start_gfn, rc); which is wrong > - use v->domain instead of v->vpci.pdev->domain > - removed odd e.g. in comment > - s/d%d/%pd in altered code > - use gdprintk for map/unmap logs > Since v2: > - improve readability for data.start_gfn and restructure ?: construct > Since v1: > - s/MSI/MSI-X in comments > --- > xen/drivers/vpci/header.c | 81 +++++++++++++++++++++++++++++++-------- > xen/include/xen/vpci.h | 3 +- > 2 files changed, 66 insertions(+), 18 deletions(-) > > diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c > index feccd070ddd0..f0b0b64b0929 100644 > --- a/xen/drivers/vpci/header.c > +++ b/xen/drivers/vpci/header.c > @@ -34,6 +34,7 @@ > > struct map_data { > struct domain *d; > + const struct vpci_bar *bar; > bool map; > }; > > @@ -41,13 +42,24 @@ static int cf_check map_range( > unsigned long s, unsigned long e, void *data, unsigned long *c) > { > const struct map_data *map = data; > + /* Start address of the BAR as seen by the guest. */ > + unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr); > + /* Physical start address of the BAR. */ > + unsigned long start_mfn = PFN_DOWN(map->bar->addr); > int rc; > > for ( ; ; ) > { > unsigned long size = e - s + 1; > + /* > + * Ranges to be mapped don't always start at the BAR start address, as > + * there can be holes or partially consumed ranges. Account for the > + * offset of the current address from the BAR start. > + */ > + unsigned long map_mfn = start_mfn + s - start_gfn; > + unsigned long m_end = map_mfn + size - 1; > > - if ( !iomem_access_permitted(map->d, s, e) ) > + if ( !iomem_access_permitted(map->d, map_mfn, m_end) ) > { > printk(XENLOG_G_WARNING > "%pd denied access to MMIO range [%#lx, %#lx]\n", > @@ -55,7 +67,8 @@ static int cf_check map_range( > return -EPERM; > } > > - rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map); > + rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end, > + map->map); > if ( rc ) > { > printk(XENLOG_G_WARNING > @@ -73,8 +86,8 @@ static int cf_check map_range( > * - {un}map_mmio_regions doesn't support preemption. > */ > > - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s)) > - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s)); > + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)) > + : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)); > if ( rc == 0 ) > { > *c += size; > @@ -83,8 +96,9 @@ static int cf_check map_range( > if ( rc < 0 ) > { > printk(XENLOG_G_WARNING > - "Failed to identity %smap [%lx, %lx] for d%d: %d\n", > - map->map ? "" : "un", s, e, map->d->domain_id, rc); > + "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n", > + map->map ? "" : "un", s, e, map_mfn, > + map_mfn + size, map->d, rc); > break; > } > ASSERT(rc < size); > @@ -163,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, > bool vpci_process_pending(struct vcpu *v) > { > struct pci_dev *pdev = v->vpci.pdev; > - struct map_data data = { > - .d = v->domain, > - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > - }; > struct vpci_header *header = NULL; > unsigned int i; > > @@ -186,6 +196,11 @@ bool vpci_process_pending(struct vcpu *v) > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > + struct map_data data = { > + .d = v->domain, > + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, > + .bar = bar, > + }; > int rc; > > if ( rangeset_is_empty(bar->mem) ) > @@ -236,7 +251,6 @@ bool vpci_process_pending(struct vcpu *v) > static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > uint16_t cmd) > { > - struct map_data data = { .d = d, .map = true }; > struct vpci_header *header = &pdev->vpci->header; > int rc = 0; > unsigned int i; > @@ -246,6 +260,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > + struct map_data data = { .d = d, .map = true, .bar = bar }; > > if ( rangeset_is_empty(bar->mem) ) > continue; > @@ -311,12 +326,16 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > * First fill the rangesets with the BAR of this device or with the ROM > * BAR only, depending on whether the guest is toggling the memory decode > * bit of the command register, or the enable bit of the ROM BAR register. > + * > + * For non-hardware domain we use guest physical addresses. > */ > for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) > { > struct vpci_bar *bar = &header->bars[i]; > unsigned long start = PFN_DOWN(bar->addr); > unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); > + unsigned long start_guest = PFN_DOWN(bar->guest_addr); > + unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1); > > if ( !bar->mem ) > continue; > @@ -336,11 +355,25 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > continue; > } Should we assert that the BAR rangeset is empty here? To stay on the safe side. > > - rc = rangeset_add_range(bar->mem, start, end); > + /* > + * Make sure that the guest set address has the same page offset > + * as the physical address on the host or otherwise things won't work as > + * expected. > + */ > + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) ) > + { > + gprintk(XENLOG_G_WARNING, > + "%pp: Can't map BAR%d because of page offset mismatch: %lx vs %lx\n", ^u Also when using the x modifier it's better to also use # to print the 0x prefix. You can also reduce the length of the message using s/because of/due to/ IMO: %pp: Can't map BAR%u due to offset mismatch: %lx vs %lx > + &pdev->sbdf, i, PAGE_OFFSET(bar->guest_addr), > + PAGE_OFFSET(bar->addr)); Maybe worth printing the whole address? > + return -EINVAL; > + } > + > + rc = rangeset_add_range(bar->mem, start_guest, end_guest); > if ( rc ) > { > printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", > - start, end, rc); > + start_guest, end_guest, rc); > return rc; > } > > @@ -352,12 +385,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > if ( rangeset_is_empty(prev_bar->mem) ) > continue; > > - rc = rangeset_remove_range(prev_bar->mem, start, end); > + rc = rangeset_remove_range(prev_bar->mem, start_guest, end_guest); > if ( rc ) > { > gprintk(XENLOG_WARNING, > "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", > - &pdev->sbdf, start, end, rc); > + &pdev->sbdf, start_guest, end_guest, rc); > return rc; > } > } > @@ -425,8 +458,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) > for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) > { > const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; > - unsigned long start = PFN_DOWN(remote_bar->addr); > - unsigned long end = PFN_DOWN(remote_bar->addr + > + unsigned long start = PFN_DOWN(remote_bar->guest_addr); > + unsigned long end = PFN_DOWN(remote_bar->guest_addr + > remote_bar->size - 1); > > if ( !remote_bar->enabled ) > @@ -513,6 +546,8 @@ static void cf_check bar_write( > struct vpci_bar *bar = data; > bool hi = false; > > + ASSERT(is_hardware_domain(pdev->domain)); > + > if ( bar->type == VPCI_BAR_MEM64_HI ) > { > ASSERT(reg > PCI_BASE_ADDRESS_0); > @@ -543,6 +578,10 @@ static void cf_check bar_write( > */ > bar->addr &= ~(0xffffffffULL << (hi ? 32 : 0)); > bar->addr |= (uint64_t)val << (hi ? 32 : 0); > + /* > + * Update guest address as well, so hardware domain sees BAR identity mapped > + */ Can you drop the 'as well' and make this a single line comment? Otherwise maybe reword to: Update guest address, so hardware domain BAR is identity mapped. Sorry, I find it wasteful to have the opening and closing comment delimiters in separate lines for single line comments. Thanks, Roger.
On 1/12/24 10:06, Roger Pau Monné wrote: > On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> Take into account guest's BAR view and program its p2m accordingly: >> gfn is guest's view of the BAR and mfn is the physical BAR value. >> This way hardware domain sees physical BAR values and guest sees >> emulated ones. >> >> Hardware domain continues getting the BARs identity mapped, while for >> domUs the BARs are mapped at the requested guest address without >> modifying the BAR address in the device PCI config space. >> >> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> Signed-off-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com> >> Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com> > > Some nits and a request to add an extra assert. If you agree: > > Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> Thanks! I agree. I'll reply to this with a v12.1 patch. > >> --- >> In v12: >> - Update guest_addr in rom_write() >> - Use unsigned long for start_mfn and map_mfn to reduce mfn_x() calls >> - Use existing vmsix_table_*() functions >> - Change vmsix_table_base() to use .guest_addr >> In v11: >> - Add vmsix_guest_table_addr() and vmsix_guest_table_base() functions >> to access guest's view of the VMSIx tables. >> - Use MFN (not GFN) to check access permissions >> - Move page offset check to this patch >> - Call rangeset_remove_range() with correct parameters >> In v10: >> - Moved GFN variable definition outside the loop in map_range() >> - Updated printk error message in map_range() >> - Now BAR address is always stored in bar->guest_addr, even for >> HW dom, this removes bunch of ugly is_hwdom() checks in modify_bars() >> - vmsix_table_base() now uses .guest_addr instead of .addr >> In v9: >> - Extended the commit message >> - Use bar->guest_addr in modify_bars >> - Extended printk error message in map_range >> - Moved map_data initialization so .bar can be initialized during declaration >> Since v5: >> - remove debug print in map_range callback >> - remove "identity" from the debug print >> Since v4: >> - moved start_{gfn|mfn} calculation into map_range >> - pass vpci_bar in the map_data instead of start_{gfn|mfn} >> - s/guest_addr/guest_reg >> Since v3: >> - updated comment (Roger) >> - removed gfn_add(map->start_gfn, rc); which is wrong >> - use v->domain instead of v->vpci.pdev->domain >> - removed odd e.g. in comment >> - s/d%d/%pd in altered code >> - use gdprintk for map/unmap logs >> Since v2: >> - improve readability for data.start_gfn and restructure ?: construct >> Since v1: >> - s/MSI/MSI-X in comments >> --- >> xen/drivers/vpci/header.c | 81 +++++++++++++++++++++++++++++++-------- >> xen/include/xen/vpci.h | 3 +- >> 2 files changed, 66 insertions(+), 18 deletions(-) >> >> diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c >> index feccd070ddd0..f0b0b64b0929 100644 >> --- a/xen/drivers/vpci/header.c >> +++ b/xen/drivers/vpci/header.c >> @@ -34,6 +34,7 @@ >> >> struct map_data { >> struct domain *d; >> + const struct vpci_bar *bar; >> bool map; >> }; >> >> @@ -41,13 +42,24 @@ static int cf_check map_range( >> unsigned long s, unsigned long e, void *data, unsigned long *c) >> { >> const struct map_data *map = data; >> + /* Start address of the BAR as seen by the guest. */ >> + unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr); >> + /* Physical start address of the BAR. */ >> + unsigned long start_mfn = PFN_DOWN(map->bar->addr); >> int rc; >> >> for ( ; ; ) >> { >> unsigned long size = e - s + 1; >> + /* >> + * Ranges to be mapped don't always start at the BAR start address, as >> + * there can be holes or partially consumed ranges. Account for the >> + * offset of the current address from the BAR start. >> + */ >> + unsigned long map_mfn = start_mfn + s - start_gfn; >> + unsigned long m_end = map_mfn + size - 1; >> >> - if ( !iomem_access_permitted(map->d, s, e) ) >> + if ( !iomem_access_permitted(map->d, map_mfn, m_end) ) >> { >> printk(XENLOG_G_WARNING >> "%pd denied access to MMIO range [%#lx, %#lx]\n", >> @@ -55,7 +67,8 @@ static int cf_check map_range( >> return -EPERM; >> } >> >> - rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map); >> + rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end, >> + map->map); >> if ( rc ) >> { >> printk(XENLOG_G_WARNING >> @@ -73,8 +86,8 @@ static int cf_check map_range( >> * - {un}map_mmio_regions doesn't support preemption. >> */ >> >> - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s)) >> - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s)); >> + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)) >> + : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)); >> if ( rc == 0 ) >> { >> *c += size; >> @@ -83,8 +96,9 @@ static int cf_check map_range( >> if ( rc < 0 ) >> { >> printk(XENLOG_G_WARNING >> - "Failed to identity %smap [%lx, %lx] for d%d: %d\n", >> - map->map ? "" : "un", s, e, map->d->domain_id, rc); >> + "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n", >> + map->map ? "" : "un", s, e, map_mfn, >> + map_mfn + size, map->d, rc); >> break; >> } >> ASSERT(rc < size); >> @@ -163,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, >> bool vpci_process_pending(struct vcpu *v) >> { >> struct pci_dev *pdev = v->vpci.pdev; >> - struct map_data data = { >> - .d = v->domain, >> - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, >> - }; >> struct vpci_header *header = NULL; >> unsigned int i; >> >> @@ -186,6 +196,11 @@ bool vpci_process_pending(struct vcpu *v) >> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) >> { >> struct vpci_bar *bar = &header->bars[i]; >> + struct map_data data = { >> + .d = v->domain, >> + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, >> + .bar = bar, >> + }; >> int rc; >> >> if ( rangeset_is_empty(bar->mem) ) >> @@ -236,7 +251,6 @@ bool vpci_process_pending(struct vcpu *v) >> static int __init apply_map(struct domain *d, const struct pci_dev *pdev, >> uint16_t cmd) >> { >> - struct map_data data = { .d = d, .map = true }; >> struct vpci_header *header = &pdev->vpci->header; >> int rc = 0; >> unsigned int i; >> @@ -246,6 +260,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, >> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) >> { >> struct vpci_bar *bar = &header->bars[i]; >> + struct map_data data = { .d = d, .map = true, .bar = bar }; >> >> if ( rangeset_is_empty(bar->mem) ) >> continue; >> @@ -311,12 +326,16 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) >> * First fill the rangesets with the BAR of this device or with the ROM >> * BAR only, depending on whether the guest is toggling the memory decode >> * bit of the command register, or the enable bit of the ROM BAR register. >> + * >> + * For non-hardware domain we use guest physical addresses. >> */ >> for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) >> { >> struct vpci_bar *bar = &header->bars[i]; >> unsigned long start = PFN_DOWN(bar->addr); >> unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); >> + unsigned long start_guest = PFN_DOWN(bar->guest_addr); >> + unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1); >> >> if ( !bar->mem ) >> continue; >> @@ -336,11 +355,25 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) >> continue; >> } > > Should we assert that the BAR rangeset is empty here? To stay on the > safe side. Yes, I'll add an ASSERT. > >> >> - rc = rangeset_add_range(bar->mem, start, end); >> + /* >> + * Make sure that the guest set address has the same page offset >> + * as the physical address on the host or otherwise things won't work as >> + * expected. >> + */ >> + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) ) >> + { >> + gprintk(XENLOG_G_WARNING, >> + "%pp: Can't map BAR%d because of page offset mismatch: %lx vs %lx\n", > ^u > > Also when using the x modifier it's better to also use # to print the > 0x prefix. You can also reduce the length of the message using > s/because of/due to/ IMO: > > %pp: Can't map BAR%u due to offset mismatch: %lx vs %lx Will do > >> + &pdev->sbdf, i, PAGE_OFFSET(bar->guest_addr), >> + PAGE_OFFSET(bar->addr)); > > Maybe worth printing the whole address? Agreed, will do > >> + return -EINVAL; >> + } >> + >> + rc = rangeset_add_range(bar->mem, start_guest, end_guest); >> if ( rc ) >> { >> printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", >> - start, end, rc); >> + start_guest, end_guest, rc); >> return rc; >> } >> >> @@ -352,12 +385,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) >> if ( rangeset_is_empty(prev_bar->mem) ) >> continue; >> >> - rc = rangeset_remove_range(prev_bar->mem, start, end); >> + rc = rangeset_remove_range(prev_bar->mem, start_guest, end_guest); >> if ( rc ) >> { >> gprintk(XENLOG_WARNING, >> "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", >> - &pdev->sbdf, start, end, rc); >> + &pdev->sbdf, start_guest, end_guest, rc); >> return rc; >> } >> } >> @@ -425,8 +458,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) >> for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) >> { >> const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; >> - unsigned long start = PFN_DOWN(remote_bar->addr); >> - unsigned long end = PFN_DOWN(remote_bar->addr + >> + unsigned long start = PFN_DOWN(remote_bar->guest_addr); >> + unsigned long end = PFN_DOWN(remote_bar->guest_addr + >> remote_bar->size - 1); >> >> if ( !remote_bar->enabled ) >> @@ -513,6 +546,8 @@ static void cf_check bar_write( >> struct vpci_bar *bar = data; >> bool hi = false; >> >> + ASSERT(is_hardware_domain(pdev->domain)); >> + >> if ( bar->type == VPCI_BAR_MEM64_HI ) >> { >> ASSERT(reg > PCI_BASE_ADDRESS_0); >> @@ -543,6 +578,10 @@ static void cf_check bar_write( >> */ >> bar->addr &= ~(0xffffffffULL << (hi ? 32 : 0)); >> bar->addr |= (uint64_t)val << (hi ? 32 : 0); >> + /* >> + * Update guest address as well, so hardware domain sees BAR identity mapped >> + */ > > Can you drop the 'as well' and make this a single line comment? > > Otherwise maybe reword to: > > Update guest address, so hardware domain BAR is identity mapped. > > Sorry, I find it wasteful to have the opening and closing comment > delimiters in separate lines for single line comments. Yep, I'll make it single line
On 12.01.2024 16:06, Roger Pau Monné wrote: > On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> + /* >> + * Make sure that the guest set address has the same page offset >> + * as the physical address on the host or otherwise things won't work as >> + * expected. >> + */ >> + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) ) >> + { >> + gprintk(XENLOG_G_WARNING, >> + "%pp: Can't map BAR%d because of page offset mismatch: %lx vs %lx\n", > ^u > > Also when using the x modifier it's better to also use # to print the > 0x prefix. You can also reduce the length of the message using > s/because of/due to/ IMO: > > %pp: Can't map BAR%u due to offset mismatch: %lx vs %lx Or even %pp: can't map BAR%u - offset mismatch: %lx vs %lx ? Note also my use of lower-case 'c', which brings this log message in line with all pre-existing (prior to the whole series) vPCI log messages starting with "%pp: " (when not limiting to thus-prefixed there are a couple of "Failed to ..." outliers). Jan
On 1/15/24 04:07, Jan Beulich wrote: > On 12.01.2024 16:06, Roger Pau Monné wrote: >> On Tue, Jan 09, 2024 at 04:51:24PM -0500, Stewart Hildebrand wrote: >>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>> + /* >>> + * Make sure that the guest set address has the same page offset >>> + * as the physical address on the host or otherwise things won't work as >>> + * expected. >>> + */ >>> + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) ) >>> + { >>> + gprintk(XENLOG_G_WARNING, >>> + "%pp: Can't map BAR%d because of page offset mismatch: %lx vs %lx\n", >> ^u >> >> Also when using the x modifier it's better to also use # to print the >> 0x prefix. You can also reduce the length of the message using >> s/because of/due to/ IMO: >> >> %pp: Can't map BAR%u due to offset mismatch: %lx vs %lx > > Or even > > %pp: can't map BAR%u - offset mismatch: %lx vs %lx > > ? Using # that becomes: "%pp: can't map BAR%u - offset mismatch: %#lx vs %#lx\n" I'll send v12.2. > Note also my use of lower-case 'c', which brings this log message in > line with all pre-existing (prior to the whole series) vPCI log messages > starting with "%pp: " (when not limiting to thus-prefixed there are a > couple of "Failed to ..." outliers). > > Jan
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c index feccd070ddd0..f0b0b64b0929 100644 --- a/xen/drivers/vpci/header.c +++ b/xen/drivers/vpci/header.c @@ -34,6 +34,7 @@ struct map_data { struct domain *d; + const struct vpci_bar *bar; bool map; }; @@ -41,13 +42,24 @@ static int cf_check map_range( unsigned long s, unsigned long e, void *data, unsigned long *c) { const struct map_data *map = data; + /* Start address of the BAR as seen by the guest. */ + unsigned long start_gfn = PFN_DOWN(map->bar->guest_addr); + /* Physical start address of the BAR. */ + unsigned long start_mfn = PFN_DOWN(map->bar->addr); int rc; for ( ; ; ) { unsigned long size = e - s + 1; + /* + * Ranges to be mapped don't always start at the BAR start address, as + * there can be holes or partially consumed ranges. Account for the + * offset of the current address from the BAR start. + */ + unsigned long map_mfn = start_mfn + s - start_gfn; + unsigned long m_end = map_mfn + size - 1; - if ( !iomem_access_permitted(map->d, s, e) ) + if ( !iomem_access_permitted(map->d, map_mfn, m_end) ) { printk(XENLOG_G_WARNING "%pd denied access to MMIO range [%#lx, %#lx]\n", @@ -55,7 +67,8 @@ static int cf_check map_range( return -EPERM; } - rc = xsm_iomem_mapping(XSM_HOOK, map->d, s, e, map->map); + rc = xsm_iomem_mapping(XSM_HOOK, map->d, map_mfn, m_end, + map->map); if ( rc ) { printk(XENLOG_G_WARNING @@ -73,8 +86,8 @@ static int cf_check map_range( * - {un}map_mmio_regions doesn't support preemption. */ - rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(s)) - : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(s)); + rc = map->map ? map_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)) + : unmap_mmio_regions(map->d, _gfn(s), size, _mfn(map_mfn)); if ( rc == 0 ) { *c += size; @@ -83,8 +96,9 @@ static int cf_check map_range( if ( rc < 0 ) { printk(XENLOG_G_WARNING - "Failed to identity %smap [%lx, %lx] for d%d: %d\n", - map->map ? "" : "un", s, e, map->d->domain_id, rc); + "Failed to %smap [%lx %lx] -> [%lx %lx] for %pd: %d\n", + map->map ? "" : "un", s, e, map_mfn, + map_mfn + size, map->d, rc); break; } ASSERT(rc < size); @@ -163,10 +177,6 @@ static void modify_decoding(const struct pci_dev *pdev, uint16_t cmd, bool vpci_process_pending(struct vcpu *v) { struct pci_dev *pdev = v->vpci.pdev; - struct map_data data = { - .d = v->domain, - .map = v->vpci.cmd & PCI_COMMAND_MEMORY, - }; struct vpci_header *header = NULL; unsigned int i; @@ -186,6 +196,11 @@ bool vpci_process_pending(struct vcpu *v) for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; + struct map_data data = { + .d = v->domain, + .map = v->vpci.cmd & PCI_COMMAND_MEMORY, + .bar = bar, + }; int rc; if ( rangeset_is_empty(bar->mem) ) @@ -236,7 +251,6 @@ bool vpci_process_pending(struct vcpu *v) static int __init apply_map(struct domain *d, const struct pci_dev *pdev, uint16_t cmd) { - struct map_data data = { .d = d, .map = true }; struct vpci_header *header = &pdev->vpci->header; int rc = 0; unsigned int i; @@ -246,6 +260,7 @@ static int __init apply_map(struct domain *d, const struct pci_dev *pdev, for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; + struct map_data data = { .d = d, .map = true, .bar = bar }; if ( rangeset_is_empty(bar->mem) ) continue; @@ -311,12 +326,16 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) * First fill the rangesets with the BAR of this device or with the ROM * BAR only, depending on whether the guest is toggling the memory decode * bit of the command register, or the enable bit of the ROM BAR register. + * + * For non-hardware domain we use guest physical addresses. */ for ( i = 0; i < ARRAY_SIZE(header->bars); i++ ) { struct vpci_bar *bar = &header->bars[i]; unsigned long start = PFN_DOWN(bar->addr); unsigned long end = PFN_DOWN(bar->addr + bar->size - 1); + unsigned long start_guest = PFN_DOWN(bar->guest_addr); + unsigned long end_guest = PFN_DOWN(bar->guest_addr + bar->size - 1); if ( !bar->mem ) continue; @@ -336,11 +355,25 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) continue; } - rc = rangeset_add_range(bar->mem, start, end); + /* + * Make sure that the guest set address has the same page offset + * as the physical address on the host or otherwise things won't work as + * expected. + */ + if ( PAGE_OFFSET(bar->guest_addr) != PAGE_OFFSET(bar->addr) ) + { + gprintk(XENLOG_G_WARNING, + "%pp: Can't map BAR%d because of page offset mismatch: %lx vs %lx\n", + &pdev->sbdf, i, PAGE_OFFSET(bar->guest_addr), + PAGE_OFFSET(bar->addr)); + return -EINVAL; + } + + rc = rangeset_add_range(bar->mem, start_guest, end_guest); if ( rc ) { printk(XENLOG_G_WARNING "Failed to add [%lx, %lx]: %d\n", - start, end, rc); + start_guest, end_guest, rc); return rc; } @@ -352,12 +385,12 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) if ( rangeset_is_empty(prev_bar->mem) ) continue; - rc = rangeset_remove_range(prev_bar->mem, start, end); + rc = rangeset_remove_range(prev_bar->mem, start_guest, end_guest); if ( rc ) { gprintk(XENLOG_WARNING, "%pp: failed to remove overlapping range [%lx, %lx]: %d\n", - &pdev->sbdf, start, end, rc); + &pdev->sbdf, start_guest, end_guest, rc); return rc; } } @@ -425,8 +458,8 @@ static int modify_bars(const struct pci_dev *pdev, uint16_t cmd, bool rom_only) for ( i = 0; i < ARRAY_SIZE(tmp->vpci->header.bars); i++ ) { const struct vpci_bar *remote_bar = &tmp->vpci->header.bars[i]; - unsigned long start = PFN_DOWN(remote_bar->addr); - unsigned long end = PFN_DOWN(remote_bar->addr + + unsigned long start = PFN_DOWN(remote_bar->guest_addr); + unsigned long end = PFN_DOWN(remote_bar->guest_addr + remote_bar->size - 1); if ( !remote_bar->enabled ) @@ -513,6 +546,8 @@ static void cf_check bar_write( struct vpci_bar *bar = data; bool hi = false; + ASSERT(is_hardware_domain(pdev->domain)); + if ( bar->type == VPCI_BAR_MEM64_HI ) { ASSERT(reg > PCI_BASE_ADDRESS_0); @@ -543,6 +578,10 @@ static void cf_check bar_write( */ bar->addr &= ~(0xffffffffULL << (hi ? 32 : 0)); bar->addr |= (uint64_t)val << (hi ? 32 : 0); + /* + * Update guest address as well, so hardware domain sees BAR identity mapped + */ + bar->guest_addr = bar->addr; /* Make sure Xen writes back the same value for the BAR RO bits. */ if ( !hi ) @@ -639,11 +678,14 @@ static void cf_check rom_write( } if ( !rom->enabled ) + { /* * If the ROM BAR is not mapped update the address field so the * correct address is mapped into the p2m. */ rom->addr = val & PCI_ROM_ADDRESS_MASK; + rom->guest_addr = rom->addr; + } if ( !header->bars_mapped || rom->enabled == new_enabled ) { @@ -667,7 +709,10 @@ static void cf_check rom_write( return; if ( !new_enabled ) + { rom->addr = val & PCI_ROM_ADDRESS_MASK; + rom->guest_addr = rom->addr; + } } static int bar_add_rangeset(const struct pci_dev *pdev, struct vpci_bar *bar, @@ -862,6 +907,7 @@ static int cf_check init_header(struct pci_dev *pdev) } bars[i].addr = addr; + bars[i].guest_addr = addr; bars[i].size = size; bars[i].prefetchable = val & PCI_BASE_ADDRESS_MEM_PREFETCH; @@ -884,6 +930,7 @@ static int cf_check init_header(struct pci_dev *pdev) rom->type = VPCI_BAR_ROM; rom->size = size; rom->addr = addr; + rom->guest_addr = addr; header->rom_enabled = pci_conf_read32(pdev->sbdf, rom_reg) & PCI_ROM_ADDRESS_ENABLE; diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 817ee9ee7300..e89c571890b2 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -216,7 +216,8 @@ int vpci_msix_arch_print(const struct vpci_msix *msix); */ static inline paddr_t vmsix_table_base(const struct vpci *vpci, unsigned int nr) { - return vpci->header.bars[vpci->msix->tables[nr] & PCI_MSIX_BIRMASK].addr; + return vpci->header.bars[vpci->msix->tables[nr] & + PCI_MSIX_BIRMASK].guest_addr; } static inline paddr_t vmsix_table_addr(const struct vpci *vpci, unsigned int nr)