Message ID | 20220719174253.541965-10-olekstysh@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > Assign SBDF to the PCI devices being passed through with bus 0. > The resulting topology is where PCIe devices reside on the bus 0 of the > root complex itself (embedded endpoints). > This implementation is limited to 32 devices which are allowed on > a single PCI bus. > > Please note, that at the moment only function 0 of a multifunction > device can be passed through. I've not been able to spot where this restriction is being enforced - can you please point me at the respective code? > @@ -99,6 +102,62 @@ int vpci_add_handlers(struct pci_dev *pdev) > } > > #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > +static int add_virtual_device(struct pci_dev *pdev) > +{ > + struct domain *d = pdev->domain; > + pci_sbdf_t sbdf = { 0 }; > + unsigned long new_dev_number; > + > + if ( is_hardware_domain(d) ) > + return 0; > + > + ASSERT(pcidevs_write_locked()); > + > + /* > + * Each PCI bus supports 32 devices/slots at max or up to 256 when > + * there are multi-function ones which are not yet supported. > + */ > + if ( pdev->info.is_extfn ) > + { > + gdprintk(XENLOG_ERR, "%pp: only function 0 passthrough supported\n", > + &pdev->sbdf); > + return -EOPNOTSUPP; > + } > + > + new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map, > + VPCI_MAX_VIRT_DEV); > + if ( new_dev_number >= VPCI_MAX_VIRT_DEV ) > + return -ENOSPC; > + > + __set_bit(new_dev_number, &d->vpci_dev_assigned_map); > + > + /* > + * Both segment and bus number are 0: > + * - we emulate a single host bridge for the guest, e.g. segment 0 > + * - with bus 0 the virtual devices are seen as embedded > + * endpoints behind the root complex > + * > + * TODO: add support for multi-function devices. > + */ > + sbdf.devfn = PCI_DEVFN(new_dev_number, 0); > + pdev->vpci->guest_sbdf = sbdf; > + > + return 0; > + > +} > + > +static void vpci_remove_virtual_device(const struct pci_dev *pdev) > +{ > + ASSERT(pcidevs_write_locked()); > + > + if ( pdev->vpci ) > + { > + __clear_bit(pdev->vpci->guest_sbdf.dev, > + &pdev->domain->vpci_dev_assigned_map); > + pdev->vpci->guest_sbdf.sbdf = ~0; > + } > +} Feels like I did comment on this before: When ... > @@ -111,8 +170,16 @@ int vpci_assign_device(struct pci_dev *pdev) > > rc = vpci_add_handlers(pdev); > if ( rc ) > - vpci_deassign_device(pdev); > + goto fail; ... this path is taken and hence ... > + rc = add_virtual_device(pdev); ... this is bypassed, ... > + if ( rc ) > + goto fail; > + > + return 0; > > + fail: > + vpci_deassign_device(pdev); ... the function here will see guest_sbdf still as ~0, while pdev->vpci is non-NULL. Therefore mistakenly bit 31 of vpci_dev_assigned_map will be cleared. > @@ -124,6 +191,7 @@ void vpci_deassign_device(struct pci_dev *pdev) > if ( !has_vpci(pdev->domain) ) > return; > > + vpci_remove_virtual_device(pdev); > vpci_remove_device(pdev); > } And other call sites of vpci_remove_device() do not have a need of cleaning up guest_sbdf / vpci_dev_assigned_map? IOW I wonder if it wouldn't be better to have vpci_remove_device() do this as well (retaining - see my comment on the earlier patch) the simple aliasing of vpci_deassign_device() to vpci_remove_device()). > --- a/xen/include/xen/sched.h > +++ b/xen/include/xen/sched.h > @@ -457,6 +457,14 @@ struct domain > > #ifdef CONFIG_HAS_PCI > struct list_head pdev_list; > +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > + /* > + * The bitmap which shows which device numbers are already used by the > + * virtual PCI bus topology and is used to assign a unique SBDF to the > + * next passed through virtual PCI device. > + */ > + DECLARE_BITMAP(vpci_dev_assigned_map, VPCI_MAX_VIRT_DEV); > +#endif > #endif Hmm, yet another reason to keep sched.h including vpci.h, which imo would better be dropped - sched.h already has way too many dependencies. (Just a remark, not strictly a request to change anything.) Jan
On 27.07.22 13:32, Jan Beulich wrote: Hello Jan > On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> Assign SBDF to the PCI devices being passed through with bus 0. >> The resulting topology is where PCIe devices reside on the bus 0 of the >> root complex itself (embedded endpoints). >> This implementation is limited to 32 devices which are allowed on >> a single PCI bus. >> >> Please note, that at the moment only function 0 of a multifunction >> device can be passed through. > I've not been able to spot where this restriction is being enforced - > can you please point me at the respective code? Nor have I found the respective code. Could you please suggest a place where to put such enforcement (I guess, this should be present in the toolstack)? > >> @@ -99,6 +102,62 @@ int vpci_add_handlers(struct pci_dev *pdev) >> } >> >> #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> +static int add_virtual_device(struct pci_dev *pdev) >> +{ >> + struct domain *d = pdev->domain; >> + pci_sbdf_t sbdf = { 0 }; >> + unsigned long new_dev_number; >> + >> + if ( is_hardware_domain(d) ) >> + return 0; >> + >> + ASSERT(pcidevs_write_locked()); >> + >> + /* >> + * Each PCI bus supports 32 devices/slots at max or up to 256 when >> + * there are multi-function ones which are not yet supported. >> + */ >> + if ( pdev->info.is_extfn ) >> + { >> + gdprintk(XENLOG_ERR, "%pp: only function 0 passthrough supported\n", >> + &pdev->sbdf); >> + return -EOPNOTSUPP; >> + } >> + >> + new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map, >> + VPCI_MAX_VIRT_DEV); >> + if ( new_dev_number >= VPCI_MAX_VIRT_DEV ) >> + return -ENOSPC; >> + >> + __set_bit(new_dev_number, &d->vpci_dev_assigned_map); >> + >> + /* >> + * Both segment and bus number are 0: >> + * - we emulate a single host bridge for the guest, e.g. segment 0 >> + * - with bus 0 the virtual devices are seen as embedded >> + * endpoints behind the root complex >> + * >> + * TODO: add support for multi-function devices. >> + */ >> + sbdf.devfn = PCI_DEVFN(new_dev_number, 0); >> + pdev->vpci->guest_sbdf = sbdf; >> + >> + return 0; >> + >> +} >> + >> +static void vpci_remove_virtual_device(const struct pci_dev *pdev) >> +{ >> + ASSERT(pcidevs_write_locked()); >> + >> + if ( pdev->vpci ) >> + { >> + __clear_bit(pdev->vpci->guest_sbdf.dev, >> + &pdev->domain->vpci_dev_assigned_map); >> + pdev->vpci->guest_sbdf.sbdf = ~0; >> + } >> +} > Feels like I did comment on this before: When ... > >> @@ -111,8 +170,16 @@ int vpci_assign_device(struct pci_dev *pdev) >> >> rc = vpci_add_handlers(pdev); >> if ( rc ) >> - vpci_deassign_device(pdev); >> + goto fail; > ... this path is taken and hence ... > >> + rc = add_virtual_device(pdev); > ... this is bypassed, ... > >> + if ( rc ) >> + goto fail; >> + >> + return 0; >> >> + fail: >> + vpci_deassign_device(pdev); > ... the function here will see guest_sbdf still as ~0, while pdev->vpci > is non-NULL. Therefore mistakenly bit 31 of vpci_dev_assigned_map will > be cleared. Indeed, good catch, thanks! I assume this can be just fixed by extending a check in vpci_remove_virtual_device(): if ( pdev->vpci && (pdev->vpci->guest_sbdf.sbdf != ~0) ) > >> @@ -124,6 +191,7 @@ void vpci_deassign_device(struct pci_dev *pdev) >> if ( !has_vpci(pdev->domain) ) >> return; >> >> + vpci_remove_virtual_device(pdev); >> vpci_remove_device(pdev); >> } > And other call sites of vpci_remove_device() do not have a need of > cleaning up guest_sbdf / vpci_dev_assigned_map? I am not 100% sure, but it looks like they don't need. On the other hand, even if they don't need that, doing the cleaning won't be an issue at all, there is a check before cleaning (which will be extended as I proposed above), so ... > IOW I wonder if it > wouldn't be better to have vpci_remove_device() do this as well > (retaining - see my comment on the earlier patch) the simple aliasing > of vpci_deassign_device() to vpci_remove_device()). ... maybe yes. Shall I do that change? > >> --- a/xen/include/xen/sched.h >> +++ b/xen/include/xen/sched.h >> @@ -457,6 +457,14 @@ struct domain >> >> #ifdef CONFIG_HAS_PCI >> struct list_head pdev_list; >> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> + /* >> + * The bitmap which shows which device numbers are already used by the >> + * virtual PCI bus topology and is used to assign a unique SBDF to the >> + * next passed through virtual PCI device. >> + */ >> + DECLARE_BITMAP(vpci_dev_assigned_map, VPCI_MAX_VIRT_DEV); >> +#endif >> #endif > Hmm, yet another reason to keep sched.h including vpci.h, which > imo would better be dropped - sched.h already has way too many > dependencies. (Just a remark, not strictly a request to change > anything.) I see. > > Jan
On 28.07.2022 16:16, Oleksandr wrote: > On 27.07.22 13:32, Jan Beulich wrote: >> On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>> >>> Assign SBDF to the PCI devices being passed through with bus 0. >>> The resulting topology is where PCIe devices reside on the bus 0 of the >>> root complex itself (embedded endpoints). >>> This implementation is limited to 32 devices which are allowed on >>> a single PCI bus. >>> >>> Please note, that at the moment only function 0 of a multifunction >>> device can be passed through. >> I've not been able to spot where this restriction is being enforced - >> can you please point me at the respective code? > > Nor have I found the respective code. > > Could you please suggest a place where to put such enforcement (I guess, > this should be present in the toolstack)? Such check should be in the tool stack primarily to give a sensible error message to the user. Yet the hypervisor needs to check itself nevertheless. You know the code you're adding much better than I do, so I guess I'm a little puzzled by you asking me to suggest a place. (And for the tool stack I guess asking tool stack folks would get you better mileage.) >>> @@ -124,6 +191,7 @@ void vpci_deassign_device(struct pci_dev *pdev) >>> if ( !has_vpci(pdev->domain) ) >>> return; >>> >>> + vpci_remove_virtual_device(pdev); >>> vpci_remove_device(pdev); >>> } >> And other call sites of vpci_remove_device() do not have a need of >> cleaning up guest_sbdf / vpci_dev_assigned_map? > > I am not 100% sure, but it looks like they don't need. On the other > hand, even if they don't need that, doing the cleaning won't be an issue > at all, > > there is a check before cleaning (which will be extended as I proposed > above), so ... > > >> IOW I wonder if it >> wouldn't be better to have vpci_remove_device() do this as well >> (retaining - see my comment on the earlier patch) the simple aliasing >> of vpci_deassign_device() to vpci_remove_device()). > > > ... maybe yes. Shall I do that change? Well - yes please, afaic. Jan
On 28.07.22 17:26, Jan Beulich wrote: Hello Jan > On 28.07.2022 16:16, Oleksandr wrote: >> On 27.07.22 13:32, Jan Beulich wrote: >>> On 19.07.2022 19:42, Oleksandr Tyshchenko wrote: >>>> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >>>> >>>> Assign SBDF to the PCI devices being passed through with bus 0. >>>> The resulting topology is where PCIe devices reside on the bus 0 of the >>>> root complex itself (embedded endpoints). >>>> This implementation is limited to 32 devices which are allowed on >>>> a single PCI bus. >>>> >>>> Please note, that at the moment only function 0 of a multifunction >>>> device can be passed through. >>> I've not been able to spot where this restriction is being enforced - >>> can you please point me at the respective code? >> Nor have I found the respective code. >> >> Could you please suggest a place where to put such enforcement (I guess, >> this should be present in the toolstack)? > Such check should be in the tool stack primarily to give a sensible > error message to the user. Yet the hypervisor needs to check itself > nevertheless. You know the code you're adding much better than I do, > so I guess I'm a little puzzled by you asking me to suggest a place. > (And for the tool stack I guess asking tool stack folks would get > you better mileage.) Thanks for the clarification. I am still getting used to the changes which that patch series makes (I didn't write that code). Asking for suggestion I didn't mean to point an exact place in the code, but rather a subsystem/software layer, sorry if I was unclear. > >>>> @@ -124,6 +191,7 @@ void vpci_deassign_device(struct pci_dev *pdev) >>>> if ( !has_vpci(pdev->domain) ) >>>> return; >>>> >>>> + vpci_remove_virtual_device(pdev); >>>> vpci_remove_device(pdev); >>>> } >>> And other call sites of vpci_remove_device() do not have a need of >>> cleaning up guest_sbdf / vpci_dev_assigned_map? >> I am not 100% sure, but it looks like they don't need. On the other >> hand, even if they don't need that, doing the cleaning won't be an issue >> at all, >> >> there is a check before cleaning (which will be extended as I proposed >> above), so ... >> >> >>> IOW I wonder if it >>> wouldn't be better to have vpci_remove_device() do this as well >>> (retaining - see my comment on the earlier patch) the simple aliasing >>> of vpci_deassign_device() to vpci_remove_device()). >> >> ... maybe yes. Shall I do that change? > Well - yes please, afaic. ok, will do > > Jan
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c index f683346285..d4601ecf9b 100644 --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -84,6 +84,9 @@ int vpci_add_handlers(struct pci_dev *pdev) INIT_LIST_HEAD(&pdev->vpci->handlers); spin_lock_init(&pdev->vpci->lock); +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT + pdev->vpci->guest_sbdf.sbdf = ~0; +#endif for ( i = 0; i < NUM_VPCI_INIT; i++ ) { @@ -99,6 +102,62 @@ int vpci_add_handlers(struct pci_dev *pdev) } #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT +static int add_virtual_device(struct pci_dev *pdev) +{ + struct domain *d = pdev->domain; + pci_sbdf_t sbdf = { 0 }; + unsigned long new_dev_number; + + if ( is_hardware_domain(d) ) + return 0; + + ASSERT(pcidevs_write_locked()); + + /* + * Each PCI bus supports 32 devices/slots at max or up to 256 when + * there are multi-function ones which are not yet supported. + */ + if ( pdev->info.is_extfn ) + { + gdprintk(XENLOG_ERR, "%pp: only function 0 passthrough supported\n", + &pdev->sbdf); + return -EOPNOTSUPP; + } + + new_dev_number = find_first_zero_bit(d->vpci_dev_assigned_map, + VPCI_MAX_VIRT_DEV); + if ( new_dev_number >= VPCI_MAX_VIRT_DEV ) + return -ENOSPC; + + __set_bit(new_dev_number, &d->vpci_dev_assigned_map); + + /* + * Both segment and bus number are 0: + * - we emulate a single host bridge for the guest, e.g. segment 0 + * - with bus 0 the virtual devices are seen as embedded + * endpoints behind the root complex + * + * TODO: add support for multi-function devices. + */ + sbdf.devfn = PCI_DEVFN(new_dev_number, 0); + pdev->vpci->guest_sbdf = sbdf; + + return 0; + +} + +static void vpci_remove_virtual_device(const struct pci_dev *pdev) +{ + ASSERT(pcidevs_write_locked()); + + if ( pdev->vpci ) + { + __clear_bit(pdev->vpci->guest_sbdf.dev, + &pdev->domain->vpci_dev_assigned_map); + pdev->vpci->guest_sbdf.sbdf = ~0; + } +} + /* Notify vPCI that device is assigned to guest. */ int vpci_assign_device(struct pci_dev *pdev) { @@ -111,8 +170,16 @@ int vpci_assign_device(struct pci_dev *pdev) rc = vpci_add_handlers(pdev); if ( rc ) - vpci_deassign_device(pdev); + goto fail; + + rc = add_virtual_device(pdev); + if ( rc ) + goto fail; + + return 0; + fail: + vpci_deassign_device(pdev); return rc; } @@ -124,6 +191,7 @@ void vpci_deassign_device(struct pci_dev *pdev) if ( !has_vpci(pdev->domain) ) return; + vpci_remove_virtual_device(pdev); vpci_remove_device(pdev); } #endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index b9515eb497..a2848a5740 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -457,6 +457,14 @@ struct domain #ifdef CONFIG_HAS_PCI struct list_head pdev_list; +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT + /* + * The bitmap which shows which device numbers are already used by the + * virtual PCI bus topology and is used to assign a unique SBDF to the + * next passed through virtual PCI device. + */ + DECLARE_BITMAP(vpci_dev_assigned_map, VPCI_MAX_VIRT_DEV); +#endif #endif #ifdef CONFIG_HAS_PASSTHROUGH diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 1010f68c28..cc14b0086d 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -21,6 +21,13 @@ typedef int vpci_register_init_t(struct pci_dev *dev); #define VPCI_ECAM_BDF(addr) (((addr) & 0x0ffff000) >> 12) +/* + * Maximum number of devices supported by the virtual bus topology: + * each PCI bus supports 32 devices/slots at max or up to 256 when + * there are multi-function ones which are not yet supported. + */ +#define VPCI_MAX_VIRT_DEV (PCI_SLOT(~0) + 1) + #define REGISTER_VPCI_INIT(x, p) \ static vpci_register_init_t *const x##_entry \ __used_section(".data.vpci." p) = x @@ -145,6 +152,10 @@ struct vpci { struct vpci_arch_msix_entry arch; } entries[]; } *msix; +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT + /* Guest SBDF of the device. */ + pci_sbdf_t guest_sbdf; +#endif #endif };