Message ID | 20230720003205.1828537-5-volodymyr_babchuk@epam.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PCI devices passthrough on Arm, part 3 | expand |
On Thu, Jul 20, 2023 at 12:32:31AM +0000, Volodymyr Babchuk wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When a PCI device gets assigned/de-assigned some work on vPCI side needs > to be done for that device. Introduce a pair of hooks so vPCI can handle > that. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > --- > Since v8: > - removed vpci_deassign_device > Since v6: > - do not pass struct domain to vpci_{assign|deassign}_device as > pdev->domain can be used > - do not leave the device assigned (pdev->domain == new domain) in case > vpci_assign_device fails: try to de-assign and if this also fails, then > crash the domain > Since v5: > - do not split code into run_vpci_init > - do not check for is_system_domain in vpci_{de}assign_device > - do not use vpci_remove_device_handlers_locked and re-allocate > pdev->vpci completely > - make vpci_deassign_device void > Since v4: > - de-assign vPCI from the previous domain on device assignment > - do not remove handlers in vpci_assign_device as those must not > exist at that point > Since v3: > - remove toolstack roll-back description from the commit message > as error are to be handled with proper cleanup in Xen itself > - remove __must_check > - remove redundant rc check while assigning devices > - fix redundant CONFIG_HAS_VPCI check for CONFIG_HAS_VPCI_GUEST_SUPPORT > - use REGISTER_VPCI_INIT machinery to run required steps on device > init/assign: add run_vpci_init helper > Since v2: > - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled > for x86 > Since v1: > - constify struct pci_dev where possible > - do not open code is_system_domain() > - extended the commit message > --- > xen/drivers/Kconfig | 4 ++++ > xen/drivers/passthrough/pci.c | 21 +++++++++++++++++++++ > xen/drivers/vpci/vpci.c | 18 ++++++++++++++++++ > xen/include/xen/vpci.h | 15 +++++++++++++++ > 4 files changed, 58 insertions(+) > > diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig > index db94393f47..780490cf8e 100644 > --- a/xen/drivers/Kconfig > +++ b/xen/drivers/Kconfig > @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" > config HAS_VPCI > bool > > +config HAS_VPCI_GUEST_SUPPORT > + bool > + depends on HAS_VPCI > + > endmenu > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > index 6f8692cd9c..265d359704 100644 > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -885,6 +885,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, > if ( ret ) > goto out; > > + write_lock(&pdev->domain->pci_lock); > + vpci_deassign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); > + > if ( pdev->domain == hardware_domain ) > pdev->quarantine = false; > > @@ -1484,6 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > if ( pdev->broken && d != hardware_domain && d != dom_io ) > goto done; > > + write_lock(&pdev->domain->pci_lock); > + vpci_deassign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); > + > rc = pdev_msix_assign(d, pdev); > if ( rc ) > goto done; > @@ -1509,6 +1517,19 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > rc = iommu_call(hd->platform_ops, assign_device, d, devfn, > pci_to_dev(pdev), flag); > } > + if ( rc ) > + goto done; > + > + devfn = pdev->devfn; > + write_lock(&pdev->domain->pci_lock); > + rc = vpci_assign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); > + if ( rc && deassign_device(d, seg, bus, devfn) ) > + { > + printk(XENLOG_ERR "%pd: %pp was left partially assigned\n", > + d, &PCI_SBDF(seg, bus, devfn)); &pdev->sbdf? Then you can get of the devfn usage above. > + domain_crash(d); This seems like a bit different from the other error paths in the function, isn't it fine to return an error and let the caller handle the deassign? Also, if we really need to call deassign_device() we must do so for all possible phantom devices, see the above loop around iommu_call(..., assing_device, ...); > + } > > done: > if ( rc ) > diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c > index a6d2cf8660..a97710a806 100644 > --- a/xen/drivers/vpci/vpci.c > +++ b/xen/drivers/vpci/vpci.c > @@ -107,6 +107,24 @@ int vpci_add_handlers(struct pci_dev *pdev) > > return rc; > } > + > +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > +/* Notify vPCI that device is assigned to guest. */ > +int vpci_assign_device(struct pci_dev *pdev) > +{ > + int rc; > + > + if ( !has_vpci(pdev->domain) ) > + return 0; > + > + rc = vpci_add_handlers(pdev); > + if ( rc ) > + vpci_deassign_device(pdev); Why do you need this handler, vpci_add_handlers() when failing will already call vpci_remove_device(), which is what vpci_deassign_device() does. > + > + return rc; > +} > +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ > + > #endif /* __XEN__ */ > > static int vpci_register_cmp(const struct vpci_register *r1, > diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h > index 0b8a2a3c74..44296623e1 100644 > --- a/xen/include/xen/vpci.h > +++ b/xen/include/xen/vpci.h > @@ -264,6 +264,21 @@ static inline bool __must_check vpci_process_pending(struct vcpu *v) > } > #endif > > +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT > +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ > +int vpci_assign_device(struct pci_dev *pdev); > +#define vpci_deassign_device vpci_remove_device > +#else > +static inline int vpci_assign_device(struct pci_dev *pdev) > +{ > + return 0; > +}; > + > +static inline void vpci_deassign_device(struct pci_dev *pdev) > +{ > +}; > +#endif I don't think there's much point in introducing new functions, see above. I'm fine if the current ones want to be renamed to vpci_{,de}assign_device(), but adding defines like the above just makes the code harder to follow. Thanks, Roger.
On 20.07.2023 02:32, Volodymyr Babchuk wrote: > From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > > When a PCI device gets assigned/de-assigned some work on vPCI side needs > to be done for that device. Introduce a pair of hooks so vPCI can handle > that. > > Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> A couple more mechanical comments in addition to what Roger said: > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -885,6 +885,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, > if ( ret ) > goto out; > > + write_lock(&pdev->domain->pci_lock); > + vpci_deassign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); Can't it be just d here? > @@ -1484,6 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > if ( pdev->broken && d != hardware_domain && d != dom_io ) > goto done; > > + write_lock(&pdev->domain->pci_lock); > + vpci_deassign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); Is this meaningful (and okay to call at all) when pdev->domain == dom_io? > @@ -1509,6 +1517,19 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > rc = iommu_call(hd->platform_ops, assign_device, d, devfn, > pci_to_dev(pdev), flag); > } > + if ( rc ) > + goto done; > + > + devfn = pdev->devfn; > + write_lock(&pdev->domain->pci_lock); > + rc = vpci_assign_device(pdev); > + write_unlock(&pdev->domain->pci_lock); Just d again here? Jan
Hi Roger, Roger Pau Monné <roger.pau@citrix.com> writes: > On Thu, Jul 20, 2023 at 12:32:31AM +0000, Volodymyr Babchuk wrote: >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> >> When a PCI device gets assigned/de-assigned some work on vPCI side needs >> to be done for that device. Introduce a pair of hooks so vPCI can handle >> that. >> >> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> >> --- >> Since v8: >> - removed vpci_deassign_device >> Since v6: >> - do not pass struct domain to vpci_{assign|deassign}_device as >> pdev->domain can be used >> - do not leave the device assigned (pdev->domain == new domain) in case >> vpci_assign_device fails: try to de-assign and if this also fails, then >> crash the domain >> Since v5: >> - do not split code into run_vpci_init >> - do not check for is_system_domain in vpci_{de}assign_device >> - do not use vpci_remove_device_handlers_locked and re-allocate >> pdev->vpci completely >> - make vpci_deassign_device void >> Since v4: >> - de-assign vPCI from the previous domain on device assignment >> - do not remove handlers in vpci_assign_device as those must not >> exist at that point >> Since v3: >> - remove toolstack roll-back description from the commit message >> as error are to be handled with proper cleanup in Xen itself >> - remove __must_check >> - remove redundant rc check while assigning devices >> - fix redundant CONFIG_HAS_VPCI check for CONFIG_HAS_VPCI_GUEST_SUPPORT >> - use REGISTER_VPCI_INIT machinery to run required steps on device >> init/assign: add run_vpci_init helper >> Since v2: >> - define CONFIG_HAS_VPCI_GUEST_SUPPORT so dead code is not compiled >> for x86 >> Since v1: >> - constify struct pci_dev where possible >> - do not open code is_system_domain() >> - extended the commit message >> --- >> xen/drivers/Kconfig | 4 ++++ >> xen/drivers/passthrough/pci.c | 21 +++++++++++++++++++++ >> xen/drivers/vpci/vpci.c | 18 ++++++++++++++++++ >> xen/include/xen/vpci.h | 15 +++++++++++++++ >> 4 files changed, 58 insertions(+) >> >> diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig >> index db94393f47..780490cf8e 100644 >> --- a/xen/drivers/Kconfig >> +++ b/xen/drivers/Kconfig >> @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" >> config HAS_VPCI >> bool >> >> +config HAS_VPCI_GUEST_SUPPORT >> + bool >> + depends on HAS_VPCI >> + >> endmenu >> diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c >> index 6f8692cd9c..265d359704 100644 >> --- a/xen/drivers/passthrough/pci.c >> +++ b/xen/drivers/passthrough/pci.c >> @@ -885,6 +885,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, >> if ( ret ) >> goto out; >> >> + write_lock(&pdev->domain->pci_lock); >> + vpci_deassign_device(pdev); >> + write_unlock(&pdev->domain->pci_lock); >> + >> if ( pdev->domain == hardware_domain ) >> pdev->quarantine = false; >> >> @@ -1484,6 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >> if ( pdev->broken && d != hardware_domain && d != dom_io ) >> goto done; >> >> + write_lock(&pdev->domain->pci_lock); >> + vpci_deassign_device(pdev); >> + write_unlock(&pdev->domain->pci_lock); >> + >> rc = pdev_msix_assign(d, pdev); >> if ( rc ) >> goto done; >> @@ -1509,6 +1517,19 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) >> rc = iommu_call(hd->platform_ops, assign_device, d, devfn, >> pci_to_dev(pdev), flag); >> } >> + if ( rc ) >> + goto done; >> + >> + devfn = pdev->devfn; >> + write_lock(&pdev->domain->pci_lock); >> + rc = vpci_assign_device(pdev); >> + write_unlock(&pdev->domain->pci_lock); >> + if ( rc && deassign_device(d, seg, bus, devfn) ) >> + { >> + printk(XENLOG_ERR "%pd: %pp was left partially assigned\n", >> + d, &PCI_SBDF(seg, bus, devfn)); > > &pdev->sbdf? Then you can get of the devfn usage above. Yes, thanks. >> + domain_crash(d); > > This seems like a bit different from the other error paths in the > function, isn't it fine to return an error and let the caller handle > the deassign? I believe, intention was to not leave device in an unknown state: we failed both assign_device() and deassign_device() call, so what to do now? But yes, I think you are right and it is better to let caller to decide what to do next. > > Also, if we really need to call deassign_device() we must do so for > all possible phantom devices, see the above loop around > iommu_call(..., assing_device, ...); But deassign_device() has the loop for all phantom devices that already does all the work. Unless I miss something, of course. >> + } >> >> done: >> if ( rc ) >> diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c >> index a6d2cf8660..a97710a806 100644 >> --- a/xen/drivers/vpci/vpci.c >> +++ b/xen/drivers/vpci/vpci.c >> @@ -107,6 +107,24 @@ int vpci_add_handlers(struct pci_dev *pdev) >> >> return rc; >> } >> + >> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> +/* Notify vPCI that device is assigned to guest. */ >> +int vpci_assign_device(struct pci_dev *pdev) >> +{ >> + int rc; >> + >> + if ( !has_vpci(pdev->domain) ) >> + return 0; >> + >> + rc = vpci_add_handlers(pdev); >> + if ( rc ) >> + vpci_deassign_device(pdev); > > Why do you need this handler, vpci_add_handlers() when failing will > already call vpci_remove_device(), which is what > vpci_deassign_device() does. > >> + >> + return rc; >> +} >> +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ >> + >> #endif /* __XEN__ */ >> >> static int vpci_register_cmp(const struct vpci_register *r1, >> diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h >> index 0b8a2a3c74..44296623e1 100644 >> --- a/xen/include/xen/vpci.h >> +++ b/xen/include/xen/vpci.h >> @@ -264,6 +264,21 @@ static inline bool __must_check vpci_process_pending(struct vcpu *v) >> } >> #endif >> >> +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT >> +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ >> +int vpci_assign_device(struct pci_dev *pdev); >> +#define vpci_deassign_device vpci_remove_device >> +#else >> +static inline int vpci_assign_device(struct pci_dev *pdev) >> +{ >> + return 0; >> +}; >> + >> +static inline void vpci_deassign_device(struct pci_dev *pdev) >> +{ >> +}; >> +#endif > > I don't think there's much point in introducing new functions, see > above. I'm fine if the current ones want to be renamed to > vpci_{,de}assign_device(), but adding defines like the above just > makes the code harder to follow. Good idea, thanks, I'll just rename the original functions.
On Wed, Jul 26, 2023 at 01:38:30AM +0000, Volodymyr Babchuk wrote: > > Hi Roger, > > Roger Pau Monné <roger.pau@citrix.com> writes: > > > On Thu, Jul 20, 2023 at 12:32:31AM +0000, Volodymyr Babchuk wrote: > >> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> > >> @@ -1509,6 +1517,19 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) > >> rc = iommu_call(hd->platform_ops, assign_device, d, devfn, > >> pci_to_dev(pdev), flag); > >> } > >> + if ( rc ) > >> + goto done; > >> + > >> + devfn = pdev->devfn; > >> + write_lock(&pdev->domain->pci_lock); > >> + rc = vpci_assign_device(pdev); > >> + write_unlock(&pdev->domain->pci_lock); > >> + if ( rc && deassign_device(d, seg, bus, devfn) ) > >> + { > >> + printk(XENLOG_ERR "%pd: %pp was left partially assigned\n", > >> + d, &PCI_SBDF(seg, bus, devfn)); > > > > &pdev->sbdf? Then you can get of the devfn usage above. > > Yes, thanks. > > >> + domain_crash(d); > > > > This seems like a bit different from the other error paths in the > > function, isn't it fine to return an error and let the caller handle > > the deassign? > > I believe, intention was to not leave device in an unknown state: we > failed both assign_device() and deassign_device() call, so what to do > now? But yes, I think you are right and it is better to let caller to > decide what to do next. I don't think it would be a security risk to leave the device in that state. For domUs the guest won't get access to the device registers anyway as we use an allow list approach. Also deassign_device() is not called when we fail to assign one of the phantom functions. We don't seem to undo any of the work in assign_device() on error so it should be fine to not do the call to deassign_device() on error to initialize vPCI. > > > > Also, if we really need to call deassign_device() we must do so for > > all possible phantom devices, see the above loop around > > iommu_call(..., assing_device, ...); > > But deassign_device() has the loop for all phantom devices that already > does all the work. Unless I miss something, of course. No, you are right, a single call is fine. Thanks, Roger.
diff --git a/xen/drivers/Kconfig b/xen/drivers/Kconfig index db94393f47..780490cf8e 100644 --- a/xen/drivers/Kconfig +++ b/xen/drivers/Kconfig @@ -15,4 +15,8 @@ source "drivers/video/Kconfig" config HAS_VPCI bool +config HAS_VPCI_GUEST_SUPPORT + bool + depends on HAS_VPCI + endmenu diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 6f8692cd9c..265d359704 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -885,6 +885,10 @@ static int deassign_device(struct domain *d, uint16_t seg, uint8_t bus, if ( ret ) goto out; + write_lock(&pdev->domain->pci_lock); + vpci_deassign_device(pdev); + write_unlock(&pdev->domain->pci_lock); + if ( pdev->domain == hardware_domain ) pdev->quarantine = false; @@ -1484,6 +1488,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) if ( pdev->broken && d != hardware_domain && d != dom_io ) goto done; + write_lock(&pdev->domain->pci_lock); + vpci_deassign_device(pdev); + write_unlock(&pdev->domain->pci_lock); + rc = pdev_msix_assign(d, pdev); if ( rc ) goto done; @@ -1509,6 +1517,19 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev), flag); } + if ( rc ) + goto done; + + devfn = pdev->devfn; + write_lock(&pdev->domain->pci_lock); + rc = vpci_assign_device(pdev); + write_unlock(&pdev->domain->pci_lock); + if ( rc && deassign_device(d, seg, bus, devfn) ) + { + printk(XENLOG_ERR "%pd: %pp was left partially assigned\n", + d, &PCI_SBDF(seg, bus, devfn)); + domain_crash(d); + } done: if ( rc ) diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c index a6d2cf8660..a97710a806 100644 --- a/xen/drivers/vpci/vpci.c +++ b/xen/drivers/vpci/vpci.c @@ -107,6 +107,24 @@ int vpci_add_handlers(struct pci_dev *pdev) return rc; } + +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT +/* Notify vPCI that device is assigned to guest. */ +int vpci_assign_device(struct pci_dev *pdev) +{ + int rc; + + if ( !has_vpci(pdev->domain) ) + return 0; + + rc = vpci_add_handlers(pdev); + if ( rc ) + vpci_deassign_device(pdev); + + return rc; +} +#endif /* CONFIG_HAS_VPCI_GUEST_SUPPORT */ + #endif /* __XEN__ */ static int vpci_register_cmp(const struct vpci_register *r1, diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h index 0b8a2a3c74..44296623e1 100644 --- a/xen/include/xen/vpci.h +++ b/xen/include/xen/vpci.h @@ -264,6 +264,21 @@ static inline bool __must_check vpci_process_pending(struct vcpu *v) } #endif +#ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT +/* Notify vPCI that device is assigned/de-assigned to/from guest. */ +int vpci_assign_device(struct pci_dev *pdev); +#define vpci_deassign_device vpci_remove_device +#else +static inline int vpci_assign_device(struct pci_dev *pdev) +{ + return 0; +}; + +static inline void vpci_deassign_device(struct pci_dev *pdev) +{ +}; +#endif + #endif /*