Message ID | 20240115101313.131139-3-zhenzhong.duan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Check and sync host IOMMU cap/ecap with vIOMMU | expand |
Hi Zhenzhong, On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > This adds pci_device_set/unset_iommu_device() to set/unset > IOMMUFDDevice for a given PCIe device. Caller of set > should fail if set operation fails. > > Extract out pci_device_get_iommu_bus_devfn() to facilitate > implementation of pci_device_set/unset_iommu_device(). > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++- > hw/pci/pci.c | 49 +++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 86 insertions(+), 2 deletions(-) > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index fa6313aabc..a810c0ec74 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -7,6 +7,8 @@ > /* PCI includes legacy ISA access. */ > #include "hw/isa/isa.h" > > +#include "sysemu/iommufd_device.h" > + > extern bool pci_available; > > /* PCI bus */ > @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { > * > * @devfn: device and function number > */ > - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); > + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); > + /** > + * @set_iommu_device: set iommufd device for a PCI device to vIOMMU > + * > + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't > + * utilize iommufd specific features. > + * > + * Return true if iommufd device is accepted, or else return false with > + * errp set. > + * > + * @bus: the #PCIBus of the PCI device. > + * > + * @opaque: the data passed to pci_setup_iommu(). > + * > + * @devfn: device and function number of the PCI device. > + * > + * @idev: the data structure representing iommufd device. > + * > + */ > + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, > + IOMMUFDDevice *idev, Error **errp); > + /** > + * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU > + * > + * Optional callback. > + * > + * @bus: the #PCIBus of the PCI device. > + * > + * @opaque: the data passed to pci_setup_iommu(). > + * > + * @devfn: device and function number of the PCI device. > + */ > + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn); > } PCIIOMMUOps; > > AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); > +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, > + Error **errp); > +void pci_device_unset_iommu_device(PCIDevice *dev); > > /** > * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index 76080af580..3848662f95 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -2672,7 +2672,10 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) > } > } > > -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, > + PCIBus **aliased_pbus, > + PCIBus **piommu_bus, > + uint8_t *aliased_pdevfn) nit: I would drop the p in aliased_pbus andaliased_pdevfn. Maybe you should allow the caller to pass NUL for aliased_pbus and aliased_pdevfn as it is the case for pci_device_set_iommu_device() I may resue that helper in [RFC 2/7] hw/pci: Introduce pci_device_iommu_bus > { > PCIBus *bus = pci_get_bus(dev); > PCIBus *iommu_bus = bus; > @@ -2717,6 +2720,18 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > > iommu_bus = parent_bus; > } > + *aliased_pbus = bus; > + *piommu_bus = iommu_bus; > + *aliased_pdevfn = devfn; > +} > + > +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { > return iommu_bus->iommu_ops->get_address_space(bus, > iommu_bus->iommu_opaque, devfn); > @@ -2724,6 +2739,38 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > return &address_space_memory; > } > > +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, > + Error **errp) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > + if (!pci_bus_bypass_iommu(bus) && iommu_bus && > + iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) { > + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), > + iommu_bus->iommu_opaque, > + dev->devfn, idev, errp); > + } > + return 0; > +} > + > +void pci_device_unset_iommu_device(PCIDevice *dev) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > + if (!pci_bus_bypass_iommu(bus) && iommu_bus && > + iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) { > + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), > + iommu_bus->iommu_opaque, > + dev->devfn); > + } > +} > + > void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) > { > /* Thanks Eric
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >pci_device_set/unset_iommu_device() > >Hi Zhenzhong, > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> This adds pci_device_set/unset_iommu_device() to set/unset >> IOMMUFDDevice for a given PCIe device. Caller of set >> should fail if set operation fails. >> >> Extract out pci_device_get_iommu_bus_devfn() to facilitate >> implementation of pci_device_set/unset_iommu_device(). >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++- >> hw/pci/pci.c | 49 >+++++++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 86 insertions(+), 2 deletions(-) >> >> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >> index fa6313aabc..a810c0ec74 100644 >> --- a/include/hw/pci/pci.h >> +++ b/include/hw/pci/pci.h >> @@ -7,6 +7,8 @@ >> /* PCI includes legacy ISA access. */ >> #include "hw/isa/isa.h" >> >> +#include "sysemu/iommufd_device.h" >> + >> extern bool pci_available; >> >> /* PCI bus */ >> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >> * >> * @devfn: device and function number >> */ >> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >devfn); >> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >devfn); >> + /** >> + * @set_iommu_device: set iommufd device for a PCI device to >vIOMMU >> + * >> + * Optional callback, if not implemented in vIOMMU, then vIOMMU >can't >> + * utilize iommufd specific features. >> + * >> + * Return true if iommufd device is accepted, or else return false with >> + * errp set. >> + * >> + * @bus: the #PCIBus of the PCI device. >> + * >> + * @opaque: the data passed to pci_setup_iommu(). >> + * >> + * @devfn: device and function number of the PCI device. >> + * >> + * @idev: the data structure representing iommufd device. >> + * >> + */ >> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, >> + IOMMUFDDevice *idev, Error **errp); >> + /** >> + * @unset_iommu_device: unset iommufd device for a PCI device from >vIOMMU >> + * >> + * Optional callback. >> + * >> + * @bus: the #PCIBus of the PCI device. >> + * >> + * @opaque: the data passed to pci_setup_iommu(). >> + * >> + * @devfn: device and function number of the PCI device. >> + */ >> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >devfn); >> } PCIIOMMUOps; >> >> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >*idev, >> + Error **errp); >> +void pci_device_unset_iommu_device(PCIDevice *dev); >> >> /** >> * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >> index 76080af580..3848662f95 100644 >> --- a/hw/pci/pci.c >> +++ b/hw/pci/pci.c >> @@ -2672,7 +2672,10 @@ static void >pci_device_class_base_init(ObjectClass *klass, void *data) >> } >> } >> >> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >> + PCIBus **aliased_pbus, >> + PCIBus **piommu_bus, >> + uint8_t *aliased_pdevfn) >nit: I would drop the p in aliased_pbus andaliased_pdevfn. Maybe you >should allow the caller to pass NUL for aliased_pbus and aliased_pdevfn >as it is the case for pci_device_set_iommu_device() I may resue that >helper in [RFC 2/7] hw/pci: Introduce pci_device_iommu_bus Good suggestion, will do. Thanks Zhenzhong >> { >> PCIBus *bus = pci_get_bus(dev); >> PCIBus *iommu_bus = bus; >> @@ -2717,6 +2720,18 @@ AddressSpace >*pci_device_iommu_address_space(PCIDevice *dev) >> >> iommu_bus = parent_bus; >> } >> + *aliased_pbus = bus; >> + *piommu_bus = iommu_bus; >> + *aliased_pdevfn = devfn; >> +} >> + >> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >> +{ >> + PCIBus *bus; >> + PCIBus *iommu_bus; >> + uint8_t devfn; >> + >> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >> return iommu_bus->iommu_ops->get_address_space(bus, >> iommu_bus->iommu_opaque, devfn); >> @@ -2724,6 +2739,38 @@ AddressSpace >*pci_device_iommu_address_space(PCIDevice *dev) >> return &address_space_memory; >> } >> >> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >*idev, >> + Error **errp) >> +{ >> + PCIBus *bus; >> + PCIBus *iommu_bus; >> + uint8_t devfn; >> + >> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >> + iommu_bus->iommu_ops && iommu_bus->iommu_ops- >>set_iommu_device) { >> + return iommu_bus->iommu_ops- >>set_iommu_device(pci_get_bus(dev), >> + iommu_bus->iommu_opaque, >> + dev->devfn, idev, errp); >> + } >> + return 0; >> +} >> + >> +void pci_device_unset_iommu_device(PCIDevice *dev) >> +{ >> + PCIBus *bus; >> + PCIBus *iommu_bus; >> + uint8_t devfn; >> + >> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >> + iommu_bus->iommu_ops && iommu_bus->iommu_ops- >>unset_iommu_device) { >> + return iommu_bus->iommu_ops- >>unset_iommu_device(pci_get_bus(dev), >> + iommu_bus->iommu_opaque, >> + dev->devfn); >> + } >> +} >> + >> void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void >*opaque) >> { >> /* >Thanks > >Eric
On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > This adds pci_device_set/unset_iommu_device() to set/unset > IOMMUFDDevice for a given PCIe device. Caller of set > should fail if set operation fails. > > Extract out pci_device_get_iommu_bus_devfn() to facilitate > implementation of pci_device_set/unset_iommu_device(). > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++- > hw/pci/pci.c | 49 +++++++++++++++++++++++++++++++++++++++++++- > 2 files changed, 86 insertions(+), 2 deletions(-) > > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index fa6313aabc..a810c0ec74 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -7,6 +7,8 @@ > /* PCI includes legacy ISA access. */ > #include "hw/isa/isa.h" > > +#include "sysemu/iommufd_device.h" > + > extern bool pci_available; > > /* PCI bus */ > @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { > * > * @devfn: device and function number > */ > - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); > + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); > + /** > + * @set_iommu_device: set iommufd device for a PCI device to vIOMMU > + * > + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't > + * utilize iommufd specific features. > + * > + * Return true if iommufd device is accepted, or else return false with > + * errp set. > + * > + * @bus: the #PCIBus of the PCI device. > + * > + * @opaque: the data passed to pci_setup_iommu(). > + * > + * @devfn: device and function number of the PCI device. > + * > + * @idev: the data structure representing iommufd device. > + * > + */ > + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, > + IOMMUFDDevice *idev, Error **errp); > + /** > + * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU > + * > + * Optional callback. > + * > + * @bus: the #PCIBus of the PCI device. > + * > + * @opaque: the data passed to pci_setup_iommu(). > + * > + * @devfn: device and function number of the PCI device. > + */ > + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn); > } PCIIOMMUOps; > > AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); > +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, > + Error **errp); > +void pci_device_unset_iommu_device(PCIDevice *dev); > > /** > * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index 76080af580..3848662f95 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -2672,7 +2672,10 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) > } > } > > -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, > + PCIBus **aliased_pbus, > + PCIBus **piommu_bus, > + uint8_t *aliased_pdevfn) > { > PCIBus *bus = pci_get_bus(dev); > PCIBus *iommu_bus = bus; > @@ -2717,6 +2720,18 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > > iommu_bus = parent_bus; > } > + *aliased_pbus = bus; > + *piommu_bus = iommu_bus; > + *aliased_pdevfn = devfn; > +} > + > +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { > return iommu_bus->iommu_ops->get_address_space(bus, > iommu_bus->iommu_opaque, devfn); > @@ -2724,6 +2739,38 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) > return &address_space_memory; > } > > +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, > + Error **errp) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > + if (!pci_bus_bypass_iommu(bus) && iommu_bus && Why do we test iommu_bus in pci_device_un/set_iommu_device routines and not in pci_device_iommu_address_space() ? Thanks, C. > + iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) { > + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), > + iommu_bus->iommu_opaque, > + dev->devfn, idev, errp); > + } > + return 0; > +} > + > +void pci_device_unset_iommu_device(PCIDevice *dev) > +{ > + PCIBus *bus; > + PCIBus *iommu_bus; > + uint8_t devfn; > + > + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); > + if (!pci_bus_bypass_iommu(bus) && iommu_bus && > + iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) { > + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), > + iommu_bus->iommu_opaque, > + dev->devfn); > + } > +} > + > void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) > { > /*
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >pci_device_set/unset_iommu_device() > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> This adds pci_device_set/unset_iommu_device() to set/unset >> IOMMUFDDevice for a given PCIe device. Caller of set >> should fail if set operation fails. >> >> Extract out pci_device_get_iommu_bus_devfn() to facilitate >> implementation of pci_device_set/unset_iommu_device(). >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++- >> hw/pci/pci.c | 49 >+++++++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 86 insertions(+), 2 deletions(-) >> >> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >> index fa6313aabc..a810c0ec74 100644 >> --- a/include/hw/pci/pci.h >> +++ b/include/hw/pci/pci.h >> @@ -7,6 +7,8 @@ >> /* PCI includes legacy ISA access. */ >> #include "hw/isa/isa.h" >> >> +#include "sysemu/iommufd_device.h" >> + >> extern bool pci_available; >> >> /* PCI bus */ >> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >> * >> * @devfn: device and function number >> */ >> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >devfn); >> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >devfn); >> + /** >> + * @set_iommu_device: set iommufd device for a PCI device to >vIOMMU >> + * >> + * Optional callback, if not implemented in vIOMMU, then vIOMMU >can't >> + * utilize iommufd specific features. >> + * >> + * Return true if iommufd device is accepted, or else return false with >> + * errp set. >> + * >> + * @bus: the #PCIBus of the PCI device. >> + * >> + * @opaque: the data passed to pci_setup_iommu(). >> + * >> + * @devfn: device and function number of the PCI device. >> + * >> + * @idev: the data structure representing iommufd device. >> + * >> + */ >> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, >> + IOMMUFDDevice *idev, Error **errp); >> + /** >> + * @unset_iommu_device: unset iommufd device for a PCI device from >vIOMMU >> + * >> + * Optional callback. >> + * >> + * @bus: the #PCIBus of the PCI device. >> + * >> + * @opaque: the data passed to pci_setup_iommu(). >> + * >> + * @devfn: device and function number of the PCI device. >> + */ >> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >devfn); >> } PCIIOMMUOps; >> >> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >*idev, >> + Error **errp); >> +void pci_device_unset_iommu_device(PCIDevice *dev); >> >> /** >> * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus >> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >> index 76080af580..3848662f95 100644 >> --- a/hw/pci/pci.c >> +++ b/hw/pci/pci.c >> @@ -2672,7 +2672,10 @@ static void >pci_device_class_base_init(ObjectClass *klass, void *data) >> } >> } >> >> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >> + PCIBus **aliased_pbus, >> + PCIBus **piommu_bus, >> + uint8_t *aliased_pdevfn) >> { >> PCIBus *bus = pci_get_bus(dev); >> PCIBus *iommu_bus = bus; >> @@ -2717,6 +2720,18 @@ AddressSpace >*pci_device_iommu_address_space(PCIDevice *dev) >> >> iommu_bus = parent_bus; >> } >> + *aliased_pbus = bus; >> + *piommu_bus = iommu_bus; >> + *aliased_pdevfn = devfn; >> +} >> + >> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >> +{ >> + PCIBus *bus; >> + PCIBus *iommu_bus; >> + uint8_t devfn; >> + >> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >> return iommu_bus->iommu_ops->get_address_space(bus, >> iommu_bus->iommu_opaque, devfn); >> @@ -2724,6 +2739,38 @@ AddressSpace >*pci_device_iommu_address_space(PCIDevice *dev) >> return &address_space_memory; >> } >> >> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >*idev, >> + Error **errp) >> +{ >> + PCIBus *bus; >> + PCIBus *iommu_bus; >> + uint8_t devfn; >> + >> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && > >Why do we test iommu_bus in pci_device_un/set_iommu_device routines >and >not in pci_device_iommu_address_space() ? iommu_bus check in pci_device_iommu_address_space() is dropped in below commit, I didn't find related discussion in mail history, maybe by accident? I can add it back if it's not intentional. ba7d12eb8c hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps Thanks Zhenzhong
On 1/23/24 07:37, Duan, Zhenzhong wrote: > > >> -----Original Message----- >> From: Cédric Le Goater <clg@redhat.com> >> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >> pci_device_set/unset_iommu_device() >> >> On 1/15/24 11:13, Zhenzhong Duan wrote: >>> From: Yi Liu <yi.l.liu@intel.com> >>> >>> This adds pci_device_set/unset_iommu_device() to set/unset >>> IOMMUFDDevice for a given PCIe device. Caller of set >>> should fail if set operation fails. >>> >>> Extract out pci_device_get_iommu_bus_devfn() to facilitate >>> implementation of pci_device_set/unset_iommu_device(). >>> >>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>> --- >>> include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++- >>> hw/pci/pci.c | 49 >> +++++++++++++++++++++++++++++++++++++++++++- >>> 2 files changed, 86 insertions(+), 2 deletions(-) >>> >>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >>> index fa6313aabc..a810c0ec74 100644 >>> --- a/include/hw/pci/pci.h >>> +++ b/include/hw/pci/pci.h >>> @@ -7,6 +7,8 @@ >>> /* PCI includes legacy ISA access. */ >>> #include "hw/isa/isa.h" >>> >>> +#include "sysemu/iommufd_device.h" >>> + >>> extern bool pci_available; >>> >>> /* PCI bus */ >>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >>> * >>> * @devfn: device and function number >>> */ >>> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >> devfn); >>> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >> devfn); >>> + /** >>> + * @set_iommu_device: set iommufd device for a PCI device to >> vIOMMU >>> + * >>> + * Optional callback, if not implemented in vIOMMU, then vIOMMU >> can't >>> + * utilize iommufd specific features. >>> + * >>> + * Return true if iommufd device is accepted, or else return false with >>> + * errp set. >>> + * >>> + * @bus: the #PCIBus of the PCI device. >>> + * >>> + * @opaque: the data passed to pci_setup_iommu(). >>> + * >>> + * @devfn: device and function number of the PCI device. >>> + * >>> + * @idev: the data structure representing iommufd device. >>> + * >>> + */ >>> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, >>> + IOMMUFDDevice *idev, Error **errp); >>> + /** >>> + * @unset_iommu_device: unset iommufd device for a PCI device from >> vIOMMU >>> + * >>> + * Optional callback. >>> + * >>> + * @bus: the #PCIBus of the PCI device. >>> + * >>> + * @opaque: the data passed to pci_setup_iommu(). >>> + * >>> + * @devfn: device and function number of the PCI device. >>> + */ >>> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >> devfn); >>> } PCIIOMMUOps; >>> >>> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >> *idev, >>> + Error **errp); >>> +void pci_device_unset_iommu_device(PCIDevice *dev); >>> >>> /** >>> * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >>> index 76080af580..3848662f95 100644 >>> --- a/hw/pci/pci.c >>> +++ b/hw/pci/pci.c >>> @@ -2672,7 +2672,10 @@ static void >> pci_device_class_base_init(ObjectClass *klass, void *data) >>> } >>> } >>> >>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >>> + PCIBus **aliased_pbus, >>> + PCIBus **piommu_bus, >>> + uint8_t *aliased_pdevfn) >>> { >>> PCIBus *bus = pci_get_bus(dev); >>> PCIBus *iommu_bus = bus; >>> @@ -2717,6 +2720,18 @@ AddressSpace >> *pci_device_iommu_address_space(PCIDevice *dev) >>> >>> iommu_bus = parent_bus; >>> } >>> + *aliased_pbus = bus; >>> + *piommu_bus = iommu_bus; >>> + *aliased_pdevfn = devfn; >>> +} >>> + >>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>> +{ >>> + PCIBus *bus; >>> + PCIBus *iommu_bus; >>> + uint8_t devfn; >>> + >>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >>> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >>> return iommu_bus->iommu_ops->get_address_space(bus, >>> iommu_bus->iommu_opaque, devfn); >>> @@ -2724,6 +2739,38 @@ AddressSpace >> *pci_device_iommu_address_space(PCIDevice *dev) >>> return &address_space_memory; >>> } >>> >>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >> *idev, >>> + Error **errp) >>> +{ >>> + PCIBus *bus; >>> + PCIBus *iommu_bus; >>> + uint8_t devfn; >>> + >>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); >>> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >> >> Why do we test iommu_bus in pci_device_un/set_iommu_device routines >> and >> not in pci_device_iommu_address_space() ? > > iommu_bus check in pci_device_iommu_address_space() is dropped in > below commit, I didn't find related discussion in mail history, maybe > by accident? I can add it back if it's not intentional. Can iommu_bus be NULL or should we add an assert ? C. > > ba7d12eb8c hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps > > Thanks > Zhenzhong
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >pci_device_set/unset_iommu_device() > >On 1/23/24 07:37, Duan, Zhenzhong wrote: >> >> >>> -----Original Message----- >>> From: Cédric Le Goater <clg@redhat.com> >>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >>> pci_device_set/unset_iommu_device() >>> >>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>> From: Yi Liu <yi.l.liu@intel.com> >>>> >>>> This adds pci_device_set/unset_iommu_device() to set/unset >>>> IOMMUFDDevice for a given PCIe device. Caller of set >>>> should fail if set operation fails. >>>> >>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate >>>> implementation of pci_device_set/unset_iommu_device(). >>>> >>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>> --- >>>> include/hw/pci/pci.h | 39 >++++++++++++++++++++++++++++++++++- >>>> hw/pci/pci.c | 49 >>> +++++++++++++++++++++++++++++++++++++++++++- >>>> 2 files changed, 86 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >>>> index fa6313aabc..a810c0ec74 100644 >>>> --- a/include/hw/pci/pci.h >>>> +++ b/include/hw/pci/pci.h >>>> @@ -7,6 +7,8 @@ >>>> /* PCI includes legacy ISA access. */ >>>> #include "hw/isa/isa.h" >>>> >>>> +#include "sysemu/iommufd_device.h" >>>> + >>>> extern bool pci_available; >>>> >>>> /* PCI bus */ >>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >>>> * >>>> * @devfn: device and function number >>>> */ >>>> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >>> devfn); >>>> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, >int >>> devfn); >>>> + /** >>>> + * @set_iommu_device: set iommufd device for a PCI device to >>> vIOMMU >>>> + * >>>> + * Optional callback, if not implemented in vIOMMU, then vIOMMU >>> can't >>>> + * utilize iommufd specific features. >>>> + * >>>> + * Return true if iommufd device is accepted, or else return false with >>>> + * errp set. >>>> + * >>>> + * @bus: the #PCIBus of the PCI device. >>>> + * >>>> + * @opaque: the data passed to pci_setup_iommu(). >>>> + * >>>> + * @devfn: device and function number of the PCI device. >>>> + * >>>> + * @idev: the data structure representing iommufd device. >>>> + * >>>> + */ >>>> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, >>>> + IOMMUFDDevice *idev, Error **errp); >>>> + /** >>>> + * @unset_iommu_device: unset iommufd device for a PCI device >from >>> vIOMMU >>>> + * >>>> + * Optional callback. >>>> + * >>>> + * @bus: the #PCIBus of the PCI device. >>>> + * >>>> + * @opaque: the data passed to pci_setup_iommu(). >>>> + * >>>> + * @devfn: device and function number of the PCI device. >>>> + */ >>>> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >>> devfn); >>>> } PCIIOMMUOps; >>>> >>>> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>> *idev, >>>> + Error **errp); >>>> +void pci_device_unset_iommu_device(PCIDevice *dev); >>>> >>>> /** >>>> * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus >>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >>>> index 76080af580..3848662f95 100644 >>>> --- a/hw/pci/pci.c >>>> +++ b/hw/pci/pci.c >>>> @@ -2672,7 +2672,10 @@ static void >>> pci_device_class_base_init(ObjectClass *klass, void *data) >>>> } >>>> } >>>> >>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >>>> + PCIBus **aliased_pbus, >>>> + PCIBus **piommu_bus, >>>> + uint8_t *aliased_pdevfn) >>>> { >>>> PCIBus *bus = pci_get_bus(dev); >>>> PCIBus *iommu_bus = bus; >>>> @@ -2717,6 +2720,18 @@ AddressSpace >>> *pci_device_iommu_address_space(PCIDevice *dev) >>>> >>>> iommu_bus = parent_bus; >>>> } >>>> + *aliased_pbus = bus; >>>> + *piommu_bus = iommu_bus; >>>> + *aliased_pdevfn = devfn; >>>> +} >>>> + >>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>> +{ >>>> + PCIBus *bus; >>>> + PCIBus *iommu_bus; >>>> + uint8_t devfn; >>>> + >>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >&devfn); >>>> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >>>> return iommu_bus->iommu_ops->get_address_space(bus, >>>> iommu_bus->iommu_opaque, devfn); >>>> @@ -2724,6 +2739,38 @@ AddressSpace >>> *pci_device_iommu_address_space(PCIDevice *dev) >>>> return &address_space_memory; >>>> } >>>> >>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>> *idev, >>>> + Error **errp) >>>> +{ >>>> + PCIBus *bus; >>>> + PCIBus *iommu_bus; >>>> + uint8_t devfn; >>>> + >>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >&devfn); >>>> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >>> >>> Why do we test iommu_bus in pci_device_un/set_iommu_device >routines >>> and >>> not in pci_device_iommu_address_space() ? >> >> iommu_bus check in pci_device_iommu_address_space() is dropped in >> below commit, I didn't find related discussion in mail history, maybe >> by accident? I can add it back if it's not intentional. > >Can iommu_bus be NULL or should we add an assert ? I dig into the history changes of pci_device_iommu_address_space() and below commit added iommu_bus check. 5af2ae230514 pci: Fix pci_device_iommu_address_space() bus propagation In theory, !iommu_bus->parent_dev take precedency over !iommu_bus, So we never see iommu_bus NULL, assert may be better. Thanks Zhenzhong
On 1/23/24 10:25, Duan, Zhenzhong wrote: > >> -----Original Message----- >> From: Cédric Le Goater <clg@redhat.com> >> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >> pci_device_set/unset_iommu_device() >> >> On 1/23/24 07:37, Duan, Zhenzhong wrote: >>> >>>> -----Original Message----- >>>> From: Cédric Le Goater <clg@redhat.com> >>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >>>> pci_device_set/unset_iommu_device() >>>> >>>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>>> From: Yi Liu <yi.l.liu@intel.com> >>>>> >>>>> This adds pci_device_set/unset_iommu_device() to set/unset >>>>> IOMMUFDDevice for a given PCIe device. Caller of set >>>>> should fail if set operation fails. >>>>> >>>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate >>>>> implementation of pci_device_set/unset_iommu_device(). >>>>> >>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>>> --- >>>>> include/hw/pci/pci.h | 39 >> ++++++++++++++++++++++++++++++++++- >>>>> hw/pci/pci.c | 49 >>>> +++++++++++++++++++++++++++++++++++++++++++- >>>>> 2 files changed, 86 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >>>>> index fa6313aabc..a810c0ec74 100644 >>>>> --- a/include/hw/pci/pci.h >>>>> +++ b/include/hw/pci/pci.h >>>>> @@ -7,6 +7,8 @@ >>>>> /* PCI includes legacy ISA access. */ >>>>> #include "hw/isa/isa.h" >>>>> >>>>> +#include "sysemu/iommufd_device.h" >>>>> + >>>>> extern bool pci_available; >>>>> >>>>> /* PCI bus */ >>>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >>>>> * >>>>> * @devfn: device and function number >>>>> */ >>>>> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int >>>> devfn); >>>>> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, >> int >>>> devfn); >>>>> + /** >>>>> + * @set_iommu_device: set iommufd device for a PCI device to >>>> vIOMMU >>>>> + * >>>>> + * Optional callback, if not implemented in vIOMMU, then vIOMMU >>>> can't >>>>> + * utilize iommufd specific features. >>>>> + * >>>>> + * Return true if iommufd device is accepted, or else return false with >>>>> + * errp set. >>>>> + * >>>>> + * @bus: the #PCIBus of the PCI device. >>>>> + * >>>>> + * @opaque: the data passed to pci_setup_iommu(). >>>>> + * >>>>> + * @devfn: device and function number of the PCI device. >>>>> + * >>>>> + * @idev: the data structure representing iommufd device. >>>>> + * >>>>> + */ >>>>> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, >>>>> + IOMMUFDDevice *idev, Error **errp); >>>>> + /** >>>>> + * @unset_iommu_device: unset iommufd device for a PCI device >> from >>>> vIOMMU >>>>> + * >>>>> + * Optional callback. >>>>> + * >>>>> + * @bus: the #PCIBus of the PCI device. >>>>> + * >>>>> + * @opaque: the data passed to pci_setup_iommu(). >>>>> + * >>>>> + * @devfn: device and function number of the PCI device. >>>>> + */ >>>>> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >>>> devfn); >>>>> } PCIIOMMUOps; >>>>> >>>>> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>>> *idev, >>>>> + Error **errp); >>>>> +void pci_device_unset_iommu_device(PCIDevice *dev); >>>>> >>>>> /** >>>>> * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus >>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >>>>> index 76080af580..3848662f95 100644 >>>>> --- a/hw/pci/pci.c >>>>> +++ b/hw/pci/pci.c >>>>> @@ -2672,7 +2672,10 @@ static void >>>> pci_device_class_base_init(ObjectClass *klass, void *data) >>>>> } >>>>> } >>>>> >>>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >>>>> + PCIBus **aliased_pbus, >>>>> + PCIBus **piommu_bus, >>>>> + uint8_t *aliased_pdevfn) >>>>> { >>>>> PCIBus *bus = pci_get_bus(dev); >>>>> PCIBus *iommu_bus = bus; >>>>> @@ -2717,6 +2720,18 @@ AddressSpace >>>> *pci_device_iommu_address_space(PCIDevice *dev) >>>>> iommu_bus = parent_bus; >>>>> } >>>>> + *aliased_pbus = bus; >>>>> + *piommu_bus = iommu_bus; >>>>> + *aliased_pdevfn = devfn; >>>>> +} >>>>> + >>>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>>> +{ >>>>> + PCIBus *bus; >>>>> + PCIBus *iommu_bus; >>>>> + uint8_t devfn; >>>>> + >>>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >> &devfn); >>>>> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >>>>> return iommu_bus->iommu_ops->get_address_space(bus, >>>>> iommu_bus->iommu_opaque, devfn); >>>>> @@ -2724,6 +2739,38 @@ AddressSpace >>>> *pci_device_iommu_address_space(PCIDevice *dev) >>>>> return &address_space_memory; >>>>> } >>>>> >>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>>> *idev, >>>>> + Error **errp) >>>>> +{ >>>>> + PCIBus *bus; >>>>> + PCIBus *iommu_bus; >>>>> + uint8_t devfn; >>>>> + >>>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >> &devfn); >>>>> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >>>> Why do we test iommu_bus in pci_device_un/set_iommu_device >> routines >>>> and >>>> not in pci_device_iommu_address_space() ? >>> iommu_bus check in pci_device_iommu_address_space() is dropped in >>> below commit, I didn't find related discussion in mail history, maybe >>> by accident? I can add it back if it's not intentional. >> Can iommu_bus be NULL or should we add an assert ? > I dig into the history changes of pci_device_iommu_address_space() and > below commit added iommu_bus check. > > 5af2ae230514 pci: Fix pci_device_iommu_address_space() bus propagation > > In theory, !iommu_bus->parent_dev take precedency over !iommu_bus, > So we never see iommu_bus NULL, assert may be better. I think we had such a discussion in https://www.mail-archive.com/qemu-devel@nongnu.org/msg994766.html But maybe this was related to a different call place. I remember I challenged the check at some point Eric > > Thanks > Zhenzhong >
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >pci_device_set/unset_iommu_device() > > > >On 1/23/24 10:25, Duan, Zhenzhong wrote: >> >>> -----Original Message----- >>> From: Cédric Le Goater <clg@redhat.com> >>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >>> pci_device_set/unset_iommu_device() >>> >>> On 1/23/24 07:37, Duan, Zhenzhong wrote: >>>> >>>>> -----Original Message----- >>>>> From: Cédric Le Goater <clg@redhat.com> >>>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce >>>>> pci_device_set/unset_iommu_device() >>>>> >>>>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>>>> From: Yi Liu <yi.l.liu@intel.com> >>>>>> >>>>>> This adds pci_device_set/unset_iommu_device() to set/unset >>>>>> IOMMUFDDevice for a given PCIe device. Caller of set >>>>>> should fail if set operation fails. >>>>>> >>>>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate >>>>>> implementation of pci_device_set/unset_iommu_device(). >>>>>> >>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> >>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>>>> --- >>>>>> include/hw/pci/pci.h | 39 >>> ++++++++++++++++++++++++++++++++++- >>>>>> hw/pci/pci.c | 49 >>>>> +++++++++++++++++++++++++++++++++++++++++++- >>>>>> 2 files changed, 86 insertions(+), 2 deletions(-) >>>>>> >>>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h >>>>>> index fa6313aabc..a810c0ec74 100644 >>>>>> --- a/include/hw/pci/pci.h >>>>>> +++ b/include/hw/pci/pci.h >>>>>> @@ -7,6 +7,8 @@ >>>>>> /* PCI includes legacy ISA access. */ >>>>>> #include "hw/isa/isa.h" >>>>>> >>>>>> +#include "sysemu/iommufd_device.h" >>>>>> + >>>>>> extern bool pci_available; >>>>>> >>>>>> /* PCI bus */ >>>>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { >>>>>> * >>>>>> * @devfn: device and function number >>>>>> */ >>>>>> - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, >int >>>>> devfn); >>>>>> + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, >>> int >>>>> devfn); >>>>>> + /** >>>>>> + * @set_iommu_device: set iommufd device for a PCI device to >>>>> vIOMMU >>>>>> + * >>>>>> + * Optional callback, if not implemented in vIOMMU, then >vIOMMU >>>>> can't >>>>>> + * utilize iommufd specific features. >>>>>> + * >>>>>> + * Return true if iommufd device is accepted, or else return false >with >>>>>> + * errp set. >>>>>> + * >>>>>> + * @bus: the #PCIBus of the PCI device. >>>>>> + * >>>>>> + * @opaque: the data passed to pci_setup_iommu(). >>>>>> + * >>>>>> + * @devfn: device and function number of the PCI device. >>>>>> + * >>>>>> + * @idev: the data structure representing iommufd device. >>>>>> + * >>>>>> + */ >>>>>> + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t >devfn, >>>>>> + IOMMUFDDevice *idev, Error **errp); >>>>>> + /** >>>>>> + * @unset_iommu_device: unset iommufd device for a PCI device >>> from >>>>> vIOMMU >>>>>> + * >>>>>> + * Optional callback. >>>>>> + * >>>>>> + * @bus: the #PCIBus of the PCI device. >>>>>> + * >>>>>> + * @opaque: the data passed to pci_setup_iommu(). >>>>>> + * >>>>>> + * @devfn: device and function number of the PCI device. >>>>>> + */ >>>>>> + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t >>>>> devfn); >>>>>> } PCIIOMMUOps; >>>>>> >>>>>> AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); >>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>>>> *idev, >>>>>> + Error **errp); >>>>>> +void pci_device_unset_iommu_device(PCIDevice *dev); >>>>>> >>>>>> /** >>>>>> * pci_setup_iommu: Initialize specific IOMMU handlers for a >PCIBus >>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c >>>>>> index 76080af580..3848662f95 100644 >>>>>> --- a/hw/pci/pci.c >>>>>> +++ b/hw/pci/pci.c >>>>>> @@ -2672,7 +2672,10 @@ static void >>>>> pci_device_class_base_init(ObjectClass *klass, void *data) >>>>>> } >>>>>> } >>>>>> >>>>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, >>>>>> + PCIBus **aliased_pbus, >>>>>> + PCIBus **piommu_bus, >>>>>> + uint8_t *aliased_pdevfn) >>>>>> { >>>>>> PCIBus *bus = pci_get_bus(dev); >>>>>> PCIBus *iommu_bus = bus; >>>>>> @@ -2717,6 +2720,18 @@ AddressSpace >>>>> *pci_device_iommu_address_space(PCIDevice *dev) >>>>>> iommu_bus = parent_bus; >>>>>> } >>>>>> + *aliased_pbus = bus; >>>>>> + *piommu_bus = iommu_bus; >>>>>> + *aliased_pdevfn = devfn; >>>>>> +} >>>>>> + >>>>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) >>>>>> +{ >>>>>> + PCIBus *bus; >>>>>> + PCIBus *iommu_bus; >>>>>> + uint8_t devfn; >>>>>> + >>>>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >>> &devfn); >>>>>> if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { >>>>>> return iommu_bus->iommu_ops->get_address_space(bus, >>>>>> iommu_bus->iommu_opaque, devfn); >>>>>> @@ -2724,6 +2739,38 @@ AddressSpace >>>>> *pci_device_iommu_address_space(PCIDevice *dev) >>>>>> return &address_space_memory; >>>>>> } >>>>>> >>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice >>>>> *idev, >>>>>> + Error **errp) >>>>>> +{ >>>>>> + PCIBus *bus; >>>>>> + PCIBus *iommu_bus; >>>>>> + uint8_t devfn; >>>>>> + >>>>>> + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, >>> &devfn); >>>>>> + if (!pci_bus_bypass_iommu(bus) && iommu_bus && >>>>> Why do we test iommu_bus in pci_device_un/set_iommu_device >>> routines >>>>> and >>>>> not in pci_device_iommu_address_space() ? >>>> iommu_bus check in pci_device_iommu_address_space() is dropped in >>>> below commit, I didn't find related discussion in mail history, maybe >>>> by accident? I can add it back if it's not intentional. >>> Can iommu_bus be NULL or should we add an assert ? >> I dig into the history changes of pci_device_iommu_address_space() and >> below commit added iommu_bus check. >> >> 5af2ae230514 pci: Fix pci_device_iommu_address_space() bus >propagation >> >> In theory, !iommu_bus->parent_dev take precedency over !iommu_bus, >> So we never see iommu_bus NULL, assert may be better. > >I think we had such a discussion in >https://www.mail-archive.com/qemu-devel@nongnu.org/msg994766.html >But maybe this was related to a different call place. I remember I >challenged the check at some point It seems this question is not discussed further in that thread. Per my code inspection, PCI root bus's parent_dev should be NULL, so we get either root bus or sub bus, neither a NULL. Also tested with PXB bridge which is suspicious scenarios, same. Thanks Zhenzhong
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index fa6313aabc..a810c0ec74 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -7,6 +7,8 @@ /* PCI includes legacy ISA access. */ #include "hw/isa/isa.h" +#include "sysemu/iommufd_device.h" + extern bool pci_available; /* PCI bus */ @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps { * * @devfn: device and function number */ - AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn); + /** + * @set_iommu_device: set iommufd device for a PCI device to vIOMMU + * + * Optional callback, if not implemented in vIOMMU, then vIOMMU can't + * utilize iommufd specific features. + * + * Return true if iommufd device is accepted, or else return false with + * errp set. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + * + * @idev: the data structure representing iommufd device. + * + */ + int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn, + IOMMUFDDevice *idev, Error **errp); + /** + * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU + * + * Optional callback. + * + * @bus: the #PCIBus of the PCI device. + * + * @opaque: the data passed to pci_setup_iommu(). + * + * @devfn: device and function number of the PCI device. + */ + void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn); } PCIIOMMUOps; AddressSpace *pci_device_iommu_address_space(PCIDevice *dev); +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, + Error **errp); +void pci_device_unset_iommu_device(PCIDevice *dev); /** * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 76080af580..3848662f95 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2672,7 +2672,10 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data) } } -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev, + PCIBus **aliased_pbus, + PCIBus **piommu_bus, + uint8_t *aliased_pdevfn) { PCIBus *bus = pci_get_bus(dev); PCIBus *iommu_bus = bus; @@ -2717,6 +2720,18 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) iommu_bus = parent_bus; } + *aliased_pbus = bus; + *piommu_bus = iommu_bus; + *aliased_pdevfn = devfn; +} + +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) +{ + PCIBus *bus; + PCIBus *iommu_bus; + uint8_t devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) { return iommu_bus->iommu_ops->get_address_space(bus, iommu_bus->iommu_opaque, devfn); @@ -2724,6 +2739,38 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev) return &address_space_memory; } +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev, + Error **errp) +{ + PCIBus *bus; + PCIBus *iommu_bus; + uint8_t devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (!pci_bus_bypass_iommu(bus) && iommu_bus && + iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) { + return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn, idev, errp); + } + return 0; +} + +void pci_device_unset_iommu_device(PCIDevice *dev) +{ + PCIBus *bus; + PCIBus *iommu_bus; + uint8_t devfn; + + pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn); + if (!pci_bus_bypass_iommu(bus) && iommu_bus && + iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) { + return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev), + iommu_bus->iommu_opaque, + dev->devfn); + } +} + void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque) { /*