diff mbox series

[rfcv1,2/6] hw/pci: introduce pci_device_set/unset_iommu_device()

Message ID 20240115101313.131139-3-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series Check and sync host IOMMU cap/ecap with vIOMMU | expand

Commit Message

Duan, Zhenzhong Jan. 15, 2024, 10:13 a.m. UTC
From: Yi Liu <yi.l.liu@intel.com>

This adds pci_device_set/unset_iommu_device() to set/unset
IOMMUFDDevice for a given PCIe device. Caller of set
should fail if set operation fails.

Extract out pci_device_get_iommu_bus_devfn() to facilitate
implementation of pci_device_set/unset_iommu_device().

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
 hw/pci/pci.c         | 49 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 86 insertions(+), 2 deletions(-)

Comments

Eric Auger Jan. 17, 2024, 2:11 p.m. UTC | #1
Hi Zhenzhong,

On 1/15/24 11:13, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> This adds pci_device_set/unset_iommu_device() to set/unset
> IOMMUFDDevice for a given PCIe device. Caller of set
> should fail if set operation fails.
>
> Extract out pci_device_get_iommu_bus_devfn() to facilitate
> implementation of pci_device_set/unset_iommu_device().
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
>  hw/pci/pci.c         | 49 +++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 86 insertions(+), 2 deletions(-)
>
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index fa6313aabc..a810c0ec74 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -7,6 +7,8 @@
>  /* PCI includes legacy ISA access.  */
>  #include "hw/isa/isa.h"
>  
> +#include "sysemu/iommufd_device.h"
> +
>  extern bool pci_available;
>  
>  /* PCI bus */
> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>       *
>       * @devfn: device and function number
>       */
> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
> +    /**
> +     * @set_iommu_device: set iommufd device for a PCI device to vIOMMU
> +     *
> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
> +     * utilize iommufd specific features.
> +     *
> +     * Return true if iommufd device is accepted, or else return false with
> +     * errp set.
> +     *
> +     * @bus: the #PCIBus of the PCI device.
> +     *
> +     * @opaque: the data passed to pci_setup_iommu().
> +     *
> +     * @devfn: device and function number of the PCI device.
> +     *
> +     * @idev: the data structure representing iommufd device.
> +     *
> +     */
> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
> +                            IOMMUFDDevice *idev, Error **errp);
> +    /**
> +     * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU
> +     *
> +     * Optional callback.
> +     *
> +     * @bus: the #PCIBus of the PCI device.
> +     *
> +     * @opaque: the data passed to pci_setup_iommu().
> +     *
> +     * @devfn: device and function number of the PCI device.
> +     */
> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn);
>  } PCIIOMMUOps;
>  
>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
> +                                Error **errp);
> +void pci_device_unset_iommu_device(PCIDevice *dev);
>  
>  /**
>   * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 76080af580..3848662f95 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2672,7 +2672,10 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
>      }
>  }
>  
> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
> +                                           PCIBus **aliased_pbus,
> +                                           PCIBus **piommu_bus,
> +                                           uint8_t *aliased_pdevfn)
nit: I would drop the p in aliased_pbus andaliased_pdevfn. Maybe you
should allow the caller to pass NUL for aliased_pbus and aliased_pdevfn
as it is the case for pci_device_set_iommu_device() I may resue that
helper in [RFC 2/7] hw/pci: Introduce pci_device_iommu_bus
>  {
>      PCIBus *bus = pci_get_bus(dev);
>      PCIBus *iommu_bus = bus;
> @@ -2717,6 +2720,18 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>  
>          iommu_bus = parent_bus;
>      }
> +    *aliased_pbus = bus;
> +    *piommu_bus = iommu_bus;
> +    *aliased_pdevfn = devfn;
> +}
> +
> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>      if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>          return iommu_bus->iommu_ops->get_address_space(bus,
>                                   iommu_bus->iommu_opaque, devfn);
> @@ -2724,6 +2739,38 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>      return &address_space_memory;
>  }
>  
> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
> +                                Error **errp)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) {
> +        return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
> +                                                      iommu_bus->iommu_opaque,
> +                                                      dev->devfn, idev, errp);
> +    }
> +    return 0;
> +}
> +
> +void pci_device_unset_iommu_device(PCIDevice *dev)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) {
> +        return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
> +                                                        iommu_bus->iommu_opaque,
> +                                                        dev->devfn);
> +    }
> +}
> +
>  void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
>  {
>      /*
Thanks

Eric
Duan, Zhenzhong Jan. 18, 2024, 7:58 a.m. UTC | #2
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>pci_device_set/unset_iommu_device()
>
>Hi Zhenzhong,
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This adds pci_device_set/unset_iommu_device() to set/unset
>> IOMMUFDDevice for a given PCIe device. Caller of set
>> should fail if set operation fails.
>>
>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>> implementation of pci_device_set/unset_iommu_device().
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
>>  hw/pci/pci.c         | 49
>+++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 86 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index fa6313aabc..a810c0ec74 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -7,6 +7,8 @@
>>  /* PCI includes legacy ISA access.  */
>>  #include "hw/isa/isa.h"
>>
>> +#include "sysemu/iommufd_device.h"
>> +
>>  extern bool pci_available;
>>
>>  /* PCI bus */
>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>       *
>>       * @devfn: device and function number
>>       */
>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +    /**
>> +     * @set_iommu_device: set iommufd device for a PCI device to
>vIOMMU
>> +     *
>> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU
>can't
>> +     * utilize iommufd specific features.
>> +     *
>> +     * Return true if iommufd device is accepted, or else return false with
>> +     * errp set.
>> +     *
>> +     * @bus: the #PCIBus of the PCI device.
>> +     *
>> +     * @opaque: the data passed to pci_setup_iommu().
>> +     *
>> +     * @devfn: device and function number of the PCI device.
>> +     *
>> +     * @idev: the data structure representing iommufd device.
>> +     *
>> +     */
>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>> +                            IOMMUFDDevice *idev, Error **errp);
>> +    /**
>> +     * @unset_iommu_device: unset iommufd device for a PCI device from
>vIOMMU
>> +     *
>> +     * Optional callback.
>> +     *
>> +     * @bus: the #PCIBus of the PCI device.
>> +     *
>> +     * @opaque: the data passed to pci_setup_iommu().
>> +     *
>> +     * @devfn: device and function number of the PCI device.
>> +     */
>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>devfn);
>>  } PCIIOMMUOps;
>>
>>  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +                                Error **errp);
>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>
>>  /**
>>   * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 76080af580..3848662f95 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -2672,7 +2672,10 @@ static void
>pci_device_class_base_init(ObjectClass *klass, void *data)
>>      }
>>  }
>>
>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>> +                                           PCIBus **aliased_pbus,
>> +                                           PCIBus **piommu_bus,
>> +                                           uint8_t *aliased_pdevfn)
>nit: I would drop the p in aliased_pbus andaliased_pdevfn. Maybe you
>should allow the caller to pass NUL for aliased_pbus and aliased_pdevfn
>as it is the case for pci_device_set_iommu_device() I may resue that
>helper in [RFC 2/7] hw/pci: Introduce pci_device_iommu_bus

Good suggestion, will do.

Thanks
Zhenzhong

>>  {
>>      PCIBus *bus = pci_get_bus(dev);
>>      PCIBus *iommu_bus = bus;
>> @@ -2717,6 +2720,18 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>
>>          iommu_bus = parent_bus;
>>      }
>> +    *aliased_pbus = bus;
>> +    *piommu_bus = iommu_bus;
>> +    *aliased_pdevfn = devfn;
>> +}
>> +
>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +{
>> +    PCIBus *bus;
>> +    PCIBus *iommu_bus;
>> +    uint8_t devfn;
>> +
>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>>      if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>          return iommu_bus->iommu_ops->get_address_space(bus,
>>                                   iommu_bus->iommu_opaque, devfn);
>> @@ -2724,6 +2739,38 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>      return &address_space_memory;
>>  }
>>
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +                                Error **errp)
>> +{
>> +    PCIBus *bus;
>> +    PCIBus *iommu_bus;
>> +    uint8_t devfn;
>> +
>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops-
>>set_iommu_device) {
>> +        return iommu_bus->iommu_ops-
>>set_iommu_device(pci_get_bus(dev),
>> +                                                      iommu_bus->iommu_opaque,
>> +                                                      dev->devfn, idev, errp);
>> +    }
>> +    return 0;
>> +}
>> +
>> +void pci_device_unset_iommu_device(PCIDevice *dev)
>> +{
>> +    PCIBus *bus;
>> +    PCIBus *iommu_bus;
>> +    uint8_t devfn;
>> +
>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops-
>>unset_iommu_device) {
>> +        return iommu_bus->iommu_ops-
>>unset_iommu_device(pci_get_bus(dev),
>> +                                                        iommu_bus->iommu_opaque,
>> +                                                        dev->devfn);
>> +    }
>> +}
>> +
>>  void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void
>*opaque)
>>  {
>>      /*
>Thanks
>
>Eric
Cédric Le Goater Jan. 22, 2024, 4:55 p.m. UTC | #3
On 1/15/24 11:13, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
> 
> This adds pci_device_set/unset_iommu_device() to set/unset
> IOMMUFDDevice for a given PCIe device. Caller of set
> should fail if set operation fails.
> 
> Extract out pci_device_get_iommu_bus_devfn() to facilitate
> implementation of pci_device_set/unset_iommu_device().
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
>   hw/pci/pci.c         | 49 +++++++++++++++++++++++++++++++++++++++++++-
>   2 files changed, 86 insertions(+), 2 deletions(-)
> 
> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
> index fa6313aabc..a810c0ec74 100644
> --- a/include/hw/pci/pci.h
> +++ b/include/hw/pci/pci.h
> @@ -7,6 +7,8 @@
>   /* PCI includes legacy ISA access.  */
>   #include "hw/isa/isa.h"
>   
> +#include "sysemu/iommufd_device.h"
> +
>   extern bool pci_available;
>   
>   /* PCI bus */
> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>        *
>        * @devfn: device and function number
>        */
> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
> +    /**
> +     * @set_iommu_device: set iommufd device for a PCI device to vIOMMU
> +     *
> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
> +     * utilize iommufd specific features.
> +     *
> +     * Return true if iommufd device is accepted, or else return false with
> +     * errp set.
> +     *
> +     * @bus: the #PCIBus of the PCI device.
> +     *
> +     * @opaque: the data passed to pci_setup_iommu().
> +     *
> +     * @devfn: device and function number of the PCI device.
> +     *
> +     * @idev: the data structure representing iommufd device.
> +     *
> +     */
> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
> +                            IOMMUFDDevice *idev, Error **errp);
> +    /**
> +     * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU
> +     *
> +     * Optional callback.
> +     *
> +     * @bus: the #PCIBus of the PCI device.
> +     *
> +     * @opaque: the data passed to pci_setup_iommu().
> +     *
> +     * @devfn: device and function number of the PCI device.
> +     */
> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn);
>   } PCIIOMMUOps;
>   
>   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
> +                                Error **errp);
> +void pci_device_unset_iommu_device(PCIDevice *dev);
>   
>   /**
>    * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index 76080af580..3848662f95 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -2672,7 +2672,10 @@ static void pci_device_class_base_init(ObjectClass *klass, void *data)
>       }
>   }
>   
> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
> +                                           PCIBus **aliased_pbus,
> +                                           PCIBus **piommu_bus,
> +                                           uint8_t *aliased_pdevfn)
>   {
>       PCIBus *bus = pci_get_bus(dev);
>       PCIBus *iommu_bus = bus;
> @@ -2717,6 +2720,18 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>   
>           iommu_bus = parent_bus;
>       }
> +    *aliased_pbus = bus;
> +    *piommu_bus = iommu_bus;
> +    *aliased_pdevfn = devfn;
> +}
> +
> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>       if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>           return iommu_bus->iommu_ops->get_address_space(bus,
>                                    iommu_bus->iommu_opaque, devfn);
> @@ -2724,6 +2739,38 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>       return &address_space_memory;
>   }
>   
> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
> +                                Error **errp)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&

Why do we test iommu_bus in pci_device_un/set_iommu_device routines and
not in pci_device_iommu_address_space() ?


Thanks,

C.


> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) {
> +        return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
> +                                                      iommu_bus->iommu_opaque,
> +                                                      dev->devfn, idev, errp);
> +    }
> +    return 0;
> +}
> +
> +void pci_device_unset_iommu_device(PCIDevice *dev)
> +{
> +    PCIBus *bus;
> +    PCIBus *iommu_bus;
> +    uint8_t devfn;
> +
> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
> +        iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) {
> +        return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
> +                                                        iommu_bus->iommu_opaque,
> +                                                        dev->devfn);
> +    }
> +}
> +
>   void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
>   {
>       /*
Duan, Zhenzhong Jan. 23, 2024, 6:37 a.m. UTC | #4
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>pci_device_set/unset_iommu_device()
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This adds pci_device_set/unset_iommu_device() to set/unset
>> IOMMUFDDevice for a given PCIe device. Caller of set
>> should fail if set operation fails.
>>
>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>> implementation of pci_device_set/unset_iommu_device().
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
>>   hw/pci/pci.c         | 49
>+++++++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 86 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>> index fa6313aabc..a810c0ec74 100644
>> --- a/include/hw/pci/pci.h
>> +++ b/include/hw/pci/pci.h
>> @@ -7,6 +7,8 @@
>>   /* PCI includes legacy ISA access.  */
>>   #include "hw/isa/isa.h"
>>
>> +#include "sysemu/iommufd_device.h"
>> +
>>   extern bool pci_available;
>>
>>   /* PCI bus */
>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>        *
>>        * @devfn: device and function number
>>        */
>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>devfn);
>> +    /**
>> +     * @set_iommu_device: set iommufd device for a PCI device to
>vIOMMU
>> +     *
>> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU
>can't
>> +     * utilize iommufd specific features.
>> +     *
>> +     * Return true if iommufd device is accepted, or else return false with
>> +     * errp set.
>> +     *
>> +     * @bus: the #PCIBus of the PCI device.
>> +     *
>> +     * @opaque: the data passed to pci_setup_iommu().
>> +     *
>> +     * @devfn: device and function number of the PCI device.
>> +     *
>> +     * @idev: the data structure representing iommufd device.
>> +     *
>> +     */
>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>> +                            IOMMUFDDevice *idev, Error **errp);
>> +    /**
>> +     * @unset_iommu_device: unset iommufd device for a PCI device from
>vIOMMU
>> +     *
>> +     * Optional callback.
>> +     *
>> +     * @bus: the #PCIBus of the PCI device.
>> +     *
>> +     * @opaque: the data passed to pci_setup_iommu().
>> +     *
>> +     * @devfn: device and function number of the PCI device.
>> +     */
>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>devfn);
>>   } PCIIOMMUOps;
>>
>>   AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +                                Error **errp);
>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>
>>   /**
>>    * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index 76080af580..3848662f95 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -2672,7 +2672,10 @@ static void
>pci_device_class_base_init(ObjectClass *klass, void *data)
>>       }
>>   }
>>
>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>> +                                           PCIBus **aliased_pbus,
>> +                                           PCIBus **piommu_bus,
>> +                                           uint8_t *aliased_pdevfn)
>>   {
>>       PCIBus *bus = pci_get_bus(dev);
>>       PCIBus *iommu_bus = bus;
>> @@ -2717,6 +2720,18 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>
>>           iommu_bus = parent_bus;
>>       }
>> +    *aliased_pbus = bus;
>> +    *piommu_bus = iommu_bus;
>> +    *aliased_pdevfn = devfn;
>> +}
>> +
>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>> +{
>> +    PCIBus *bus;
>> +    PCIBus *iommu_bus;
>> +    uint8_t devfn;
>> +
>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>>       if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>           return iommu_bus->iommu_ops->get_address_space(bus,
>>                                    iommu_bus->iommu_opaque, devfn);
>> @@ -2724,6 +2739,38 @@ AddressSpace
>*pci_device_iommu_address_space(PCIDevice *dev)
>>       return &address_space_memory;
>>   }
>>
>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>*idev,
>> +                                Error **errp)
>> +{
>> +    PCIBus *bus;
>> +    PCIBus *iommu_bus;
>> +    uint8_t devfn;
>> +
>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>
>Why do we test iommu_bus in pci_device_un/set_iommu_device routines
>and
>not in pci_device_iommu_address_space() ?

iommu_bus check in pci_device_iommu_address_space() is dropped in
below commit, I didn't find related discussion in mail history, maybe
by accident? I can add it back if it's not intentional.

ba7d12eb8c  hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps

Thanks
Zhenzhong
Cédric Le Goater Jan. 23, 2024, 7:40 a.m. UTC | #5
On 1/23/24 07:37, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Cédric Le Goater <clg@redhat.com>
>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>> pci_device_set/unset_iommu_device()
>>
>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> This adds pci_device_set/unset_iommu_device() to set/unset
>>> IOMMUFDDevice for a given PCIe device. Caller of set
>>> should fail if set operation fails.
>>>
>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>>> implementation of pci_device_set/unset_iommu_device().
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    include/hw/pci/pci.h | 39 ++++++++++++++++++++++++++++++++++-
>>>    hw/pci/pci.c         | 49
>> +++++++++++++++++++++++++++++++++++++++++++-
>>>    2 files changed, 86 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>> index fa6313aabc..a810c0ec74 100644
>>> --- a/include/hw/pci/pci.h
>>> +++ b/include/hw/pci/pci.h
>>> @@ -7,6 +7,8 @@
>>>    /* PCI includes legacy ISA access.  */
>>>    #include "hw/isa/isa.h"
>>>
>>> +#include "sysemu/iommufd_device.h"
>>> +
>>>    extern bool pci_available;
>>>
>>>    /* PCI bus */
>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>>         *
>>>         * @devfn: device and function number
>>>         */
>>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>> devfn);
>>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>> devfn);
>>> +    /**
>>> +     * @set_iommu_device: set iommufd device for a PCI device to
>> vIOMMU
>>> +     *
>>> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU
>> can't
>>> +     * utilize iommufd specific features.
>>> +     *
>>> +     * Return true if iommufd device is accepted, or else return false with
>>> +     * errp set.
>>> +     *
>>> +     * @bus: the #PCIBus of the PCI device.
>>> +     *
>>> +     * @opaque: the data passed to pci_setup_iommu().
>>> +     *
>>> +     * @devfn: device and function number of the PCI device.
>>> +     *
>>> +     * @idev: the data structure representing iommufd device.
>>> +     *
>>> +     */
>>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>>> +                            IOMMUFDDevice *idev, Error **errp);
>>> +    /**
>>> +     * @unset_iommu_device: unset iommufd device for a PCI device from
>> vIOMMU
>>> +     *
>>> +     * Optional callback.
>>> +     *
>>> +     * @bus: the #PCIBus of the PCI device.
>>> +     *
>>> +     * @opaque: the data passed to pci_setup_iommu().
>>> +     *
>>> +     * @devfn: device and function number of the PCI device.
>>> +     */
>>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>> devfn);
>>>    } PCIIOMMUOps;
>>>
>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>> *idev,
>>> +                                Error **errp);
>>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>>
>>>    /**
>>>     * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>> index 76080af580..3848662f95 100644
>>> --- a/hw/pci/pci.c
>>> +++ b/hw/pci/pci.c
>>> @@ -2672,7 +2672,10 @@ static void
>> pci_device_class_base_init(ObjectClass *klass, void *data)
>>>        }
>>>    }
>>>
>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>>> +                                           PCIBus **aliased_pbus,
>>> +                                           PCIBus **piommu_bus,
>>> +                                           uint8_t *aliased_pdevfn)
>>>    {
>>>        PCIBus *bus = pci_get_bus(dev);
>>>        PCIBus *iommu_bus = bus;
>>> @@ -2717,6 +2720,18 @@ AddressSpace
>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>
>>>            iommu_bus = parent_bus;
>>>        }
>>> +    *aliased_pbus = bus;
>>> +    *piommu_bus = iommu_bus;
>>> +    *aliased_pdevfn = devfn;
>>> +}
>>> +
>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>> +{
>>> +    PCIBus *bus;
>>> +    PCIBus *iommu_bus;
>>> +    uint8_t devfn;
>>> +
>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>>>        if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>>            return iommu_bus->iommu_ops->get_address_space(bus,
>>>                                     iommu_bus->iommu_opaque, devfn);
>>> @@ -2724,6 +2739,38 @@ AddressSpace
>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>        return &address_space_memory;
>>>    }
>>>
>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>> *idev,
>>> +                                Error **errp)
>>> +{
>>> +    PCIBus *bus;
>>> +    PCIBus *iommu_bus;
>>> +    uint8_t devfn;
>>> +
>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
>>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>>
>> Why do we test iommu_bus in pci_device_un/set_iommu_device routines
>> and
>> not in pci_device_iommu_address_space() ?
> 
> iommu_bus check in pci_device_iommu_address_space() is dropped in
> below commit, I didn't find related discussion in mail history, maybe
> by accident? I can add it back if it's not intentional.

Can iommu_bus be NULL or should we add an assert ?

C.

> 
> ba7d12eb8c  hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps
> 
> Thanks
> Zhenzhong
Duan, Zhenzhong Jan. 23, 2024, 9:25 a.m. UTC | #6
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>pci_device_set/unset_iommu_device()
>
>On 1/23/24 07:37, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Cédric Le Goater <clg@redhat.com>
>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>>> pci_device_set/unset_iommu_device()
>>>
>>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> This adds pci_device_set/unset_iommu_device() to set/unset
>>>> IOMMUFDDevice for a given PCIe device. Caller of set
>>>> should fail if set operation fails.
>>>>
>>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>>>> implementation of pci_device_set/unset_iommu_device().
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>    include/hw/pci/pci.h | 39
>++++++++++++++++++++++++++++++++++-
>>>>    hw/pci/pci.c         | 49
>>> +++++++++++++++++++++++++++++++++++++++++++-
>>>>    2 files changed, 86 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>> index fa6313aabc..a810c0ec74 100644
>>>> --- a/include/hw/pci/pci.h
>>>> +++ b/include/hw/pci/pci.h
>>>> @@ -7,6 +7,8 @@
>>>>    /* PCI includes legacy ISA access.  */
>>>>    #include "hw/isa/isa.h"
>>>>
>>>> +#include "sysemu/iommufd_device.h"
>>>> +
>>>>    extern bool pci_available;
>>>>
>>>>    /* PCI bus */
>>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>>>         *
>>>>         * @devfn: device and function number
>>>>         */
>>>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>>> devfn);
>>>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque,
>int
>>> devfn);
>>>> +    /**
>>>> +     * @set_iommu_device: set iommufd device for a PCI device to
>>> vIOMMU
>>>> +     *
>>>> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU
>>> can't
>>>> +     * utilize iommufd specific features.
>>>> +     *
>>>> +     * Return true if iommufd device is accepted, or else return false with
>>>> +     * errp set.
>>>> +     *
>>>> +     * @bus: the #PCIBus of the PCI device.
>>>> +     *
>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>> +     *
>>>> +     * @devfn: device and function number of the PCI device.
>>>> +     *
>>>> +     * @idev: the data structure representing iommufd device.
>>>> +     *
>>>> +     */
>>>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>>>> +                            IOMMUFDDevice *idev, Error **errp);
>>>> +    /**
>>>> +     * @unset_iommu_device: unset iommufd device for a PCI device
>from
>>> vIOMMU
>>>> +     *
>>>> +     * Optional callback.
>>>> +     *
>>>> +     * @bus: the #PCIBus of the PCI device.
>>>> +     *
>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>> +     *
>>>> +     * @devfn: device and function number of the PCI device.
>>>> +     */
>>>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>>> devfn);
>>>>    } PCIIOMMUOps;
>>>>
>>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>> *idev,
>>>> +                                Error **errp);
>>>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>>>
>>>>    /**
>>>>     * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>> index 76080af580..3848662f95 100644
>>>> --- a/hw/pci/pci.c
>>>> +++ b/hw/pci/pci.c
>>>> @@ -2672,7 +2672,10 @@ static void
>>> pci_device_class_base_init(ObjectClass *klass, void *data)
>>>>        }
>>>>    }
>>>>
>>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>>>> +                                           PCIBus **aliased_pbus,
>>>> +                                           PCIBus **piommu_bus,
>>>> +                                           uint8_t *aliased_pdevfn)
>>>>    {
>>>>        PCIBus *bus = pci_get_bus(dev);
>>>>        PCIBus *iommu_bus = bus;
>>>> @@ -2717,6 +2720,18 @@ AddressSpace
>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>
>>>>            iommu_bus = parent_bus;
>>>>        }
>>>> +    *aliased_pbus = bus;
>>>> +    *piommu_bus = iommu_bus;
>>>> +    *aliased_pdevfn = devfn;
>>>> +}
>>>> +
>>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>> +{
>>>> +    PCIBus *bus;
>>>> +    PCIBus *iommu_bus;
>>>> +    uint8_t devfn;
>>>> +
>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>&devfn);
>>>>        if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>>>            return iommu_bus->iommu_ops->get_address_space(bus,
>>>>                                     iommu_bus->iommu_opaque, devfn);
>>>> @@ -2724,6 +2739,38 @@ AddressSpace
>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>        return &address_space_memory;
>>>>    }
>>>>
>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>> *idev,
>>>> +                                Error **errp)
>>>> +{
>>>> +    PCIBus *bus;
>>>> +    PCIBus *iommu_bus;
>>>> +    uint8_t devfn;
>>>> +
>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>&devfn);
>>>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>>>
>>> Why do we test iommu_bus in pci_device_un/set_iommu_device
>routines
>>> and
>>> not in pci_device_iommu_address_space() ?
>>
>> iommu_bus check in pci_device_iommu_address_space() is dropped in
>> below commit, I didn't find related discussion in mail history, maybe
>> by accident? I can add it back if it's not intentional.
>
>Can iommu_bus be NULL or should we add an assert ?

I dig into the history changes of pci_device_iommu_address_space() and
below commit added iommu_bus check.

5af2ae230514  pci: Fix pci_device_iommu_address_space() bus propagation

In theory, !iommu_bus->parent_dev take precedency over !iommu_bus,
So we never see iommu_bus NULL, assert may be better.

Thanks
Zhenzhong
Eric Auger Jan. 23, 2024, 10:18 a.m. UTC | #7
On 1/23/24 10:25, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Cédric Le Goater <clg@redhat.com>
>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>> pci_device_set/unset_iommu_device()
>>
>> On 1/23/24 07:37, Duan, Zhenzhong wrote:
>>>
>>>> -----Original Message-----
>>>> From: Cédric Le Goater <clg@redhat.com>
>>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>>>> pci_device_set/unset_iommu_device()
>>>>
>>>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>>
>>>>> This adds pci_device_set/unset_iommu_device() to set/unset
>>>>> IOMMUFDDevice for a given PCIe device. Caller of set
>>>>> should fail if set operation fails.
>>>>>
>>>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>>>>> implementation of pci_device_set/unset_iommu_device().
>>>>>
>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>> ---
>>>>>    include/hw/pci/pci.h | 39
>> ++++++++++++++++++++++++++++++++++-
>>>>>    hw/pci/pci.c         | 49
>>>> +++++++++++++++++++++++++++++++++++++++++++-
>>>>>    2 files changed, 86 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>>> index fa6313aabc..a810c0ec74 100644
>>>>> --- a/include/hw/pci/pci.h
>>>>> +++ b/include/hw/pci/pci.h
>>>>> @@ -7,6 +7,8 @@
>>>>>    /* PCI includes legacy ISA access.  */
>>>>>    #include "hw/isa/isa.h"
>>>>>
>>>>> +#include "sysemu/iommufd_device.h"
>>>>> +
>>>>>    extern bool pci_available;
>>>>>
>>>>>    /* PCI bus */
>>>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>>>>         *
>>>>>         * @devfn: device and function number
>>>>>         */
>>>>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int
>>>> devfn);
>>>>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque,
>> int
>>>> devfn);
>>>>> +    /**
>>>>> +     * @set_iommu_device: set iommufd device for a PCI device to
>>>> vIOMMU
>>>>> +     *
>>>>> +     * Optional callback, if not implemented in vIOMMU, then vIOMMU
>>>> can't
>>>>> +     * utilize iommufd specific features.
>>>>> +     *
>>>>> +     * Return true if iommufd device is accepted, or else return false with
>>>>> +     * errp set.
>>>>> +     *
>>>>> +     * @bus: the #PCIBus of the PCI device.
>>>>> +     *
>>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>>> +     *
>>>>> +     * @devfn: device and function number of the PCI device.
>>>>> +     *
>>>>> +     * @idev: the data structure representing iommufd device.
>>>>> +     *
>>>>> +     */
>>>>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
>>>>> +                            IOMMUFDDevice *idev, Error **errp);
>>>>> +    /**
>>>>> +     * @unset_iommu_device: unset iommufd device for a PCI device
>> from
>>>> vIOMMU
>>>>> +     *
>>>>> +     * Optional callback.
>>>>> +     *
>>>>> +     * @bus: the #PCIBus of the PCI device.
>>>>> +     *
>>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>>> +     *
>>>>> +     * @devfn: device and function number of the PCI device.
>>>>> +     */
>>>>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>>>> devfn);
>>>>>    } PCIIOMMUOps;
>>>>>
>>>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>>> *idev,
>>>>> +                                Error **errp);
>>>>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>>>>
>>>>>    /**
>>>>>     * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>> index 76080af580..3848662f95 100644
>>>>> --- a/hw/pci/pci.c
>>>>> +++ b/hw/pci/pci.c
>>>>> @@ -2672,7 +2672,10 @@ static void
>>>> pci_device_class_base_init(ObjectClass *klass, void *data)
>>>>>        }
>>>>>    }
>>>>>
>>>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>>>>> +                                           PCIBus **aliased_pbus,
>>>>> +                                           PCIBus **piommu_bus,
>>>>> +                                           uint8_t *aliased_pdevfn)
>>>>>    {
>>>>>        PCIBus *bus = pci_get_bus(dev);
>>>>>        PCIBus *iommu_bus = bus;
>>>>> @@ -2717,6 +2720,18 @@ AddressSpace
>>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>            iommu_bus = parent_bus;
>>>>>        }
>>>>> +    *aliased_pbus = bus;
>>>>> +    *piommu_bus = iommu_bus;
>>>>> +    *aliased_pdevfn = devfn;
>>>>> +}
>>>>> +
>>>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>>> +{
>>>>> +    PCIBus *bus;
>>>>> +    PCIBus *iommu_bus;
>>>>> +    uint8_t devfn;
>>>>> +
>>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>> &devfn);
>>>>>        if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>>>>            return iommu_bus->iommu_ops->get_address_space(bus,
>>>>>                                     iommu_bus->iommu_opaque, devfn);
>>>>> @@ -2724,6 +2739,38 @@ AddressSpace
>>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>        return &address_space_memory;
>>>>>    }
>>>>>
>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>>> *idev,
>>>>> +                                Error **errp)
>>>>> +{
>>>>> +    PCIBus *bus;
>>>>> +    PCIBus *iommu_bus;
>>>>> +    uint8_t devfn;
>>>>> +
>>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>> &devfn);
>>>>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>>>> Why do we test iommu_bus in pci_device_un/set_iommu_device
>> routines
>>>> and
>>>> not in pci_device_iommu_address_space() ?
>>> iommu_bus check in pci_device_iommu_address_space() is dropped in
>>> below commit, I didn't find related discussion in mail history, maybe
>>> by accident? I can add it back if it's not intentional.
>> Can iommu_bus be NULL or should we add an assert ?
> I dig into the history changes of pci_device_iommu_address_space() and
> below commit added iommu_bus check.
>
> 5af2ae230514  pci: Fix pci_device_iommu_address_space() bus propagation
>
> In theory, !iommu_bus->parent_dev take precedency over !iommu_bus,
> So we never see iommu_bus NULL, assert may be better.

I think we had such a discussion in
https://www.mail-archive.com/qemu-devel@nongnu.org/msg994766.html
But maybe this was related to a different call place. I remember I
challenged the check at some point

Eric
>
> Thanks
> Zhenzhong
>
Duan, Zhenzhong Jan. 24, 2024, 9:23 a.m. UTC | #8
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>pci_device_set/unset_iommu_device()
>
>
>
>On 1/23/24 10:25, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Cédric Le Goater <clg@redhat.com>
>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>>> pci_device_set/unset_iommu_device()
>>>
>>> On 1/23/24 07:37, Duan, Zhenzhong wrote:
>>>>
>>>>> -----Original Message-----
>>>>> From: Cédric Le Goater <clg@redhat.com>
>>>>> Subject: Re: [PATCH rfcv1 2/6] hw/pci: introduce
>>>>> pci_device_set/unset_iommu_device()
>>>>>
>>>>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>>>
>>>>>> This adds pci_device_set/unset_iommu_device() to set/unset
>>>>>> IOMMUFDDevice for a given PCIe device. Caller of set
>>>>>> should fail if set operation fails.
>>>>>>
>>>>>> Extract out pci_device_get_iommu_bus_devfn() to facilitate
>>>>>> implementation of pci_device_set/unset_iommu_device().
>>>>>>
>>>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>>>> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
>>>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>>>> ---
>>>>>>    include/hw/pci/pci.h | 39
>>> ++++++++++++++++++++++++++++++++++-
>>>>>>    hw/pci/pci.c         | 49
>>>>> +++++++++++++++++++++++++++++++++++++++++++-
>>>>>>    2 files changed, 86 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
>>>>>> index fa6313aabc..a810c0ec74 100644
>>>>>> --- a/include/hw/pci/pci.h
>>>>>> +++ b/include/hw/pci/pci.h
>>>>>> @@ -7,6 +7,8 @@
>>>>>>    /* PCI includes legacy ISA access.  */
>>>>>>    #include "hw/isa/isa.h"
>>>>>>
>>>>>> +#include "sysemu/iommufd_device.h"
>>>>>> +
>>>>>>    extern bool pci_available;
>>>>>>
>>>>>>    /* PCI bus */
>>>>>> @@ -384,10 +386,45 @@ typedef struct PCIIOMMUOps {
>>>>>>         *
>>>>>>         * @devfn: device and function number
>>>>>>         */
>>>>>> -   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque,
>int
>>>>> devfn);
>>>>>> +    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque,
>>> int
>>>>> devfn);
>>>>>> +    /**
>>>>>> +     * @set_iommu_device: set iommufd device for a PCI device to
>>>>> vIOMMU
>>>>>> +     *
>>>>>> +     * Optional callback, if not implemented in vIOMMU, then
>vIOMMU
>>>>> can't
>>>>>> +     * utilize iommufd specific features.
>>>>>> +     *
>>>>>> +     * Return true if iommufd device is accepted, or else return false
>with
>>>>>> +     * errp set.
>>>>>> +     *
>>>>>> +     * @bus: the #PCIBus of the PCI device.
>>>>>> +     *
>>>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>>>> +     *
>>>>>> +     * @devfn: device and function number of the PCI device.
>>>>>> +     *
>>>>>> +     * @idev: the data structure representing iommufd device.
>>>>>> +     *
>>>>>> +     */
>>>>>> +    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t
>devfn,
>>>>>> +                            IOMMUFDDevice *idev, Error **errp);
>>>>>> +    /**
>>>>>> +     * @unset_iommu_device: unset iommufd device for a PCI device
>>> from
>>>>> vIOMMU
>>>>>> +     *
>>>>>> +     * Optional callback.
>>>>>> +     *
>>>>>> +     * @bus: the #PCIBus of the PCI device.
>>>>>> +     *
>>>>>> +     * @opaque: the data passed to pci_setup_iommu().
>>>>>> +     *
>>>>>> +     * @devfn: device and function number of the PCI device.
>>>>>> +     */
>>>>>> +    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t
>>>>> devfn);
>>>>>>    } PCIIOMMUOps;
>>>>>>
>>>>>>    AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
>>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>>>> *idev,
>>>>>> +                                Error **errp);
>>>>>> +void pci_device_unset_iommu_device(PCIDevice *dev);
>>>>>>
>>>>>>    /**
>>>>>>     * pci_setup_iommu: Initialize specific IOMMU handlers for a
>PCIBus
>>>>>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>>>>>> index 76080af580..3848662f95 100644
>>>>>> --- a/hw/pci/pci.c
>>>>>> +++ b/hw/pci/pci.c
>>>>>> @@ -2672,7 +2672,10 @@ static void
>>>>> pci_device_class_base_init(ObjectClass *klass, void *data)
>>>>>>        }
>>>>>>    }
>>>>>>
>>>>>> -AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>> +static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
>>>>>> +                                           PCIBus **aliased_pbus,
>>>>>> +                                           PCIBus **piommu_bus,
>>>>>> +                                           uint8_t *aliased_pdevfn)
>>>>>>    {
>>>>>>        PCIBus *bus = pci_get_bus(dev);
>>>>>>        PCIBus *iommu_bus = bus;
>>>>>> @@ -2717,6 +2720,18 @@ AddressSpace
>>>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>>            iommu_bus = parent_bus;
>>>>>>        }
>>>>>> +    *aliased_pbus = bus;
>>>>>> +    *piommu_bus = iommu_bus;
>>>>>> +    *aliased_pdevfn = devfn;
>>>>>> +}
>>>>>> +
>>>>>> +AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>> +{
>>>>>> +    PCIBus *bus;
>>>>>> +    PCIBus *iommu_bus;
>>>>>> +    uint8_t devfn;
>>>>>> +
>>>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>>> &devfn);
>>>>>>        if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
>>>>>>            return iommu_bus->iommu_ops->get_address_space(bus,
>>>>>>                                     iommu_bus->iommu_opaque, devfn);
>>>>>> @@ -2724,6 +2739,38 @@ AddressSpace
>>>>> *pci_device_iommu_address_space(PCIDevice *dev)
>>>>>>        return &address_space_memory;
>>>>>>    }
>>>>>>
>>>>>> +int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice
>>>>> *idev,
>>>>>> +                                Error **errp)
>>>>>> +{
>>>>>> +    PCIBus *bus;
>>>>>> +    PCIBus *iommu_bus;
>>>>>> +    uint8_t devfn;
>>>>>> +
>>>>>> +    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus,
>>> &devfn);
>>>>>> +    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
>>>>> Why do we test iommu_bus in pci_device_un/set_iommu_device
>>> routines
>>>>> and
>>>>> not in pci_device_iommu_address_space() ?
>>>> iommu_bus check in pci_device_iommu_address_space() is dropped in
>>>> below commit, I didn't find related discussion in mail history, maybe
>>>> by accident? I can add it back if it's not intentional.
>>> Can iommu_bus be NULL or should we add an assert ?
>> I dig into the history changes of pci_device_iommu_address_space() and
>> below commit added iommu_bus check.
>>
>> 5af2ae230514  pci: Fix pci_device_iommu_address_space() bus
>propagation
>>
>> In theory, !iommu_bus->parent_dev take precedency over !iommu_bus,
>> So we never see iommu_bus NULL, assert may be better.
>
>I think we had such a discussion in
>https://www.mail-archive.com/qemu-devel@nongnu.org/msg994766.html
>But maybe this was related to a different call place. I remember I
>challenged the check at some point

It seems this question is not discussed further in that thread.
Per my code inspection, PCI root bus's parent_dev should be NULL, so we get
either root bus or sub bus, neither a NULL.
Also tested with PXB bridge which is suspicious scenarios, same.

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index fa6313aabc..a810c0ec74 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -7,6 +7,8 @@ 
 /* PCI includes legacy ISA access.  */
 #include "hw/isa/isa.h"
 
+#include "sysemu/iommufd_device.h"
+
 extern bool pci_available;
 
 /* PCI bus */
@@ -384,10 +386,45 @@  typedef struct PCIIOMMUOps {
      *
      * @devfn: device and function number
      */
-   AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+    AddressSpace * (*get_address_space)(PCIBus *bus, void *opaque, int devfn);
+    /**
+     * @set_iommu_device: set iommufd device for a PCI device to vIOMMU
+     *
+     * Optional callback, if not implemented in vIOMMU, then vIOMMU can't
+     * utilize iommufd specific features.
+     *
+     * Return true if iommufd device is accepted, or else return false with
+     * errp set.
+     *
+     * @bus: the #PCIBus of the PCI device.
+     *
+     * @opaque: the data passed to pci_setup_iommu().
+     *
+     * @devfn: device and function number of the PCI device.
+     *
+     * @idev: the data structure representing iommufd device.
+     *
+     */
+    int (*set_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn,
+                            IOMMUFDDevice *idev, Error **errp);
+    /**
+     * @unset_iommu_device: unset iommufd device for a PCI device from vIOMMU
+     *
+     * Optional callback.
+     *
+     * @bus: the #PCIBus of the PCI device.
+     *
+     * @opaque: the data passed to pci_setup_iommu().
+     *
+     * @devfn: device and function number of the PCI device.
+     */
+    void (*unset_iommu_device)(PCIBus *bus, void *opaque, int32_t devfn);
 } PCIIOMMUOps;
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
+int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
+                                Error **errp);
+void pci_device_unset_iommu_device(PCIDevice *dev);
 
 /**
  * pci_setup_iommu: Initialize specific IOMMU handlers for a PCIBus
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 76080af580..3848662f95 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2672,7 +2672,10 @@  static void pci_device_class_base_init(ObjectClass *klass, void *data)
     }
 }
 
-AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+static void pci_device_get_iommu_bus_devfn(PCIDevice *dev,
+                                           PCIBus **aliased_pbus,
+                                           PCIBus **piommu_bus,
+                                           uint8_t *aliased_pdevfn)
 {
     PCIBus *bus = pci_get_bus(dev);
     PCIBus *iommu_bus = bus;
@@ -2717,6 +2720,18 @@  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
 
         iommu_bus = parent_bus;
     }
+    *aliased_pbus = bus;
+    *piommu_bus = iommu_bus;
+    *aliased_pdevfn = devfn;
+}
+
+AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
+{
+    PCIBus *bus;
+    PCIBus *iommu_bus;
+    uint8_t devfn;
+
+    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
     if (!pci_bus_bypass_iommu(bus) && iommu_bus->iommu_ops) {
         return iommu_bus->iommu_ops->get_address_space(bus,
                                  iommu_bus->iommu_opaque, devfn);
@@ -2724,6 +2739,38 @@  AddressSpace *pci_device_iommu_address_space(PCIDevice *dev)
     return &address_space_memory;
 }
 
+int pci_device_set_iommu_device(PCIDevice *dev, IOMMUFDDevice *idev,
+                                Error **errp)
+{
+    PCIBus *bus;
+    PCIBus *iommu_bus;
+    uint8_t devfn;
+
+    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
+        iommu_bus->iommu_ops && iommu_bus->iommu_ops->set_iommu_device) {
+        return iommu_bus->iommu_ops->set_iommu_device(pci_get_bus(dev),
+                                                      iommu_bus->iommu_opaque,
+                                                      dev->devfn, idev, errp);
+    }
+    return 0;
+}
+
+void pci_device_unset_iommu_device(PCIDevice *dev)
+{
+    PCIBus *bus;
+    PCIBus *iommu_bus;
+    uint8_t devfn;
+
+    pci_device_get_iommu_bus_devfn(dev, &bus, &iommu_bus, &devfn);
+    if (!pci_bus_bypass_iommu(bus) && iommu_bus &&
+        iommu_bus->iommu_ops && iommu_bus->iommu_ops->unset_iommu_device) {
+        return iommu_bus->iommu_ops->unset_iommu_device(pci_get_bus(dev),
+                                                        iommu_bus->iommu_opaque,
+                                                        dev->devfn);
+    }
+}
+
 void pci_setup_iommu(PCIBus *bus, const PCIIOMMUOps *ops, void *opaque)
 {
     /*