diff mbox series

[rfcv1,3/6] intel_iommu: add set/unset_iommu_device callback

Message ID 20240115101313.131139-4-zhenzhong.duan@intel.com (mailing list archive)
State New, archived
Headers show
Series Check and sync host IOMMU cap/ecap with vIOMMU | expand

Commit Message

Duan, Zhenzhong Jan. 15, 2024, 10:13 a.m. UTC
From: Yi Liu <yi.l.liu@intel.com>

This adds set/unset_iommu_device() implementation in Intel vIOMMU.
In set call, IOMMUFDDevice is recorded in hash table indexed by
PCI BDF.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/i386/intel_iommu.h | 10 +++++
 hw/i386/intel_iommu.c         | 79 +++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+)

Comments

Eric Auger Jan. 17, 2024, 3:44 p.m. UTC | #1
Hi Zhenzhong,

On 1/15/24 11:13, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
> In set call, IOMMUFDDevice is recorded in hash table indexed by
> PCI BDF.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/i386/intel_iommu.h | 10 +++++
>  hw/i386/intel_iommu.c         | 79 +++++++++++++++++++++++++++++++++++
>  2 files changed, 89 insertions(+)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 7fa0a695c8..c65fdde56f 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>  typedef struct VTDPASIDEntry VTDPASIDEntry;
> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>  
>  /* Context-Entry */
>  struct VTDContextEntry {
> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>      IOVATree *iova_tree;
>  };
>  
> +struct VTDIOMMUFDDevice {
> +    PCIBus *bus;
> +    uint8_t devfn;
> +    IOMMUFDDevice *idev;
> +    IntelIOMMUState *iommu_state;
> +};
> +
Just wondering whether we shouldn't reuse the VTDAddressSpace to store
the idev, if any. How have you made your choice. What will it become
when PASID gets added?
>  struct VTDIOTLBEntry {
>      uint64_t gfn;
>      uint16_t domain_id;
> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>      /* list of registered notifiers */
>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>  
> +    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
> +
>      /* interrupt remapping */
>      bool intr_enabled;              /* Whether guest enabled IR */
>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index ed5677c0ae..95faf697eb 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
>             (key1->pasid == key2->pasid);
>  }
>  
> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
>  /*
>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>   * based on the pointer value is intended. Note that we deal with
> @@ -3812,6 +3819,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>      return vtd_dev_as;
>  }
>  
> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
> +                                    IOMMUFDDevice *idev, Error **errp)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDIOMMUFDDevice *vtd_idev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +    struct vtd_as_key *new_key;
> +
> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
> +
> +    /* None IOMMUFD case */
> +    if (!idev) {
> +        return 0;
> +    }
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
> +
> +    if (vtd_idev) {
> +        error_setg(errp, "IOMMUFD device already exist");
> +        return -1;
> +    }
> +
> +    new_key = g_malloc(sizeof(*new_key));
> +    new_key->bus = bus;
> +    new_key->devfn = devfn;
> +
> +    vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
> +    vtd_idev->bus = bus;
> +    vtd_idev->devfn = (uint8_t)devfn;
> +    vtd_idev->iommu_state = s;
> +    vtd_idev->idev = idev;
> +
> +    g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
> +
> +    vtd_iommu_unlock(s);
> +
> +    return 0;
> +}
> +
> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDIOMMUFDDevice *vtd_idev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +
> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
> +    if (!vtd_idev) {
> +        vtd_iommu_unlock(s);
> +        return;
> +    }
> +
> +    g_hash_table_remove(s->vtd_iommufd_dev, &key);
> +
> +    vtd_iommu_unlock(s);
> +}
> +
>  /* Unmap the whole range in the notifier's scope. */
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>  {
> @@ -4107,6 +4182,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>  
>  static PCIIOMMUOps vtd_iommu_ops = {
>      .get_address_space = vtd_host_dma_iommu,
> +    .set_iommu_device = vtd_dev_set_iommu_device,
> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>  };
>  
>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                       g_free, g_free);
>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
>                                        g_free, g_free);
> +    s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal,
> +                                               g_free, g_free);
>      vtd_init(s);
>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>      /* Pseudo address space under root PCI bus. */
Thanks

Eric
Duan, Zhenzhong Jan. 18, 2024, 8:43 a.m. UTC | #2
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>callback
>
>Hi Zhenzhong,
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>> PCI BDF.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/i386/intel_iommu.h | 10 +++++
>>  hw/i386/intel_iommu.c         | 79
>+++++++++++++++++++++++++++++++++++
>>  2 files changed, 89 insertions(+)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7fa0a695c8..c65fdde56f 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>>  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>>  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>>  typedef struct VTDPASIDEntry VTDPASIDEntry;
>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>
>>  /* Context-Entry */
>>  struct VTDContextEntry {
>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>>      IOVATree *iova_tree;
>>  };
>>
>> +struct VTDIOMMUFDDevice {
>> +    PCIBus *bus;
>> +    uint8_t devfn;
>> +    IOMMUFDDevice *idev;
>> +    IntelIOMMUState *iommu_state;
>> +};
>> +
>Just wondering whether we shouldn't reuse the VTDAddressSpace to store
>the idev, if any. How have you made your choice. What will it become
>when PASID gets added?

VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed
by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in
VTDAddressSpace, may need a list in case more than one device in same address
space. Then a global VTDIOMMUFDDevice list is better for lookup.

For PASID in modern mode which support stage-1 page table, we have
VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use
VTDAddressSpace which is for stage-2 page table.

Thanks
Zhenzhong

>>  struct VTDIOTLBEntry {
>>      uint64_t gfn;
>>      uint16_t domain_id;
>> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>>      /* list of registered notifiers */
>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> +    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
>> +
>>      /* interrupt remapping */
>>      bool intr_enabled;              /* Whether guest enabled IR */
>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index ed5677c0ae..95faf697eb 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>>             (key1->pasid == key2->pasid);
>>  }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> +    const struct vtd_as_key *key1 = v1;
>> +    const struct vtd_as_key *key2 = v2;
>> +
>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>>  /*
>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>   * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,74 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>      return vtd_dev_as;
>>  }
>>
>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>int32_t devfn,
>> +                                    IOMMUFDDevice *idev, Error **errp)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDIOMMUFDDevice *vtd_idev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +    struct vtd_as_key *new_key;
>> +
>> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>> +
>> +    /* None IOMMUFD case */
>> +    if (!idev) {
>> +        return 0;
>> +    }
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>> +
>> +    if (vtd_idev) {
>> +        error_setg(errp, "IOMMUFD device already exist");
>> +        return -1;
>> +    }
>> +
>> +    new_key = g_malloc(sizeof(*new_key));
>> +    new_key->bus = bus;
>> +    new_key->devfn = devfn;
>> +
>> +    vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
>> +    vtd_idev->bus = bus;
>> +    vtd_idev->devfn = (uint8_t)devfn;
>> +    vtd_idev->iommu_state = s;
>> +    vtd_idev->idev = idev;
>> +
>> +    g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
>> +
>> +    vtd_iommu_unlock(s);
>> +
>> +    return 0;
>> +}
>> +
>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque,
>int32_t devfn)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDIOMMUFDDevice *vtd_idev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +
>> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>> +
>> +    vtd_iommu_lock(s);
>> +
>> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>> +    if (!vtd_idev) {
>> +        vtd_iommu_unlock(s);
>> +        return;
>> +    }
>> +
>> +    g_hash_table_remove(s->vtd_iommufd_dev, &key);
>> +
>> +    vtd_iommu_unlock(s);
>> +}
>> +
>>  /* Unmap the whole range in the notifier's scope. */
>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n)
>>  {
>> @@ -4107,6 +4182,8 @@ static AddressSpace
>*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>
>>  static PCIIOMMUOps vtd_iommu_ops = {
>>      .get_address_space = vtd_host_dma_iommu,
>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>  };
>>
>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>>                                       g_free, g_free);
>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>vtd_as_equal,
>>                                        g_free, g_free);
>> +    s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash,
>vtd_as_idev_equal,
>> +                                               g_free, g_free);
>>      vtd_init(s);
>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>      /* Pseudo address space under root PCI bus. */
>Thanks
>
>Eric
Eric Auger Jan. 18, 2024, 12:34 p.m. UTC | #3
On 1/18/24 09:43, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>> callback
>>
>> Hi Zhenzhong,
>>
>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>>> PCI BDF.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  include/hw/i386/intel_iommu.h | 10 +++++
>>>  hw/i386/intel_iommu.c         | 79
>> +++++++++++++++++++++++++++++++++++
>>>  2 files changed, 89 insertions(+)
>>>
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 7fa0a695c8..c65fdde56f 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>>>  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>>>  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>>>  typedef struct VTDPASIDEntry VTDPASIDEntry;
>>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>>
>>>  /* Context-Entry */
>>>  struct VTDContextEntry {
>>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>>>      IOVATree *iova_tree;
>>>  };
>>>
>>> +struct VTDIOMMUFDDevice {
>>> +    PCIBus *bus;
>>> +    uint8_t devfn;
>>> +    IOMMUFDDevice *idev;
>>> +    IntelIOMMUState *iommu_state;
>>> +};
>>> +
>> Just wondering whether we shouldn't reuse the VTDAddressSpace to store
>> the idev, if any. How have you made your choice. What will it become
>> when PASID gets added?
> VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed
> by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in
> VTDAddressSpace, may need a list in case more than one device in same address
> space. Then a global VTDIOMMUFDDevice list is better for lookup.

OK but if several devices are hidden under an aliased BDF, can't they
share the host properties (DMAR ecap/cap)?
>
> For PASID in modern mode which support stage-1 page table, we have
> VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use
> VTDAddressSpace which is for stage-2 page table.

OK

Thanks

Eric
>
> Thanks
> Zhenzhong
>
>>>  struct VTDIOTLBEntry {
>>>      uint64_t gfn;
>>>      uint16_t domain_id;
>>> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>>>      /* list of registered notifiers */
>>>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>
>>> +    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
>>> +
>>>      /* interrupt remapping */
>>>      bool intr_enabled;              /* Whether guest enabled IR */
>>>      dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index ed5677c0ae..95faf697eb 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>> gconstpointer v2)
>>>             (key1->pasid == key2->pasid);
>>>  }
>>>
>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>> +{
>>> +    const struct vtd_as_key *key1 = v1;
>>> +    const struct vtd_as_key *key2 = v2;
>>> +
>>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>> +}
>>>  /*
>>>   * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>>   * based on the pointer value is intended. Note that we deal with
>>> @@ -3812,6 +3819,74 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>      return vtd_dev_as;
>>>  }
>>>
>>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>> int32_t devfn,
>>> +                                    IOMMUFDDevice *idev, Error **errp)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDIOMMUFDDevice *vtd_idev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +    struct vtd_as_key *new_key;
>>> +
>>> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>>> +
>>> +    /* None IOMMUFD case */
>>> +    if (!idev) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>>> +
>>> +    if (vtd_idev) {
>>> +        error_setg(errp, "IOMMUFD device already exist");
>>> +        return -1;
>>> +    }
>>> +
>>> +    new_key = g_malloc(sizeof(*new_key));
>>> +    new_key->bus = bus;
>>> +    new_key->devfn = devfn;
>>> +
>>> +    vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
>>> +    vtd_idev->bus = bus;
>>> +    vtd_idev->devfn = (uint8_t)devfn;
>>> +    vtd_idev->iommu_state = s;
>>> +    vtd_idev->idev = idev;
>>> +
>>> +    g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque,
>> int32_t devfn)
>>> +{
>>> +    IntelIOMMUState *s = opaque;
>>> +    VTDIOMMUFDDevice *vtd_idev;
>>> +    struct vtd_as_key key = {
>>> +        .bus = bus,
>>> +        .devfn = devfn,
>>> +    };
>>> +
>>> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>>> +
>>> +    vtd_iommu_lock(s);
>>> +
>>> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>>> +    if (!vtd_idev) {
>>> +        vtd_iommu_unlock(s);
>>> +        return;
>>> +    }
>>> +
>>> +    g_hash_table_remove(s->vtd_iommufd_dev, &key);
>>> +
>>> +    vtd_iommu_unlock(s);
>>> +}
>>> +
>>>  /* Unmap the whole range in the notifier's scope. */
>>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n)
>>>  {
>>> @@ -4107,6 +4182,8 @@ static AddressSpace
>> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>>  static PCIIOMMUOps vtd_iommu_ops = {
>>>      .get_address_space = vtd_host_dma_iommu,
>>> +    .set_iommu_device = vtd_dev_set_iommu_device,
>>> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>>>  };
>>>
>>>  static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error
>> **errp)
>>>                                       g_free, g_free);
>>>      s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_equal,
>>>                                        g_free, g_free);
>>> +    s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_idev_equal,
>>> +                                               g_free, g_free);
>>>      vtd_init(s);
>>>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>>      /* Pseudo address space under root PCI bus. */
>> Thanks
>>
>> Eric
Duan, Zhenzhong Jan. 19, 2024, 7:27 a.m. UTC | #4
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>callback
>
>
>
>On 1/18/24 09:43, Duan, Zhenzhong wrote:
>>
>>> -----Original Message-----
>>> From: Eric Auger <eric.auger@redhat.com>
>>> Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add
>set/unset_iommu_device
>>> callback
>>>
>>> Hi Zhenzhong,
>>>
>>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>>> From: Yi Liu <yi.l.liu@intel.com>
>>>>
>>>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>>>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>>>> PCI BDF.
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>  include/hw/i386/intel_iommu.h | 10 +++++
>>>>  hw/i386/intel_iommu.c         | 79
>>> +++++++++++++++++++++++++++++++++++
>>>>  2 files changed, 89 insertions(+)
>>>>
>>>> diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
>>>> index 7fa0a695c8..c65fdde56f 100644
>>>> --- a/include/hw/i386/intel_iommu.h
>>>> +++ b/include/hw/i386/intel_iommu.h
>>>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry
>VTD_IR_TableEntry;
>>>>  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>>>>  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>>>>  typedef struct VTDPASIDEntry VTDPASIDEntry;
>>>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>>>
>>>>  /* Context-Entry */
>>>>  struct VTDContextEntry {
>>>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>>>>      IOVATree *iova_tree;
>>>>  };
>>>>
>>>> +struct VTDIOMMUFDDevice {
>>>> +    PCIBus *bus;
>>>> +    uint8_t devfn;
>>>> +    IOMMUFDDevice *idev;
>>>> +    IntelIOMMUState *iommu_state;
>>>> +};
>>>> +
>>> Just wondering whether we shouldn't reuse the VTDAddressSpace to
>store
>>> the idev, if any. How have you made your choice. What will it become
>>> when PASID gets added?
>> VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is
>indexed
>> by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in
>> VTDAddressSpace, may need a list in case more than one device in same
>address
>> space. Then a global VTDIOMMUFDDevice list is better for lookup.
>
>OK but if several devices are hidden under an aliased BDF, can't they
>share the host properties (DMAR ecap/cap)?

It depends on that if the vfio devices are under same aliased BDF in host.
If vfio devices are configured under same aliased BDF in qemu but they are
not in same aliased BDF in host, their host cap/ecap may be different.

Thanks
Zhenzhong
Cédric Le Goater Jan. 22, 2024, 5:09 p.m. UTC | #5
On 1/15/24 11:13, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
> 
> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
> In set call, IOMMUFDDevice is recorded in hash table indexed by
> PCI BDF.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/i386/intel_iommu.h | 10 +++++
>   hw/i386/intel_iommu.c         | 79 +++++++++++++++++++++++++++++++++++
>   2 files changed, 89 insertions(+)
> 
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 7fa0a695c8..c65fdde56f 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>   typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>   typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>   typedef struct VTDPASIDEntry VTDPASIDEntry;
> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>   
>   /* Context-Entry */
>   struct VTDContextEntry {
> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>       IOVATree *iova_tree;
>   };
>   
> +struct VTDIOMMUFDDevice {
> +    PCIBus *bus;
> +    uint8_t devfn;
> +    IOMMUFDDevice *idev;
> +    IntelIOMMUState *iommu_state;
> +};

Does the VTDIOMMUFDDevice definition need to be public ?

>   struct VTDIOTLBEntry {
>       uint64_t gfn;
>       uint16_t domain_id;
> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>       /* list of registered notifiers */
>       QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>   
> +    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
> +
>       /* interrupt remapping */
>       bool intr_enabled;              /* Whether guest enabled IR */
>       dma_addr_t intr_root;           /* Interrupt remapping table pointer */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index ed5677c0ae..95faf697eb 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
>              (key1->pasid == key2->pasid);
>   }
>   
> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
>   /*
>    * Note that we use pointer to PCIBus as the key, so hashing/shifting
>    * based on the pointer value is intended. Note that we deal with
> @@ -3812,6 +3819,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>       return vtd_dev_as;
>   }
>   
> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
> +                                    IOMMUFDDevice *idev, Error **errp)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDIOMMUFDDevice *vtd_idev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +    struct vtd_as_key *new_key;
> +
> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);

Can we move the assert earlier in the call stack ?
pci_device_get_iommu_bus_devfn() looks like a good place.

> +
> +    /* None IOMMUFD case */
> +    if (!idev) {
> +        return 0;
> +    }

Can we move this test in the helper ? (Looks like an error to me).


Thanks,

C.


> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
> +
> +    if (vtd_idev) {
> +        error_setg(errp, "IOMMUFD device already exist");
> +        return -1;
> +    }
> +
> +    new_key = g_malloc(sizeof(*new_key));
> +    new_key->bus = bus;
> +    new_key->devfn = devfn;
> +
> +    vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
> +    vtd_idev->bus = bus;
> +    vtd_idev->devfn = (uint8_t)devfn;
> +    vtd_idev->iommu_state = s;
> +    vtd_idev->idev = idev;
> +
> +    g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
> +
> +    vtd_iommu_unlock(s);
> +
> +    return 0;
> +}
> +
> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn)
> +{
> +    IntelIOMMUState *s = opaque;
> +    VTDIOMMUFDDevice *vtd_idev;
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
> +
> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
> +
> +    vtd_iommu_lock(s);
> +
> +    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
> +    if (!vtd_idev) {
> +        vtd_iommu_unlock(s);
> +        return;
> +    }
> +
> +    g_hash_table_remove(s->vtd_iommufd_dev, &key);
> +
> +    vtd_iommu_unlock(s);
> +}
> +
>   /* Unmap the whole range in the notifier's scope. */
>   static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
>   {
> @@ -4107,6 +4182,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>   
>   static PCIIOMMUOps vtd_iommu_ops = {
>       .get_address_space = vtd_host_dma_iommu,
> +    .set_iommu_device = vtd_dev_set_iommu_device,
> +    .unset_iommu_device = vtd_dev_unset_iommu_device,
>   };
>   
>   static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>                                        g_free, g_free);
>       s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
>                                         g_free, g_free);
> +    s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal,
> +                                               g_free, g_free);
>       vtd_init(s);
>       pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>       /* Pseudo address space under root PCI bus. */
Duan, Zhenzhong Jan. 23, 2024, 9:46 a.m. UTC | #6
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>callback
>
>On 1/15/24 11:13, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>> PCI BDF.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/i386/intel_iommu.h | 10 +++++
>>   hw/i386/intel_iommu.c         | 79
>+++++++++++++++++++++++++++++++++++
>>   2 files changed, 89 insertions(+)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 7fa0a695c8..c65fdde56f 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>>   typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>>   typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>>   typedef struct VTDPASIDEntry VTDPASIDEntry;
>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>
>>   /* Context-Entry */
>>   struct VTDContextEntry {
>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>>       IOVATree *iova_tree;
>>   };
>>
>> +struct VTDIOMMUFDDevice {
>> +    PCIBus *bus;
>> +    uint8_t devfn;
>> +    IOMMUFDDevice *idev;
>> +    IntelIOMMUState *iommu_state;
>> +};
>
>Does the VTDIOMMUFDDevice definition need to be public ?

No need, will move it to hw/i386/intel_iommu_internal.h
It looks I need to do the same for other definitions in nesting series.

>
>>   struct VTDIOTLBEntry {
>>       uint64_t gfn;
>>       uint16_t domain_id;
>> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>>       /* list of registered notifiers */
>>       QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>
>> +    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
>> +
>>       /* interrupt remapping */
>>       bool intr_enabled;              /* Whether guest enabled IR */
>>       dma_addr_t intr_root;           /* Interrupt remapping table pointer */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index ed5677c0ae..95faf697eb 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>gconstpointer v2)
>>              (key1->pasid == key2->pasid);
>>   }
>>
>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>> +{
>> +    const struct vtd_as_key *key1 = v1;
>> +    const struct vtd_as_key *key2 = v2;
>> +
>> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>> +}
>>   /*
>>    * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>    * based on the pointer value is intended. Note that we deal with
>> @@ -3812,6 +3819,74 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>       return vtd_dev_as;
>>   }
>>
>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>int32_t devfn,
>> +                                    IOMMUFDDevice *idev, Error **errp)
>> +{
>> +    IntelIOMMUState *s = opaque;
>> +    VTDIOMMUFDDevice *vtd_idev;
>> +    struct vtd_as_key key = {
>> +        .bus = bus,
>> +        .devfn = devfn,
>> +    };
>> +    struct vtd_as_key *new_key;
>> +
>> +    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>
>Can we move the assert earlier in the call stack ?
>pci_device_get_iommu_bus_devfn() looks like a good place.

Sure.

>
>> +
>> +    /* None IOMMUFD case */
>> +    if (!idev) {
>> +        return 0;
>> +    }
>
>Can we move this test in the helper ? (Looks like an error to me).

We need to pass in NULL idev to do further check in nesting series.
See https://github.com/yiliu1765/qemu/commit/7f0bb59575bb5cf38618ae950f68a8661676e881

Thanks
Zhenzhong
diff mbox series

Patch

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 7fa0a695c8..c65fdde56f 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -62,6 +62,7 @@  typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
 typedef struct VTDPASIDEntry VTDPASIDEntry;
+typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
 
 /* Context-Entry */
 struct VTDContextEntry {
@@ -148,6 +149,13 @@  struct VTDAddressSpace {
     IOVATree *iova_tree;
 };
 
+struct VTDIOMMUFDDevice {
+    PCIBus *bus;
+    uint8_t devfn;
+    IOMMUFDDevice *idev;
+    IntelIOMMUState *iommu_state;
+};
+
 struct VTDIOTLBEntry {
     uint64_t gfn;
     uint16_t domain_id;
@@ -292,6 +300,8 @@  struct IntelIOMMUState {
     /* list of registered notifiers */
     QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
 
+    GHashTable *vtd_iommufd_dev;             /* VTDIOMMUFDDevice */
+
     /* interrupt remapping */
     bool intr_enabled;              /* Whether guest enabled IR */
     dma_addr_t intr_root;           /* Interrupt remapping table pointer */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ed5677c0ae..95faf697eb 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -237,6 +237,13 @@  static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
            (key1->pasid == key2->pasid);
 }
 
+static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
+{
+    const struct vtd_as_key *key1 = v1;
+    const struct vtd_as_key *key2 = v2;
+
+    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
 /*
  * Note that we use pointer to PCIBus as the key, so hashing/shifting
  * based on the pointer value is intended. Note that we deal with
@@ -3812,6 +3819,74 @@  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
+static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn,
+                                    IOMMUFDDevice *idev, Error **errp)
+{
+    IntelIOMMUState *s = opaque;
+    VTDIOMMUFDDevice *vtd_idev;
+    struct vtd_as_key key = {
+        .bus = bus,
+        .devfn = devfn,
+    };
+    struct vtd_as_key *new_key;
+
+    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+    /* None IOMMUFD case */
+    if (!idev) {
+        return 0;
+    }
+
+    vtd_iommu_lock(s);
+
+    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
+
+    if (vtd_idev) {
+        error_setg(errp, "IOMMUFD device already exist");
+        return -1;
+    }
+
+    new_key = g_malloc(sizeof(*new_key));
+    new_key->bus = bus;
+    new_key->devfn = devfn;
+
+    vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
+    vtd_idev->bus = bus;
+    vtd_idev->devfn = (uint8_t)devfn;
+    vtd_idev->iommu_state = s;
+    vtd_idev->idev = idev;
+
+    g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
+
+    vtd_iommu_unlock(s);
+
+    return 0;
+}
+
+static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn)
+{
+    IntelIOMMUState *s = opaque;
+    VTDIOMMUFDDevice *vtd_idev;
+    struct vtd_as_key key = {
+        .bus = bus,
+        .devfn = devfn,
+    };
+
+    assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
+
+    vtd_iommu_lock(s);
+
+    vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
+    if (!vtd_idev) {
+        vtd_iommu_unlock(s);
+        return;
+    }
+
+    g_hash_table_remove(s->vtd_iommufd_dev, &key);
+
+    vtd_iommu_unlock(s);
+}
+
 /* Unmap the whole range in the notifier's scope. */
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
 {
@@ -4107,6 +4182,8 @@  static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 
 static PCIIOMMUOps vtd_iommu_ops = {
     .get_address_space = vtd_host_dma_iommu,
+    .set_iommu_device = vtd_dev_set_iommu_device,
+    .unset_iommu_device = vtd_dev_unset_iommu_device,
 };
 
 static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
@@ -4230,6 +4307,8 @@  static void vtd_realize(DeviceState *dev, Error **errp)
                                      g_free, g_free);
     s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
                                       g_free, g_free);
+    s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal,
+                                               g_free, g_free);
     vtd_init(s);
     pci_setup_iommu(bus, &vtd_iommu_ops, dev);
     /* Pseudo address space under root PCI bus. */