Message ID | 20240115101313.131139-4-zhenzhong.duan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Check and sync host IOMMU cap/ecap with vIOMMU | expand |
Hi Zhenzhong, On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > This adds set/unset_iommu_device() implementation in Intel vIOMMU. > In set call, IOMMUFDDevice is recorded in hash table indexed by > PCI BDF. > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/i386/intel_iommu.h | 10 +++++ > hw/i386/intel_iommu.c | 79 +++++++++++++++++++++++++++++++++++ > 2 files changed, 89 insertions(+) > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index 7fa0a695c8..c65fdde56f 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; > typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; > typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; > typedef struct VTDPASIDEntry VTDPASIDEntry; > +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; > > /* Context-Entry */ > struct VTDContextEntry { > @@ -148,6 +149,13 @@ struct VTDAddressSpace { > IOVATree *iova_tree; > }; > > +struct VTDIOMMUFDDevice { > + PCIBus *bus; > + uint8_t devfn; > + IOMMUFDDevice *idev; > + IntelIOMMUState *iommu_state; > +}; > + Just wondering whether we shouldn't reuse the VTDAddressSpace to store the idev, if any. How have you made your choice. What will it become when PASID gets added? > struct VTDIOTLBEntry { > uint64_t gfn; > uint16_t domain_id; > @@ -292,6 +300,8 @@ struct IntelIOMMUState { > /* list of registered notifiers */ > QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; > > + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ > + > /* interrupt remapping */ > bool intr_enabled; /* Whether guest enabled IR */ > dma_addr_t intr_root; /* Interrupt remapping table pointer */ > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index ed5677c0ae..95faf697eb 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) > (key1->pasid == key2->pasid); > } > > +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) > +{ > + const struct vtd_as_key *key1 = v1; > + const struct vtd_as_key *key2 = v2; > + > + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); > +} > /* > * Note that we use pointer to PCIBus as the key, so hashing/shifting > * based on the pointer value is intended. Note that we deal with > @@ -3812,6 +3819,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, > return vtd_dev_as; > } > > +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > + IOMMUFDDevice *idev, Error **errp) > +{ > + IntelIOMMUState *s = opaque; > + VTDIOMMUFDDevice *vtd_idev; > + struct vtd_as_key key = { > + .bus = bus, > + .devfn = devfn, > + }; > + struct vtd_as_key *new_key; > + > + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); > + > + /* None IOMMUFD case */ > + if (!idev) { > + return 0; > + } > + > + vtd_iommu_lock(s); > + > + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > + > + if (vtd_idev) { > + error_setg(errp, "IOMMUFD device already exist"); > + return -1; > + } > + > + new_key = g_malloc(sizeof(*new_key)); > + new_key->bus = bus; > + new_key->devfn = devfn; > + > + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice)); > + vtd_idev->bus = bus; > + vtd_idev->devfn = (uint8_t)devfn; > + vtd_idev->iommu_state = s; > + vtd_idev->idev = idev; > + > + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev); > + > + vtd_iommu_unlock(s); > + > + return 0; > +} > + > +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn) > +{ > + IntelIOMMUState *s = opaque; > + VTDIOMMUFDDevice *vtd_idev; > + struct vtd_as_key key = { > + .bus = bus, > + .devfn = devfn, > + }; > + > + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); > + > + vtd_iommu_lock(s); > + > + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > + if (!vtd_idev) { > + vtd_iommu_unlock(s); > + return; > + } > + > + g_hash_table_remove(s->vtd_iommufd_dev, &key); > + > + vtd_iommu_unlock(s); > +} > + > /* Unmap the whole range in the notifier's scope. */ > static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) > { > @@ -4107,6 +4182,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) > > static PCIIOMMUOps vtd_iommu_ops = { > .get_address_space = vtd_host_dma_iommu, > + .set_iommu_device = vtd_dev_set_iommu_device, > + .unset_iommu_device = vtd_dev_unset_iommu_device, > }; > > static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) > @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) > g_free, g_free); > s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, > g_free, g_free); > + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal, > + g_free, g_free); > vtd_init(s); > pci_setup_iommu(bus, &vtd_iommu_ops, dev); > /* Pseudo address space under root PCI bus. */ Thanks Eric
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device >callback > >Hi Zhenzhong, > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> This adds set/unset_iommu_device() implementation in Intel vIOMMU. >> In set call, IOMMUFDDevice is recorded in hash table indexed by >> PCI BDF. >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/i386/intel_iommu.h | 10 +++++ >> hw/i386/intel_iommu.c | 79 >+++++++++++++++++++++++++++++++++++ >> 2 files changed, 89 insertions(+) >> >> diff --git a/include/hw/i386/intel_iommu.h >b/include/hw/i386/intel_iommu.h >> index 7fa0a695c8..c65fdde56f 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; >> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; >> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; >> typedef struct VTDPASIDEntry VTDPASIDEntry; >> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; >> >> /* Context-Entry */ >> struct VTDContextEntry { >> @@ -148,6 +149,13 @@ struct VTDAddressSpace { >> IOVATree *iova_tree; >> }; >> >> +struct VTDIOMMUFDDevice { >> + PCIBus *bus; >> + uint8_t devfn; >> + IOMMUFDDevice *idev; >> + IntelIOMMUState *iommu_state; >> +}; >> + >Just wondering whether we shouldn't reuse the VTDAddressSpace to store >the idev, if any. How have you made your choice. What will it become >when PASID gets added? VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in VTDAddressSpace, may need a list in case more than one device in same address space. Then a global VTDIOMMUFDDevice list is better for lookup. For PASID in modern mode which support stage-1 page table, we have VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use VTDAddressSpace which is for stage-2 page table. Thanks Zhenzhong >> struct VTDIOTLBEntry { >> uint64_t gfn; >> uint16_t domain_id; >> @@ -292,6 +300,8 @@ struct IntelIOMMUState { >> /* list of registered notifiers */ >> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; >> >> + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ >> + >> /* interrupt remapping */ >> bool intr_enabled; /* Whether guest enabled IR */ >> dma_addr_t intr_root; /* Interrupt remapping table pointer */ >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index ed5677c0ae..95faf697eb 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, >gconstpointer v2) >> (key1->pasid == key2->pasid); >> } >> >> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) >> +{ >> + const struct vtd_as_key *key1 = v1; >> + const struct vtd_as_key *key2 = v2; >> + >> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); >> +} >> /* >> * Note that we use pointer to PCIBus as the key, so hashing/shifting >> * based on the pointer value is intended. Note that we deal with >> @@ -3812,6 +3819,74 @@ VTDAddressSpace >*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >> return vtd_dev_as; >> } >> >> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >int32_t devfn, >> + IOMMUFDDevice *idev, Error **errp) >> +{ >> + IntelIOMMUState *s = opaque; >> + VTDIOMMUFDDevice *vtd_idev; >> + struct vtd_as_key key = { >> + .bus = bus, >> + .devfn = devfn, >> + }; >> + struct vtd_as_key *new_key; >> + >> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); >> + >> + /* None IOMMUFD case */ >> + if (!idev) { >> + return 0; >> + } >> + >> + vtd_iommu_lock(s); >> + >> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >> + >> + if (vtd_idev) { >> + error_setg(errp, "IOMMUFD device already exist"); >> + return -1; >> + } >> + >> + new_key = g_malloc(sizeof(*new_key)); >> + new_key->bus = bus; >> + new_key->devfn = devfn; >> + >> + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice)); >> + vtd_idev->bus = bus; >> + vtd_idev->devfn = (uint8_t)devfn; >> + vtd_idev->iommu_state = s; >> + vtd_idev->idev = idev; >> + >> + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev); >> + >> + vtd_iommu_unlock(s); >> + >> + return 0; >> +} >> + >> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, >int32_t devfn) >> +{ >> + IntelIOMMUState *s = opaque; >> + VTDIOMMUFDDevice *vtd_idev; >> + struct vtd_as_key key = { >> + .bus = bus, >> + .devfn = devfn, >> + }; >> + >> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); >> + >> + vtd_iommu_lock(s); >> + >> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >> + if (!vtd_idev) { >> + vtd_iommu_unlock(s); >> + return; >> + } >> + >> + g_hash_table_remove(s->vtd_iommufd_dev, &key); >> + >> + vtd_iommu_unlock(s); >> +} >> + >> /* Unmap the whole range in the notifier's scope. */ >> static void vtd_address_space_unmap(VTDAddressSpace *as, >IOMMUNotifier *n) >> { >> @@ -4107,6 +4182,8 @@ static AddressSpace >*vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) >> >> static PCIIOMMUOps vtd_iommu_ops = { >> .get_address_space = vtd_host_dma_iommu, >> + .set_iommu_device = vtd_dev_set_iommu_device, >> + .unset_iommu_device = vtd_dev_unset_iommu_device, >> }; >> >> static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) >> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error >**errp) >> g_free, g_free); >> s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, >vtd_as_equal, >> g_free, g_free); >> + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, >vtd_as_idev_equal, >> + g_free, g_free); >> vtd_init(s); >> pci_setup_iommu(bus, &vtd_iommu_ops, dev); >> /* Pseudo address space under root PCI bus. */ >Thanks > >Eric
On 1/18/24 09:43, Duan, Zhenzhong wrote: > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device >> callback >> >> Hi Zhenzhong, >> >> On 1/15/24 11:13, Zhenzhong Duan wrote: >>> From: Yi Liu <yi.l.liu@intel.com> >>> >>> This adds set/unset_iommu_device() implementation in Intel vIOMMU. >>> In set call, IOMMUFDDevice is recorded in hash table indexed by >>> PCI BDF. >>> >>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>> --- >>> include/hw/i386/intel_iommu.h | 10 +++++ >>> hw/i386/intel_iommu.c | 79 >> +++++++++++++++++++++++++++++++++++ >>> 2 files changed, 89 insertions(+) >>> >>> diff --git a/include/hw/i386/intel_iommu.h >> b/include/hw/i386/intel_iommu.h >>> index 7fa0a695c8..c65fdde56f 100644 >>> --- a/include/hw/i386/intel_iommu.h >>> +++ b/include/hw/i386/intel_iommu.h >>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; >>> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; >>> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; >>> typedef struct VTDPASIDEntry VTDPASIDEntry; >>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; >>> >>> /* Context-Entry */ >>> struct VTDContextEntry { >>> @@ -148,6 +149,13 @@ struct VTDAddressSpace { >>> IOVATree *iova_tree; >>> }; >>> >>> +struct VTDIOMMUFDDevice { >>> + PCIBus *bus; >>> + uint8_t devfn; >>> + IOMMUFDDevice *idev; >>> + IntelIOMMUState *iommu_state; >>> +}; >>> + >> Just wondering whether we shouldn't reuse the VTDAddressSpace to store >> the idev, if any. How have you made your choice. What will it become >> when PASID gets added? > VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed > by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in > VTDAddressSpace, may need a list in case more than one device in same address > space. Then a global VTDIOMMUFDDevice list is better for lookup. OK but if several devices are hidden under an aliased BDF, can't they share the host properties (DMAR ecap/cap)? > > For PASID in modern mode which support stage-1 page table, we have > VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use > VTDAddressSpace which is for stage-2 page table. OK Thanks Eric > > Thanks > Zhenzhong > >>> struct VTDIOTLBEntry { >>> uint64_t gfn; >>> uint16_t domain_id; >>> @@ -292,6 +300,8 @@ struct IntelIOMMUState { >>> /* list of registered notifiers */ >>> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; >>> >>> + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ >>> + >>> /* interrupt remapping */ >>> bool intr_enabled; /* Whether guest enabled IR */ >>> dma_addr_t intr_root; /* Interrupt remapping table pointer */ >>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >>> index ed5677c0ae..95faf697eb 100644 >>> --- a/hw/i386/intel_iommu.c >>> +++ b/hw/i386/intel_iommu.c >>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, >> gconstpointer v2) >>> (key1->pasid == key2->pasid); >>> } >>> >>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) >>> +{ >>> + const struct vtd_as_key *key1 = v1; >>> + const struct vtd_as_key *key2 = v2; >>> + >>> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); >>> +} >>> /* >>> * Note that we use pointer to PCIBus as the key, so hashing/shifting >>> * based on the pointer value is intended. Note that we deal with >>> @@ -3812,6 +3819,74 @@ VTDAddressSpace >> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >>> return vtd_dev_as; >>> } >>> >>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >> int32_t devfn, >>> + IOMMUFDDevice *idev, Error **errp) >>> +{ >>> + IntelIOMMUState *s = opaque; >>> + VTDIOMMUFDDevice *vtd_idev; >>> + struct vtd_as_key key = { >>> + .bus = bus, >>> + .devfn = devfn, >>> + }; >>> + struct vtd_as_key *new_key; >>> + >>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); >>> + >>> + /* None IOMMUFD case */ >>> + if (!idev) { >>> + return 0; >>> + } >>> + >>> + vtd_iommu_lock(s); >>> + >>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >>> + >>> + if (vtd_idev) { >>> + error_setg(errp, "IOMMUFD device already exist"); >>> + return -1; >>> + } >>> + >>> + new_key = g_malloc(sizeof(*new_key)); >>> + new_key->bus = bus; >>> + new_key->devfn = devfn; >>> + >>> + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice)); >>> + vtd_idev->bus = bus; >>> + vtd_idev->devfn = (uint8_t)devfn; >>> + vtd_idev->iommu_state = s; >>> + vtd_idev->idev = idev; >>> + >>> + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev); >>> + >>> + vtd_iommu_unlock(s); >>> + >>> + return 0; >>> +} >>> + >>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, >> int32_t devfn) >>> +{ >>> + IntelIOMMUState *s = opaque; >>> + VTDIOMMUFDDevice *vtd_idev; >>> + struct vtd_as_key key = { >>> + .bus = bus, >>> + .devfn = devfn, >>> + }; >>> + >>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); >>> + >>> + vtd_iommu_lock(s); >>> + >>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >>> + if (!vtd_idev) { >>> + vtd_iommu_unlock(s); >>> + return; >>> + } >>> + >>> + g_hash_table_remove(s->vtd_iommufd_dev, &key); >>> + >>> + vtd_iommu_unlock(s); >>> +} >>> + >>> /* Unmap the whole range in the notifier's scope. */ >>> static void vtd_address_space_unmap(VTDAddressSpace *as, >> IOMMUNotifier *n) >>> { >>> @@ -4107,6 +4182,8 @@ static AddressSpace >> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) >>> static PCIIOMMUOps vtd_iommu_ops = { >>> .get_address_space = vtd_host_dma_iommu, >>> + .set_iommu_device = vtd_dev_set_iommu_device, >>> + .unset_iommu_device = vtd_dev_unset_iommu_device, >>> }; >>> >>> static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) >>> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error >> **errp) >>> g_free, g_free); >>> s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, >> vtd_as_equal, >>> g_free, g_free); >>> + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, >> vtd_as_idev_equal, >>> + g_free, g_free); >>> vtd_init(s); >>> pci_setup_iommu(bus, &vtd_iommu_ops, dev); >>> /* Pseudo address space under root PCI bus. */ >> Thanks >> >> Eric
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device >callback > > > >On 1/18/24 09:43, Duan, Zhenzhong wrote: >> >>> -----Original Message----- >>> From: Eric Auger <eric.auger@redhat.com> >>> Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add >set/unset_iommu_device >>> callback >>> >>> Hi Zhenzhong, >>> >>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>> From: Yi Liu <yi.l.liu@intel.com> >>>> >>>> This adds set/unset_iommu_device() implementation in Intel vIOMMU. >>>> In set call, IOMMUFDDevice is recorded in hash table indexed by >>>> PCI BDF. >>>> >>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>> --- >>>> include/hw/i386/intel_iommu.h | 10 +++++ >>>> hw/i386/intel_iommu.c | 79 >>> +++++++++++++++++++++++++++++++++++ >>>> 2 files changed, 89 insertions(+) >>>> >>>> diff --git a/include/hw/i386/intel_iommu.h >>> b/include/hw/i386/intel_iommu.h >>>> index 7fa0a695c8..c65fdde56f 100644 >>>> --- a/include/hw/i386/intel_iommu.h >>>> +++ b/include/hw/i386/intel_iommu.h >>>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry >VTD_IR_TableEntry; >>>> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; >>>> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; >>>> typedef struct VTDPASIDEntry VTDPASIDEntry; >>>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; >>>> >>>> /* Context-Entry */ >>>> struct VTDContextEntry { >>>> @@ -148,6 +149,13 @@ struct VTDAddressSpace { >>>> IOVATree *iova_tree; >>>> }; >>>> >>>> +struct VTDIOMMUFDDevice { >>>> + PCIBus *bus; >>>> + uint8_t devfn; >>>> + IOMMUFDDevice *idev; >>>> + IntelIOMMUState *iommu_state; >>>> +}; >>>> + >>> Just wondering whether we shouldn't reuse the VTDAddressSpace to >store >>> the idev, if any. How have you made your choice. What will it become >>> when PASID gets added? >> VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is >indexed >> by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in >> VTDAddressSpace, may need a list in case more than one device in same >address >> space. Then a global VTDIOMMUFDDevice list is better for lookup. > >OK but if several devices are hidden under an aliased BDF, can't they >share the host properties (DMAR ecap/cap)? It depends on that if the vfio devices are under same aliased BDF in host. If vfio devices are configured under same aliased BDF in qemu but they are not in same aliased BDF in host, their host cap/ecap may be different. Thanks Zhenzhong
On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > This adds set/unset_iommu_device() implementation in Intel vIOMMU. > In set call, IOMMUFDDevice is recorded in hash table indexed by > PCI BDF. > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/i386/intel_iommu.h | 10 +++++ > hw/i386/intel_iommu.c | 79 +++++++++++++++++++++++++++++++++++ > 2 files changed, 89 insertions(+) > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index 7fa0a695c8..c65fdde56f 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; > typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; > typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; > typedef struct VTDPASIDEntry VTDPASIDEntry; > +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; > > /* Context-Entry */ > struct VTDContextEntry { > @@ -148,6 +149,13 @@ struct VTDAddressSpace { > IOVATree *iova_tree; > }; > > +struct VTDIOMMUFDDevice { > + PCIBus *bus; > + uint8_t devfn; > + IOMMUFDDevice *idev; > + IntelIOMMUState *iommu_state; > +}; Does the VTDIOMMUFDDevice definition need to be public ? > struct VTDIOTLBEntry { > uint64_t gfn; > uint16_t domain_id; > @@ -292,6 +300,8 @@ struct IntelIOMMUState { > /* list of registered notifiers */ > QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; > > + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ > + > /* interrupt remapping */ > bool intr_enabled; /* Whether guest enabled IR */ > dma_addr_t intr_root; /* Interrupt remapping table pointer */ > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index ed5677c0ae..95faf697eb 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) > (key1->pasid == key2->pasid); > } > > +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) > +{ > + const struct vtd_as_key *key1 = v1; > + const struct vtd_as_key *key2 = v2; > + > + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); > +} > /* > * Note that we use pointer to PCIBus as the key, so hashing/shifting > * based on the pointer value is intended. Note that we deal with > @@ -3812,6 +3819,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, > return vtd_dev_as; > } > > +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > + IOMMUFDDevice *idev, Error **errp) > +{ > + IntelIOMMUState *s = opaque; > + VTDIOMMUFDDevice *vtd_idev; > + struct vtd_as_key key = { > + .bus = bus, > + .devfn = devfn, > + }; > + struct vtd_as_key *new_key; > + > + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); Can we move the assert earlier in the call stack ? pci_device_get_iommu_bus_devfn() looks like a good place. > + > + /* None IOMMUFD case */ > + if (!idev) { > + return 0; > + } Can we move this test in the helper ? (Looks like an error to me). Thanks, C. > + > + vtd_iommu_lock(s); > + > + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > + > + if (vtd_idev) { > + error_setg(errp, "IOMMUFD device already exist"); > + return -1; > + } > + > + new_key = g_malloc(sizeof(*new_key)); > + new_key->bus = bus; > + new_key->devfn = devfn; > + > + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice)); > + vtd_idev->bus = bus; > + vtd_idev->devfn = (uint8_t)devfn; > + vtd_idev->iommu_state = s; > + vtd_idev->idev = idev; > + > + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev); > + > + vtd_iommu_unlock(s); > + > + return 0; > +} > + > +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn) > +{ > + IntelIOMMUState *s = opaque; > + VTDIOMMUFDDevice *vtd_idev; > + struct vtd_as_key key = { > + .bus = bus, > + .devfn = devfn, > + }; > + > + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); > + > + vtd_iommu_lock(s); > + > + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > + if (!vtd_idev) { > + vtd_iommu_unlock(s); > + return; > + } > + > + g_hash_table_remove(s->vtd_iommufd_dev, &key); > + > + vtd_iommu_unlock(s); > +} > + > /* Unmap the whole range in the notifier's scope. */ > static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) > { > @@ -4107,6 +4182,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) > > static PCIIOMMUOps vtd_iommu_ops = { > .get_address_space = vtd_host_dma_iommu, > + .set_iommu_device = vtd_dev_set_iommu_device, > + .unset_iommu_device = vtd_dev_unset_iommu_device, > }; > > static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) > @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) > g_free, g_free); > s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, > g_free, g_free); > + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal, > + g_free, g_free); > vtd_init(s); > pci_setup_iommu(bus, &vtd_iommu_ops, dev); > /* Pseudo address space under root PCI bus. */
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device >callback > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> This adds set/unset_iommu_device() implementation in Intel vIOMMU. >> In set call, IOMMUFDDevice is recorded in hash table indexed by >> PCI BDF. >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/i386/intel_iommu.h | 10 +++++ >> hw/i386/intel_iommu.c | 79 >+++++++++++++++++++++++++++++++++++ >> 2 files changed, 89 insertions(+) >> >> diff --git a/include/hw/i386/intel_iommu.h >b/include/hw/i386/intel_iommu.h >> index 7fa0a695c8..c65fdde56f 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; >> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; >> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; >> typedef struct VTDPASIDEntry VTDPASIDEntry; >> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; >> >> /* Context-Entry */ >> struct VTDContextEntry { >> @@ -148,6 +149,13 @@ struct VTDAddressSpace { >> IOVATree *iova_tree; >> }; >> >> +struct VTDIOMMUFDDevice { >> + PCIBus *bus; >> + uint8_t devfn; >> + IOMMUFDDevice *idev; >> + IntelIOMMUState *iommu_state; >> +}; > >Does the VTDIOMMUFDDevice definition need to be public ? No need, will move it to hw/i386/intel_iommu_internal.h It looks I need to do the same for other definitions in nesting series. > >> struct VTDIOTLBEntry { >> uint64_t gfn; >> uint16_t domain_id; >> @@ -292,6 +300,8 @@ struct IntelIOMMUState { >> /* list of registered notifiers */ >> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; >> >> + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ >> + >> /* interrupt remapping */ >> bool intr_enabled; /* Whether guest enabled IR */ >> dma_addr_t intr_root; /* Interrupt remapping table pointer */ >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index ed5677c0ae..95faf697eb 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, >gconstpointer v2) >> (key1->pasid == key2->pasid); >> } >> >> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) >> +{ >> + const struct vtd_as_key *key1 = v1; >> + const struct vtd_as_key *key2 = v2; >> + >> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); >> +} >> /* >> * Note that we use pointer to PCIBus as the key, so hashing/shifting >> * based on the pointer value is intended. Note that we deal with >> @@ -3812,6 +3819,74 @@ VTDAddressSpace >*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >> return vtd_dev_as; >> } >> >> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >int32_t devfn, >> + IOMMUFDDevice *idev, Error **errp) >> +{ >> + IntelIOMMUState *s = opaque; >> + VTDIOMMUFDDevice *vtd_idev; >> + struct vtd_as_key key = { >> + .bus = bus, >> + .devfn = devfn, >> + }; >> + struct vtd_as_key *new_key; >> + >> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); > >Can we move the assert earlier in the call stack ? >pci_device_get_iommu_bus_devfn() looks like a good place. Sure. > >> + >> + /* None IOMMUFD case */ >> + if (!idev) { >> + return 0; >> + } > >Can we move this test in the helper ? (Looks like an error to me). We need to pass in NULL idev to do further check in nesting series. See https://github.com/yiliu1765/qemu/commit/7f0bb59575bb5cf38618ae950f68a8661676e881 Thanks Zhenzhong
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 7fa0a695c8..c65fdde56f 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry; typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress; typedef struct VTDPASIDDirEntry VTDPASIDDirEntry; typedef struct VTDPASIDEntry VTDPASIDEntry; +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice; /* Context-Entry */ struct VTDContextEntry { @@ -148,6 +149,13 @@ struct VTDAddressSpace { IOVATree *iova_tree; }; +struct VTDIOMMUFDDevice { + PCIBus *bus; + uint8_t devfn; + IOMMUFDDevice *idev; + IntelIOMMUState *iommu_state; +}; + struct VTDIOTLBEntry { uint64_t gfn; uint16_t domain_id; @@ -292,6 +300,8 @@ struct IntelIOMMUState { /* list of registered notifiers */ QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers; + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */ + /* interrupt remapping */ bool intr_enabled; /* Whether guest enabled IR */ dma_addr_t intr_root; /* Interrupt remapping table pointer */ diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index ed5677c0ae..95faf697eb 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) (key1->pasid == key2->pasid); } +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2) +{ + const struct vtd_as_key *key1 = v1; + const struct vtd_as_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} /* * Note that we use pointer to PCIBus as the key, so hashing/shifting * based on the pointer value is intended. Note that we deal with @@ -3812,6 +3819,74 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, + IOMMUFDDevice *idev, Error **errp) +{ + IntelIOMMUState *s = opaque; + VTDIOMMUFDDevice *vtd_idev; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + struct vtd_as_key *new_key; + + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); + + /* None IOMMUFD case */ + if (!idev) { + return 0; + } + + vtd_iommu_lock(s); + + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); + + if (vtd_idev) { + error_setg(errp, "IOMMUFD device already exist"); + return -1; + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice)); + vtd_idev->bus = bus; + vtd_idev->devfn = (uint8_t)devfn; + vtd_idev->iommu_state = s; + vtd_idev->idev = idev; + + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev); + + vtd_iommu_unlock(s); + + return 0; +} + +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque, int32_t devfn) +{ + IntelIOMMUState *s = opaque; + VTDIOMMUFDDevice *vtd_idev; + struct vtd_as_key key = { + .bus = bus, + .devfn = devfn, + }; + + assert(0 <= devfn && devfn < PCI_DEVFN_MAX); + + vtd_iommu_lock(s); + + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); + if (!vtd_idev) { + vtd_iommu_unlock(s); + return; + } + + g_hash_table_remove(s->vtd_iommufd_dev, &key); + + vtd_iommu_unlock(s); +} + /* Unmap the whole range in the notifier's scope. */ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) { @@ -4107,6 +4182,8 @@ static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) static PCIIOMMUOps vtd_iommu_ops = { .get_address_space = vtd_host_dma_iommu, + .set_iommu_device = vtd_dev_set_iommu_device, + .unset_iommu_device = vtd_dev_unset_iommu_device, }; static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error **errp) g_free, g_free); s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash, vtd_as_equal, g_free, g_free); + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash, vtd_as_idev_equal, + g_free, g_free); vtd_init(s); pci_setup_iommu(bus, &vtd_iommu_ops, dev); /* Pseudo address space under root PCI bus. */