Message ID | 20240115101313.131139-7-zhenzhong.duan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Check and sync host IOMMU cap/ecap with vIOMMU | expand |
Hi Zhenzhong, On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > Add a framework to check and synchronize host IOMMU cap/ecap with > vIOMMU cap/ecap. > > Currently only stage-2 translation is supported which is backed by > shadow page table on host side. So we don't need exact matching of > each bit of cap/ecap between vIOMMU and host. However, we can still > utilize this framework to ensure compatibility of host and vIOMMU's > address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, > which is missed before. > > When stage-1 translation is supported in future, a.k.a. scalable > modern mode, we need to ensure compatibility of each bits. Some > bits are user controllable, they should be checked with host side > to ensure compatibility. Other bits are not, they should be synced > into vIOMMU cap/ecap for compatibility. > > The sequence will be: > > vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() > iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() > iommu->host_cap/ecap is checked and updated some bits with host cap/ecap. ---- vtd_sync_hw_info() > iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- vtd_machine_done_hook() > > iommu->host_cap/ecap is a temporary storage to hold intermediate value > when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/i386/intel_iommu.h | 4 ++ > hw/i386/intel_iommu.c | 78 +++++++++++++++++++++++++++++++---- > 2 files changed, 75 insertions(+), 7 deletions(-) > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index c65fdde56f..b8abbcce12 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -292,6 +292,9 @@ struct IntelIOMMUState { > uint64_t cap; /* The value of capability reg */ > uint64_t ecap; /* The value of extended capability reg */ > > + uint64_t host_cap; /* The value of host capability reg */ > + uint64_t host_ecap; /* The value of host ext-capability reg */ > + > uint32_t context_cache_gen; /* Should be in [1,MAX] */ > GHashTable *iotlb; /* IOTLB */ > > @@ -314,6 +317,7 @@ struct IntelIOMMUState { > bool dma_translation; /* Whether DMA translation supported */ > bool pasid; /* Whether to support PASID */ > > + bool cap_finalized; /* Whether VTD capability finalized */ > /* > * Protects IOMMU states in general. Currently it protects the > * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 4c1d058ebd..be03fcbf52 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -3819,6 +3819,47 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, > return vtd_dev_as; > } > > +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd, > + Error **errp) > +{ > + uint64_t addr_width; > + > + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; > + if (s->aw_bits > addr_width) { > + error_setg(errp, "User aw-bits: %u > host address width: %lu", > + s->aw_bits, addr_width); > + return false; > + } > + > + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ > + > + return true; > +} > + > +/* > + * virtual VT-d which wants nested needs to check the host IOMMU > + * nesting cap info behind the assigned devices. Thus that vIOMMU > + * could bind guest page table to host. > + */ > +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, > + Error **errp) > +{ > + struct iommu_hw_info_vtd vtd; > + enum iommu_hw_info_type type = IOMMU_HW_INFO_TYPE_INTEL_VTD; > + > + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { > + error_setg(errp, "Failed to get IOMMU capability!!!"); > + return false; > + } > + > + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { > + error_setg(errp, "IOMMU hardware is not compatible!!!"); > + return false; > + } > + > + return vtd_sync_hw_info(s, &vtd, errp); > +} > + > static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > IOMMUFDDevice *idev, Error **errp) > { > @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > return 0; > } > > + if (!vtd_check_idev(s, idev, errp)) {In In [RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for hotplugged devices https://lore.kernel.org/all/20240117080414.316890-1-eric.auger@redhat.com/ I also attempt to pass host iommu info to the virtio-iommu but with legacy BE. In my case I want to pass the reserved memory regions which also model the aw. So this is a pretty similar use case. Why don't we pass the pointer to an opaque iommu_hw_info instead, through the PCIIOMMUOps? > + return -1; > + } > + > vtd_iommu_lock(s); > > vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > @@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s) > { > X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); > > - memset(s->csr, 0, DMAR_REG_SIZE); > - memset(s->wmask, 0, DMAR_REG_SIZE); > - memset(s->w1cmask, 0, DMAR_REG_SIZE); > - memset(s->womask, 0, DMAR_REG_SIZE); > + /* CAP/ECAP are initialized in machine create done stage */ > + memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); This change is not documented in the commit msg. Sorry I don't get why this is needed? > > s->root = 0; > s->root_scalable = false; > @@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s) > vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; > } > > - vtd_cap_init(s); > + if (!s->cap_finalized) { > + vtd_cap_init(s); > + s->host_cap = s->cap; > + s->host_ecap = s->ecap; > + } > + > vtd_reset_caches(s); > > /* Define registers with default values and bit semantics */ > vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); > - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); > - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); > vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); > vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); > vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); > @@ -4241,6 +4290,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) > return true; > } > > +static void vtd_setup_capability_reg(IntelIOMMUState *s) > +{ > + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); > + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); > +} > + > static int vtd_machine_done_notify_one(Object *child, void *unused) > { > IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); > @@ -4259,6 +4314,14 @@ static int vtd_machine_done_notify_one(Object *child, void *unused) > > static void vtd_machine_done_hook(Notifier *notifier, void *unused) > { > + IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); > + > + iommu->cap = iommu->host_cap; > + iommu->ecap = iommu->host_ecap; > + iommu->cap_finalized = true; I don't think you can change the defaults like this without taking care of compats (migration). Thanks Eric > + > + vtd_setup_capability_reg(iommu); > + > object_child_foreach_recursive(object_get_root(), > vtd_machine_done_notify_one, NULL); > } > @@ -4292,6 +4355,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) > > QLIST_INIT(&s->vtd_as_with_notifiers); > qemu_mutex_init(&s->iommu_lock); > + s->cap_finalized = false; > memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, > "intel_iommu", DMAR_REG_SIZE); > memory_region_add_subregion(get_system_memory(),
Hi Eric, >-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >sync host IOMMU cap/ecap > >Hi Zhenzhong, > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> Add a framework to check and synchronize host IOMMU cap/ecap with >> vIOMMU cap/ecap. >> >> Currently only stage-2 translation is supported which is backed by >> shadow page table on host side. So we don't need exact matching of >> each bit of cap/ecap between vIOMMU and host. However, we can still >> utilize this framework to ensure compatibility of host and vIOMMU's >> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >> which is missed before. >> >> When stage-1 translation is supported in future, a.k.a. scalable >> modern mode, we need to ensure compatibility of each bits. Some >> bits are user controllable, they should be checked with host side >> to ensure compatibility. Other bits are not, they should be synced >> into vIOMMU cap/ecap for compatibility. >> >> The sequence will be: >> >> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >> iommu->host_cap/ecap is checked and updated some bits with host >cap/ecap. ---- vtd_sync_hw_info() >> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >vtd_machine_done_hook() >> >> iommu->host_cap/ecap is a temporary storage to hold intermediate value >> when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/i386/intel_iommu.h | 4 ++ >> hw/i386/intel_iommu.c | 78 >+++++++++++++++++++++++++++++++---- >> 2 files changed, 75 insertions(+), 7 deletions(-) >> >> diff --git a/include/hw/i386/intel_iommu.h >b/include/hw/i386/intel_iommu.h >> index c65fdde56f..b8abbcce12 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >> uint64_t cap; /* The value of capability reg */ >> uint64_t ecap; /* The value of extended capability reg */ >> >> + uint64_t host_cap; /* The value of host capability reg */ >> + uint64_t host_ecap; /* The value of host ext-capability reg */ >> + >> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >> GHashTable *iotlb; /* IOTLB */ >> >> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >> bool dma_translation; /* Whether DMA translation supported */ >> bool pasid; /* Whether to support PASID */ >> >> + bool cap_finalized; /* Whether VTD capability finalized */ >> /* >> * Protects IOMMU states in general. Currently it protects the >> * per-IOMMU IOTLB cache, and context entry cache in >VTDAddressSpace. >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 4c1d058ebd..be03fcbf52 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -3819,6 +3819,47 @@ VTDAddressSpace >*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >> return vtd_dev_as; >> } >> >> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >iommu_hw_info_vtd *vtd, >> + Error **errp) >> +{ >> + uint64_t addr_width; >> + >> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; >> + if (s->aw_bits > addr_width) { >> + error_setg(errp, "User aw-bits: %u > host address width: %lu", >> + s->aw_bits, addr_width); >> + return false; >> + } >> + >> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >> + >> + return true; >> +} >> + >> +/* >> + * virtual VT-d which wants nested needs to check the host IOMMU >> + * nesting cap info behind the assigned devices. Thus that vIOMMU >> + * could bind guest page table to host. >> + */ >> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, >> + Error **errp) >> +{ >> + struct iommu_hw_info_vtd vtd; >> + enum iommu_hw_info_type type = >IOMMU_HW_INFO_TYPE_INTEL_VTD; >> + >> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >> + error_setg(errp, "Failed to get IOMMU capability!!!"); >> + return false; >> + } >> + >> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >> + return false; >> + } >> + >> + return vtd_sync_hw_info(s, &vtd, errp); >> +} >> + >> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t >devfn, >> IOMMUFDDevice *idev, Error **errp) >> { >> @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus >*bus, void *opaque, int32_t devfn, >> return 0; >> } >> >> + if (!vtd_check_idev(s, idev, errp)) {In >In >[RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for >hotplugged devices >https://lore.kernel.org/all/20240117080414.316890-1- >eric.auger@redhat.com/ > >I also attempt to pass host iommu info to the virtio-iommu but with >legacy BE. I think your patch works with iommufd BE too
On 1/18/24 10:30, Duan, Zhenzhong wrote: > Hi Eric, > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >> sync host IOMMU cap/ecap >> >> Hi Zhenzhong, >> >> On 1/15/24 11:13, Zhenzhong Duan wrote: >>> From: Yi Liu <yi.l.liu@intel.com> >>> >>> Add a framework to check and synchronize host IOMMU cap/ecap with >>> vIOMMU cap/ecap. >>> >>> Currently only stage-2 translation is supported which is backed by >>> shadow page table on host side. So we don't need exact matching of >>> each bit of cap/ecap between vIOMMU and host. However, we can still >>> utilize this framework to ensure compatibility of host and vIOMMU's >>> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >>> which is missed before. >>> >>> When stage-1 translation is supported in future, a.k.a. scalable >>> modern mode, we need to ensure compatibility of each bits. Some >>> bits are user controllable, they should be checked with host side >>> to ensure compatibility. Other bits are not, they should be synced >>> into vIOMMU cap/ecap for compatibility. >>> >>> The sequence will be: >>> >>> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >>> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >>> iommu->host_cap/ecap is checked and updated some bits with host >> cap/ecap. ---- vtd_sync_hw_info() >>> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >> vtd_machine_done_hook() >>> iommu->host_cap/ecap is a temporary storage to hold intermediate value >>> when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. >>> >>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>> --- >>> include/hw/i386/intel_iommu.h | 4 ++ >>> hw/i386/intel_iommu.c | 78 >> +++++++++++++++++++++++++++++++---- >>> 2 files changed, 75 insertions(+), 7 deletions(-) >>> >>> diff --git a/include/hw/i386/intel_iommu.h >> b/include/hw/i386/intel_iommu.h >>> index c65fdde56f..b8abbcce12 100644 >>> --- a/include/hw/i386/intel_iommu.h >>> +++ b/include/hw/i386/intel_iommu.h >>> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >>> uint64_t cap; /* The value of capability reg */ >>> uint64_t ecap; /* The value of extended capability reg */ >>> >>> + uint64_t host_cap; /* The value of host capability reg */ >>> + uint64_t host_ecap; /* The value of host ext-capability reg */ >>> + >>> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >>> GHashTable *iotlb; /* IOTLB */ >>> >>> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >>> bool dma_translation; /* Whether DMA translation supported */ >>> bool pasid; /* Whether to support PASID */ >>> >>> + bool cap_finalized; /* Whether VTD capability finalized */ >>> /* >>> * Protects IOMMU states in general. Currently it protects the >>> * per-IOMMU IOTLB cache, and context entry cache in >> VTDAddressSpace. >>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >>> index 4c1d058ebd..be03fcbf52 100644 >>> --- a/hw/i386/intel_iommu.c >>> +++ b/hw/i386/intel_iommu.c >>> @@ -3819,6 +3819,47 @@ VTDAddressSpace >> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >>> return vtd_dev_as; >>> } >>> >>> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >> iommu_hw_info_vtd *vtd, >>> + Error **errp) >>> +{ >>> + uint64_t addr_width; >>> + >>> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; >>> + if (s->aw_bits > addr_width) { >>> + error_setg(errp, "User aw-bits: %u > host address width: %lu", >>> + s->aw_bits, addr_width); >>> + return false; >>> + } >>> + >>> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >>> + >>> + return true; >>> +} >>> + >>> +/* >>> + * virtual VT-d which wants nested needs to check the host IOMMU >>> + * nesting cap info behind the assigned devices. Thus that vIOMMU >>> + * could bind guest page table to host. >>> + */ >>> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, >>> + Error **errp) >>> +{ >>> + struct iommu_hw_info_vtd vtd; >>> + enum iommu_hw_info_type type = >> IOMMU_HW_INFO_TYPE_INTEL_VTD; >>> + >>> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >>> + error_setg(errp, "Failed to get IOMMU capability!!!"); >>> + return false; >>> + } >>> + >>> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >>> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >>> + return false; >>> + } >>> + >>> + return vtd_sync_hw_info(s, &vtd, errp); >>> +} >>> + >>> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t >> devfn, >>> IOMMUFDDevice *idev, Error **errp) >>> { >>> @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus >> *bus, void *opaque, int32_t devfn, >>> return 0; >>> } >>> >>> + if (!vtd_check_idev(s, idev, errp)) {In >> In >> [RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for >> hotplugged devices >> https://lore.kernel.org/all/20240117080414.316890-1- >> eric.auger@redhat.com/ >> >> I also attempt to pass host iommu info to the virtio-iommu but with >> legacy BE. > I think your patch works with iommufd BE too
>-----Original Message----- >From: Eric Auger <eric.auger@redhat.com> >Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >sync host IOMMU cap/ecap > > > >On 1/18/24 10:30, Duan, Zhenzhong wrote: >> Hi Eric, >> >>> -----Original Message----- >>> From: Eric Auger <eric.auger@redhat.com> >>> Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check >and >>> sync host IOMMU cap/ecap >>> >>> Hi Zhenzhong, >>> >>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>> From: Yi Liu <yi.l.liu@intel.com> >>>> >>>> Add a framework to check and synchronize host IOMMU cap/ecap with >>>> vIOMMU cap/ecap. >>>> >>>> Currently only stage-2 translation is supported which is backed by >>>> shadow page table on host side. So we don't need exact matching of >>>> each bit of cap/ecap between vIOMMU and host. However, we can still >>>> utilize this framework to ensure compatibility of host and vIOMMU's >>>> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >>>> which is missed before. >>>> >>>> When stage-1 translation is supported in future, a.k.a. scalable >>>> modern mode, we need to ensure compatibility of each bits. Some >>>> bits are user controllable, they should be checked with host side >>>> to ensure compatibility. Other bits are not, they should be synced >>>> into vIOMMU cap/ecap for compatibility. >>>> >>>> The sequence will be: >>>> >>>> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >>>> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >>>> iommu->host_cap/ecap is checked and updated some bits with host >>> cap/ecap. ---- vtd_sync_hw_info() >>>> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >>> vtd_machine_done_hook() >>>> iommu->host_cap/ecap is a temporary storage to hold intermediate >value >>>> when synthesize host cap/ecap and vIOMMU's initial configured >cap/ecap. >>>> >>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>> --- >>>> include/hw/i386/intel_iommu.h | 4 ++ >>>> hw/i386/intel_iommu.c | 78 >>> +++++++++++++++++++++++++++++++---- >>>> 2 files changed, 75 insertions(+), 7 deletions(-) >>>> >>>> diff --git a/include/hw/i386/intel_iommu.h >>> b/include/hw/i386/intel_iommu.h >>>> index c65fdde56f..b8abbcce12 100644 >>>> --- a/include/hw/i386/intel_iommu.h >>>> +++ b/include/hw/i386/intel_iommu.h >>>> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >>>> uint64_t cap; /* The value of capability reg */ >>>> uint64_t ecap; /* The value of extended capability reg */ >>>> >>>> + uint64_t host_cap; /* The value of host capability reg */ >>>> + uint64_t host_ecap; /* The value of host ext-capability reg */ >>>> + >>>> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >>>> GHashTable *iotlb; /* IOTLB */ >>>> >>>> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >>>> bool dma_translation; /* Whether DMA translation supported >*/ >>>> bool pasid; /* Whether to support PASID */ >>>> >>>> + bool cap_finalized; /* Whether VTD capability finalized */ >>>> /* >>>> * Protects IOMMU states in general. Currently it protects the >>>> * per-IOMMU IOTLB cache, and context entry cache in >>> VTDAddressSpace. >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >>>> index 4c1d058ebd..be03fcbf52 100644 >>>> --- a/hw/i386/intel_iommu.c >>>> +++ b/hw/i386/intel_iommu.c >>>> @@ -3819,6 +3819,47 @@ VTDAddressSpace >>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >>>> return vtd_dev_as; >>>> } >>>> >>>> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >>> iommu_hw_info_vtd *vtd, >>>> + Error **errp) >>>> +{ >>>> + uint64_t addr_width; >>>> + >>>> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; >>>> + if (s->aw_bits > addr_width) { >>>> + error_setg(errp, "User aw-bits: %u > host address width: %lu", >>>> + s->aw_bits, addr_width); >>>> + return false; >>>> + } >>>> + >>>> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >>>> + >>>> + return true; >>>> +} >>>> + >>>> +/* >>>> + * virtual VT-d which wants nested needs to check the host IOMMU >>>> + * nesting cap info behind the assigned devices. Thus that vIOMMU >>>> + * could bind guest page table to host. >>>> + */ >>>> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice >*idev, >>>> + Error **errp) >>>> +{ >>>> + struct iommu_hw_info_vtd vtd; >>>> + enum iommu_hw_info_type type = >>> IOMMU_HW_INFO_TYPE_INTEL_VTD; >>>> + >>>> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >>>> + error_setg(errp, "Failed to get IOMMU capability!!!"); >>>> + return false; >>>> + } >>>> + >>>> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >>>> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >>>> + return false; >>>> + } >>>> + >>>> + return vtd_sync_hw_info(s, &vtd, errp); >>>> +} >>>> + >>>> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >int32_t >>> devfn, >>>> IOMMUFDDevice *idev, Error **errp) >>>> { >>>> @@ -3837,6 +3878,10 @@ static int >vtd_dev_set_iommu_device(PCIBus >>> *bus, void *opaque, int32_t devfn, >>>> return 0; >>>> } >>>> >>>> + if (!vtd_check_idev(s, idev, errp)) {In >>> In >>> [RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for >>> hotplugged devices >>> https://lore.kernel.org/all/20240117080414.316890-1- >>> eric.auger@redhat.com/ >>> >>> I also attempt to pass host iommu info to the virtio-iommu but with >>> legacy BE. >> I think your patch works with iommufd BE too
On 1/15/24 11:13, Zhenzhong Duan wrote: > From: Yi Liu <yi.l.liu@intel.com> > > Add a framework to check and synchronize host IOMMU cap/ecap with > vIOMMU cap/ecap. > > Currently only stage-2 translation is supported which is backed by > shadow page table on host side. So we don't need exact matching of > each bit of cap/ecap between vIOMMU and host. However, we can still > utilize this framework to ensure compatibility of host and vIOMMU's > address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, > which is missed before. > > When stage-1 translation is supported in future, a.k.a. scalable > modern mode, we need to ensure compatibility of each bits. Some > bits are user controllable, they should be checked with host side > to ensure compatibility. Other bits are not, they should be synced > into vIOMMU cap/ecap for compatibility. > > The sequence will be: > > vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() > iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() > iommu->host_cap/ecap is checked and updated some bits with host cap/ecap. ---- vtd_sync_hw_info() > iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- vtd_machine_done_hook() > > iommu->host_cap/ecap is a temporary storage to hold intermediate value > when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. The above "sequence" paragraph is not very clear. The patch may need to be split further. > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > include/hw/i386/intel_iommu.h | 4 ++ > hw/i386/intel_iommu.c | 78 +++++++++++++++++++++++++++++++---- > 2 files changed, 75 insertions(+), 7 deletions(-) > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index c65fdde56f..b8abbcce12 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -292,6 +292,9 @@ struct IntelIOMMUState { > uint64_t cap; /* The value of capability reg */ > uint64_t ecap; /* The value of extended capability reg */ > > + uint64_t host_cap; /* The value of host capability reg */ > + uint64_t host_ecap; /* The value of host ext-capability reg */ > + > uint32_t context_cache_gen; /* Should be in [1,MAX] */ > GHashTable *iotlb; /* IOTLB */ > > @@ -314,6 +317,7 @@ struct IntelIOMMUState { > bool dma_translation; /* Whether DMA translation supported */ > bool pasid; /* Whether to support PASID */ > > + bool cap_finalized; /* Whether VTD capability finalized */ > /* > * Protects IOMMU states in general. Currently it protects the > * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 4c1d058ebd..be03fcbf52 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -3819,6 +3819,47 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, > return vtd_dev_as; > } > > +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd, > + Error **errp) > +{ > + uint64_t addr_width; > + > + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; Virek uses the same kind of macro in : https://lore.kernel.org/qemu-devel/20240118192049.1796763-1-vivek.kasireddy@intel.com/ What about the + 1 ? Looks like it's missing here, according to 11.4.2 Capability Register. Could we introduce a common macro in intel_iommu_internal.h ? > + if (s->aw_bits > addr_width) { > + error_setg(errp, "User aw-bits: %u > host address width: %lu", I think %lu should be PRId64. This is a general comment. You should avoid %llx, %lx, etc. in the code. > + s->aw_bits, addr_width); > + return false; > + } > + > + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ > + > + return true; > +} > + > +/* > + * virtual VT-d which wants nested needs to check the host IOMMU > + * nesting cap info behind the assigned devices. Thus that vIOMMU > + * could bind guest page table to host. > + */ > +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, > + Error **errp) > +{ > + struct iommu_hw_info_vtd vtd; > + enum iommu_hw_info_type type = IOMMU_HW_INFO_TYPE_INTEL_VTD; > + > + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { > + error_setg(errp, "Failed to get IOMMU capability!!!"); > + return false; > + } > + > + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { > + error_setg(errp, "IOMMU hardware is not compatible!!!"); > + return false; > + } > + > + return vtd_sync_hw_info(s, &vtd, errp); > +} > + > static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > IOMMUFDDevice *idev, Error **errp) > { > @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, > return 0; > } > > + if (!vtd_check_idev(s, idev, errp)) { > + return -1; > + } > + > vtd_iommu_lock(s); > > vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); > @@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s) > { > X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); > > - memset(s->csr, 0, DMAR_REG_SIZE); > - memset(s->wmask, 0, DMAR_REG_SIZE); > - memset(s->w1cmask, 0, DMAR_REG_SIZE); > - memset(s->womask, 0, DMAR_REG_SIZE); > + /* CAP/ECAP are initialized in machine create done stage */ > + memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > + memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); > > s->root = 0; > s->root_scalable = false; > @@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s) vtd_init() is called from reset and from realize. This is redundant. reset should be enough. > vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; > } > > - vtd_cap_init(s); > + if (!s->cap_finalized) { ok. so this can only be done in reset. > + vtd_cap_init(s); > + s->host_cap = s->cap; > + s->host_ecap = s->ecap; > + } > + > vtd_reset_caches(s); > > /* Define registers with default values and bit semantics */ > vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); > - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); > - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); > vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); > vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); > vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); > @@ -4241,6 +4290,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) > return true; > } > > +static void vtd_setup_capability_reg(IntelIOMMUState *s) > +{ > + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); > + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); > +} > + > static int vtd_machine_done_notify_one(Object *child, void *unused) > { > IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); > @@ -4259,6 +4314,14 @@ static int vtd_machine_done_notify_one(Object *child, void *unused) > > static void vtd_machine_done_hook(Notifier *notifier, void *unused) > { > + IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); > + > + iommu->cap = iommu->host_cap; > + iommu->ecap = iommu->host_ecap; > + iommu->cap_finalized = true; > + > + vtd_setup_capability_reg(iommu); > + This is confusing. Please split the patch better to reflect the ordering of the e/cap register settings. Thanks, C. > object_child_foreach_recursive(object_get_root(), > vtd_machine_done_notify_one, NULL); > } > @@ -4292,6 +4355,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) > > QLIST_INIT(&s->vtd_as_with_notifiers); > qemu_mutex_init(&s->iommu_lock); > + s->cap_finalized = false; > memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, > "intel_iommu", DMAR_REG_SIZE); > memory_region_add_subregion(get_system_memory(),
>-----Original Message----- >From: Cédric Le Goater <clg@redhat.com> >Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >sync host IOMMU cap/ecap > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l.liu@intel.com> >> >> Add a framework to check and synchronize host IOMMU cap/ecap with >> vIOMMU cap/ecap. >> >> Currently only stage-2 translation is supported which is backed by >> shadow page table on host side. So we don't need exact matching of >> each bit of cap/ecap between vIOMMU and host. However, we can still >> utilize this framework to ensure compatibility of host and vIOMMU's >> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >> which is missed before. >> >> When stage-1 translation is supported in future, a.k.a. scalable >> modern mode, we need to ensure compatibility of each bits. Some >> bits are user controllable, they should be checked with host side >> to ensure compatibility. Other bits are not, they should be synced >> into vIOMMU cap/ecap for compatibility. >> >> The sequence will be: >> >> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >> iommu->host_cap/ecap is checked and updated some bits with host >cap/ecap. ---- vtd_sync_hw_info() >> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >vtd_machine_done_hook() >> >> iommu->host_cap/ecap is a temporary storage to hold intermediate value >> when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. > > >The above "sequence" paragraph is not very clear. The patch may need to >be split further. OK, will do. > > > >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> include/hw/i386/intel_iommu.h | 4 ++ >> hw/i386/intel_iommu.c | 78 >+++++++++++++++++++++++++++++++---- >> 2 files changed, 75 insertions(+), 7 deletions(-) >> >> diff --git a/include/hw/i386/intel_iommu.h >b/include/hw/i386/intel_iommu.h >> index c65fdde56f..b8abbcce12 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >> uint64_t cap; /* The value of capability reg */ >> uint64_t ecap; /* The value of extended capability reg */ >> >> + uint64_t host_cap; /* The value of host capability reg */ >> + uint64_t host_ecap; /* The value of host ext-capability reg */ >> + >> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >> GHashTable *iotlb; /* IOTLB */ >> >> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >> bool dma_translation; /* Whether DMA translation supported */ >> bool pasid; /* Whether to support PASID */ >> >> + bool cap_finalized; /* Whether VTD capability finalized */ >> /* >> * Protects IOMMU states in general. Currently it protects the >> * per-IOMMU IOTLB cache, and context entry cache in >VTDAddressSpace. >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 4c1d058ebd..be03fcbf52 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -3819,6 +3819,47 @@ VTDAddressSpace >*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >> return vtd_dev_as; >> } >> >> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >iommu_hw_info_vtd *vtd, >> + Error **errp) >> +{ >> + uint64_t addr_width; >> + >> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; > >Virek uses the same kind of macro in : > > https://lore.kernel.org/qemu-devel/20240118192049.1796763-1- >vivek.kasireddy@intel.com/ > >What about the + 1 ? Looks like it's missing here, according to 11.4.2 >Capability Register. > >Could we introduce a common macro in intel_iommu_internal.h ? Sure. > > >> + if (s->aw_bits > addr_width) { >> + error_setg(errp, "User aw-bits: %u > host address width: %lu", > >I think %lu should be PRId64. This is a general comment. You should avoid >%llx, %lx, etc. in the code. Got it. > >> + s->aw_bits, addr_width); >> + return false; >> + } >> + >> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >> + >> + return true; >> +} >> + >> +/* >> + * virtual VT-d which wants nested needs to check the host IOMMU >> + * nesting cap info behind the assigned devices. Thus that vIOMMU >> + * could bind guest page table to host. >> + */ >> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, >> + Error **errp) >> +{ >> + struct iommu_hw_info_vtd vtd; >> + enum iommu_hw_info_type type = >IOMMU_HW_INFO_TYPE_INTEL_VTD; >> + >> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >> + error_setg(errp, "Failed to get IOMMU capability!!!"); >> + return false; >> + } >> + >> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >> + return false; >> + } >> + >> + return vtd_sync_hw_info(s, &vtd, errp); >> +} >> + >> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >int32_t devfn, >> IOMMUFDDevice *idev, Error **errp) >> { >> @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus >*bus, void *opaque, int32_t devfn, >> return 0; >> } >> >> + if (!vtd_check_idev(s, idev, errp)) { >> + return -1; >> + } >> + >> vtd_iommu_lock(s); >> >> vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >> @@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s) >> { >> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); >> >> - memset(s->csr, 0, DMAR_REG_SIZE); >> - memset(s->wmask, 0, DMAR_REG_SIZE); >> - memset(s->w1cmask, 0, DMAR_REG_SIZE); >> - memset(s->womask, 0, DMAR_REG_SIZE); >> + /* CAP/ECAP are initialized in machine create done stage */ >> + memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> >> s->root = 0; >> s->root_scalable = false; >> @@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s) > >vtd_init() is called from reset and from realize. This is redundant. >reset should be enough. Looks so. I'll try to see if it break anything. > > >> vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; >> } >> >> - vtd_cap_init(s); >> + if (!s->cap_finalized) { > >ok. so this can only be done in reset. Not quite get, vtd_init can be called multiple times harmlessly before machine create done. But once is enough, i.e., in reset. The call in realize looks redundant. > >> + vtd_cap_init(s); >> + s->host_cap = s->cap; >> + s->host_ecap = s->ecap; >> + } >> + >> vtd_reset_caches(s); >> >> /* Define registers with default values and bit semantics */ >> vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); >> - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); >> - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); >> vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); >> vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); >> vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); >> @@ -4241,6 +4290,12 @@ static bool >vtd_decide_config(IntelIOMMUState *s, Error **errp) >> return true; >> } >> >> +static void vtd_setup_capability_reg(IntelIOMMUState *s) >> +{ >> + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); >> + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); >> +} >> + >> static int vtd_machine_done_notify_one(Object *child, void *unused) >> { >> IntelIOMMUState *iommu = >INTEL_IOMMU_DEVICE(x86_iommu_get_default()); >> @@ -4259,6 +4314,14 @@ static int >vtd_machine_done_notify_one(Object *child, void *unused) >> >> static void vtd_machine_done_hook(Notifier *notifier, void *unused) >> { >> + IntelIOMMUState *iommu = >INTEL_IOMMU_DEVICE(x86_iommu_get_default()); >> + >> + iommu->cap = iommu->host_cap; >> + iommu->ecap = iommu->host_ecap; >> + iommu->cap_finalized = true; >> + >> + vtd_setup_capability_reg(iommu); >> + > >This is confusing. Please split the patch better to reflect the ordering of >the e/cap register settings. Will do. Thanks Zhenzhong
On 1/19/24 12:55, Duan, Zhenzhong wrote: > >> -----Original Message----- >> From: Eric Auger <eric.auger@redhat.com> >> Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >> sync host IOMMU cap/ecap >> >> >> >> On 1/18/24 10:30, Duan, Zhenzhong wrote: >>> Hi Eric, >>> >>>> -----Original Message----- >>>> From: Eric Auger <eric.auger@redhat.com> >>>> Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check >> and >>>> sync host IOMMU cap/ecap >>>> >>>> Hi Zhenzhong, >>>> >>>> On 1/15/24 11:13, Zhenzhong Duan wrote: >>>>> From: Yi Liu <yi.l.liu@intel.com> >>>>> >>>>> Add a framework to check and synchronize host IOMMU cap/ecap with >>>>> vIOMMU cap/ecap. >>>>> >>>>> Currently only stage-2 translation is supported which is backed by >>>>> shadow page table on host side. So we don't need exact matching of >>>>> each bit of cap/ecap between vIOMMU and host. However, we can still >>>>> utilize this framework to ensure compatibility of host and vIOMMU's >>>>> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >>>>> which is missed before. >>>>> >>>>> When stage-1 translation is supported in future, a.k.a. scalable >>>>> modern mode, we need to ensure compatibility of each bits. Some >>>>> bits are user controllable, they should be checked with host side >>>>> to ensure compatibility. Other bits are not, they should be synced >>>>> into vIOMMU cap/ecap for compatibility. >>>>> >>>>> The sequence will be: >>>>> >>>>> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >>>>> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >>>>> iommu->host_cap/ecap is checked and updated some bits with host >>>> cap/ecap. ---- vtd_sync_hw_info() >>>>> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >>>> vtd_machine_done_hook() >>>>> iommu->host_cap/ecap is a temporary storage to hold intermediate >> value >>>>> when synthesize host cap/ecap and vIOMMU's initial configured >> cap/ecap. >>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >>>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >>>>> --- >>>>> include/hw/i386/intel_iommu.h | 4 ++ >>>>> hw/i386/intel_iommu.c | 78 >>>> +++++++++++++++++++++++++++++++---- >>>>> 2 files changed, 75 insertions(+), 7 deletions(-) >>>>> >>>>> diff --git a/include/hw/i386/intel_iommu.h >>>> b/include/hw/i386/intel_iommu.h >>>>> index c65fdde56f..b8abbcce12 100644 >>>>> --- a/include/hw/i386/intel_iommu.h >>>>> +++ b/include/hw/i386/intel_iommu.h >>>>> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >>>>> uint64_t cap; /* The value of capability reg */ >>>>> uint64_t ecap; /* The value of extended capability reg */ >>>>> >>>>> + uint64_t host_cap; /* The value of host capability reg */ >>>>> + uint64_t host_ecap; /* The value of host ext-capability reg */ >>>>> + >>>>> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >>>>> GHashTable *iotlb; /* IOTLB */ >>>>> >>>>> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >>>>> bool dma_translation; /* Whether DMA translation supported >> */ >>>>> bool pasid; /* Whether to support PASID */ >>>>> >>>>> + bool cap_finalized; /* Whether VTD capability finalized */ >>>>> /* >>>>> * Protects IOMMU states in general. Currently it protects the >>>>> * per-IOMMU IOTLB cache, and context entry cache in >>>> VTDAddressSpace. >>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >>>>> index 4c1d058ebd..be03fcbf52 100644 >>>>> --- a/hw/i386/intel_iommu.c >>>>> +++ b/hw/i386/intel_iommu.c >>>>> @@ -3819,6 +3819,47 @@ VTDAddressSpace >>>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >>>>> return vtd_dev_as; >>>>> } >>>>> >>>>> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >>>> iommu_hw_info_vtd *vtd, >>>>> + Error **errp) >>>>> +{ >>>>> + uint64_t addr_width; >>>>> + >>>>> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; >>>>> + if (s->aw_bits > addr_width) { >>>>> + error_setg(errp, "User aw-bits: %u > host address width: %lu", >>>>> + s->aw_bits, addr_width); >>>>> + return false; >>>>> + } >>>>> + >>>>> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> +/* >>>>> + * virtual VT-d which wants nested needs to check the host IOMMU >>>>> + * nesting cap info behind the assigned devices. Thus that vIOMMU >>>>> + * could bind guest page table to host. >>>>> + */ >>>>> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice >> *idev, >>>>> + Error **errp) >>>>> +{ >>>>> + struct iommu_hw_info_vtd vtd; >>>>> + enum iommu_hw_info_type type = >>>> IOMMU_HW_INFO_TYPE_INTEL_VTD; >>>>> + >>>>> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >>>>> + error_setg(errp, "Failed to get IOMMU capability!!!"); >>>>> + return false; >>>>> + } >>>>> + >>>>> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >>>>> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >>>>> + return false; >>>>> + } >>>>> + >>>>> + return vtd_sync_hw_info(s, &vtd, errp); >>>>> +} >>>>> + >>>>> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >> int32_t >>>> devfn, >>>>> IOMMUFDDevice *idev, Error **errp) >>>>> { >>>>> @@ -3837,6 +3878,10 @@ static int >> vtd_dev_set_iommu_device(PCIBus >>>> *bus, void *opaque, int32_t devfn, >>>>> return 0; >>>>> } >>>>> >>>>> + if (!vtd_check_idev(s, idev, errp)) {In >>>> In >>>> [RFC 0/7] VIRTIO-IOMMU/VFIO: Fix host iommu geometry handling for >>>> hotplugged devices >>>> https://lore.kernel.org/all/20240117080414.316890-1- >>>> eric.auger@redhat.com/ >>>> >>>> I also attempt to pass host iommu info to the virtio-iommu but with >>>> legacy BE. >>> I think your patch works with iommufd BE too
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index c65fdde56f..b8abbcce12 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -292,6 +292,9 @@ struct IntelIOMMUState { uint64_t cap; /* The value of capability reg */ uint64_t ecap; /* The value of extended capability reg */ + uint64_t host_cap; /* The value of host capability reg */ + uint64_t host_ecap; /* The value of host ext-capability reg */ + uint32_t context_cache_gen; /* Should be in [1,MAX] */ GHashTable *iotlb; /* IOTLB */ @@ -314,6 +317,7 @@ struct IntelIOMMUState { bool dma_translation; /* Whether DMA translation supported */ bool pasid; /* Whether to support PASID */ + bool cap_finalized; /* Whether VTD capability finalized */ /* * Protects IOMMU states in general. Currently it protects the * per-IOMMU IOTLB cache, and context entry cache in VTDAddressSpace. diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 4c1d058ebd..be03fcbf52 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3819,6 +3819,47 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, return vtd_dev_as; } +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct iommu_hw_info_vtd *vtd, + Error **errp) +{ + uint64_t addr_width; + + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; + if (s->aw_bits > addr_width) { + error_setg(errp, "User aw-bits: %u > host address width: %lu", + s->aw_bits, addr_width); + return false; + } + + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ + + return true; +} + +/* + * virtual VT-d which wants nested needs to check the host IOMMU + * nesting cap info behind the assigned devices. Thus that vIOMMU + * could bind guest page table to host. + */ +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, + Error **errp) +{ + struct iommu_hw_info_vtd vtd; + enum iommu_hw_info_type type = IOMMU_HW_INFO_TYPE_INTEL_VTD; + + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { + error_setg(errp, "Failed to get IOMMU capability!!!"); + return false; + } + + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { + error_setg(errp, "IOMMU hardware is not compatible!!!"); + return false; + } + + return vtd_sync_hw_info(s, &vtd, errp); +} + static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, IOMMUFDDevice *idev, Error **errp) { @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int32_t devfn, return 0; } + if (!vtd_check_idev(s, idev, errp)) { + return -1; + } + vtd_iommu_lock(s); vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); @@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); - memset(s->csr, 0, DMAR_REG_SIZE); - memset(s->wmask, 0, DMAR_REG_SIZE); - memset(s->w1cmask, 0, DMAR_REG_SIZE); - memset(s->womask, 0, DMAR_REG_SIZE); + /* CAP/ECAP are initialized in machine create done stage */ + memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); + memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); + memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); + memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - DMAR_GCMD_REG); s->root = 0; s->root_scalable = false; @@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; } - vtd_cap_init(s); + if (!s->cap_finalized) { + vtd_cap_init(s); + s->host_cap = s->cap; + s->host_ecap = s->ecap; + } + vtd_reset_caches(s); /* Define registers with default values and bit semantics */ vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); @@ -4241,6 +4290,12 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) return true; } +static void vtd_setup_capability_reg(IntelIOMMUState *s) +{ + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); +} + static int vtd_machine_done_notify_one(Object *child, void *unused) { IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); @@ -4259,6 +4314,14 @@ static int vtd_machine_done_notify_one(Object *child, void *unused) static void vtd_machine_done_hook(Notifier *notifier, void *unused) { + IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default()); + + iommu->cap = iommu->host_cap; + iommu->ecap = iommu->host_ecap; + iommu->cap_finalized = true; + + vtd_setup_capability_reg(iommu); + object_child_foreach_recursive(object_get_root(), vtd_machine_done_notify_one, NULL); } @@ -4292,6 +4355,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) QLIST_INIT(&s->vtd_as_with_notifiers); qemu_mutex_init(&s->iommu_lock); + s->cap_finalized = false; memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s, "intel_iommu", DMAR_REG_SIZE); memory_region_add_subregion(get_system_memory(),