Message ID | 20231221153948.119007-10-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add iommufd nesting (part 2/2) | expand |
On 12/21/2023 11:39 PM, Yi Liu wrote: > From: Lu Baolu <baolu.lu@linux.intel.com> > > This implements the .cache_invalidate_user() callback to support iotlb > flush for nested domain. > > Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> > Co-developed-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > --- > drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++ > 1 file changed, 116 insertions(+) > > diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c > index b5a5563ab32c..c665e2647045 100644 > --- a/drivers/iommu/intel/nested.c > +++ b/drivers/iommu/intel/nested.c > @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain) > kfree(to_dmar_domain(domain)); > } > > +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu, > + struct dmar_domain *domain, u64 addr, > + unsigned long npages, bool ih) > +{ > + u16 did = domain_id_iommu(domain, iommu); > + unsigned long flags; > + > + spin_lock_irqsave(&domain->lock, flags); > + if (!list_empty(&domain->devices)) > + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, > + npages, ih, NULL); > + spin_unlock_irqrestore(&domain->lock, flags); > +} > + > +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, > + unsigned mask, u32 *fault) > +{ > + struct device_domain_info *info; > + unsigned long flags; > + u16 sid, qdep; > + > + spin_lock_irqsave(&domain->lock, flags); > + list_for_each_entry(info, &domain->devices, link) { > + if (!info->ats_enabled) > + continue; > + sid = info->bus << 8 | info->devfn; > + qdep = info->ats_qdep; > + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, > + qdep, addr, mask, fault); > + quirk_extra_dev_tlb_flush(info, addr, mask, > + IOMMU_NO_PASID, qdep); > + } > + spin_unlock_irqrestore(&domain->lock, flags); > +} > + > +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, > + unsigned long npages, u32 *error) > +{ > + struct iommu_domain_info *info; > + unsigned long i; > + unsigned mask; > + u32 fault = 0; > + > + if (npages == U64_MAX) > + mask = 64 - VTD_PAGE_SHIFT; > + else > + mask = ilog2(__roundup_pow_of_two(npages)); > + > + xa_for_each(&domain->iommu_array, i, info) { > + nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0); > + > + if (domain->has_iotlb_device) > + continue; Shouldn't this be if (!domain->has_iotlb_device)? > + > + nested_flush_dev_iotlb(domain, addr, mask, &fault); > + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) > + break; > + } > + > + if (fault & DMA_FSTS_ICE) > + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE; > + if (fault & DMA_FSTS_ITE) > + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE; > +} > + > +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, > + struct iommu_user_data_array *array) > +{ > + struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + struct iommu_hwpt_vtd_s1_invalidate inv_entry; > + u32 processed = 0; > + int ret = 0; > + u32 index; > + > + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { > + ret = -EINVAL; > + goto out; > + } > + > + for (index = 0; index < array->entry_num; index++) { > + ret = iommu_copy_struct_from_user_array(&inv_entry, array, > + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, > + index, inv_error); > + if (ret) > + break; > + > + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) { > + ret = -EOPNOTSUPP; > + break; > + } > + > + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || > + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { > + ret = -EINVAL; > + break; > + } > + > + inv_entry.inv_error = 0; > + intel_nested_flush_cache(dmar_domain, inv_entry.addr, > + inv_entry.npages, &inv_entry.inv_error); > + > + ret = iommu_respond_struct_to_user_array(array, index, > + (void *)&inv_entry, > + sizeof(inv_entry)); > + if (ret) > + break; > + > + processed++; > + } > + > +out: > + array->entry_num = processed; > + return ret; > +} > + > static const struct iommu_domain_ops intel_nested_domain_ops = { > .attach_dev = intel_nested_attach_dev, > .free = intel_nested_domain_free, > + .cache_invalidate_user = intel_nested_cache_invalidate_user, > }; > > struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
> From: Yang, Weijiang <weijiang.yang@intel.com> > Sent: Friday, December 22, 2023 11:56 AM > > + > > + xa_for_each(&domain->iommu_array, i, info) { > > + nested_flush_pasid_iotlb(info->iommu, domain, addr, > npages, 0); > > + > > + if (domain->has_iotlb_device) > > + continue; > > Shouldn't this be if (!domain->has_iotlb_device)? yes that is wrong. actually it's weird to put domain check in a loop of domain->iommu_array. that check along with devtlb flush should be done out of that loop.
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Thursday, December 21, 2023 11:40 PM > > + > +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 > addr, > + unsigned long npages, u32 *error) > +{ > + struct iommu_domain_info *info; > + unsigned long i; > + unsigned mask; > + u32 fault = 0; > + > + if (npages == U64_MAX) > + mask = 64 - VTD_PAGE_SHIFT; > + else > + mask = ilog2(__roundup_pow_of_two(npages)); > + > + xa_for_each(&domain->iommu_array, i, info) { > + nested_flush_pasid_iotlb(info->iommu, domain, addr, > npages, 0); so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored? > + > + if (domain->has_iotlb_device) > + continue; > + > + nested_flush_dev_iotlb(domain, addr, mask, &fault); > + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) > + break; here you may add a note that we don't plan to forward invalidation queue error (i.e. IQE) to the caller as it's caused only by driver internal bug. > + > + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || > + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { > + ret = -EINVAL; > + break; > + } > + why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to be not supported by underlying helpers?
> On Dec 22, 2023, at 11:56, Yang, Weijiang <weijiang.yang@intel.com> wrote: > > On 12/21/2023 11:39 PM, Yi Liu wrote: >> From: Lu Baolu <baolu.lu@linux.intel.com> >> >> This implements the .cache_invalidate_user() callback to support iotlb >> flush for nested domain. >> >> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> >> Co-developed-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> --- >> drivers/iommu/intel/nested.c | 116 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 116 insertions(+) >> >> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c >> index b5a5563ab32c..c665e2647045 100644 >> --- a/drivers/iommu/intel/nested.c >> +++ b/drivers/iommu/intel/nested.c >> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain) >> kfree(to_dmar_domain(domain)); >> } >> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu, >> + struct dmar_domain *domain, u64 addr, >> + unsigned long npages, bool ih) >> +{ >> + u16 did = domain_id_iommu(domain, iommu); >> + unsigned long flags; >> + >> + spin_lock_irqsave(&domain->lock, flags); >> + if (!list_empty(&domain->devices)) >> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, >> + npages, ih, NULL); >> + spin_unlock_irqrestore(&domain->lock, flags); >> +} >> + >> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, >> + unsigned mask, u32 *fault) >> +{ >> + struct device_domain_info *info; >> + unsigned long flags; >> + u16 sid, qdep; >> + >> + spin_lock_irqsave(&domain->lock, flags); >> + list_for_each_entry(info, &domain->devices, link) { >> + if (!info->ats_enabled) >> + continue; >> + sid = info->bus << 8 | info->devfn; >> + qdep = info->ats_qdep; >> + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, >> + qdep, addr, mask, fault); >> + quirk_extra_dev_tlb_flush(info, addr, mask, >> + IOMMU_NO_PASID, qdep); >> + } >> + spin_unlock_irqrestore(&domain->lock, flags); >> +} >> + >> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, >> + unsigned long npages, u32 *error) >> +{ >> + struct iommu_domain_info *info; >> + unsigned long i; >> + unsigned mask; >> + u32 fault = 0; >> + >> + if (npages == U64_MAX) >> + mask = 64 - VTD_PAGE_SHIFT; >> + else >> + mask = ilog2(__roundup_pow_of_two(npages)); >> + >> + xa_for_each(&domain->iommu_array, i, info) { >> + nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0); >> + >> + if (domain->has_iotlb_device) >> + continue; > > Shouldn't this be if (!domain->has_iotlb_device)? oops, yes it is. >> + >> + nested_flush_dev_iotlb(domain, addr, mask, &fault); >> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) >> + break; >> + } >> + >> + if (fault & DMA_FSTS_ICE) >> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE; >> + if (fault & DMA_FSTS_ITE) >> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE; >> +} >> + >> +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, >> + struct iommu_user_data_array *array) >> +{ >> + struct dmar_domain *dmar_domain = to_dmar_domain(domain); >> + struct iommu_hwpt_vtd_s1_invalidate inv_entry; >> + u32 processed = 0; >> + int ret = 0; >> + u32 index; >> + >> + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + for (index = 0; index < array->entry_num; index++) { >> + ret = iommu_copy_struct_from_user_array(&inv_entry, array, >> + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, >> + index, inv_error); >> + if (ret) >> + break; >> + >> + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) { >> + ret = -EOPNOTSUPP; >> + break; >> + } >> + >> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || >> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { >> + ret = -EINVAL; >> + break; >> + } >> + >> + inv_entry.inv_error = 0; >> + intel_nested_flush_cache(dmar_domain, inv_entry.addr, >> + inv_entry.npages, &inv_entry.inv_error); >> + >> + ret = iommu_respond_struct_to_user_array(array, index, >> + (void *)&inv_entry, >> + sizeof(inv_entry)); >> + if (ret) >> + break; >> + >> + processed++; >> + } >> + >> +out: >> + array->entry_num = processed; >> + return ret; >> +} >> + >> static const struct iommu_domain_ops intel_nested_domain_ops = { >> .attach_dev = intel_nested_attach_dev, >> .free = intel_nested_domain_free, >> + .cache_invalidate_user = intel_nested_cache_invalidate_user, >> }; >> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, >
> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote: > > >> >> From: Yang, Weijiang <weijiang.yang@intel.com> >> Sent: Friday, December 22, 2023 11:56 AM >>> + >>> + xa_for_each(&domain->iommu_array, i, info) { >>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >> npages, 0); >>> + >>> + if (domain->has_iotlb_device) >>> + continue; >> >> Shouldn't this be if (!domain->has_iotlb_device)? > > yes that is wrong. > > actually it's weird to put domain check in a loop of domain->iommu_array. > > that check along with devtlb flush should be done out of that loop. Maybe adding a bool, set it out of the loop, check the bool in the loop.
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Friday, December 22, 2023 3:02 PM > > > > On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote: > > > > > >> > >> From: Yang, Weijiang <weijiang.yang@intel.com> > >> Sent: Friday, December 22, 2023 11:56 AM > >>> + > >>> + xa_for_each(&domain->iommu_array, i, info) { > >>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, > >> npages, 0); > >>> + > >>> + if (domain->has_iotlb_device) > >>> + continue; > >> > >> Shouldn't this be if (!domain->has_iotlb_device)? > > > > yes that is wrong. > > > > actually it's weird to put domain check in a loop of domain->iommu_array. > > > > that check along with devtlb flush should be done out of that loop. > > Maybe adding a bool, set it out of the loop, check the bool in the loop. the point is that dev iotlb doesn't rely on info->iommu: nested_flush_dev_iotlb(domain, addr, mask, &fault); then why do it in the loop of info->iommu?
> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote: > > >> >> From: Liu, Yi L <yi.l.liu@intel.com> >> Sent: Friday, December 22, 2023 3:02 PM >> >> >>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote: >>> >>> >>>> >>>> From: Yang, Weijiang <weijiang.yang@intel.com> >>>> Sent: Friday, December 22, 2023 11:56 AM >>>>> + >>>>> + xa_for_each(&domain->iommu_array, i, info) { >>>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >>>> npages, 0); >>>>> + >>>>> + if (domain->has_iotlb_device) >>>>> + continue; >>>> >>>> Shouldn't this be if (!domain->has_iotlb_device)? >>> >>> yes that is wrong. >>> >>> actually it's weird to put domain check in a loop of domain->iommu_array. >>> >>> that check along with devtlb flush should be done out of that loop. >> >> Maybe adding a bool, set it out of the loop, check the bool in the loop. > > the point is that dev iotlb doesn't rely on info->iommu: > > nested_flush_dev_iotlb(domain, addr, mask, &fault); > > then why do it in the loop of info->iommu? yes. It should have another device loop instead.
On 2023/12/22 14:57, Tian, Kevin wrote: >> From: Liu, Yi L <yi.l.liu@intel.com> >> Sent: Thursday, December 21, 2023 11:40 PM >> >> + >> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 >> addr, >> + unsigned long npages, u32 *error) >> +{ >> + struct iommu_domain_info *info; >> + unsigned long i; >> + unsigned mask; >> + u32 fault = 0; >> + >> + if (npages == U64_MAX) >> + mask = 64 - VTD_PAGE_SHIFT; >> + else >> + mask = ilog2(__roundup_pow_of_two(npages)); >> + >> + xa_for_each(&domain->iommu_array, i, info) { >> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >> npages, 0); > > so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored? yeah... it is. It is named as ih in the driver code. But it appears only the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th parameter (ih) may be true. static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); switch (val) { case MEM_GOING_ONLINE: if (iommu_domain_identity_map(si_domain, start_vpfn, last_vpfn)) { pr_warn("Failed to build identity map for [%lx-%lx]\n", start_vpfn, last_vpfn); return NOTIFY_BAD; } break; case MEM_OFFLINE: case MEM_CANCEL_ONLINE: { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; LIST_HEAD(freelist); domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain, start_vpfn, mhp->nr_pages, list_empty(&freelist), 0); rcu_read_unlock(); put_pages_list(&freelist); } break; } return NOTIFY_OK; } > >> + >> + if (domain->has_iotlb_device) >> + continue; >> + >> + nested_flush_dev_iotlb(domain, addr, mask, &fault); >> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) >> + break; > > here you may add a note that we don't plan to forward invalidation > queue error (i.e. IQE) to the caller as it's caused only by driver > internal bug. yes. > >> + >> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || >> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { >> + ret = -EINVAL; >> + break; >> + } >> + > > why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to > be not supported by underlying helpers? no such limitation by underlying helpers. But in such case, the addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit strange. But I'm fine to relax the check since the underlying helper only checks npages when determining paid-selective or not.
> From: Liu, Yi L <yi.l.liu@intel.com> > Sent: Tuesday, December 26, 2023 12:52 PM > >> + > >> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || > >> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { > >> + ret = -EINVAL; > >> + break; > >> + } > >> + > > > > why is [non-zero-addr, U64_MAX] an error? Is it explicitly stated to > > be not supported by underlying helpers? > > no such limitation by underlying helpers. But in such case, the > addr+npages*PAGE_SIZE would exceed U64_MAX, this seems a bit > strange. But I'm fine to relax the check since the underlying helper > only checks npages when determining paid-selective or not. > I overlooked npages as end. let's keep the check.
On 2023/12/22 19:59, Liu, Yi L wrote: > >> On Dec 22, 2023, at 15:12, Tian, Kevin <kevin.tian@intel.com> wrote: >> >> >>> >>> From: Liu, Yi L <yi.l.liu@intel.com> >>> Sent: Friday, December 22, 2023 3:02 PM >>> >>> >>>>> On Dec 22, 2023, at 14:47, Tian, Kevin <kevin.tian@intel.com> wrote: >>>> >>>> >>>>> >>>>> From: Yang, Weijiang <weijiang.yang@intel.com> >>>>> Sent: Friday, December 22, 2023 11:56 AM >>>>>> + >>>>>> + xa_for_each(&domain->iommu_array, i, info) { >>>>>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >>>>> npages, 0); >>>>>> + >>>>>> + if (domain->has_iotlb_device) >>>>>> + continue; >>>>> >>>>> Shouldn't this be if (!domain->has_iotlb_device)? >>>> >>>> yes that is wrong. >>>> >>>> actually it's weird to put domain check in a loop of domain->iommu_array. >>>> >>>> that check along with devtlb flush should be done out of that loop. >>> >>> Maybe adding a bool, set it out of the loop, check the bool in the loop. >> >> the point is that dev iotlb doesn't rely on info->iommu: >> >> nested_flush_dev_iotlb(domain, addr, mask, &fault); >> >> then why do it in the loop of info->iommu? > > yes. It should have another device loop instead. let me move the device tlb related code out of the info->iommu loop.
On 2023/12/26 12:51, Yi Liu wrote: > On 2023/12/22 14:57, Tian, Kevin wrote: >>> From: Liu, Yi L <yi.l.liu@intel.com> >>> Sent: Thursday, December 21, 2023 11:40 PM >>> >>> + >>> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 >>> addr, >>> + unsigned long npages, u32 *error) >>> +{ >>> + struct iommu_domain_info *info; >>> + unsigned long i; >>> + unsigned mask; >>> + u32 fault = 0; >>> + >>> + if (npages == U64_MAX) >>> + mask = 64 - VTD_PAGE_SHIFT; >>> + else >>> + mask = ilog2(__roundup_pow_of_two(npages)); >>> + >>> + xa_for_each(&domain->iommu_array, i, info) { >>> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >>> npages, 0); >> >> so IOMMU_VTD_INV_FLAGS_LEAF is defined but ignored? > > yeah... it is. It is named as ih in the driver code. But it appears only > the below code is set ih. When calling iommu_flush_iotlb_psi(), the 5th > parameter (ih) may be true. > > static int intel_iommu_memory_notifier(struct notifier_block *nb, > unsigned long val, void *v) > { > struct memory_notify *mhp = v; > unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); > unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + > mhp->nr_pages - 1); > > switch (val) { > case MEM_GOING_ONLINE: > if (iommu_domain_identity_map(si_domain, > start_vpfn, last_vpfn)) { > pr_warn("Failed to build identity map for [%lx-%lx]\n", > start_vpfn, last_vpfn); > return NOTIFY_BAD; > } > break; > > case MEM_OFFLINE: > case MEM_CANCEL_ONLINE: > { > struct dmar_drhd_unit *drhd; > struct intel_iommu *iommu; > LIST_HEAD(freelist); > > domain_unmap(si_domain, start_vpfn, last_vpfn, &freelist); > > rcu_read_lock(); > for_each_active_iommu(iommu, drhd) > iommu_flush_iotlb_psi(iommu, si_domain, > start_vpfn, mhp->nr_pages, > list_empty(&freelist), 0); > rcu_read_unlock(); > put_pages_list(&freelist); > } > break; > } > > return NOTIFY_OK; > } I passed this flag to the intel_nested_flush_cache() now as the helper accepts an ih parameter.
>-----Original Message----- >From: Liu, Yi L <yi.l.liu@intel.com> >Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain > >From: Lu Baolu <baolu.lu@linux.intel.com> > >This implements the .cache_invalidate_user() callback to support iotlb >flush for nested domain. > >Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> >Co-developed-by: Yi Liu <yi.l.liu@intel.com> >Signed-off-by: Yi Liu <yi.l.liu@intel.com> >--- > drivers/iommu/intel/nested.c | 116 >+++++++++++++++++++++++++++++++++++ > 1 file changed, 116 insertions(+) > >diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c >index b5a5563ab32c..c665e2647045 100644 >--- a/drivers/iommu/intel/nested.c >+++ b/drivers/iommu/intel/nested.c >@@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct >iommu_domain *domain) > kfree(to_dmar_domain(domain)); > } > >+static void nested_flush_pasid_iotlb(struct intel_iommu *iommu, >+ struct dmar_domain *domain, u64 addr, >+ unsigned long npages, bool ih) >+{ >+ u16 did = domain_id_iommu(domain, iommu); >+ unsigned long flags; >+ >+ spin_lock_irqsave(&domain->lock, flags); >+ if (!list_empty(&domain->devices)) >+ qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, >+ npages, ih, NULL); Is it optimal to check if domain attached to iommu before trigger flush? Or the check is redundant if intel_nested_flush_cache() is the only call site. Thanks Zhenzhong >+ spin_unlock_irqrestore(&domain->lock, flags); >+} >+ >+static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, >+ unsigned mask, u32 *fault) >+{ >+ struct device_domain_info *info; >+ unsigned long flags; >+ u16 sid, qdep; >+ >+ spin_lock_irqsave(&domain->lock, flags); >+ list_for_each_entry(info, &domain->devices, link) { >+ if (!info->ats_enabled) >+ continue; >+ sid = info->bus << 8 | info->devfn; >+ qdep = info->ats_qdep; >+ qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, >+ qdep, addr, mask, fault); >+ quirk_extra_dev_tlb_flush(info, addr, mask, >+ IOMMU_NO_PASID, qdep); >+ } >+ spin_unlock_irqrestore(&domain->lock, flags); >+} >+ >+static void intel_nested_flush_cache(struct dmar_domain *domain, u64 >addr, >+ unsigned long npages, u32 *error) >+{ >+ struct iommu_domain_info *info; >+ unsigned long i; >+ unsigned mask; >+ u32 fault = 0; >+ >+ if (npages == U64_MAX) >+ mask = 64 - VTD_PAGE_SHIFT; >+ else >+ mask = ilog2(__roundup_pow_of_two(npages)); >+ >+ xa_for_each(&domain->iommu_array, i, info) { >+ nested_flush_pasid_iotlb(info->iommu, domain, addr, >npages, 0); >+ >+ if (domain->has_iotlb_device) >+ continue; >+ >+ nested_flush_dev_iotlb(domain, addr, mask, &fault); >+ if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) >+ break; >+ } >+ >+ if (fault & DMA_FSTS_ICE) >+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE; >+ if (fault & DMA_FSTS_ITE) >+ *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE; >+} >+ >+static int intel_nested_cache_invalidate_user(struct iommu_domain >*domain, >+ struct iommu_user_data_array >*array) >+{ >+ struct dmar_domain *dmar_domain = to_dmar_domain(domain); >+ struct iommu_hwpt_vtd_s1_invalidate inv_entry; >+ u32 processed = 0; >+ int ret = 0; >+ u32 index; >+ >+ if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { >+ ret = -EINVAL; >+ goto out; >+ } >+ >+ for (index = 0; index < array->entry_num; index++) { >+ ret = iommu_copy_struct_from_user_array(&inv_entry, >array, >+ > IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, >+ index, inv_error); >+ if (ret) >+ break; >+ >+ if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) { >+ ret = -EOPNOTSUPP; >+ break; >+ } >+ >+ if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || >+ ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { >+ ret = -EINVAL; >+ break; >+ } >+ >+ inv_entry.inv_error = 0; >+ intel_nested_flush_cache(dmar_domain, inv_entry.addr, >+ inv_entry.npages, >&inv_entry.inv_error); >+ >+ ret = iommu_respond_struct_to_user_array(array, index, >+ (void *)&inv_entry, >+ sizeof(inv_entry)); >+ if (ret) >+ break; >+ >+ processed++; >+ } >+ >+out: >+ array->entry_num = processed; >+ return ret; >+} >+ > static const struct iommu_domain_ops intel_nested_domain_ops = { > .attach_dev = intel_nested_attach_dev, > .free = intel_nested_domain_free, >+ .cache_invalidate_user = intel_nested_cache_invalidate_user, > }; > > struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain >*parent, >-- >2.34.1
On 2023/12/27 17:27, Duan, Zhenzhong wrote: > > >> -----Original Message----- >> From: Liu, Yi L <yi.l.liu@intel.com> >> Subject: [PATCH v7 9/9] iommu/vt-d: Add iotlb flush for nested domain >> >> From: Lu Baolu <baolu.lu@linux.intel.com> >> >> This implements the .cache_invalidate_user() callback to support iotlb >> flush for nested domain. >> >> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> >> Co-developed-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> --- >> drivers/iommu/intel/nested.c | 116 >> +++++++++++++++++++++++++++++++++++ >> 1 file changed, 116 insertions(+) >> >> diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c >> index b5a5563ab32c..c665e2647045 100644 >> --- a/drivers/iommu/intel/nested.c >> +++ b/drivers/iommu/intel/nested.c >> @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct >> iommu_domain *domain) >> kfree(to_dmar_domain(domain)); >> } >> >> +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu, >> + struct dmar_domain *domain, u64 addr, >> + unsigned long npages, bool ih) >> +{ >> + u16 did = domain_id_iommu(domain, iommu); >> + unsigned long flags; >> + >> + spin_lock_irqsave(&domain->lock, flags); >> + if (!list_empty(&domain->devices)) >> + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, >> + npages, ih, NULL); > > Is it optimal to check if domain attached to iommu before trigger flush? > Or the check is redundant if intel_nested_flush_cache() is the only call site. I think it is possible that userspace issue an invalidation on a hwpt which does not have any device attached.. Though this is something stupid. So checking if any device attached before flushing still makes sense. > Thanks > Zhenzhong > >> + spin_unlock_irqrestore(&domain->lock, flags); >> +} >> + >> +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, >> + unsigned mask, u32 *fault) >> +{ >> + struct device_domain_info *info; >> + unsigned long flags; >> + u16 sid, qdep; >> + >> + spin_lock_irqsave(&domain->lock, flags); >> + list_for_each_entry(info, &domain->devices, link) { >> + if (!info->ats_enabled) >> + continue; >> + sid = info->bus << 8 | info->devfn; >> + qdep = info->ats_qdep; >> + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, >> + qdep, addr, mask, fault); >> + quirk_extra_dev_tlb_flush(info, addr, mask, >> + IOMMU_NO_PASID, qdep); >> + } >> + spin_unlock_irqrestore(&domain->lock, flags); >> +} >> + >> +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 >> addr, >> + unsigned long npages, u32 *error) >> +{ >> + struct iommu_domain_info *info; >> + unsigned long i; >> + unsigned mask; >> + u32 fault = 0; >> + >> + if (npages == U64_MAX) >> + mask = 64 - VTD_PAGE_SHIFT; >> + else >> + mask = ilog2(__roundup_pow_of_two(npages)); >> + >> + xa_for_each(&domain->iommu_array, i, info) { >> + nested_flush_pasid_iotlb(info->iommu, domain, addr, >> npages, 0); >> + >> + if (domain->has_iotlb_device) >> + continue; >> + >> + nested_flush_dev_iotlb(domain, addr, mask, &fault); >> + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) >> + break; >> + } >> + >> + if (fault & DMA_FSTS_ICE) >> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE; >> + if (fault & DMA_FSTS_ITE) >> + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE; >> +} >> + >> +static int intel_nested_cache_invalidate_user(struct iommu_domain >> *domain, >> + struct iommu_user_data_array >> *array) >> +{ >> + struct dmar_domain *dmar_domain = to_dmar_domain(domain); >> + struct iommu_hwpt_vtd_s1_invalidate inv_entry; >> + u32 processed = 0; >> + int ret = 0; >> + u32 index; >> + >> + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + for (index = 0; index < array->entry_num; index++) { >> + ret = iommu_copy_struct_from_user_array(&inv_entry, >> array, >> + >> IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, >> + index, inv_error); >> + if (ret) >> + break; >> + >> + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) { >> + ret = -EOPNOTSUPP; >> + break; >> + } >> + >> + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || >> + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { >> + ret = -EINVAL; >> + break; >> + } >> + >> + inv_entry.inv_error = 0; >> + intel_nested_flush_cache(dmar_domain, inv_entry.addr, >> + inv_entry.npages, >> &inv_entry.inv_error); >> + >> + ret = iommu_respond_struct_to_user_array(array, index, >> + (void *)&inv_entry, >> + sizeof(inv_entry)); >> + if (ret) >> + break; >> + >> + processed++; >> + } >> + >> +out: >> + array->entry_num = processed; >> + return ret; >> +} >> + >> static const struct iommu_domain_ops intel_nested_domain_ops = { >> .attach_dev = intel_nested_attach_dev, >> .free = intel_nested_domain_free, >> + .cache_invalidate_user = intel_nested_cache_invalidate_user, >> }; >> >> struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain >> *parent, >> -- >> 2.34.1 >
diff --git a/drivers/iommu/intel/nested.c b/drivers/iommu/intel/nested.c index b5a5563ab32c..c665e2647045 100644 --- a/drivers/iommu/intel/nested.c +++ b/drivers/iommu/intel/nested.c @@ -73,9 +73,125 @@ static void intel_nested_domain_free(struct iommu_domain *domain) kfree(to_dmar_domain(domain)); } +static void nested_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, + npages, ih, NULL); + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void nested_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, + unsigned mask, u32 *fault) +{ + struct device_domain_info *info; + unsigned long flags; + u16 sid, qdep; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(info, &domain->devices, link) { + if (!info->ats_enabled) + continue; + sid = info->bus << 8 | info->devfn; + qdep = info->ats_qdep; + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, addr, mask, fault); + quirk_extra_dev_tlb_flush(info, addr, mask, + IOMMU_NO_PASID, qdep); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void intel_nested_flush_cache(struct dmar_domain *domain, u64 addr, + unsigned long npages, u32 *error) +{ + struct iommu_domain_info *info; + unsigned long i; + unsigned mask; + u32 fault = 0; + + if (npages == U64_MAX) + mask = 64 - VTD_PAGE_SHIFT; + else + mask = ilog2(__roundup_pow_of_two(npages)); + + xa_for_each(&domain->iommu_array, i, info) { + nested_flush_pasid_iotlb(info->iommu, domain, addr, npages, 0); + + if (domain->has_iotlb_device) + continue; + + nested_flush_dev_iotlb(domain, addr, mask, &fault); + if (fault & (DMA_FSTS_ITE | DMA_FSTS_ICE)) + break; + } + + if (fault & DMA_FSTS_ICE) + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ICE; + if (fault & DMA_FSTS_ITE) + *error |= IOMMU_HWPT_INVALIDATE_VTD_S1_ITE; +} + +static int intel_nested_cache_invalidate_user(struct iommu_domain *domain, + struct iommu_user_data_array *array) +{ + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct iommu_hwpt_vtd_s1_invalidate inv_entry; + u32 processed = 0; + int ret = 0; + u32 index; + + if (array->type != IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) { + ret = -EINVAL; + goto out; + } + + for (index = 0; index < array->entry_num; index++) { + ret = iommu_copy_struct_from_user_array(&inv_entry, array, + IOMMU_HWPT_INVALIDATE_DATA_VTD_S1, + index, inv_error); + if (ret) + break; + + if (inv_entry.flags & ~IOMMU_VTD_INV_FLAGS_LEAF) { + ret = -EOPNOTSUPP; + break; + } + + if (!IS_ALIGNED(inv_entry.addr, VTD_PAGE_SIZE) || + ((inv_entry.npages == U64_MAX) && inv_entry.addr)) { + ret = -EINVAL; + break; + } + + inv_entry.inv_error = 0; + intel_nested_flush_cache(dmar_domain, inv_entry.addr, + inv_entry.npages, &inv_entry.inv_error); + + ret = iommu_respond_struct_to_user_array(array, index, + (void *)&inv_entry, + sizeof(inv_entry)); + if (ret) + break; + + processed++; + } + +out: + array->entry_num = processed; + return ret; +} + static const struct iommu_domain_ops intel_nested_domain_ops = { .attach_dev = intel_nested_attach_dev, .free = intel_nested_domain_free, + .cache_invalidate_user = intel_nested_cache_invalidate_user, }; struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,