Message ID | 22-v5-9a37e0c884ce+31e3-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Update SMMUv3 to the modern iommu API (part 2/3) | expand |
On Tue, Mar 5, 2024 at 7:44 AM Jason Gunthorpe <jgg@nvidia.com> wrote: > > The SMMUv3 IOTLB is tagged with a VMID/ASID cache tag. Any time the > underlying translation is changed these need to be invalidated. At boot > time the IOTLB starts out empty and all cache tags are available for > allocation. > > When a tag is taken out of the allocator the code assumes the IOTLB > doesn't reference it, and immediately programs it into a STE/CD. If the > cache is referencing the tag then it will have stale data and IOMMU will > become incoherent. > > Thus, whenever an ASID/VMID is freed back to the allocator we need to know > that the IOTLB doesn't have any references to it. The SVA code correctly > had an invalidation here, but the paging code does not. Isn't that....bad? > > Consolidate freeing the VMID/ASID to one place and consistently flush both > ID types before returning to their allocators. > > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 9 ++--- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 36 +++++++++++++------ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + > 3 files changed, 29 insertions(+), 17 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > index cb0da4e5a5517a..3a9f4ef47c6b6f 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c > @@ -381,18 +381,13 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain) > { > struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > > - /* > - * Ensure the ASID is empty in the iommu cache before allowing reuse. > - */ > - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); > - > /* > * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can > * still be called/running at this point. We allow the ASID to be > * reused, and if there is a race then it just suffers harmless > * unnecessary invalidation. > */ > - xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); > + arm_smmu_domain_free_id(smmu_domain); > > /* > * Actual free is defered to the SRCU callback > @@ -437,7 +432,7 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, > return &smmu_domain->domain; > > err_asid: > - xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); > + arm_smmu_domain_free_id(smmu_domain); > err_free: > kfree(smmu_domain); > return ERR_PTR(ret); > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 5642321b2124d9..4f22eb810c8dbd 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -2296,25 +2296,41 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) > return &smmu_domain->domain; > } > > -static void arm_smmu_domain_free(struct iommu_domain *domain) > +/* > + * Return the domain's ASID or VMID back to the allocator. All IDs in the > + * allocator do not have an IOTLB entries referencing them. > + */ > +void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain) > { > - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > struct arm_smmu_device *smmu = smmu_domain->smmu; > > - free_io_pgtable_ops(smmu_domain->pgtbl_ops); > + if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1 || > + smmu_domain->domain.type == IOMMU_DOMAIN_SVA) && > + smmu_domain->cd.asid) { > + arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); > > - /* Free the ASID or VMID */ > - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { > /* Prevent SVA from touching the CD while we're freeing it */ > mutex_lock(&arm_smmu_asid_lock); > xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); > mutex_unlock(&arm_smmu_asid_lock); > - } else { > - struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; > - if (cfg->vmid) > - ida_free(&smmu->vmid_map, cfg->vmid); > - } > + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2 && > + smmu_domain->s2_cfg.vmid) { > + struct arm_smmu_cmdq_ent cmd = { > + .opcode = CMDQ_OP_TLBI_S12_VMALL, > + .tlbi.vmid = smmu_domain->s2_cfg.vmid > + }; > > + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); > + ida_free(&smmu->vmid_map, smmu_domain->s2_cfg.vmid); > + } > +} > + There's room to refactor and share the tlb id invalidation logic with arm_smmu_tlb_inv_context by the way, but this works too. > +static void arm_smmu_domain_free(struct iommu_domain *domain) > +{ > + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); > + > + free_io_pgtable_ops(smmu_domain->pgtbl_ops); > + arm_smmu_domain_free_id(smmu_domain); > kfree(smmu_domain); > } > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > index cfae4d69cd810c..4631f0ac396dc3 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h > @@ -774,6 +774,7 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, > void arm_smmu_remove_pasid(struct arm_smmu_master *master, > struct arm_smmu_domain *smmu_domain, ioasid_t pasid); > > +void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain); > void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); > void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, > size_t granule, bool leaf, > -- > 2.43.2 > > Reviewed-by: Michael Shavit <mshavit@google.com>
On Wed, Mar 20, 2024 at 12:44:42AM +0800, Michael Shavit wrote: > On Tue, Mar 5, 2024 at 7:44 AM Jason Gunthorpe <jgg@nvidia.com> wrote: > > > > The SMMUv3 IOTLB is tagged with a VMID/ASID cache tag. Any time the > > underlying translation is changed these need to be invalidated. At boot > > time the IOTLB starts out empty and all cache tags are available for > > allocation. > > > > When a tag is taken out of the allocator the code assumes the IOTLB > > doesn't reference it, and immediately programs it into a STE/CD. If the > > cache is referencing the tag then it will have stale data and IOMMU will > > become incoherent. > > > > Thus, whenever an ASID/VMID is freed back to the allocator we need to know > > that the IOTLB doesn't have any references to it. The SVA code correctly > > had an invalidation here, but the paging code does not. > > Isn't that....bad? Ah, that needed some more work I forgot to do. The paging code hides the invalidation in the free_io_pgtable_ops() called during domain free which calls back to arm_smmu_tlb_inv_context(). free_io_pgtable_ops() is the only thing that calls io_pgtable_tlb_flush_all() and thus arm_smmu_tlb_inv_context(). So we should also delete arm_smmu_tlb_inv_context() and NULL ops.tlb_flush_all. Then arm_smmu_domain_free_id() consistently handles the final invalidation on all code paths instead of having it split up. Thanks, Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index cb0da4e5a5517a..3a9f4ef47c6b6f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -381,18 +381,13 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - /* - * Ensure the ASID is empty in the iommu cache before allowing reuse. - */ - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); - /* * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can * still be called/running at this point. We allow the ASID to be * reused, and if there is a race then it just suffers harmless * unnecessary invalidation. */ - xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + arm_smmu_domain_free_id(smmu_domain); /* * Actual free is defered to the SRCU callback @@ -437,7 +432,7 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, return &smmu_domain->domain; err_asid: - xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + arm_smmu_domain_free_id(smmu_domain); err_free: kfree(smmu_domain); return ERR_PTR(ret); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5642321b2124d9..4f22eb810c8dbd 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2296,25 +2296,41 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) return &smmu_domain->domain; } -static void arm_smmu_domain_free(struct iommu_domain *domain) +/* + * Return the domain's ASID or VMID back to the allocator. All IDs in the + * allocator do not have an IOTLB entries referencing them. + */ +void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - free_io_pgtable_ops(smmu_domain->pgtbl_ops); + if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1 || + smmu_domain->domain.type == IOMMU_DOMAIN_SVA) && + smmu_domain->cd.asid) { + arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); - /* Free the ASID or VMID */ - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); mutex_unlock(&arm_smmu_asid_lock); - } else { - struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; - if (cfg->vmid) - ida_free(&smmu->vmid_map, cfg->vmid); - } + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2 && + smmu_domain->s2_cfg.vmid) { + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_TLBI_S12_VMALL, + .tlbi.vmid = smmu_domain->s2_cfg.vmid + }; + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + ida_free(&smmu->vmid_map, smmu_domain->s2_cfg.vmid); + } +} + +static void arm_smmu_domain_free(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + free_io_pgtable_ops(smmu_domain->pgtbl_ops); + arm_smmu_domain_free_id(smmu_domain); kfree(smmu_domain); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cfae4d69cd810c..4631f0ac396dc3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -774,6 +774,7 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master, void arm_smmu_remove_pasid(struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain, ioasid_t pasid); +void arm_smmu_domain_free_id(struct arm_smmu_domain *smmu_domain); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf,