Message ID | 20200618155125.1548969-8-jean-philippe@linaro.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | iommu: Shared Virtual Addressing for SMMUv3 (PT sharing part) | expand |
On Thu, Jun 18, 2020 at 05:51:20PM +0200, Jean-Philippe Brucker wrote: > With Shared Virtual Addressing (SVA), we need to mirror CPU TTBR, TCR, > MAIR and ASIDs in SMMU contexts. Each SMMU has a single ASID space split > into two sets, shared and private. Shared ASIDs correspond to those > obtained from the arch ASID allocator, and private ASIDs are used for > "classic" map/unmap DMA. > > A possible conflict happens when trying to use a shared ASID that has > already been allocated for private use by the SMMU driver. This will be > addressed in a later patch by replacing the private ASID. At the > moment we return -EBUSY. > > Each mm_struct shared with the SMMU will have a single context > descriptor. Add a refcount to keep track of this. It will be protected > by the global SVA lock. > > Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> > --- > drivers/iommu/arm-smmu-v3.c | 150 +++++++++++++++++++++++++++++++++++- > 1 file changed, 146 insertions(+), 4 deletions(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index 937aa1af428d5..cabd942e4cbf3 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -22,6 +22,7 @@ > #include <linux/iommu.h> > #include <linux/iopoll.h> > #include <linux/module.h> > +#include <linux/mmu_context.h> > #include <linux/msi.h> > #include <linux/of.h> > #include <linux/of_address.h> > @@ -33,6 +34,8 @@ > > #include <linux/amba/bus.h> > > +#include "io-pgtable-arm.h" > + > /* MMIO registers */ > #define ARM_SMMU_IDR0 0x0 > #define IDR0_ST_LVL GENMASK(28, 27) > @@ -589,6 +592,9 @@ struct arm_smmu_ctx_desc { > u64 ttbr; > u64 tcr; > u64 mair; > + > + refcount_t refs; > + struct mm_struct *mm; > }; > > struct arm_smmu_l1_ctx_desc { > @@ -727,6 +733,7 @@ struct arm_smmu_option_prop { > }; > > static DEFINE_XARRAY_ALLOC1(asid_xa); > +static DEFINE_MUTEX(sva_lock); > > static struct arm_smmu_option_prop arm_smmu_options[] = { > { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, > @@ -1662,7 +1669,8 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, > #ifdef __BIG_ENDIAN > CTXDESC_CD_0_ENDI | > #endif > - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > + CTXDESC_CD_0_R | CTXDESC_CD_0_A | > + (cd->mm ? 0 : CTXDESC_CD_0_ASET) | > CTXDESC_CD_0_AA64 | > FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | > CTXDESC_CD_0_V; > @@ -1766,12 +1774,144 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) > cdcfg->cdtab = NULL; > } > > -static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) > +static void arm_smmu_init_cd(struct arm_smmu_ctx_desc *cd) > { > + refcount_set(&cd->refs, 1); > +} > + > +static bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) > +{ > + bool free; > + struct arm_smmu_ctx_desc *old_cd; > + > if (!cd->asid) > - return; > + return false; > + > + free = refcount_dec_and_test(&cd->refs); > + if (free) { > + old_cd = xa_erase(&asid_xa, cd->asid); > + WARN_ON(old_cd != cd); > + } > + return free; > +} > + > +static struct arm_smmu_ctx_desc *arm_smmu_share_asid(u16 asid) > +{ > + struct arm_smmu_ctx_desc *cd; > > - xa_erase(&asid_xa, cd->asid); > + cd = xa_load(&asid_xa, asid); > + if (!cd) > + return NULL; > + > + if (cd->mm) { > + /* All devices bound to this mm use the same cd struct. */ > + refcount_inc(&cd->refs); > + return cd; > + } How do you handle racing against a concurrent arm_smmu_free_asid() here? > +__maybe_unused > +static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) > +{ > + u16 asid; > + int ret = 0; > + u64 tcr, par, reg; > + struct arm_smmu_ctx_desc *cd; > + struct arm_smmu_ctx_desc *old_cd = NULL; > + > + lockdep_assert_held(&sva_lock); Please don't bother with these for static functions (but I can see the value in having them for functions with external callers). > + > + asid = mm_context_get(mm); > + if (!asid) > + return ERR_PTR(-ESRCH); > + > + cd = kzalloc(sizeof(*cd), GFP_KERNEL); > + if (!cd) { > + ret = -ENOMEM; > + goto err_put_context; > + } > + > + arm_smmu_init_cd(cd); > + > + old_cd = arm_smmu_share_asid(asid); > + if (IS_ERR(old_cd)) { > + ret = PTR_ERR(old_cd); > + goto err_free_cd; > + } else if (old_cd) { Don't need the 'else' > + if (WARN_ON(old_cd->mm != mm)) { > + ret = -EINVAL; > + goto err_free_cd; > + } > + kfree(cd); > + mm_context_put(mm); > + return old_cd; This is a bit messy. Can you consolidate the return path so that ret is a pointer and you have an 'int err', e.g.: return err < 0 ? ERR_PTR(err) : ret; Will
On Mon, Jul 13, 2020 at 09:22:37PM +0100, Will Deacon wrote: > > +static struct arm_smmu_ctx_desc *arm_smmu_share_asid(u16 asid) > > +{ > > + struct arm_smmu_ctx_desc *cd; > > > > - xa_erase(&asid_xa, cd->asid); > > + cd = xa_load(&asid_xa, asid); > > + if (!cd) > > + return NULL; > > + > > + if (cd->mm) { > > + /* All devices bound to this mm use the same cd struct. */ > > + refcount_inc(&cd->refs); > > + return cd; > > + } > > How do you handle racing against a concurrent arm_smmu_free_asid() here? Patch 8 adds an asid_lock to deal with this, but it should be introduced in this patch. There is a potential use-after-free here, if arm_smmu_domain_free() runs concurrently. > > > +__maybe_unused > > +static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) > > +{ > > + u16 asid; > > + int ret = 0; > > + u64 tcr, par, reg; > > + struct arm_smmu_ctx_desc *cd; > > + struct arm_smmu_ctx_desc *old_cd = NULL; > > + > > + lockdep_assert_held(&sva_lock); > > Please don't bother with these for static functions (but I can see the > value in having them for functions with external callers). > > > + > > + asid = mm_context_get(mm); > > + if (!asid) > > + return ERR_PTR(-ESRCH); > > + > > + cd = kzalloc(sizeof(*cd), GFP_KERNEL); > > + if (!cd) { > > + ret = -ENOMEM; > > + goto err_put_context; > > + } > > + > > + arm_smmu_init_cd(cd); > > + > > + old_cd = arm_smmu_share_asid(asid); > > + if (IS_ERR(old_cd)) { > > + ret = PTR_ERR(old_cd); > > + goto err_free_cd; > > + } else if (old_cd) { > > Don't need the 'else' > > > + if (WARN_ON(old_cd->mm != mm)) { > > + ret = -EINVAL; > > + goto err_free_cd; > > + } > > + kfree(cd); > > + mm_context_put(mm); > > + return old_cd; > > This is a bit messy. Can you consolidate the return path so that ret is a > pointer and you have an 'int err', e.g.: > > return err < 0 ? ERR_PTR(err) : ret; Sure, I think it looks a little nicer this way Thanks, Jean
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 937aa1af428d5..cabd942e4cbf3 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -22,6 +22,7 @@ #include <linux/iommu.h> #include <linux/iopoll.h> #include <linux/module.h> +#include <linux/mmu_context.h> #include <linux/msi.h> #include <linux/of.h> #include <linux/of_address.h> @@ -33,6 +34,8 @@ #include <linux/amba/bus.h> +#include "io-pgtable-arm.h" + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -589,6 +592,9 @@ struct arm_smmu_ctx_desc { u64 ttbr; u64 tcr; u64 mair; + + refcount_t refs; + struct mm_struct *mm; }; struct arm_smmu_l1_ctx_desc { @@ -727,6 +733,7 @@ struct arm_smmu_option_prop { }; static DEFINE_XARRAY_ALLOC1(asid_xa); +static DEFINE_MUTEX(sva_lock); static struct arm_smmu_option_prop arm_smmu_options[] = { { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, @@ -1662,7 +1669,8 @@ static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, #ifdef __BIG_ENDIAN CTXDESC_CD_0_ENDI | #endif - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | + CTXDESC_CD_0_R | CTXDESC_CD_0_A | + (cd->mm ? 0 : CTXDESC_CD_0_ASET) | CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; @@ -1766,12 +1774,144 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) cdcfg->cdtab = NULL; } -static void arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) +static void arm_smmu_init_cd(struct arm_smmu_ctx_desc *cd) { + refcount_set(&cd->refs, 1); +} + +static bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) +{ + bool free; + struct arm_smmu_ctx_desc *old_cd; + if (!cd->asid) - return; + return false; + + free = refcount_dec_and_test(&cd->refs); + if (free) { + old_cd = xa_erase(&asid_xa, cd->asid); + WARN_ON(old_cd != cd); + } + return free; +} + +static struct arm_smmu_ctx_desc *arm_smmu_share_asid(u16 asid) +{ + struct arm_smmu_ctx_desc *cd; - xa_erase(&asid_xa, cd->asid); + cd = xa_load(&asid_xa, asid); + if (!cd) + return NULL; + + if (cd->mm) { + /* All devices bound to this mm use the same cd struct. */ + refcount_inc(&cd->refs); + return cd; + } + + /* Ouch, ASID is already in use for a private cd. */ + return ERR_PTR(-EBUSY); +} + +__maybe_unused +static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) +{ + u16 asid; + int ret = 0; + u64 tcr, par, reg; + struct arm_smmu_ctx_desc *cd; + struct arm_smmu_ctx_desc *old_cd = NULL; + + lockdep_assert_held(&sva_lock); + + asid = mm_context_get(mm); + if (!asid) + return ERR_PTR(-ESRCH); + + cd = kzalloc(sizeof(*cd), GFP_KERNEL); + if (!cd) { + ret = -ENOMEM; + goto err_put_context; + } + + arm_smmu_init_cd(cd); + + old_cd = arm_smmu_share_asid(asid); + if (IS_ERR(old_cd)) { + ret = PTR_ERR(old_cd); + goto err_free_cd; + } else if (old_cd) { + if (WARN_ON(old_cd->mm != mm)) { + ret = -EINVAL; + goto err_free_cd; + } + kfree(cd); + mm_context_put(mm); + return old_cd; + } + + /* Fails if a private ASID has been allocated since we last checked */ + ret = xa_insert(&asid_xa, asid, cd, GFP_KERNEL); + if (ret) + goto err_free_cd; + + tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - VA_BITS) | + FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) | + FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) | + CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; + + switch (PAGE_SIZE) { + case SZ_4K: + tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_4K); + break; + case SZ_16K: + tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_16K); + break; + case SZ_64K: + tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_TG0, ARM_LPAE_TCR_TG0_64K); + break; + default: + WARN_ON(1); + ret = -EINVAL; + goto err_free_asid; + } + + reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + par = cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR0_PARANGE_SHIFT); + tcr |= FIELD_PREP(CTXDESC_CD_0_TCR_IPS, par); + + cd->ttbr = virt_to_phys(mm->pgd); + cd->tcr = tcr; + /* + * MAIR value is pretty much constant and global, so we can just get it + * from the current CPU register + */ + cd->mair = read_sysreg(mair_el1); + cd->asid = asid; + cd->mm = mm; + + return cd; + +err_free_asid: + arm_smmu_free_asid(cd); +err_free_cd: + kfree(cd); +err_put_context: + mm_context_put(mm); + return ERR_PTR(ret); +} + +__maybe_unused +static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) +{ + lockdep_assert_held(&sva_lock); + + if (arm_smmu_free_asid(cd)) { + /* Unpin ASID */ + mm_context_put(cd->mm); + kfree(cd); + } } /* Stream table manipulation functions */ @@ -2481,6 +2621,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; + arm_smmu_init_cd(&cfg->cd); + ret = xa_alloc(&asid_xa, &asid, &cfg->cd, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); if (ret)