@@ -14,12 +14,33 @@
static DEFINE_MUTEX(sva_lock);
-static void __maybe_unused
-arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
+static int arm_smmu_realloc_s1_domain_asid(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_master_domain *master_domain;
+ u32 old_asid = smmu_domain->cd.asid;
struct arm_smmu_cd target_cd;
unsigned long flags;
+ int ret;
+
+ lockdep_assert_held(&smmu->asid_lock);
+
+ /*
+ * FIXME: The unmap and invalidation path doesn't take any locks but
+ * this is not fully safe. Since updating the CD tables is not atomic
+ * there is always a hole where invalidating only one ASID of two active
+ * ASIDs during unmap will cause the IOTLB to become stale.
+ *
+ * This approach is to hopefully shift the racing CPUs to the new ASID
+ * before we start programming the HW. This increases the chance that
+ * racing IOPTE changes will pick up an invalidation for the new ASID
+ * and we achieve eventual consistency. For the brief period where the
+ * old ASID is still in the CD entries it will become incoherent.
+ */
+ ret = xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, smmu_domain,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ if (ret)
+ return ret;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) {
@@ -35,6 +56,10 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
&target_cd);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+
+ /* Clean the ASID we are about to assign to a new translation */
+ arm_smmu_tlb_inv_asid(smmu, old_asid);
+ return 0;
}
static u64 page_size_to_cd(void)
@@ -147,12 +172,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
}
if (!smmu_domain->btm_invalidation) {
+ ioasid_t asid = READ_ONCE(smmu_domain->cd.asid);
+
if (!size)
- arm_smmu_tlb_inv_asid(smmu_domain->smmu,
- smmu_domain->cd.asid);
+ arm_smmu_tlb_inv_asid(smmu_domain->smmu, asid);
else
- arm_smmu_tlb_inv_range_asid(start, size,
- smmu_domain->cd.asid,
+ arm_smmu_tlb_inv_range_asid(start, size, asid,
PAGE_SIZE, false,
smmu_domain);
}
@@ -181,6 +206,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid);
if (WARN_ON(!cdptr))
continue;
+
+ /* An SVA ASID never changes, no asid_lock required */
arm_smmu_make_sva_cd(&target, master, NULL,
smmu_domain->cd.asid,
smmu_domain->btm_invalidation);
@@ -374,6 +401,8 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
* unnecessary invalidation.
*/
arm_smmu_domain_free_id(smmu_domain);
+ if (smmu_domain->btm_invalidation)
+ arm64_mm_context_put(domain->mm);
/*
* Actual free is defered to the SRCU callback
@@ -387,13 +416,97 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = {
.free = arm_smmu_sva_domain_free
};
+static int arm_smmu_share_asid(struct arm_smmu_device *smmu,
+ struct arm_smmu_domain *smmu_domain,
+ struct mm_struct *mm)
+{
+ struct arm_smmu_domain *old_s1_domain;
+ int ret;
+
+ /*
+ * Notice that BTM is never currently enabled, this is all dead code.
+ * The specification cautions:
+ *
+ * Note: Arm expects that SMMU stage 2 address spaces are generally
+ * shared with their respective PE virtual machine stage 2
+ * configuration. If broadcast invalidation is required to be avoided
+ * for a particular SMMU stage 2 address space, Arm recommends that a
+ * hypervisor configures the STE with a VMID that is not allocated for
+ * virtual machine use on the PEs
+ *
+ * However, in Linux, both KVM and SMMU think they own the VMID pool.
+ * Unfortunately the ARM design is problematic for Linux as we do not
+ * currently share the S2 table with KVM. This creates a situation where
+ * the S2 needs to have the same VMID as KVM in order to allow the guest
+ * to use BTM, however we must still invalidate the S2 directly since it
+ * is a different radix tree. What Linux would like is something like
+ * ASET for the STE to disable BTM only for the S2.
+ *
+ * Arguably in a system with BTM the driver should prefer to use a S1
+ * table in all cases execpt when explicitly asked to create a nesting
+ * parent. Then it should use the VMID of KVM to enable BTM in the
+ * guest. We cannot optimize away the resulting double invalidation of
+ * the S2 :( Or we simply ignore BTM entirely as we are doing now.
+ */
+ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
+ return xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid,
+ smmu_domain,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1),
+ GFP_KERNEL);
+
+ /* At this point the caller ensures we have a mmget() */
+ smmu_domain->cd.asid = arm64_mm_context_get(mm);
+
+ mutex_lock(&smmu->asid_lock);
+ old_s1_domain = xa_store(&smmu->asid_map, smmu_domain->cd.asid,
+ smmu_domain, GFP_KERNEL);
+ if (xa_err(old_s1_domain)) {
+ ret = xa_err(old_s1_domain);
+ goto out_put_asid;
+ }
+
+ /*
+ * In BTM mode the CPU ASID and the IOMMU ASID have to be the same.
+ * Unfortunately we run separate allocators for this and the IOMMU
+ * ASID can already have been assigned to a S1 domain. SVA domains
+ * always align to their CPU ASIDs. In this case we change
+ * the S1 domain's ASID, update the CD entry and flush the caches.
+ *
+ * This is a bit tricky, all the places writing to a S1 CD, reading the
+ * S1 ASID, or doing xa_erase must hold the asid_lock or xa_lock to
+ * avoid IOTLB incoherence.
+ */
+ if (old_s1_domain) {
+ if (WARN_ON(old_s1_domain->domain.type == IOMMU_DOMAIN_SVA)) {
+ ret = -EINVAL;
+ goto out_restore_s1;
+ }
+ ret = arm_smmu_realloc_s1_domain_asid(smmu, old_s1_domain);
+ if (ret)
+ goto out_restore_s1;
+ }
+
+ smmu_domain->btm_invalidation = true;
+
+ ret = 0;
+ goto out_unlock;
+
+out_restore_s1:
+ xa_store(&smmu->asid_map, smmu_domain->cd.asid, old_s1_domain,
+ GFP_KERNEL);
+out_put_asid:
+ arm64_mm_context_put(mm);
+out_unlock:
+ mutex_unlock(&smmu->asid_lock);
+ return ret;
+}
+
struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
struct mm_struct *mm)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_domain *smmu_domain;
- u32 asid;
int ret;
smmu_domain = arm_smmu_domain_alloc();
@@ -404,12 +517,10 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
smmu_domain->domain.ops = &arm_smmu_sva_domain_ops;
smmu_domain->smmu = smmu;
- ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain,
- XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
+ ret = arm_smmu_share_asid(smmu, smmu_domain, mm);
if (ret)
goto err_free;
- smmu_domain->cd.asid = asid;
smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops;
ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm);
if (ret)
@@ -419,6 +530,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
err_asid:
arm_smmu_domain_free_id(smmu_domain);
+ if (smmu_domain->btm_invalidation)
+ arm64_mm_context_put(mm);
err_free:
kfree(smmu_domain);
return ERR_PTR(ret);
@@ -1324,6 +1324,8 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+ lockdep_assert_held(&master->smmu->asid_lock);
+
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
@@ -2061,7 +2063,7 @@ static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain)
if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1 ||
smmu_domain->domain.type == IOMMU_DOMAIN_SVA)) {
- arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
+ arm_smmu_tlb_inv_asid(smmu, READ_ONCE(smmu_domain->cd.asid));
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -2305,17 +2307,10 @@ static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
struct arm_smmu_domain *smmu_domain)
{
- int ret;
- u32 asid;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
- /* Prevent SVA from modifying the ASID until it is written to the CD */
- mutex_lock(&smmu->asid_lock);
- ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain,
- XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
- cd->asid = (u16)asid;
- mutex_unlock(&smmu->asid_lock);
- return ret;
+ return xa_alloc(&smmu->asid_map, &cd->asid, smmu_domain,
+ XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
@@ -586,7 +586,7 @@ struct arm_smmu_strtab_l1_desc {
};
struct arm_smmu_ctx_desc {
- u16 asid;
+ u32 asid;
};
struct arm_smmu_l1_ctx_desc {