@@ -14,6 +14,7 @@ obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += amd_iommu_debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
+obj-$(CONFIG_ARM_SMMU) += lib-arm-smmu.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
new file mode 100644
@@ -0,0 +1,1671 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation
+ * Author: Krishna Reddy <vdumpa@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Library for ARM architected v1 and v2 SMMU implementations.
+ * This library is created by reusing the code from arm-smmu.c which is
+ * authored by Will Deacon.
+ */
+
+#define pr_fmt(fmt) "lib-arm-smmu: " fmt
+
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/fsl/mc.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "io-pgtable.h"
+#include "arm-smmu-regs.h"
+#include "lib-arm-smmu.h"
+
+#define ARM_MMU500_ACTLR_CPRE (1 << 1)
+
+#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
+#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
+
+#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
+#define TLB_SPIN_COUNT 10
+
+/* SMMU global address space */
+#define ARM_SMMU_GR0(smmu) ((smmu)->base)
+#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift))
+
+/*
+ * SMMU global address space with conditional offset to access secure
+ * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
+ * nsGFSYNR0: 0x450)
+ */
+#define ARM_SMMU_GR0_NS(smmu) \
+ ((smmu)->base + \
+ ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \
+ ? 0x400 : 0))
+
+/*
+ * Some 64-bit registers only make sense to write atomically, but in such
+ * cases all the data relevant to AArch32 formats lies within the lower word,
+ * therefore this actually makes more sense than it might first appear.
+ */
+#ifdef CONFIG_64BIT
+#define smmu_write_atomic_lq writeq_relaxed
+#else
+#define smmu_write_atomic_lq writel_relaxed
+#endif
+
+/* Translation context bank */
+#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift))
+
+#define MSI_IOVA_BASE 0x8000000
+#define MSI_IOVA_LENGTH 0x100000
+
+#define s2cr_init_val (struct arm_smmu_s2cr){ \
+ .type = smmu->disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS, \
+}
+
+struct arm_smmu_master_cfg {
+ struct arm_smmu_device *smmu;
+ s16 smendx[];
+};
+#define INVALID_SMENDX -1
+#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
+#define fwspec_smmu(fw) (__fwspec_cfg(fw)->smmu)
+#define fwspec_smendx(fw, i) \
+ (i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
+#define for_each_cfg_sme(fw, i, idx) \
+ for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
+
+enum arm_smmu_context_fmt {
+ ARM_SMMU_CTX_FMT_NONE,
+ ARM_SMMU_CTX_FMT_AARCH64,
+ ARM_SMMU_CTX_FMT_AARCH32_L,
+ ARM_SMMU_CTX_FMT_AARCH32_S,
+};
+
+struct arm_smmu_cfg {
+ u8 cbndx;
+ u8 irptndx;
+ union {
+ u16 asid;
+ u16 vmid;
+ };
+ u32 cbar;
+ enum arm_smmu_context_fmt fmt;
+};
+#define INVALID_IRPTNDX 0xff
+
+enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
+ ARM_SMMU_DOMAIN_BYPASS,
+};
+
+struct arm_smmu_domain {
+ struct arm_smmu_device *smmu;
+ struct io_pgtable_ops *pgtbl_ops;
+ const struct iommu_gather_ops *tlb_ops;
+ struct arm_smmu_cfg cfg;
+ enum arm_smmu_domain_stage stage;
+ bool non_strict;
+ struct mutex init_mutex; /* Protects smmu pointer */
+ /* Serialises ATS1* ops and TLB syncs */
+ spinlock_t cb_lock;
+ struct iommu_domain domain;
+};
+
+static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct arm_smmu_domain, domain);
+}
+
+static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+{
+ int idx;
+
+ do {
+ idx = find_next_zero_bit(map, end, start);
+ if (idx == end)
+ return -ENOSPC;
+ } while (test_and_set_bit(idx, map));
+
+ return idx;
+}
+
+static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
+{
+ clear_bit(idx, map);
+}
+
+/* Wait for any pending TLB invalidations to complete */
+static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
+ void __iomem *sync, void __iomem *status)
+{
+ unsigned int spin_cnt, delay;
+
+ writel_relaxed(0, sync);
+ for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
+ for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
+ if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
+ return;
+ cpu_relax();
+ }
+ udelay(delay);
+ }
+ dev_err_ratelimited(smmu->dev,
+ "TLB sync timed out -- SMMU may be deadlocked\n");
+}
+
+static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
+{
+ void __iomem *base = ARM_SMMU_GR0(smmu);
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu->global_sync_lock, flags);
+ __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
+ base + ARM_SMMU_GR0_sTLBGSTATUS);
+ spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
+}
+
+static void arm_smmu_tlb_sync_context(void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+ __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
+ base + ARM_SMMU_CB_TLBSTATUS);
+ spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+}
+
+static void arm_smmu_tlb_sync_vmid(void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+
+ arm_smmu_tlb_sync_global(smmu_domain->smmu);
+}
+
+static void arm_smmu_tlb_inv_context_s1(void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+
+ /*
+ * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+ * cleared by the current CPU are visible to the SMMU before the TLBI.
+ */
+ writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+ arm_smmu_tlb_sync_context(cookie);
+}
+
+static void arm_smmu_tlb_inv_context_s2(void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ void __iomem *base = ARM_SMMU_GR0(smmu);
+
+ /* NOTE: see above */
+ writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+ arm_smmu_tlb_sync_global(smmu);
+}
+
+static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+ void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
+ if (stage1) {
+ reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
+
+ if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
+ iova &= ~12UL;
+ iova |= cfg->asid;
+ do {
+ writel_relaxed(iova, reg);
+ iova += granule;
+ } while (size -= granule);
+ } else {
+ iova >>= 12;
+ iova |= (u64)cfg->asid << 48;
+ do {
+ writeq_relaxed(iova, reg);
+ iova += granule >> 12;
+ } while (size -= granule);
+ }
+ } else {
+ reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
+ ARM_SMMU_CB_S2_TLBIIPAS2;
+ iova >>= 12;
+ do {
+ smmu_write_atomic_lq(iova, reg);
+ iova += granule >> 12;
+ } while (size -= granule);
+ }
+}
+
+/*
+ * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
+ * almost negligible, but the benefit of getting the first one in as far ahead
+ * of the sync as possible is significant, hence we don't just make this a
+ * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
+ */
+static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
+ size_t granule, bool leaf, void *cookie)
+{
+ struct arm_smmu_domain *smmu_domain = cookie;
+ void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
+ writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+}
+
+static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s1,
+ .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
+ .tlb_sync = arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_add_flush = arm_smmu_tlb_inv_range_nosync,
+ .tlb_sync = arm_smmu_tlb_sync_context,
+};
+
+static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
+ .tlb_flush_all = arm_smmu_tlb_inv_context_s2,
+ .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync,
+ .tlb_sync = arm_smmu_tlb_sync_vmid,
+};
+
+irqreturn_t arm_smmu_context_fault(int irq, void *dev)
+{
+ u32 fsr, fsynr;
+ unsigned long iova;
+ struct iommu_domain *domain = dev;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ void __iomem *cb_base;
+
+ cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
+ fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);
+
+ if (!(fsr & FSR_FAULT))
+ return IRQ_NONE;
+
+ fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
+ iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
+
+ dev_err_ratelimited(smmu->dev,
+ "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+ fsr, iova, fsynr, cfg->cbndx);
+
+ writel(fsr, cb_base + ARM_SMMU_CB_FSR);
+ return IRQ_HANDLED;
+}
+
+irqreturn_t arm_smmu_global_fault(int irq, void *dev)
+{
+ u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+ struct arm_smmu_device *smmu = dev;
+ void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
+
+ gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
+ gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
+ gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
+ gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
+
+ if (!gfsr)
+ return IRQ_NONE;
+
+ dev_err_ratelimited(smmu->dev,
+ "Unexpected global fault, this could be serious\n");
+ dev_err_ratelimited(smmu->dev,
+ "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+ gfsr, gfsynr0, gfsynr1, gfsynr2);
+
+ writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
+ return IRQ_HANDLED;
+}
+
+static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
+ struct io_pgtable_cfg *pgtbl_cfg)
+{
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
+ bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+ cb->cfg = cfg;
+
+ /* TTBCR */
+ if (stage1) {
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+ cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
+ } else {
+ cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+ cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+ cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+ cb->tcr[1] |= TTBCR2_AS;
+ }
+ } else {
+ cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+ }
+
+ /* TTBRs */
+ if (stage1) {
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+ cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
+ cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
+ } else {
+ cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+ cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+ cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+ cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
+ }
+ } else {
+ cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+ }
+
+ /* MAIRs (stage-1 only) */
+ if (stage1) {
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+ cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
+ cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
+ } else {
+ cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+ cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+ }
+ }
+}
+
+static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
+{
+ u32 reg;
+ bool stage1;
+ struct arm_smmu_cb *cb = &smmu->cbs[idx];
+ struct arm_smmu_cfg *cfg = cb->cfg;
+ void __iomem *cb_base, *gr1_base;
+
+ cb_base = ARM_SMMU_CB(smmu, idx);
+
+ /* Unassigned context banks only need disabling */
+ if (!cfg) {
+ writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+ return;
+ }
+
+ gr1_base = ARM_SMMU_GR1(smmu);
+ stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+ /* CBA2R */
+ if (smmu->version > ARM_SMMU_V1) {
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+ reg = CBA2R_RW64_64BIT;
+ else
+ reg = CBA2R_RW64_32BIT;
+ /* 16-bit VMIDs live in CBA2R */
+ if (smmu->features & ARM_SMMU_FEAT_VMID16)
+ reg |= cfg->vmid << CBA2R_VMID_SHIFT;
+
+ writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
+ }
+
+ /* CBAR */
+ reg = cfg->cbar;
+ if (smmu->version < ARM_SMMU_V2)
+ reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
+
+ /*
+ * Use the weakest shareability/memory types, so they are
+ * overridden by the ttbcr/pte.
+ */
+ if (stage1) {
+ reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
+ (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+ } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
+ /* 8-bit VMIDs live in CBAR */
+ reg |= cfg->vmid << CBAR_VMID_SHIFT;
+ }
+ writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
+
+ /*
+ * TTBCR
+ * We must write this before the TTBRs, since it determines the
+ * access behaviour of some fields (in particular, ASID[15:8]).
+ */
+ if (stage1 && smmu->version > ARM_SMMU_V1)
+ writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
+ writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
+
+ /* TTBRs */
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+ writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
+ writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+ writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
+ } else {
+ writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
+ if (stage1)
+ writeq_relaxed(cb->ttbr[1],
+ cb_base + ARM_SMMU_CB_TTBR1);
+ }
+
+ /* MAIRs (stage-1 only) */
+ if (stage1) {
+ writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
+ writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
+ }
+
+ /* SCTLR */
+ reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
+ if (stage1)
+ reg |= SCTLR_S1_ASIDPNE;
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ reg |= SCTLR_E;
+
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
+}
+
+static int arm_smmu_init_domain_context(struct iommu_domain *domain,
+ struct arm_smmu_device *smmu)
+{
+ int irq, start, ret = 0;
+ unsigned long ias, oas;
+ struct io_pgtable_ops *pgtbl_ops;
+ struct io_pgtable_cfg pgtbl_cfg;
+ enum io_pgtable_fmt fmt;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+
+ mutex_lock(&smmu_domain->init_mutex);
+ if (smmu_domain->smmu)
+ goto out_unlock;
+
+ if (domain->type == IOMMU_DOMAIN_IDENTITY) {
+ smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
+ smmu_domain->smmu = smmu;
+ goto out_unlock;
+ }
+
+ /*
+ * Mapping the requested stage onto what we support is surprisingly
+ * complicated, mainly because the spec allows S1+S2 SMMUs without
+ * support for nested translation. That means we end up with the
+ * following table:
+ *
+ * Requested Supported Actual
+ * S1 N S1
+ * S1 S1+S2 S1
+ * S1 S2 S2
+ * S1 S1 S1
+ * N N N
+ * N S1+S2 S2
+ * N S2 S2
+ * N S1 S1
+ *
+ * Note that you can't actually request stage-2 mappings.
+ */
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+ /*
+ * Choosing a suitable context format is even more fiddly. Until we
+ * grow some way for the caller to express a preference, and/or move
+ * the decision into the io-pgtable code where it arguably belongs,
+ * just aim for the closest thing to the rest of the system, and hope
+ * that the hardware isn't esoteric enough that we can't assume AArch64
+ * support to be a superset of AArch32 support...
+ */
+ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
+ cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+ if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
+ !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
+ (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
+ (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+ cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
+ if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
+ (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
+ ARM_SMMU_FEAT_FMT_AARCH64_16K |
+ ARM_SMMU_FEAT_FMT_AARCH64_4K)))
+ cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
+
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ switch (smmu_domain->stage) {
+ case ARM_SMMU_DOMAIN_S1:
+ cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+ start = smmu->num_s2_context_banks;
+ ias = smmu->va_size;
+ oas = smmu->ipa_size;
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
+ fmt = ARM_64_LPAE_S1;
+ } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
+ fmt = ARM_32_LPAE_S1;
+ ias = min(ias, 32UL);
+ oas = min(oas, 40UL);
+ } else {
+ fmt = ARM_V7S;
+ ias = min(ias, 32UL);
+ oas = min(oas, 32UL);
+ }
+ smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
+ break;
+ case ARM_SMMU_DOMAIN_NESTED:
+ /*
+ * We will likely want to change this if/when KVM gets
+ * involved.
+ */
+ case ARM_SMMU_DOMAIN_S2:
+ cfg->cbar = CBAR_TYPE_S2_TRANS;
+ start = 0;
+ ias = smmu->ipa_size;
+ oas = smmu->pa_size;
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
+ fmt = ARM_64_LPAE_S2;
+ } else {
+ fmt = ARM_32_LPAE_S2;
+ ias = min(ias, 40UL);
+ oas = min(oas, 40UL);
+ }
+ if (smmu->version == ARM_SMMU_V2)
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ else
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ break;
+ default:
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+ smmu->num_context_banks);
+ if (ret < 0)
+ goto out_unlock;
+
+ cfg->cbndx = ret;
+ if (smmu->version < ARM_SMMU_V2) {
+ cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+ cfg->irptndx %= smmu->num_context_irqs;
+ } else {
+ cfg->irptndx = cfg->cbndx;
+ }
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
+ cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
+ else
+ cfg->asid = cfg->cbndx + smmu->cavium_id_base;
+
+ pgtbl_cfg = (struct io_pgtable_cfg) {
+ .pgsize_bitmap = smmu->pgsize_bitmap,
+ .ias = ias,
+ .oas = oas,
+ .tlb = smmu_domain->tlb_ops,
+ .iommu_dev = smmu->dev,
+ };
+
+ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
+ smmu_domain->smmu = smmu;
+ pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
+ if (!pgtbl_ops) {
+ ret = -ENOMEM;
+ goto out_clear_smmu;
+ }
+
+ /* Update the domain's page sizes to reflect the page table format */
+ domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+ domain->geometry.aperture_end = (1UL << ias) - 1;
+ domain->geometry.force_aperture = true;
+
+ /* Initialise the context bank with our page table cfg */
+ arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
+ arm_smmu_write_context_bank(smmu, cfg->cbndx);
+
+ /*
+ * Request context fault interrupt. Do this last to avoid the
+ * handler seeing a half-initialised domain state.
+ */
+ irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
+ IRQF_SHARED, "arm-smmu-context-fault", domain);
+ if (ret < 0) {
+ dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
+ cfg->irptndx, irq);
+ cfg->irptndx = INVALID_IRPTNDX;
+ }
+
+ mutex_unlock(&smmu_domain->init_mutex);
+
+ /* Publish page table ops for map/unmap */
+ smmu_domain->pgtbl_ops = pgtbl_ops;
+ return 0;
+
+out_clear_smmu:
+ smmu_domain->smmu = NULL;
+out_unlock:
+ mutex_unlock(&smmu_domain->init_mutex);
+ return ret;
+}
+
+static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ int irq;
+
+ if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
+ return;
+
+ /*
+ * Disable the context bank and free the page tables before freeing
+ * it.
+ */
+ smmu->cbs[cfg->cbndx].cfg = NULL;
+ arm_smmu_write_context_bank(smmu, cfg->cbndx);
+
+ if (cfg->irptndx != INVALID_IRPTNDX) {
+ irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+ devm_free_irq(smmu->dev, irq, domain);
+ }
+
+ free_io_pgtable_ops(smmu_domain->pgtbl_ops);
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+}
+
+struct iommu_domain *arm_smmu_domain_alloc_common(unsigned int type,
+ bool using_legacy_binding)
+{
+ struct arm_smmu_domain *smmu_domain;
+
+ if (type != IOMMU_DOMAIN_UNMANAGED &&
+ type != IOMMU_DOMAIN_DMA &&
+ type != IOMMU_DOMAIN_IDENTITY)
+ return NULL;
+ /*
+ * Allocate the domain and initialise some of its data structures.
+ * We can't really do anything meaningful until we've added a
+ * master.
+ */
+ smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
+ if (!smmu_domain)
+ return NULL;
+
+ if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
+ iommu_get_dma_cookie(&smmu_domain->domain))) {
+ kfree(smmu_domain);
+ return NULL;
+ }
+
+ mutex_init(&smmu_domain->init_mutex);
+ spin_lock_init(&smmu_domain->cb_lock);
+
+ return &smmu_domain->domain;
+}
+
+void arm_smmu_domain_free(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ /*
+ * Free the domain resources. We assume that all devices have
+ * already been detached.
+ */
+ iommu_put_dma_cookie(domain);
+ arm_smmu_destroy_domain_context(domain);
+ kfree(smmu_domain);
+}
+
+static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
+{
+ struct arm_smmu_smr *smr = smmu->smrs + idx;
+ u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
+
+ if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
+ reg |= SMR_VALID;
+ writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
+}
+
+static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
+{
+ struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+ u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
+ (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
+ (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
+
+ if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
+ smmu->smrs[idx].valid)
+ reg |= S2CR_EXIDVALID;
+ writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+}
+
+static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
+{
+ arm_smmu_write_s2cr(smmu, idx);
+ if (smmu->smrs)
+ arm_smmu_write_smr(smmu, idx);
+}
+
+/*
+ * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
+ * should be called after sCR0 is written.
+ */
+void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
+{
+ void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+ u32 smr;
+
+ if (!smmu->smrs)
+ return;
+
+ /*
+ * SMR.ID bits may not be preserved if the corresponding MASK
+ * bits are set, so check each one separately. We can reject
+ * masters later if they try to claim IDs outside these masks.
+ */
+ smr = smmu->streamid_mask << SMR_ID_SHIFT;
+ writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+ smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+ smmu->streamid_mask = smr >> SMR_ID_SHIFT;
+
+ smr = smmu->streamid_mask << SMR_MASK_SHIFT;
+ writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+ smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+ smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+}
+
+static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
+{
+ struct arm_smmu_smr *smrs = smmu->smrs;
+ int i, free_idx = -ENOSPC;
+
+ /* Stream indexing is blissfully easy */
+ if (!smrs)
+ return id;
+
+ /* Validating SMRs is... less so */
+ for (i = 0; i < smmu->num_mapping_groups; ++i) {
+ if (!smrs[i].valid) {
+ /*
+ * Note the first free entry we come across, which
+ * we'll claim in the end if nothing else matches.
+ */
+ if (free_idx < 0)
+ free_idx = i;
+ continue;
+ }
+ /*
+ * If the new entry is _entirely_ matched by an existing entry,
+ * then reuse that, with the guarantee that there also cannot
+ * be any subsequent conflicting entries. In normal use we'd
+ * expect simply identical entries for this case, but there's
+ * no harm in accommodating the generalisation.
+ */
+ if ((mask & smrs[i].mask) == mask &&
+ !((id ^ smrs[i].id) & ~smrs[i].mask))
+ return i;
+ /*
+ * If the new entry has any other overlap with an existing one,
+ * though, then there always exists at least one stream ID
+ * which would cause a conflict, and we can't allow that risk.
+ */
+ if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
+ return -EINVAL;
+ }
+
+ return free_idx;
+}
+
+static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
+{
+ if (--smmu->s2crs[idx].count)
+ return false;
+
+ smmu->s2crs[idx] = s2cr_init_val;
+ if (smmu->smrs)
+ smmu->smrs[idx].valid = false;
+
+ return true;
+}
+
+static int arm_smmu_master_alloc_smes(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+ struct arm_smmu_device *smmu = cfg->smmu;
+ struct arm_smmu_smr *smrs = smmu->smrs;
+ struct iommu_group *group;
+ int i, idx, ret;
+
+ mutex_lock(&smmu->stream_map_mutex);
+ /* Figure out a viable stream map entry allocation */
+ for_each_cfg_sme(fwspec, i, idx) {
+ u16 sid = fwspec->ids[i];
+ u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+ if (idx != INVALID_SMENDX) {
+ ret = -EEXIST;
+ goto out_err;
+ }
+
+ ret = arm_smmu_find_sme(smmu, sid, mask);
+ if (ret < 0)
+ goto out_err;
+
+ idx = ret;
+ if (smrs && smmu->s2crs[idx].count == 0) {
+ smrs[idx].id = sid;
+ smrs[idx].mask = mask;
+ smrs[idx].valid = true;
+ }
+ smmu->s2crs[idx].count++;
+ cfg->smendx[i] = (s16)idx;
+ }
+
+ group = iommu_group_get_for_dev(dev);
+ if (!group)
+ group = ERR_PTR(-ENOMEM);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto out_err;
+ }
+ iommu_group_put(group);
+
+ /* It worked! Now, poke the actual hardware */
+ for_each_cfg_sme(fwspec, i, idx) {
+ arm_smmu_write_sme(smmu, idx);
+ smmu->s2crs[idx].group = group;
+ }
+
+ mutex_unlock(&smmu->stream_map_mutex);
+ return 0;
+
+out_err:
+ while (i--) {
+ arm_smmu_free_sme(smmu, cfg->smendx[i]);
+ cfg->smendx[i] = INVALID_SMENDX;
+ }
+ mutex_unlock(&smmu->stream_map_mutex);
+ return ret;
+}
+
+static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
+{
+ struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+ int i, idx;
+
+ mutex_lock(&smmu->stream_map_mutex);
+ for_each_cfg_sme(fwspec, i, idx) {
+ if (arm_smmu_free_sme(smmu, idx))
+ arm_smmu_write_sme(smmu, idx);
+ cfg->smendx[i] = INVALID_SMENDX;
+ }
+ mutex_unlock(&smmu->stream_map_mutex);
+}
+
+static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+ struct iommu_fwspec *fwspec)
+{
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_s2cr *s2cr = smmu->s2crs;
+ u8 cbndx = smmu_domain->cfg.cbndx;
+ enum arm_smmu_s2cr_type type;
+ int i, idx;
+
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
+ type = S2CR_TYPE_BYPASS;
+ else
+ type = S2CR_TYPE_TRANS;
+
+ for_each_cfg_sme(fwspec, i, idx) {
+ if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
+ continue;
+
+ s2cr[idx].type = type;
+ s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
+ s2cr[idx].cbndx = cbndx;
+ arm_smmu_write_s2cr(smmu, idx);
+ }
+ return 0;
+}
+
+int arm_smmu_attach_dev_common(struct iommu_domain *domain,
+ struct device *dev, struct iommu_ops *ops)
+{
+ int ret;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct arm_smmu_device *smmu;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (!fwspec || fwspec->ops != ops) {
+ dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
+ return -ENXIO;
+ }
+
+ /*
+ * FIXME: The arch/arm DMA API code tries to attach devices to its own
+ * domains between of_xlate() and add_device() - we have no way to cope
+ * with that, so until ARM gets converted to rely on groups and default
+ * domains, just say no (but more politely than by dereferencing NULL).
+ * This should be at least a WARN_ON once that's sorted.
+ */
+ if (!fwspec->iommu_priv)
+ return -ENODEV;
+
+ smmu = fwspec_smmu(fwspec);
+ /* Ensure that the domain is finalised */
+ ret = arm_smmu_init_domain_context(domain, smmu);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Sanity check the domain. We don't support domains across
+ * different SMMUs.
+ */
+ if (smmu_domain->smmu != smmu) {
+ dev_err(dev,
+ "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
+ dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
+ return -EINVAL;
+ }
+
+ /* Looks ok, so add the device to the domain */
+ return arm_smmu_domain_add_master(smmu_domain, fwspec);
+}
+
+int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot)
+{
+ struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+ if (!ops)
+ return -ENODEV;
+
+ return ops->map(ops, iova, paddr, size, prot);
+}
+
+size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
+{
+ struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
+
+ if (!ops)
+ return 0;
+
+ return ops->unmap(ops, iova, size);
+}
+
+void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->tlb_ops)
+ smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
+void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->tlb_ops)
+ smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+}
+
+static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+ struct device *dev = smmu->dev;
+ void __iomem *cb_base;
+ u32 tmp;
+ u64 phys;
+ unsigned long va, flags;
+
+ cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
+
+ spin_lock_irqsave(&smmu_domain->cb_lock, flags);
+ /* ATS1 registers can only be written atomically */
+ va = iova & ~0xfffUL;
+ if (smmu->version == ARM_SMMU_V2)
+ smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
+ else /* Register is only 32-bit in v1 */
+ writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+
+ if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
+ !(tmp & ATSR_ACTIVE), 5, 50)) {
+ spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+ dev_err(dev,
+ "iova to phys timed out on %pad. Falling back to software table walk.\n",
+ &iova);
+ return ops->iova_to_phys(ops, iova);
+ }
+
+ phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
+ spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
+ if (phys & CB_PAR_F) {
+ dev_err(dev, "translation fault!\n");
+ dev_err(dev, "PAR = 0x%llx\n", phys);
+ return 0;
+ }
+
+ return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
+}
+
+phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
+ if (domain->type == IOMMU_DOMAIN_IDENTITY)
+ return iova;
+
+ if (!ops)
+ return 0;
+
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
+ smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
+ return arm_smmu_iova_to_phys_hard(domain, iova);
+
+ return ops->iova_to_phys(ops, iova);
+}
+
+bool arm_smmu_capable(enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ /*
+ * Return true here as the SMMU can always send out coherent
+ * requests.
+ */
+ return true;
+ case IOMMU_CAP_NOEXEC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+int arm_smmu_add_device_common(struct device *dev, struct arm_smmu_device *smmu)
+{
+ struct arm_smmu_master_cfg *cfg;
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ int i, ret = -EINVAL;
+
+ for (i = 0; i < fwspec->num_ids; i++) {
+ u16 sid = fwspec->ids[i];
+ u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+ if (sid & ~smmu->streamid_mask) {
+ dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
+ sid, smmu->streamid_mask);
+ goto out_free;
+ }
+ if (mask & ~smmu->smr_mask_mask) {
+ dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
+ mask, smmu->smr_mask_mask);
+ goto out_free;
+ }
+ }
+
+ ret = -ENOMEM;
+ cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
+ GFP_KERNEL);
+ if (!cfg)
+ goto out_free;
+
+ cfg->smmu = smmu;
+ fwspec->iommu_priv = cfg;
+ while (i--)
+ cfg->smendx[i] = INVALID_SMENDX;
+
+ ret = arm_smmu_master_alloc_smes(dev);
+ if (ret)
+ goto out_cfg_free;
+
+ iommu_device_link(&smmu->iommu, dev);
+
+ return 0;
+
+out_cfg_free:
+ kfree(cfg);
+out_free:
+ iommu_fwspec_free(dev);
+ return ret;
+}
+
+void arm_smmu_remove_device_common(struct device *dev, struct iommu_ops *ops)
+{
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct arm_smmu_master_cfg *cfg;
+ struct arm_smmu_device *smmu;
+
+ if (!fwspec || fwspec->ops != ops)
+ return;
+
+ cfg = fwspec->iommu_priv;
+ smmu = cfg->smmu;
+
+ iommu_device_unlink(&smmu->iommu, dev);
+ arm_smmu_master_free_smes(fwspec);
+ iommu_group_remove_device(dev);
+ kfree(fwspec->iommu_priv);
+ iommu_fwspec_free(dev);
+}
+
+struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+ struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+ struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+ struct iommu_group *group = NULL;
+ int i, idx;
+
+ for_each_cfg_sme(fwspec, i, idx) {
+ if (group && smmu->s2crs[idx].group &&
+ group != smmu->s2crs[idx].group)
+ return ERR_PTR(-EINVAL);
+
+ group = smmu->s2crs[idx].group;
+ }
+
+ if (group)
+ return iommu_group_ref_get(group);
+
+ if (dev_is_pci(dev))
+ group = pci_device_group(dev);
+ else if (dev_is_fsl_mc(dev))
+ group = fsl_mc_device_group(dev);
+ else
+ group = generic_device_group(dev);
+
+ return group;
+}
+
+int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage ==
+ ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+}
+
+int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data)
+{
+ int ret = 0;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ mutex_lock(&smmu_domain->init_mutex);
+
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+out_unlock:
+ mutex_unlock(&smmu_domain->init_mutex);
+ return ret;
+}
+
+int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+ u32 mask, fwid = 0;
+
+ if (args->args_count > 0)
+ fwid |= (u16)args->args[0];
+
+ if (args->args_count > 1)
+ fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+ else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
+ fwid |= (u16)mask << SMR_MASK_SHIFT;
+
+ return iommu_fwspec_add_ids(dev, &fwid, 1);
+}
+
+void arm_smmu_get_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct iommu_resv_region *region;
+ int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+ region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
+ prot, IOMMU_RESV_SW_MSI);
+ if (!region)
+ return;
+
+ list_add_tail(®ion->list, head);
+
+ iommu_dma_get_resv_regions(dev, head);
+}
+
+void arm_smmu_put_resv_regions(struct device *dev,
+ struct list_head *head)
+{
+ struct iommu_resv_region *entry, *next;
+
+ list_for_each_entry_safe(entry, next, head, list)
+ kfree(entry);
+}
+
+void arm_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+ void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+ int i;
+ u32 reg, major;
+
+ /* clear global FSR */
+ reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+ writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
+
+ /*
+ * Reset stream mapping groups: Initial values mark all SMRn as
+ * invalid and all S2CRn as bypass unless overridden.
+ */
+ for (i = 0; i < smmu->num_mapping_groups; ++i)
+ arm_smmu_write_sme(smmu, i);
+
+ if (smmu->model == ARM_MMU500) {
+ /*
+ * Before clearing ARM_MMU500_ACTLR_CPRE, need to
+ * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
+ * bit is only present in MMU-500r2 onwards.
+ */
+ reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
+ major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
+ reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
+ if (major >= 2)
+ reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
+ /*
+ * Allow unmatched Stream IDs to allocate bypass
+ * TLB entries for reduced latency.
+ */
+ reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
+ writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
+ }
+
+ /* Make sure all context banks are disabled and clear CB_FSR */
+ for (i = 0; i < smmu->num_context_banks; ++i) {
+ void __iomem *cb_base = ARM_SMMU_CB(smmu, i);
+
+ arm_smmu_write_context_bank(smmu, i);
+ writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
+ /*
+ * Disable MMU-500's not-particularly-beneficial next-page
+ * prefetcher for the sake of errata #841119 and #826419.
+ */
+ if (smmu->model == ARM_MMU500) {
+ reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
+ reg &= ~ARM_MMU500_ACTLR_CPRE;
+ writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
+ }
+ }
+
+ /* Invalidate the TLB, just in case */
+ writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
+ writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);
+
+ reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+
+ /* Enable fault reporting */
+ reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
+
+ /* Disable TLB broadcasting. */
+ reg |= (sCR0_VMIDPNE | sCR0_PTM);
+
+ /* Enable client access, handling unmatched streams as appropriate */
+ reg &= ~sCR0_CLIENTPD;
+ if (smmu->disable_bypass)
+ reg |= sCR0_USFCFG;
+ else
+ reg &= ~sCR0_USFCFG;
+
+ /* Disable forced broadcasting */
+ reg &= ~sCR0_FB;
+
+ /* Don't upgrade barriers */
+ reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
+
+ if (smmu->features & ARM_SMMU_FEAT_VMID16)
+ reg |= sCR0_VMID16EN;
+
+ if (smmu->features & ARM_SMMU_FEAT_EXIDS)
+ reg |= sCR0_EXIDENABLE;
+
+ /* Push the button */
+ arm_smmu_tlb_sync_global(smmu);
+ writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+}
+
+static int arm_smmu_id_size_to_bits(int size)
+{
+ switch (size) {
+ case 0:
+ return 32;
+ case 1:
+ return 36;
+ case 2:
+ return 40;
+ case 3:
+ return 42;
+ case 4:
+ return 44;
+ case 5:
+ default:
+ return 48;
+ }
+}
+
+int arm_smmu_device_cfg_probe_common(struct arm_smmu_device *smmu,
+ struct iommu_ops *ops, atomic_t *context_count, int force_stage)
+{
+ unsigned long size;
+ void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+ u32 id;
+ bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
+ int i;
+
+ dev_notice(smmu->dev, "probing hardware configuration...\n");
+ dev_notice(smmu->dev, "SMMUv%d with:\n",
+ smmu->version == ARM_SMMU_V2 ? 2 : 1);
+
+ /* ID0 */
+ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
+
+ /* Restrict available stages based on module parameter */
+ if (force_stage == 1)
+ id &= ~(ID0_S2TS | ID0_NTS);
+ else if (force_stage == 2)
+ id &= ~(ID0_S1TS | ID0_NTS);
+
+ if (id & ID0_S1TS) {
+ smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
+ dev_notice(smmu->dev, "\tstage 1 translation\n");
+ }
+
+ if (id & ID0_S2TS) {
+ smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
+ dev_notice(smmu->dev, "\tstage 2 translation\n");
+ }
+
+ if (id & ID0_NTS) {
+ smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
+ dev_notice(smmu->dev, "\tnested translation\n");
+ }
+
+ if (!(smmu->features &
+ (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
+ dev_err(smmu->dev, "\tno translation support!\n");
+ return -ENODEV;
+ }
+
+ if ((id & ID0_S1TS) &&
+ ((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
+ smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
+ dev_notice(smmu->dev, "\taddress translation ops\n");
+ }
+
+ /*
+ * In order for DMA API calls to work properly, we must defer to what
+ * the FW says about coherency, regardless of what the hardware claims.
+ * Fortunately, this also opens up a workaround for systems where the
+ * ID register value has ended up configured incorrectly.
+ */
+ cttw_reg = !!(id & ID0_CTTW);
+ if (cttw_fw || cttw_reg)
+ dev_notice(smmu->dev, "\t%scoherent table walk\n",
+ cttw_fw ? "" : "non-");
+ if (cttw_fw != cttw_reg)
+ dev_notice(smmu->dev,
+ "\t(IDR0.CTTW overridden by FW configuration)\n");
+
+ /* Max. number of entries we have for stream matching/indexing */
+ if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
+ smmu->features |= ARM_SMMU_FEAT_EXIDS;
+ size = 1 << 16;
+ } else {
+ size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+ }
+ smmu->streamid_mask = size - 1;
+ if (id & ID0_SMS) {
+ smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
+ size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+ if (size == 0) {
+ dev_err(smmu->dev,
+ "stream-matching supported, but no SMRs present!\n");
+ return -ENODEV;
+ }
+
+ /* Zero-initialised to mark as invalid */
+ smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
+ GFP_KERNEL);
+ if (!smmu->smrs)
+ return -ENOMEM;
+
+ dev_notice(smmu->dev,
+ "\tstream matching with %lu register groups", size);
+ }
+ /* s2cr->type == 0 means translation, so initialise explicitly */
+ smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
+ GFP_KERNEL);
+ if (!smmu->s2crs)
+ return -ENOMEM;
+ for (i = 0; i < size; i++)
+ smmu->s2crs[i] = s2cr_init_val;
+
+ smmu->num_mapping_groups = size;
+ mutex_init(&smmu->stream_map_mutex);
+ spin_lock_init(&smmu->global_sync_lock);
+
+ if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
+ if (!(id & ID0_PTFS_NO_AARCH32S))
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
+ }
+
+ /* ID1 */
+ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
+ smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
+
+ /* Check for size mismatch of SMMU address space from mapped region */
+ size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
+ size <<= smmu->pgshift;
+ if (smmu->cb_base != gr0_base + size)
+ dev_warn(smmu->dev,
+ "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
+ size * 2, (smmu->cb_base - gr0_base) * 2);
+
+ smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
+ smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
+ if (smmu->num_s2_context_banks > smmu->num_context_banks) {
+ dev_err(smmu->dev, "impossible number of S2 context banks!\n");
+ return -ENODEV;
+ }
+ dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
+ smmu->num_context_banks, smmu->num_s2_context_banks);
+ /*
+ * Cavium CN88xx erratum #27704.
+ * Ensure ASID and VMID allocation is unique across all SMMUs in
+ * the system.
+ */
+ if (smmu->model == CAVIUM_SMMUV2) {
+ smmu->cavium_id_base = atomic_add_return(
+ smmu->num_context_banks, context_count);
+ smmu->cavium_id_base -= smmu->num_context_banks;
+ dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
+ }
+ smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
+ sizeof(*smmu->cbs), GFP_KERNEL);
+ if (!smmu->cbs)
+ return -ENOMEM;
+
+ /* ID2 */
+ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
+ size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
+ smmu->ipa_size = size;
+
+ /* The output mask is also applied for bypass */
+ size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
+ smmu->pa_size = size;
+
+ if (id & ID2_VMID16)
+ smmu->features |= ARM_SMMU_FEAT_VMID16;
+
+ /*
+ * What the page table walker can address actually depends on which
+ * descriptor format is in use, but since a) we don't know that yet,
+ * and b) it can vary per context bank, this will have to do...
+ */
+ if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
+ dev_warn(smmu->dev,
+ "failed to set DMA mask for table walker\n");
+
+ if (smmu->version < ARM_SMMU_V2) {
+ smmu->va_size = smmu->ipa_size;
+ if (smmu->version == ARM_SMMU_V1_64K)
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
+ } else {
+ size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+ smmu->va_size = arm_smmu_id_size_to_bits(size);
+ if (id & ID2_PTFS_4K)
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
+ if (id & ID2_PTFS_16K)
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
+ if (id & ID2_PTFS_64K)
+ smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
+ }
+
+ /* Now we've corralled the various formats, what'll it do? */
+ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
+ smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
+ if (smmu->features &
+ (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
+ smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
+ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
+ smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
+ if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
+ smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
+
+ if (ops->pgsize_bitmap == -1UL)
+ ops->pgsize_bitmap = smmu->pgsize_bitmap;
+ else
+ ops->pgsize_bitmap |= smmu->pgsize_bitmap;
+ dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
+ smmu->pgsize_bitmap);
+
+
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
+ dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
+ smmu->va_size, smmu->ipa_size);
+
+ if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
+ dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
+ smmu->ipa_size, smmu->pa_size);
+
+ return 0;
+}
+
+int arm_smmu_device_remove(struct platform_device *pdev)
+{
+ struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
+
+ if (!smmu)
+ return -ENODEV;
+
+ if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
+ dev_err(&pdev->dev, "removing device with active domains!\n");
+
+ /* Turn the thing off */
+ writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018, NVIDIA Corporation
+ * Author: Krishna Reddy <vdumpa@nvidia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LIB_ARM_SMMU_H
+#define _LIB_ARM_SMMU_H
+
+#include <linux/iommu.h>
+#include <linux/irqreturn.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+/* Maximum number of context banks per SMMU */
+#define ARM_SMMU_MAX_CBS 128
+
+enum arm_smmu_arch_version {
+ ARM_SMMU_V1,
+ ARM_SMMU_V1_64K,
+ ARM_SMMU_V2,
+};
+
+enum arm_smmu_implementation {
+ GENERIC_SMMU,
+ ARM_MMU500,
+ CAVIUM_SMMUV2,
+};
+
+struct arm_smmu_s2cr {
+ struct iommu_group *group;
+ int count;
+ enum arm_smmu_s2cr_type type;
+ enum arm_smmu_s2cr_privcfg privcfg;
+ u8 cbndx;
+};
+
+struct arm_smmu_smr {
+ u16 mask;
+ u16 id;
+ bool valid;
+};
+
+struct arm_smmu_cb {
+ u64 ttbr[2];
+ u32 tcr[2];
+ u32 mair[2];
+ struct arm_smmu_cfg *cfg;
+};
+
+struct arm_smmu_device {
+ struct device *dev;
+
+ void __iomem *base;
+ void __iomem *cb_base;
+ /* Number of ARM SMMU's represented by arm_smmu_device
+ * Tegra194 uses two ARM SMMU's as one SMMU device.
+ */
+ u32 num_smmus;
+ /* To hold the multiple base addresses when num_smmus > 1 */
+ void __iomem **bases;
+ unsigned long pgshift;
+
+#define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0)
+#define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1)
+#define ARM_SMMU_FEAT_TRANS_S1 (1 << 2)
+#define ARM_SMMU_FEAT_TRANS_S2 (1 << 3)
+#define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4)
+#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5)
+#define ARM_SMMU_FEAT_VMID16 (1 << 6)
+#define ARM_SMMU_FEAT_FMT_AARCH64_4K (1 << 7)
+#define ARM_SMMU_FEAT_FMT_AARCH64_16K (1 << 8)
+#define ARM_SMMU_FEAT_FMT_AARCH64_64K (1 << 9)
+#define ARM_SMMU_FEAT_FMT_AARCH32_L (1 << 10)
+#define ARM_SMMU_FEAT_FMT_AARCH32_S (1 << 11)
+#define ARM_SMMU_FEAT_EXIDS (1 << 12)
+ u32 features;
+
+#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
+ u32 options;
+ enum arm_smmu_arch_version version;
+ enum arm_smmu_implementation model;
+
+ u32 num_context_banks;
+ u32 num_s2_context_banks;
+ DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
+ struct arm_smmu_cb *cbs;
+ atomic_t irptndx;
+
+ u32 num_mapping_groups;
+ u16 streamid_mask;
+ u16 smr_mask_mask;
+ struct arm_smmu_smr *smrs;
+ struct arm_smmu_s2cr *s2crs;
+ struct mutex stream_map_mutex;
+
+ unsigned long va_size;
+ unsigned long ipa_size;
+ unsigned long pa_size;
+ unsigned long pgsize_bitmap;
+
+ u32 num_global_irqs;
+ u32 num_context_irqs;
+ unsigned int *irqs;
+
+ u32 cavium_id_base; /* Specific to Cavium */
+
+ spinlock_t global_sync_lock;
+ bool disable_bypass;
+
+ /* IOMMU core code handle */
+ struct iommu_device iommu;
+};
+
+/* Common programming functions */
+int arm_smmu_device_cfg_probe_common(
+ struct arm_smmu_device *smmu, struct iommu_ops *ops,
+ atomic_t *cavium_smmu_context_count, int force_stage);
+irqreturn_t arm_smmu_global_fault(int irq, void *dev);
+int arm_smmu_device_remove(struct platform_device *pdev);
+void arm_smmu_device_reset(struct arm_smmu_device *smmu);
+void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu);
+
+/* For IOMMU ops */
+bool arm_smmu_capable(enum iommu_cap cap);
+struct iommu_domain *arm_smmu_domain_alloc_common(unsigned int type,
+ bool using_legacy_binding);
+void arm_smmu_domain_free(struct iommu_domain *domain);
+int arm_smmu_attach_dev_common(struct iommu_domain *domain, struct device *dev,
+ struct iommu_ops *ops);
+int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
+size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+void arm_smmu_flush_iotlb_all(struct iommu_domain *domain);
+void arm_smmu_iotlb_sync(struct iommu_domain *domain);
+phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+ dma_addr_t iova);
+int arm_smmu_add_device_common(struct device *dev,
+ struct arm_smmu_device *smmu);
+void arm_smmu_remove_device_common(struct device *dev, struct iommu_ops *ops);
+struct iommu_group *arm_smmu_device_group(struct device *dev);
+int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data);
+int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+ enum iommu_attr attr, void *data);
+int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args);
+void arm_smmu_get_resv_regions(struct device *dev,
+ struct list_head *head);
+void arm_smmu_put_resv_regions(struct device *dev,
+ struct list_head *head);
+
+#endif
Create library routines to share ARM SMMU programming and common IOMMU API implementation for ARM SMMU v1 and v2 based architecture Implementations. Signed-off-by: Krishna Reddy <vdumpa@nvidia.com> --- drivers/iommu/Makefile | 1 + drivers/iommu/lib-arm-smmu.c | 1671 ++++++++++++++++++++++++++++++++++++++++++ drivers/iommu/lib-arm-smmu.h | 161 ++++ 3 files changed, 1833 insertions(+) create mode 100644 drivers/iommu/lib-arm-smmu.c create mode 100644 drivers/iommu/lib-arm-smmu.h