diff mbox series

[v2,03/10] iommu/arm-smmu-v3: Add arm_smmu_strtab_l1/2_idx()

Message ID 3-v2-318ed5f6983b+198f-smmuv3_tidy_jgg@nvidia.com (mailing list archive)
State New, archived
Headers show
Series Tidy some minor things in the stream table/cd table area | expand

Commit Message

Jason Gunthorpe June 11, 2024, 12:31 a.m. UTC
Don't open code the calculations of the indexes for each level, provide
two functions to do that math and call them in all the places. Update all
the places computing indexes.

Calculate the L1 table size directly based on the max required index from
the cap. Remove STRTAB_L1_SZ_SHIFT in favour of STRTAB_NUM_L2_STES.

Use STRTAB_NUM_L2_STES to replace remaining open coded 1 << STRTAB_SPLIT.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 53 +++++++++------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 14 +++++-
 2 files changed, 36 insertions(+), 31 deletions(-)

Comments

Nicolin Chen June 11, 2024, 2 a.m. UTC | #1
On Mon, Jun 10, 2024 at 09:31:12PM -0300, Jason Gunthorpe wrote:
> Don't open code the calculations of the indexes for each level, provide
> two functions to do that math and call them in all the places. Update all
> the places computing indexes.
> 
> Calculate the L1 table size directly based on the max required index from
> the cap. Remove STRTAB_L1_SZ_SHIFT in favour of STRTAB_NUM_L2_STES.
> 
> Use STRTAB_NUM_L2_STES to replace remaining open coded 1 << STRTAB_SPLIT.
> 
> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Daniel Mentz June 12, 2024, 12:30 a.m. UTC | #2
On Mon, Jun 10, 2024 at 5:31 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
>                 dev_warn(smmu->dev,
> -                        "2-level strtab only covers %u/%u bits of SID\n",
> -                        size, smmu->sid_bits);
> +                        "2-level strtab only covers %u/%u of SIDs\n",
> +                        cfg->num_l1_ents * STRTAB_NUM_L2_STES,
> +                        1 << smmu->sid_bits);

Does this mean it'll change from printing

"2-level strtab only covers 25/32 bits of SID"

to

"2-level strtab only covers 33554432/4294967296 of SIDs"?

I think that's less helpful. I would prefer printing the values in bits.

> +#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT)
> +#define STRTAB_MAX_L1_ENTRIES (1 << 17)

Consider adding some extra white space to nicely align these #defines
with the rest of this .h file.
Daniel Mentz June 12, 2024, 12:37 a.m. UTC | #3
On Mon, Jun 10, 2024 at 5:31 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> +static inline unsigned int arm_smmu_strtab_l1_idx(unsigned int sid)

Existing code appears to be exclusively using the data type u32 to
store StreamIDs.
Jason Gunthorpe June 12, 2024, 11:49 a.m. UTC | #4
On Tue, Jun 11, 2024 at 05:37:26PM -0700, Daniel Mentz wrote:
> On Mon, Jun 10, 2024 at 5:31 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> > +static inline unsigned int arm_smmu_strtab_l1_idx(unsigned int sid)
> 
> Existing code appears to be exclusively using the data type u32 to
> store StreamIDs.

Not entirely exclusively, but often enough, I fixed it.

Thanks,
Jason
Jason Gunthorpe June 12, 2024, 12:09 p.m. UTC | #5
On Tue, Jun 11, 2024 at 05:30:53PM -0700, Daniel Mentz wrote:
> On Mon, Jun 10, 2024 at 5:31 PM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >                 dev_warn(smmu->dev,
> > -                        "2-level strtab only covers %u/%u bits of SID\n",
> > -                        size, smmu->sid_bits);
> > +                        "2-level strtab only covers %u/%u of SIDs\n",
> > +                        cfg->num_l1_ents * STRTAB_NUM_L2_STES,
> > +                        1 << smmu->sid_bits);
> 
> Does this mean it'll change from printing
> 
> "2-level strtab only covers 25/32 bits of SID"
> 
> to
> 
> "2-level strtab only covers 33554432/4294967296 of SIDs"?
> 
> I think that's less helpful. I would prefer printing the values in
> bits.

Sure

> > +#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT)
> > +#define STRTAB_MAX_L1_ENTRIES (1 << 17)
> 
> Consider adding some extra white space to nicely align these #defines
> with the rest of this .h file.

I presonally dislike the random vertical alignments, but sure

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 95351c134c7c45..07b797ad832801 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1652,20 +1652,17 @@  static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
 
 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 {
-	size_t size;
-	void *strtab;
 	dma_addr_t l2ptr_dma;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
-	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
+	struct arm_smmu_strtab_l1_desc *desc =
+		&cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)];
 
 	if (desc->l2ptr)
 		return 0;
 
-	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
-	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
-
-	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma,
-					  GFP_KERNEL);
+	desc->l2ptr = dmam_alloc_coherent(
+		smmu->dev, STRTAB_NUM_L2_STES * sizeof(struct arm_smmu_ste),
+		&l2ptr_dma, GFP_KERNEL);
 	if (!desc->l2ptr) {
 		dev_err(smmu->dev,
 			"failed to allocate l2 stream table for SID %u\n",
@@ -1673,8 +1670,9 @@  static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 		return -ENOMEM;
 	}
 
-	arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT);
-	arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma);
+	arm_smmu_init_initial_stes(desc->l2ptr, STRTAB_NUM_L2_STES);
+	arm_smmu_write_strtab_l1_desc(&cfg->strtab[arm_smmu_strtab_l1_idx(sid)],
+				      l2ptr_dma);
 	return 0;
 }
 
@@ -2411,12 +2409,9 @@  arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
 
 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-		unsigned int idx1, idx2;
-
 		/* Two-level walk */
-		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
-		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
-		return &cfg->l1_desc[idx1].l2ptr[idx2];
+		return &cfg->l1_desc[arm_smmu_strtab_l1_idx(sid)]
+				.l2ptr[arm_smmu_strtab_l2_idx(sid)];
 	} else {
 		/* Simple linear lookup */
 		return (struct arm_smmu_ste *)&cfg
@@ -2792,12 +2787,10 @@  struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
 
 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
 {
-	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
-
 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
-		limit *= 1UL << STRTAB_SPLIT;
-
-	return sid < limit;
+		return arm_smmu_strtab_l1_idx(sid) <
+		       smmu->strtab_cfg.num_l1_ents;
+	return sid < smmu->strtab_cfg.num_l1_ents;
 }
 
 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
@@ -3218,19 +3211,18 @@  static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 {
 	void *strtab;
 	u64 reg;
-	u32 size, l1size;
+	u32 l1size;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
+	unsigned int last_sid_idx =
+		arm_smmu_strtab_l1_idx((1 << smmu->sid_bits) - 1);
 
 	/* Calculate the L1 size, capped to the SIDSIZE. */
-	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
-	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
-	cfg->num_l1_ents = 1 << size;
-
-	size += STRTAB_SPLIT;
-	if (size < smmu->sid_bits)
+	cfg->num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
+	if (cfg->num_l1_ents <= last_sid_idx)
 		dev_warn(smmu->dev,
-			 "2-level strtab only covers %u/%u bits of SID\n",
-			 size, smmu->sid_bits);
+			 "2-level strtab only covers %u/%u of SIDs\n",
+			 cfg->num_l1_ents * STRTAB_NUM_L2_STES,
+			 1 << smmu->sid_bits);
 
 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
@@ -3245,7 +3237,8 @@  static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 
 	/* Configure strtab_base_cfg for 2 levels */
 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
-	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
+			  ilog2(cfg->num_l1_ents) + STRTAB_SPLIT);
 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
 	cfg->strtab_base_cfg = reg;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 087733797f9087..95c3ac8613da79 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -199,7 +199,6 @@ 
  * 2lvl: 128k L1 entries,
  *       256 lazy entries per table (each table covers a PCI bus)
  */
-#define STRTAB_L1_SZ_SHIFT		20
 #define STRTAB_SPLIT			8
 
 #define STRTAB_L1_DESC_DWORDS		1
@@ -212,6 +211,19 @@  struct arm_smmu_ste {
 	__le64 data[STRTAB_STE_DWORDS];
 };
 
+#define STRTAB_NUM_L2_STES (1 << STRTAB_SPLIT)
+#define STRTAB_MAX_L1_ENTRIES (1 << 17)
+
+static inline unsigned int arm_smmu_strtab_l1_idx(unsigned int sid)
+{
+	return sid / STRTAB_NUM_L2_STES;
+}
+
+static inline unsigned int arm_smmu_strtab_l2_idx(unsigned int sid)
+{
+	return sid % STRTAB_NUM_L2_STES;
+}
+
 #define STRTAB_STE_0_V			(1UL << 0)
 #define STRTAB_STE_0_CFG		GENMASK_ULL(3, 1)
 #define STRTAB_STE_0_CFG_ABORT		0