diff mbox series

[RESEND,v17,3/5] iommu/arm-smmu: add support for PRR bit setup

Message ID 20241114160721.1527934-4-quic_bibekkum@quicinc.com (mailing list archive)
State Not Applicable
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Nov. 14, 2024, 4:07 p.m. UTC
Add an adreno-smmu-priv interface for drm/msm to call
into arm-smmu-qcom and initiate the PRR bit setup or reset
sequence as per request.

This will be used by GPU to setup the PRR bit and related
configuration registers through adreno-smmu private
interface instead of directly poking the smmu hardware.

Suggested-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
 include/linux/adreno-smmu-priv.h           | 14 ++++++++
 3 files changed, 53 insertions(+)

--
2.34.1

Comments

Rob Clark Nov. 20, 2024, 5:17 p.m. UTC | #1
On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Add an adreno-smmu-priv interface for drm/msm to call
> into arm-smmu-qcom and initiate the PRR bit setup or reset
> sequence as per request.
>
> This will be used by GPU to setup the PRR bit and related
> configuration registers through adreno-smmu private
> interface instead of directly poking the smmu hardware.
>
> Suggested-by: Rob Clark <robdclark@gmail.com>
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>  include/linux/adreno-smmu-priv.h           | 14 ++++++++
>  3 files changed, 53 insertions(+)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index d26f5aea248e..0e4f3fbda961 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -16,6 +16,8 @@
>
>  #define QCOM_DUMMY_VAL -1
>
> +#define GFX_ACTLR_PRR          (1 << 5)
> +
>  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>  {
>         return container_of(smmu, struct qcom_smmu, smmu);
> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>  }
>
> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> +       u32 reg = 0;
> +
> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> +       reg &= ~GFX_ACTLR_PRR;
> +       if (set)
> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> +}
> +
> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +
> +       writel_relaxed(lower_32_bits(page_addr),
> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> +
> +       writel_relaxed(upper_32_bits(page_addr),
> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> +}
> +
>  #define QCOM_ADRENO_SMMU_GPU_SID 0
>
>  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>  {
> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>         struct adreno_smmu_priv *priv;
>
>         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>         priv->set_stall = qcom_adreno_smmu_set_stall;
>         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> +       priv->set_prr_bit = NULL;
> +       priv->set_prr_addr = NULL;
> +
> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {

fwiw, it seems like PRR actually works on sc7180, which is _not_
mmu-500, so I guess the support actually goes back further.

I'm curious if we can just rely on this being supported by any hw that
has a6xx or newer?

BR,
-R

> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index e2aeb511ae90..2dbf3243b5ad 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>  #define ARM_SMMU_SCTLR_M               BIT(0)
>
>  #define ARM_SMMU_CB_ACTLR              0x4
> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>
>  #define ARM_SMMU_CB_RESUME             0x8
>  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> index c637e0997f6d..614665153b3e 100644
> --- a/include/linux/adreno-smmu-priv.h
> +++ b/include/linux/adreno-smmu-priv.h
> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>   *                 the GPU driver must call resume_translation()
>   * @resume_translation: Resume translation after a fault
>   *
> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> + *             targets without PRR support. Exercise caution and verify target
> + *             capabilities before invoking these callbacks to prevent potential
> + *             runtime errors or unexpected behavior.
> + *
> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> + *                ACTLR register bits, currently used to configure
> + *                Partially-Resident-Region (PRR) bit for feature's
> + *                setup and reset sequence as requested.
> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> + *                physical address of PRR page passed from
> + *                GPU driver.
>   *
>   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>      void (*set_stall)(const void *cookie, bool enabled);
>      void (*resume_translation)(const void *cookie, bool terminate);
> +    void (*set_prr_bit)(const void *cookie, bool set);
> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>  };
>
>  #endif /* __ADRENO_SMMU_PRIV_H */
> --
> 2.34.1
>
Rob Clark Nov. 20, 2024, 10:10 p.m. UTC | #2
On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>
> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
> >
> > Add an adreno-smmu-priv interface for drm/msm to call
> > into arm-smmu-qcom and initiate the PRR bit setup or reset
> > sequence as per request.
> >
> > This will be used by GPU to setup the PRR bit and related
> > configuration registers through adreno-smmu private
> > interface instead of directly poking the smmu hardware.
> >
> > Suggested-by: Rob Clark <robdclark@gmail.com>
> > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > ---
> >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >  include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >  3 files changed, 53 insertions(+)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > index d26f5aea248e..0e4f3fbda961 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > @@ -16,6 +16,8 @@
> >
> >  #define QCOM_DUMMY_VAL -1
> >
> > +#define GFX_ACTLR_PRR          (1 << 5)
> > +
> >  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >  {
> >         return container_of(smmu, struct qcom_smmu, smmu);
> > @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >  }
> >
> > +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> > +{
> > +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> > +       u32 reg = 0;
> > +
> > +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> > +       reg &= ~GFX_ACTLR_PRR;
> > +       if (set)
> > +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> > +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> > +}
> > +
> > +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> > +{
> > +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +
> > +       writel_relaxed(lower_32_bits(page_addr),
> > +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> > +
> > +       writel_relaxed(upper_32_bits(page_addr),
> > +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> > +}
> > +
> >  #define QCOM_ADRENO_SMMU_GPU_SID 0
> >
> >  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> > @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >  {
> > +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >         struct adreno_smmu_priv *priv;
> >
> >         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> > @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >         priv->set_stall = qcom_adreno_smmu_set_stall;
> >         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> > +       priv->set_prr_bit = NULL;
> > +       priv->set_prr_addr = NULL;
> > +
> > +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> > +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>
> fwiw, it seems like PRR actually works on sc7180, which is _not_
> mmu-500, so I guess the support actually goes back further.
>
> I'm curious if we can just rely on this being supported by any hw that
> has a6xx or newer?


Also, unrelated, but we can't assume the smmu is powered when drm
driver calls set_prr_bit/addr, could you add in runpm get/put around
the register access?

Otherwise Conner and I have vk sparse residency working now

BR,
-R

>
> > +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> > +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> > +       }
> >
> >         return 0;
> >  }
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > index e2aeb511ae90..2dbf3243b5ad 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >  #define ARM_SMMU_SCTLR_M               BIT(0)
> >
> >  #define ARM_SMMU_CB_ACTLR              0x4
> > +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> > +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >
> >  #define ARM_SMMU_CB_RESUME             0x8
> >  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> > diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> > index c637e0997f6d..614665153b3e 100644
> > --- a/include/linux/adreno-smmu-priv.h
> > +++ b/include/linux/adreno-smmu-priv.h
> > @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >   *                 the GPU driver must call resume_translation()
> >   * @resume_translation: Resume translation after a fault
> >   *
> > + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> > + *             targets without PRR support. Exercise caution and verify target
> > + *             capabilities before invoking these callbacks to prevent potential
> > + *             runtime errors or unexpected behavior.
> > + *
> > + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> > + *                ACTLR register bits, currently used to configure
> > + *                Partially-Resident-Region (PRR) bit for feature's
> > + *                setup and reset sequence as requested.
> > + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> > + *                physical address of PRR page passed from
> > + *                GPU driver.
> >   *
> >   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> > @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >      void (*set_stall)(const void *cookie, bool enabled);
> >      void (*resume_translation)(const void *cookie, bool terminate);
> > +    void (*set_prr_bit)(const void *cookie, bool set);
> > +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >  };
> >
> >  #endif /* __ADRENO_SMMU_PRIV_H */
> > --
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index d26f5aea248e..0e4f3fbda961 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -16,6 +16,8 @@ 

 #define QCOM_DUMMY_VAL	-1

+#define GFX_ACTLR_PRR          (1 << 5)
+
 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 {
 	return container_of(smmu, struct qcom_smmu, smmu);
@@ -99,6 +101,32 @@  static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
 	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
 }

+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	u32 reg = 0;
+
+	reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+	reg &= ~GFX_ACTLR_PRR;
+	if (set)
+		reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+	writel_relaxed(lower_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+
+	writel_relaxed(upper_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0

 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -210,6 +238,7 @@  static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
 static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 		struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
 {
+	const struct device_node *np = smmu_domain->smmu->dev->of_node;
 	struct adreno_smmu_priv *priv;

 	smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
@@ -239,6 +268,14 @@  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 	priv->set_stall = qcom_adreno_smmu_set_stall;
 	priv->resume_translation = qcom_adreno_smmu_resume_translation;
+	priv->set_prr_bit = NULL;
+	priv->set_prr_addr = NULL;
+
+	if (of_device_is_compatible(np, "qcom,smmu-500") &&
+			of_device_is_compatible(np, "qcom,adreno-smmu")) {
+		priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+		priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
+	}

 	return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae90..2dbf3243b5ad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@  enum arm_smmu_cbar_type {
 #define ARM_SMMU_SCTLR_M		BIT(0)

 #define ARM_SMMU_CB_ACTLR		0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C

 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..614665153b3e 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -50,6 +50,18 @@  struct adreno_smmu_fault_info {
  *                 the GPU driver must call resume_translation()
  * @resume_translation: Resume translation after a fault
  *
+ * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
+ *             targets without PRR support. Exercise caution and verify target
+ *             capabilities before invoking these callbacks to prevent potential
+ *             runtime errors or unexpected behavior.
+ *
+ * @set_prr_bit:   Extendible interface to be used by GPU to modify the
+ *		   ACTLR register bits, currently used to configure
+ *		   Partially-Resident-Region (PRR) bit for feature's
+ *		   setup and reset sequence as requested.
+ * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
+ *		   physical address of PRR page passed from
+ *		   GPU driver.
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -67,6 +79,8 @@  struct adreno_smmu_priv {
     void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
     void (*set_stall)(const void *cookie, bool enabled);
     void (*resume_translation)(const void *cookie, bool terminate);
+    void (*set_prr_bit)(const void *cookie, bool set);
+    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
 };

 #endif /* __ADRENO_SMMU_PRIV_H */