diff mbox series

[RESEND,v17,3/5] iommu/arm-smmu: add support for PRR bit setup

Message ID 20241114160721.1527934-4-quic_bibekkum@quicinc.com (mailing list archive)
State New, archived
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Nov. 14, 2024, 4:07 p.m. UTC
Add an adreno-smmu-priv interface for drm/msm to call
into arm-smmu-qcom and initiate the PRR bit setup or reset
sequence as per request.

This will be used by GPU to setup the PRR bit and related
configuration registers through adreno-smmu private
interface instead of directly poking the smmu hardware.

Suggested-by: Rob Clark <robdclark@gmail.com>
Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
 include/linux/adreno-smmu-priv.h           | 14 ++++++++
 3 files changed, 53 insertions(+)

--
2.34.1

Comments

Rob Clark Nov. 20, 2024, 5:17 p.m. UTC | #1
On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Add an adreno-smmu-priv interface for drm/msm to call
> into arm-smmu-qcom and initiate the PRR bit setup or reset
> sequence as per request.
>
> This will be used by GPU to setup the PRR bit and related
> configuration registers through adreno-smmu private
> interface instead of directly poking the smmu hardware.
>
> Suggested-by: Rob Clark <robdclark@gmail.com>
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>  include/linux/adreno-smmu-priv.h           | 14 ++++++++
>  3 files changed, 53 insertions(+)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index d26f5aea248e..0e4f3fbda961 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -16,6 +16,8 @@
>
>  #define QCOM_DUMMY_VAL -1
>
> +#define GFX_ACTLR_PRR          (1 << 5)
> +
>  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>  {
>         return container_of(smmu, struct qcom_smmu, smmu);
> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>  }
>
> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> +       u32 reg = 0;
> +
> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> +       reg &= ~GFX_ACTLR_PRR;
> +       if (set)
> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> +}
> +
> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> +{
> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +
> +       writel_relaxed(lower_32_bits(page_addr),
> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> +
> +       writel_relaxed(upper_32_bits(page_addr),
> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> +}
> +
>  #define QCOM_ADRENO_SMMU_GPU_SID 0
>
>  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>  {
> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>         struct adreno_smmu_priv *priv;
>
>         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>         priv->set_stall = qcom_adreno_smmu_set_stall;
>         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> +       priv->set_prr_bit = NULL;
> +       priv->set_prr_addr = NULL;
> +
> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {

fwiw, it seems like PRR actually works on sc7180, which is _not_
mmu-500, so I guess the support actually goes back further.

I'm curious if we can just rely on this being supported by any hw that
has a6xx or newer?

BR,
-R

> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index e2aeb511ae90..2dbf3243b5ad 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>  #define ARM_SMMU_SCTLR_M               BIT(0)
>
>  #define ARM_SMMU_CB_ACTLR              0x4
> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>
>  #define ARM_SMMU_CB_RESUME             0x8
>  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> index c637e0997f6d..614665153b3e 100644
> --- a/include/linux/adreno-smmu-priv.h
> +++ b/include/linux/adreno-smmu-priv.h
> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>   *                 the GPU driver must call resume_translation()
>   * @resume_translation: Resume translation after a fault
>   *
> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> + *             targets without PRR support. Exercise caution and verify target
> + *             capabilities before invoking these callbacks to prevent potential
> + *             runtime errors or unexpected behavior.
> + *
> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> + *                ACTLR register bits, currently used to configure
> + *                Partially-Resident-Region (PRR) bit for feature's
> + *                setup and reset sequence as requested.
> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> + *                physical address of PRR page passed from
> + *                GPU driver.
>   *
>   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>      void (*set_stall)(const void *cookie, bool enabled);
>      void (*resume_translation)(const void *cookie, bool terminate);
> +    void (*set_prr_bit)(const void *cookie, bool set);
> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>  };
>
>  #endif /* __ADRENO_SMMU_PRIV_H */
> --
> 2.34.1
>
Rob Clark Nov. 20, 2024, 10:10 p.m. UTC | #2
On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>
> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
> >
> > Add an adreno-smmu-priv interface for drm/msm to call
> > into arm-smmu-qcom and initiate the PRR bit setup or reset
> > sequence as per request.
> >
> > This will be used by GPU to setup the PRR bit and related
> > configuration registers through adreno-smmu private
> > interface instead of directly poking the smmu hardware.
> >
> > Suggested-by: Rob Clark <robdclark@gmail.com>
> > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > ---
> >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >  include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >  3 files changed, 53 insertions(+)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > index d26f5aea248e..0e4f3fbda961 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > @@ -16,6 +16,8 @@
> >
> >  #define QCOM_DUMMY_VAL -1
> >
> > +#define GFX_ACTLR_PRR          (1 << 5)
> > +
> >  static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >  {
> >         return container_of(smmu, struct qcom_smmu, smmu);
> > @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >  }
> >
> > +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> > +{
> > +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> > +       u32 reg = 0;
> > +
> > +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> > +       reg &= ~GFX_ACTLR_PRR;
> > +       if (set)
> > +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> > +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> > +}
> > +
> > +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> > +{
> > +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> > +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +
> > +       writel_relaxed(lower_32_bits(page_addr),
> > +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> > +
> > +       writel_relaxed(upper_32_bits(page_addr),
> > +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> > +}
> > +
> >  #define QCOM_ADRENO_SMMU_GPU_SID 0
> >
> >  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> > @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >                 struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >  {
> > +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >         struct adreno_smmu_priv *priv;
> >
> >         smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> > @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >         priv->set_stall = qcom_adreno_smmu_set_stall;
> >         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> > +       priv->set_prr_bit = NULL;
> > +       priv->set_prr_addr = NULL;
> > +
> > +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> > +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>
> fwiw, it seems like PRR actually works on sc7180, which is _not_
> mmu-500, so I guess the support actually goes back further.
>
> I'm curious if we can just rely on this being supported by any hw that
> has a6xx or newer?


Also, unrelated, but we can't assume the smmu is powered when drm
driver calls set_prr_bit/addr, could you add in runpm get/put around
the register access?

Otherwise Conner and I have vk sparse residency working now

BR,
-R

>
> > +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> > +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> > +       }
> >
> >         return 0;
> >  }
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > index e2aeb511ae90..2dbf3243b5ad 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> > @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >  #define ARM_SMMU_SCTLR_M               BIT(0)
> >
> >  #define ARM_SMMU_CB_ACTLR              0x4
> > +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> > +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >
> >  #define ARM_SMMU_CB_RESUME             0x8
> >  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> > diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> > index c637e0997f6d..614665153b3e 100644
> > --- a/include/linux/adreno-smmu-priv.h
> > +++ b/include/linux/adreno-smmu-priv.h
> > @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >   *                 the GPU driver must call resume_translation()
> >   * @resume_translation: Resume translation after a fault
> >   *
> > + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> > + *             targets without PRR support. Exercise caution and verify target
> > + *             capabilities before invoking these callbacks to prevent potential
> > + *             runtime errors or unexpected behavior.
> > + *
> > + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> > + *                ACTLR register bits, currently used to configure
> > + *                Partially-Resident-Region (PRR) bit for feature's
> > + *                setup and reset sequence as requested.
> > + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> > + *                physical address of PRR page passed from
> > + *                GPU driver.
> >   *
> >   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> > @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >      void (*set_stall)(const void *cookie, bool enabled);
> >      void (*resume_translation)(const void *cookie, bool terminate);
> > +    void (*set_prr_bit)(const void *cookie, bool set);
> > +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >  };
> >
> >  #endif /* __ADRENO_SMMU_PRIV_H */
> > --
> > 2.34.1
> >
Bibek Kumar Patro Nov. 22, 2024, 4:19 p.m. UTC | #3
On 11/20/2024 10:47 PM, Rob Clark wrote:
> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>> Add an adreno-smmu-priv interface for drm/msm to call
>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>> sequence as per request.
>>
>> This will be used by GPU to setup the PRR bit and related
>> configuration registers through adreno-smmu private
>> interface instead of directly poking the smmu hardware.
>>
>> Suggested-by: Rob Clark <robdclark@gmail.com>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>   include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>   3 files changed, 53 insertions(+)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index d26f5aea248e..0e4f3fbda961 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -16,6 +16,8 @@
>>
>>   #define QCOM_DUMMY_VAL -1
>>
>> +#define GFX_ACTLR_PRR          (1 << 5)
>> +
>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>   {
>>          return container_of(smmu, struct qcom_smmu, smmu);
>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>   }
>>
>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>> +{
>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +       u32 reg = 0;
>> +
>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>> +       reg &= ~GFX_ACTLR_PRR;
>> +       if (set)
>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>> +}
>> +
>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>> +{
>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +
>> +       writel_relaxed(lower_32_bits(page_addr),
>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>> +
>> +       writel_relaxed(upper_32_bits(page_addr),
>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>> +}
>> +
>>   #define QCOM_ADRENO_SMMU_GPU_SID 0
>>
>>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>   static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>                  struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>   {
>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>          struct adreno_smmu_priv *priv;
>>
>>          smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>          priv->set_stall = qcom_adreno_smmu_set_stall;
>>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
>> +       priv->set_prr_bit = NULL;
>> +       priv->set_prr_addr = NULL;
>> +
>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> 
> fwiw, it seems like PRR actually works on sc7180, which is _not_
> mmu-500, so I guess the support actually goes back further.
> 

As I checked sc7180 was on previous variant of SMMU,
so targets on this variant have different steps to set PRR bit.
<Do not have both prr bit and PRR page registers>.

It's MMU-500 targets only where PRR support is with both PRR bit
and PRR page addr registers. As I was re-visiting our discussions on v13
regarding this - I remember that's why we started using the SMMU-
compatible string based PRR procedure selection instead of the reserved-
memory node. [1] i.e Based on SMMU variant (as selected by compatible
string), specific sequence will be selected.

So for now only MMU-500 based selection has been supported as part of
this series and will add subsequent support for other SMMU-variants
thereafter.

> I'm curious if we can just rely on this being supported by any hw that
> has a6xx or newer?
> 

I'd need to check on targets which will be based on a6xx onwards, on
what will be the procedure planned to support PRR feature. I'll update
the information over this space.

[1]: 
https://lore.kernel.org/all/5790afa3-f9c0-4720-9804-8a7ff3d91854@quicinc.com/#:~:text=%3E%20I%20guess%20if,part%20as%20well.

Thanks & regards,
Bibek

> BR,
> -R
> 
>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>> +       }
>>
>>          return 0;
>>   }
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index e2aeb511ae90..2dbf3243b5ad 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>   #define ARM_SMMU_SCTLR_M               BIT(0)
>>
>>   #define ARM_SMMU_CB_ACTLR              0x4
>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>
>>   #define ARM_SMMU_CB_RESUME             0x8
>>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>> index c637e0997f6d..614665153b3e 100644
>> --- a/include/linux/adreno-smmu-priv.h
>> +++ b/include/linux/adreno-smmu-priv.h
>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>    *                 the GPU driver must call resume_translation()
>>    * @resume_translation: Resume translation after a fault
>>    *
>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>> + *             targets without PRR support. Exercise caution and verify target
>> + *             capabilities before invoking these callbacks to prevent potential
>> + *             runtime errors or unexpected behavior.
>> + *
>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>> + *                ACTLR register bits, currently used to configure
>> + *                Partially-Resident-Region (PRR) bit for feature's
>> + *                setup and reset sequence as requested.
>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>> + *                physical address of PRR page passed from
>> + *                GPU driver.
>>    *
>>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>       void (*set_stall)(const void *cookie, bool enabled);
>>       void (*resume_translation)(const void *cookie, bool terminate);
>> +    void (*set_prr_bit)(const void *cookie, bool set);
>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>   };
>>
>>   #endif /* __ADRENO_SMMU_PRIV_H */
>> --
>> 2.34.1
>>
Bibek Kumar Patro Nov. 22, 2024, 4:20 p.m. UTC | #4
On 11/21/2024 3:40 AM, Rob Clark wrote:
> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>>
>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>> <quic_bibekkum@quicinc.com> wrote:
>>>
>>> Add an adreno-smmu-priv interface for drm/msm to call
>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>> sequence as per request.
>>>
>>> This will be used by GPU to setup the PRR bit and related
>>> configuration registers through adreno-smmu private
>>> interface instead of directly poking the smmu hardware.
>>>
>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>> ---
>>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>   include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>   3 files changed, 53 insertions(+)
>>>
>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> index d26f5aea248e..0e4f3fbda961 100644
>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>> @@ -16,6 +16,8 @@
>>>
>>>   #define QCOM_DUMMY_VAL -1
>>>
>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>> +
>>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>   {
>>>          return container_of(smmu, struct qcom_smmu, smmu);
>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>   }
>>>
>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>> +{
>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>> +       u32 reg = 0;
>>> +
>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>> +       reg &= ~GFX_ACTLR_PRR;
>>> +       if (set)
>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>> +}
>>> +
>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>> +{
>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>> +
>>> +       writel_relaxed(lower_32_bits(page_addr),
>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>> +
>>> +       writel_relaxed(upper_32_bits(page_addr),
>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>> +}
>>> +
>>>   #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>
>>>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>   static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>                  struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>   {
>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>          struct adreno_smmu_priv *priv;
>>>
>>>          smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>          priv->set_stall = qcom_adreno_smmu_set_stall;
>>>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>> +       priv->set_prr_bit = NULL;
>>> +       priv->set_prr_addr = NULL;
>>> +
>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>
>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>> mmu-500, so I guess the support actually goes back further.
>>
>> I'm curious if we can just rely on this being supported by any hw that
>> has a6xx or newer?
> 
> 
> Also, unrelated, but we can't assume the smmu is powered when drm
> driver calls set_prr_bit/addr, could you add in runpm get/put around
> the register access?
> 

I see, thanks for this observation.
It's surely a possible case, when they access these registers
SMMU state is off.
I will add the suggested runpm ops around the register access.

> Otherwise Conner and I have vk sparse residency working now
> 

Sorry, could not get this. Did you mean that vk sparse residency
is working now using this patch?

Thanks & regards,
Bibek

> BR,
> -R
> 
>>
>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>> +       }
>>>
>>>          return 0;
>>>   }
>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>   #define ARM_SMMU_SCTLR_M               BIT(0)
>>>
>>>   #define ARM_SMMU_CB_ACTLR              0x4
>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>
>>>   #define ARM_SMMU_CB_RESUME             0x8
>>>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>> index c637e0997f6d..614665153b3e 100644
>>> --- a/include/linux/adreno-smmu-priv.h
>>> +++ b/include/linux/adreno-smmu-priv.h
>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>    *                 the GPU driver must call resume_translation()
>>>    * @resume_translation: Resume translation after a fault
>>>    *
>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>> + *             targets without PRR support. Exercise caution and verify target
>>> + *             capabilities before invoking these callbacks to prevent potential
>>> + *             runtime errors or unexpected behavior.
>>> + *
>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>> + *                ACTLR register bits, currently used to configure
>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>> + *                setup and reset sequence as requested.
>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>> + *                physical address of PRR page passed from
>>> + *                GPU driver.
>>>    *
>>>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>       void (*set_stall)(const void *cookie, bool enabled);
>>>       void (*resume_translation)(const void *cookie, bool terminate);
>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>   };
>>>
>>>   #endif /* __ADRENO_SMMU_PRIV_H */
>>> --
>>> 2.34.1
>>>
Rob Clark Nov. 22, 2024, 4:34 p.m. UTC | #5
On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 11/21/2024 3:40 AM, Rob Clark wrote:
> > On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
> >>
> >> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> >> <quic_bibekkum@quicinc.com> wrote:
> >>>
> >>> Add an adreno-smmu-priv interface for drm/msm to call
> >>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>> sequence as per request.
> >>>
> >>> This will be used by GPU to setup the PRR bit and related
> >>> configuration registers through adreno-smmu private
> >>> interface instead of directly poking the smmu hardware.
> >>>
> >>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>> ---
> >>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>   include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>>   3 files changed, 53 insertions(+)
> >>>
> >>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> index d26f5aea248e..0e4f3fbda961 100644
> >>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>> @@ -16,6 +16,8 @@
> >>>
> >>>   #define QCOM_DUMMY_VAL -1
> >>>
> >>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>> +
> >>>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>   {
> >>>          return container_of(smmu, struct qcom_smmu, smmu);
> >>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>   }
> >>>
> >>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>> +{
> >>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>> +       u32 reg = 0;
> >>> +
> >>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>> +       reg &= ~GFX_ACTLR_PRR;
> >>> +       if (set)
> >>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>> +}
> >>> +
> >>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >>> +{
> >>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>> +
> >>> +       writel_relaxed(lower_32_bits(page_addr),
> >>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>> +
> >>> +       writel_relaxed(upper_32_bits(page_addr),
> >>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>> +}
> >>> +
> >>>   #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>
> >>>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>>   static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>                  struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>>   {
> >>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>>          struct adreno_smmu_priv *priv;
> >>>
> >>>          smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>          priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>> +       priv->set_prr_bit = NULL;
> >>> +       priv->set_prr_addr = NULL;
> >>> +
> >>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>
> >> fwiw, it seems like PRR actually works on sc7180, which is _not_
> >> mmu-500, so I guess the support actually goes back further.
> >>
> >> I'm curious if we can just rely on this being supported by any hw that
> >> has a6xx or newer?
> >
> >
> > Also, unrelated, but we can't assume the smmu is powered when drm
> > driver calls set_prr_bit/addr, could you add in runpm get/put around
> > the register access?
> >
>
> I see, thanks for this observation.
> It's surely a possible case, when they access these registers
> SMMU state is off.
> I will add the suggested runpm ops around the register access.
>
> > Otherwise Conner and I have vk sparse residency working now
> >
>
> Sorry, could not get this. Did you mean that vk sparse residency
> is working now using this patch?

Yes, vk sparse residency is working with this patch (plus addition of
runpm get/put, plus drm/msm patches plus turnip patches) ;-)

BR,
-R

> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >>
> >>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> >>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> >>> +       }
> >>>
> >>>          return 0;
> >>>   }
> >>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>> index e2aeb511ae90..2dbf3243b5ad 100644
> >>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>   #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>
> >>>   #define ARM_SMMU_CB_ACTLR              0x4
> >>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>
> >>>   #define ARM_SMMU_CB_RESUME             0x8
> >>>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>> index c637e0997f6d..614665153b3e 100644
> >>> --- a/include/linux/adreno-smmu-priv.h
> >>> +++ b/include/linux/adreno-smmu-priv.h
> >>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >>>    *                 the GPU driver must call resume_translation()
> >>>    * @resume_translation: Resume translation after a fault
> >>>    *
> >>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> >>> + *             targets without PRR support. Exercise caution and verify target
> >>> + *             capabilities before invoking these callbacks to prevent potential
> >>> + *             runtime errors or unexpected behavior.
> >>> + *
> >>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> >>> + *                ACTLR register bits, currently used to configure
> >>> + *                Partially-Resident-Region (PRR) bit for feature's
> >>> + *                setup and reset sequence as requested.
> >>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> >>> + *                physical address of PRR page passed from
> >>> + *                GPU driver.
> >>>    *
> >>>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >>>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>       void (*set_stall)(const void *cookie, bool enabled);
> >>>       void (*resume_translation)(const void *cookie, bool terminate);
> >>> +    void (*set_prr_bit)(const void *cookie, bool set);
> >>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >>>   };
> >>>
> >>>   #endif /* __ADRENO_SMMU_PRIV_H */
> >>> --
> >>> 2.34.1
> >>>
>
Rob Clark Nov. 22, 2024, 5:03 p.m. UTC | #6
On Fri, Nov 22, 2024 at 8:19 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 11/20/2024 10:47 PM, Rob Clark wrote:
> > On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >> Add an adreno-smmu-priv interface for drm/msm to call
> >> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >> sequence as per request.
> >>
> >> This will be used by GPU to setup the PRR bit and related
> >> configuration registers through adreno-smmu private
> >> interface instead of directly poking the smmu hardware.
> >>
> >> Suggested-by: Rob Clark <robdclark@gmail.com>
> >> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >> ---
> >>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>   include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>   3 files changed, 53 insertions(+)
> >>
> >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> index d26f5aea248e..0e4f3fbda961 100644
> >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> @@ -16,6 +16,8 @@
> >>
> >>   #define QCOM_DUMMY_VAL -1
> >>
> >> +#define GFX_ACTLR_PRR          (1 << 5)
> >> +
> >>   static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>   {
> >>          return container_of(smmu, struct qcom_smmu, smmu);
> >> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>   }
> >>
> >> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >> +{
> >> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >> +       u32 reg = 0;
> >> +
> >> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >> +       reg &= ~GFX_ACTLR_PRR;
> >> +       if (set)
> >> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >> +}
> >> +
> >> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >> +{
> >> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >> +
> >> +       writel_relaxed(lower_32_bits(page_addr),
> >> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >> +
> >> +       writel_relaxed(upper_32_bits(page_addr),
> >> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >> +}
> >> +
> >>   #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>
> >>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>   static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>                  struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>   {
> >> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>          struct adreno_smmu_priv *priv;
> >>
> >>          smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>          priv->set_stall = qcom_adreno_smmu_set_stall;
> >>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >> +       priv->set_prr_bit = NULL;
> >> +       priv->set_prr_addr = NULL;
> >> +
> >> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >
> > fwiw, it seems like PRR actually works on sc7180, which is _not_
> > mmu-500, so I guess the support actually goes back further.
> >
>
> As I checked sc7180 was on previous variant of SMMU,
> so targets on this variant have different steps to set PRR bit.
> <Do not have both prr bit and PRR page registers>.

Experimentally, I have to set both the PRR LADDR/UADDR regs and
ACTLR.PRR bit on sc7180 to get the sparse-residency tests passing.  So
the underlying hw seems to work in the same way as mmu-500.  _But_
this is on a sc7180 chromebook, the situation might be different
depending on fw on things that have QC hyp.

> It's MMU-500 targets only where PRR support is with both PRR bit
> and PRR page addr registers. As I was re-visiting our discussions on v13
> regarding this - I remember that's why we started using the SMMU-
> compatible string based PRR procedure selection instead of the reserved-
> memory node. [1] i.e Based on SMMU variant (as selected by compatible
> string), specific sequence will be selected.
>
> So for now only MMU-500 based selection has been supported as part of
> this series and will add subsequent support for other SMMU-variants
> thereafter.
>
> > I'm curious if we can just rely on this being supported by any hw that
> > has a6xx or newer?
> >
>
> I'd need to check on targets which will be based on a6xx onwards, on
> what will be the procedure planned to support PRR feature. I'll update
> the information over this space.

One of the open questions about the drm/msm sparse-memory patchset is
whether we need to expose to userspace whether PRR is supported, or if
we can just rely on sparse-binding support implying sparse residency
(ie, PRR) support.  All a6xx devices support per-process pgtables,
which is the only requirement for basic sparseBinding support.  But
PRR is required to additionally expose
sparseResidencyBuffer/sparseResidencyImage2D.

I think, long term, turnip probably will want to drop support for
older kernels and remove support for legacy buffer mapping.  But if we
have some a6xx devices without PRR, then to do that we'd need to
decouple sparse binding and sparse residency.  (Vulkan allows a driver
to expose the former without the latter.)

BR,
-R

> [1]:
> https://lore.kernel.org/all/5790afa3-f9c0-4720-9804-8a7ff3d91854@quicinc.com/#:~:text=%3E%20I%20guess%20if,part%20as%20well.
>
> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> >> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> >> +       }
> >>
> >>          return 0;
> >>   }
> >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> index e2aeb511ae90..2dbf3243b5ad 100644
> >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>   #define ARM_SMMU_SCTLR_M               BIT(0)
> >>
> >>   #define ARM_SMMU_CB_ACTLR              0x4
> >> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>
> >>   #define ARM_SMMU_CB_RESUME             0x8
> >>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >> index c637e0997f6d..614665153b3e 100644
> >> --- a/include/linux/adreno-smmu-priv.h
> >> +++ b/include/linux/adreno-smmu-priv.h
> >> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >>    *                 the GPU driver must call resume_translation()
> >>    * @resume_translation: Resume translation after a fault
> >>    *
> >> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> >> + *             targets without PRR support. Exercise caution and verify target
> >> + *             capabilities before invoking these callbacks to prevent potential
> >> + *             runtime errors or unexpected behavior.
> >> + *
> >> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> >> + *                ACTLR register bits, currently used to configure
> >> + *                Partially-Resident-Region (PRR) bit for feature's
> >> + *                setup and reset sequence as requested.
> >> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> >> + *                physical address of PRR page passed from
> >> + *                GPU driver.
> >>    *
> >>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>       void (*set_stall)(const void *cookie, bool enabled);
> >>       void (*resume_translation)(const void *cookie, bool terminate);
> >> +    void (*set_prr_bit)(const void *cookie, bool set);
> >> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >>   };
> >>
> >>   #endif /* __ADRENO_SMMU_PRIV_H */
> >> --
> >> 2.34.1
> >>
>
Bibek Kumar Patro Dec. 4, 2024, 11:27 a.m. UTC | #7
On 11/22/2024 10:33 PM, Rob Clark wrote:
> On Fri, Nov 22, 2024 at 8:19 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 11/20/2024 10:47 PM, Rob Clark wrote:
>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>> sequence as per request.
>>>>
>>>> This will be used by GPU to setup the PRR bit and related
>>>> configuration registers through adreno-smmu private
>>>> interface instead of directly poking the smmu hardware.
>>>>
>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>    include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>    3 files changed, 53 insertions(+)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -16,6 +16,8 @@
>>>>
>>>>    #define QCOM_DUMMY_VAL -1
>>>>
>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>> +
>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>    {
>>>>           return container_of(smmu, struct qcom_smmu, smmu);
>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>    }
>>>>
>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>> +{
>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>> +       u32 reg = 0;
>>>> +
>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>> +       if (set)
>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>> +}
>>>> +
>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>> +{
>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +
>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>> +
>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>> +}
>>>> +
>>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>
>>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>    static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>                   struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>    {
>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>           struct adreno_smmu_priv *priv;
>>>>
>>>>           smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>> +       priv->set_prr_bit = NULL;
>>>> +       priv->set_prr_addr = NULL;
>>>> +
>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>
>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>> mmu-500, so I guess the support actually goes back further.
>>>
>>
>> As I checked sc7180 was on previous variant of SMMU,
>> so targets on this variant have different steps to set PRR bit.
>> <Do not have both prr bit and PRR page registers>.
> 
> Experimentally, I have to set both the PRR LADDR/UADDR regs and
> ACTLR.PRR bit on sc7180 to get the sparse-residency tests passing.  So
> the underlying hw seems to work in the same way as mmu-500.  _But_
> this is on a sc7180 chromebook, the situation might be different
> depending on fw on things that have QC hyp.
> 

I checked on sc7180 chipset which is based on the smmu-v2,
this time by looking for these offsets specifically. I can see the 
nomenclature of the PRR related registers are a bit different
compared to MMU-500 variant.
Also the implementation register is 64 bit instead of
dual 32 bit as in case of MMU-500. and PRR bit is not marked in
ACTLR register offset.

So turns out PRR is supported but with some modification and
can be carried out with same compatible based approach only - as per
my understanding.

In current patch plan was to provide support for MMU-500 based targets
and won't break any legacy targets, so we can take the PRR support
for legacy targets in different series once our evaluation is done on 
smmu-v2 ?
We would explore more on this PRR feature for smmu-v2 based targets,
before supporting it.

Thanks & regards,
Bibek

>> It's MMU-500 targets only where PRR support is with both PRR bit
>> and PRR page addr registers. As I was re-visiting our discussions on v13
>> regarding this - I remember that's why we started using the SMMU-
>> compatible string based PRR procedure selection instead of the reserved-
>> memory node. [1] i.e Based on SMMU variant (as selected by compatible
>> string), specific sequence will be selected.
>>
>> So for now only MMU-500 based selection has been supported as part of
>> this series and will add subsequent support for other SMMU-variants
>> thereafter.
>>
>>> I'm curious if we can just rely on this being supported by any hw that
>>> has a6xx or newer?
>>>
>>
>> I'd need to check on targets which will be based on a6xx onwards, on
>> what will be the procedure planned to support PRR feature. I'll update
>> the information over this space.
> 
> One of the open questions about the drm/msm sparse-memory patchset is
> whether we need to expose to userspace whether PRR is supported, or if
> we can just rely on sparse-binding support implying sparse residency
> (ie, PRR) support. All a6xx devices support per-process pgtables,
> which is the only requirement for basic sparseBinding support.  But
> PRR is required to additionally expose
> sparseResidencyBuffer/sparseResidencyImage2D.
> 
> I think, long term, turnip probably will want to drop support for
> older kernels and remove support for legacy buffer mapping.  But if we
> have some a6xx devices without PRR, then to do that we'd need to
> decouple sparse binding and sparse residency.  (Vulkan allows a driver
> to expose the former without the latter.)
> 
> BR,
> -R
> 
>> [1]:
>> https://lore.kernel.org/all/5790afa3-f9c0-4720-9804-8a7ff3d91854@quicinc.com/#:~:text=%3E%20I%20guess%20if,part%20as%20well.
>>
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>> +       }
>>>>
>>>>           return 0;
>>>>    }
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>
>>>>    #define ARM_SMMU_CB_ACTLR              0x4
>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>
>>>>    #define ARM_SMMU_CB_RESUME             0x8
>>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>> index c637e0997f6d..614665153b3e 100644
>>>> --- a/include/linux/adreno-smmu-priv.h
>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>     *                 the GPU driver must call resume_translation()
>>>>     * @resume_translation: Resume translation after a fault
>>>>     *
>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>> + *             targets without PRR support. Exercise caution and verify target
>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>> + *             runtime errors or unexpected behavior.
>>>> + *
>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>> + *                ACTLR register bits, currently used to configure
>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>> + *                setup and reset sequence as requested.
>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>> + *                physical address of PRR page passed from
>>>> + *                GPU driver.
>>>>     *
>>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>        void (*set_stall)(const void *cookie, bool enabled);
>>>>        void (*resume_translation)(const void *cookie, bool terminate);
>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>    };
>>>>
>>>>    #endif /* __ADRENO_SMMU_PRIV_H */
>>>> --
>>>> 2.34.1
>>>>
>>
Bibek Kumar Patro Dec. 4, 2024, 11:27 a.m. UTC | #8
On 11/22/2024 10:04 PM, Rob Clark wrote:
> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 11/21/2024 3:40 AM, Rob Clark wrote:
>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>>>>
>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>
>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>> sequence as per request.
>>>>>
>>>>> This will be used by GPU to setup the PRR bit and related
>>>>> configuration registers through adreno-smmu private
>>>>> interface instead of directly poking the smmu hardware.
>>>>>
>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>> ---
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>    include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>>    3 files changed, 53 insertions(+)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> @@ -16,6 +16,8 @@
>>>>>
>>>>>    #define QCOM_DUMMY_VAL -1
>>>>>
>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>> +
>>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>    {
>>>>>           return container_of(smmu, struct qcom_smmu, smmu);
>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>    }
>>>>>
>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>> +{
>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>> +       u32 reg = 0;
>>>>> +
>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>> +       if (set)
>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>> +}
>>>>> +
>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>>> +{
>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>> +
>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>> +
>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>> +}
>>>>> +
>>>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>
>>>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>>    static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>                   struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>>    {
>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>>           struct adreno_smmu_priv *priv;
>>>>>
>>>>>           smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>> +       priv->set_prr_bit = NULL;
>>>>> +       priv->set_prr_addr = NULL;
>>>>> +
>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>
>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>>> mmu-500, so I guess the support actually goes back further.
>>>>
>>>> I'm curious if we can just rely on this being supported by any hw that
>>>> has a6xx or newer?
>>>
>>>
>>> Also, unrelated, but we can't assume the smmu is powered when drm
>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
>>> the register access?
>>>
>>
>> I see, thanks for this observation.
>> It's surely a possible case, when they access these registers
>> SMMU state is off.
>> I will add the suggested runpm ops around the register access.
>>
>>> Otherwise Conner and I have vk sparse residency working now
>>>
>>
>> Sorry, could not get this. Did you mean that vk sparse residency
>> is working now using this patch?
> 
> Yes, vk sparse residency is working with this patch (plus addition of
> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
> 

Thanks for testing the sparse residency feature with our patch Rob,
I have an additional query regarding the adreno private interfaces. 
Specifically, I was referring to other interfaces such as 
qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a 
runpm get/put around the register access in this context.

Could you please clarify whether we need an SMMU vote around register 
access in the case of PRR? IMO, should the users of this callback ensure 
they put a vote before accessing the cb?

[1]: 
https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/

Thanks & regards,
Bibek

> BR,
> -R
> 
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>>
>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>>> +       }
>>>>>
>>>>>           return 0;
>>>>>    }
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>
>>>>>    #define ARM_SMMU_CB_ACTLR              0x4
>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>
>>>>>    #define ARM_SMMU_CB_RESUME             0x8
>>>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>> index c637e0997f6d..614665153b3e 100644
>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>>     *                 the GPU driver must call resume_translation()
>>>>>     * @resume_translation: Resume translation after a fault
>>>>>     *
>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>>> + *             targets without PRR support. Exercise caution and verify target
>>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>>> + *             runtime errors or unexpected behavior.
>>>>> + *
>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>>> + *                ACTLR register bits, currently used to configure
>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>>> + *                setup and reset sequence as requested.
>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>>> + *                physical address of PRR page passed from
>>>>> + *                GPU driver.
>>>>>     *
>>>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>        void (*set_stall)(const void *cookie, bool enabled);
>>>>>        void (*resume_translation)(const void *cookie, bool terminate);
>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>>    };
>>>>>
>>>>>    #endif /* __ADRENO_SMMU_PRIV_H */
>>>>> --
>>>>> 2.34.1
>>>>>
>>
Rob Clark Dec. 4, 2024, 3:21 p.m. UTC | #9
On Wed, Dec 4, 2024 at 3:27 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 11/22/2024 10:33 PM, Rob Clark wrote:
> > On Fri, Nov 22, 2024 at 8:19 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 11/20/2024 10:47 PM, Rob Clark wrote:
> >>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> >>> <quic_bibekkum@quicinc.com> wrote:
> >>>>
> >>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>> sequence as per request.
> >>>>
> >>>> This will be used by GPU to setup the PRR bit and related
> >>>> configuration registers through adreno-smmu private
> >>>> interface instead of directly poking the smmu hardware.
> >>>>
> >>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>> ---
> >>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>    include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>>>    3 files changed, 53 insertions(+)
> >>>>
> >>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> index d26f5aea248e..0e4f3fbda961 100644
> >>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>> @@ -16,6 +16,8 @@
> >>>>
> >>>>    #define QCOM_DUMMY_VAL -1
> >>>>
> >>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>> +
> >>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>>    {
> >>>>           return container_of(smmu, struct qcom_smmu, smmu);
> >>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>    }
> >>>>
> >>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>>> +{
> >>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>> +       u32 reg = 0;
> >>>> +
> >>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>> +       if (set)
> >>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>> +}
> >>>> +
> >>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >>>> +{
> >>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>> +
> >>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>> +
> >>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>> +}
> >>>> +
> >>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>
> >>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>>>    static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>                   struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>>>    {
> >>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>>>           struct adreno_smmu_priv *priv;
> >>>>
> >>>>           smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>> +       priv->set_prr_bit = NULL;
> >>>> +       priv->set_prr_addr = NULL;
> >>>> +
> >>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>>
> >>> fwiw, it seems like PRR actually works on sc7180, which is _not_
> >>> mmu-500, so I guess the support actually goes back further.
> >>>
> >>
> >> As I checked sc7180 was on previous variant of SMMU,
> >> so targets on this variant have different steps to set PRR bit.
> >> <Do not have both prr bit and PRR page registers>.
> >
> > Experimentally, I have to set both the PRR LADDR/UADDR regs and
> > ACTLR.PRR bit on sc7180 to get the sparse-residency tests passing.  So
> > the underlying hw seems to work in the same way as mmu-500.  _But_
> > this is on a sc7180 chromebook, the situation might be different
> > depending on fw on things that have QC hyp.
> >
>
> I checked on sc7180 chipset which is based on the smmu-v2,
> this time by looking for these offsets specifically. I can see the
> nomenclature of the PRR related registers are a bit different
> compared to MMU-500 variant.
> Also the implementation register is 64 bit instead of
> dual 32 bit as in case of MMU-500. and PRR bit is not marked in
> ACTLR register offset.

Interesting.. in my experiments it needed both the ACTLR.PRR bit set
and the LADDR/UADDR.  Maybe it was just happy coincidence that two 32b
writes worked?

> So turns out PRR is supported but with some modification and
> can be carried out with same compatible based approach only - as per
> my understanding.
>
> In current patch plan was to provide support for MMU-500 based targets
> and won't break any legacy targets, so we can take the PRR support
> for legacy targets in different series once our evaluation is done on
> smmu-v2 ?

I guess it wouldn't be the end of the world, but it would mean that
drm would need to expose PRR support to userspace separately from
sparse binding support.  Maybe we need to do that anyways.  (I'm not
really sure how many different a6xx+smmu-v2 devices are out there, but
I guess they are all based on the same generation of snapdragon?)

BR,
-R

> We would explore more on this PRR feature for smmu-v2 based targets,
> before supporting it.
>
> Thanks & regards,
> Bibek
>
> >> It's MMU-500 targets only where PRR support is with both PRR bit
> >> and PRR page addr registers. As I was re-visiting our discussions on v13
> >> regarding this - I remember that's why we started using the SMMU-
> >> compatible string based PRR procedure selection instead of the reserved-
> >> memory node. [1] i.e Based on SMMU variant (as selected by compatible
> >> string), specific sequence will be selected.
> >>
> >> So for now only MMU-500 based selection has been supported as part of
> >> this series and will add subsequent support for other SMMU-variants
> >> thereafter.
> >>
> >>> I'm curious if we can just rely on this being supported by any hw that
> >>> has a6xx or newer?
> >>>
> >>
> >> I'd need to check on targets which will be based on a6xx onwards, on
> >> what will be the procedure planned to support PRR feature. I'll update
> >> the information over this space.
> >
> > One of the open questions about the drm/msm sparse-memory patchset is
> > whether we need to expose to userspace whether PRR is supported, or if
> > we can just rely on sparse-binding support implying sparse residency
> > (ie, PRR) support. All a6xx devices support per-process pgtables,
> > which is the only requirement for basic sparseBinding support.  But
> > PRR is required to additionally expose
> > sparseResidencyBuffer/sparseResidencyImage2D.
> >
> > I think, long term, turnip probably will want to drop support for
> > older kernels and remove support for legacy buffer mapping.  But if we
> > have some a6xx devices without PRR, then to do that we'd need to
> > decouple sparse binding and sparse residency.  (Vulkan allows a driver
> > to expose the former without the latter.)
> >
> > BR,
> > -R
> >
> >> [1]:
> >> https://lore.kernel.org/all/5790afa3-f9c0-4720-9804-8a7ff3d91854@quicinc.com/#:~:text=%3E%20I%20guess%20if,part%20as%20well.
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>> BR,
> >>> -R
> >>>
> >>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> >>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> >>>> +       }
> >>>>
> >>>>           return 0;
> >>>>    }
> >>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> index e2aeb511ae90..2dbf3243b5ad 100644
> >>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>
> >>>>    #define ARM_SMMU_CB_ACTLR              0x4
> >>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>
> >>>>    #define ARM_SMMU_CB_RESUME             0x8
> >>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>> index c637e0997f6d..614665153b3e 100644
> >>>> --- a/include/linux/adreno-smmu-priv.h
> >>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >>>>     *                 the GPU driver must call resume_translation()
> >>>>     * @resume_translation: Resume translation after a fault
> >>>>     *
> >>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> >>>> + *             targets without PRR support. Exercise caution and verify target
> >>>> + *             capabilities before invoking these callbacks to prevent potential
> >>>> + *             runtime errors or unexpected behavior.
> >>>> + *
> >>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> >>>> + *                ACTLR register bits, currently used to configure
> >>>> + *                Partially-Resident-Region (PRR) bit for feature's
> >>>> + *                setup and reset sequence as requested.
> >>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> >>>> + *                physical address of PRR page passed from
> >>>> + *                GPU driver.
> >>>>     *
> >>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>        void (*set_stall)(const void *cookie, bool enabled);
> >>>>        void (*resume_translation)(const void *cookie, bool terminate);
> >>>> +    void (*set_prr_bit)(const void *cookie, bool set);
> >>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >>>>    };
> >>>>
> >>>>    #endif /* __ADRENO_SMMU_PRIV_H */
> >>>> --
> >>>> 2.34.1
> >>>>
> >>
>
Rob Clark Dec. 4, 2024, 3:24 p.m. UTC | #10
On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 11/22/2024 10:04 PM, Rob Clark wrote:
> > On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 11/21/2024 3:40 AM, Rob Clark wrote:
> >>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
> >>>>
> >>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> >>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>
> >>>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>>> sequence as per request.
> >>>>>
> >>>>> This will be used by GPU to setup the PRR bit and related
> >>>>> configuration registers through adreno-smmu private
> >>>>> interface instead of directly poking the smmu hardware.
> >>>>>
> >>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>> ---
> >>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>>    include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>>>>    3 files changed, 53 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>> index d26f5aea248e..0e4f3fbda961 100644
> >>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>> @@ -16,6 +16,8 @@
> >>>>>
> >>>>>    #define QCOM_DUMMY_VAL -1
> >>>>>
> >>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>> +
> >>>>>    static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>>>    {
> >>>>>           return container_of(smmu, struct qcom_smmu, smmu);
> >>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>>    }
> >>>>>
> >>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>>>> +{
> >>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>>> +       u32 reg = 0;
> >>>>> +
> >>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>>> +       if (set)
> >>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>> +}
> >>>>> +
> >>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >>>>> +{
> >>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>> +
> >>>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>> +
> >>>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>> +}
> >>>>> +
> >>>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>>
> >>>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>>>>    static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>                   struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>>>>    {
> >>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>>>>           struct adreno_smmu_priv *priv;
> >>>>>
> >>>>>           smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>>> +       priv->set_prr_bit = NULL;
> >>>>> +       priv->set_prr_addr = NULL;
> >>>>> +
> >>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>>>
> >>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
> >>>> mmu-500, so I guess the support actually goes back further.
> >>>>
> >>>> I'm curious if we can just rely on this being supported by any hw that
> >>>> has a6xx or newer?
> >>>
> >>>
> >>> Also, unrelated, but we can't assume the smmu is powered when drm
> >>> driver calls set_prr_bit/addr, could you add in runpm get/put around
> >>> the register access?
> >>>
> >>
> >> I see, thanks for this observation.
> >> It's surely a possible case, when they access these registers
> >> SMMU state is off.
> >> I will add the suggested runpm ops around the register access.
> >>
> >>> Otherwise Conner and I have vk sparse residency working now
> >>>
> >>
> >> Sorry, could not get this. Did you mean that vk sparse residency
> >> is working now using this patch?
> >
> > Yes, vk sparse residency is working with this patch (plus addition of
> > runpm get/put, plus drm/msm patches plus turnip patches) ;-)
> >
>
> Thanks for testing the sparse residency feature with our patch Rob,
> I have an additional query regarding the adreno private interfaces.
> Specifically, I was referring to other interfaces such as
> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
> runpm get/put around the register access in this context.

get_fault_info() is called from the iommu fault handler callback, so
from the fault irq handler, which is why it didn't need the runpm
get/put.  Maybe it is bad to make this assumption about usage, but
then again adreno_smmu_priv isn't really a general purpose interface.

> Could you please clarify whether we need an SMMU vote around register
> access in the case of PRR? IMO, should the users of this callback ensure
> they put a vote before accessing the cb?

How can drm vote for the smmu device?  I guess it could power up
itself and rely on device-link.. but that is pretty overkill to power
up the entire gpu in this case.  I think it is best for the vote to
happen in the PRR callbacks.

BR,
-R

> [1]:
> https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/
>
> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >> Thanks & regards,
> >> Bibek
> >>
> >>> BR,
> >>> -R
> >>>
> >>>>
> >>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> >>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> >>>>> +       }
> >>>>>
> >>>>>           return 0;
> >>>>>    }
> >>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>> index e2aeb511ae90..2dbf3243b5ad 100644
> >>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>>
> >>>>>    #define ARM_SMMU_CB_ACTLR              0x4
> >>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>>
> >>>>>    #define ARM_SMMU_CB_RESUME             0x8
> >>>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>>> index c637e0997f6d..614665153b3e 100644
> >>>>> --- a/include/linux/adreno-smmu-priv.h
> >>>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >>>>>     *                 the GPU driver must call resume_translation()
> >>>>>     * @resume_translation: Resume translation after a fault
> >>>>>     *
> >>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> >>>>> + *             targets without PRR support. Exercise caution and verify target
> >>>>> + *             capabilities before invoking these callbacks to prevent potential
> >>>>> + *             runtime errors or unexpected behavior.
> >>>>> + *
> >>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> >>>>> + *                ACTLR register bits, currently used to configure
> >>>>> + *                Partially-Resident-Region (PRR) bit for feature's
> >>>>> + *                setup and reset sequence as requested.
> >>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> >>>>> + *                physical address of PRR page passed from
> >>>>> + *                GPU driver.
> >>>>>     *
> >>>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >>>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>>        void (*set_stall)(const void *cookie, bool enabled);
> >>>>>        void (*resume_translation)(const void *cookie, bool terminate);
> >>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
> >>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >>>>>    };
> >>>>>
> >>>>>    #endif /* __ADRENO_SMMU_PRIV_H */
> >>>>> --
> >>>>> 2.34.1
> >>>>>
> >>
>
Bibek Kumar Patro Dec. 5, 2024, 6:53 p.m. UTC | #11
On 12/4/2024 8:51 PM, Rob Clark wrote:
> On Wed, Dec 4, 2024 at 3:27 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 11/22/2024 10:33 PM, Rob Clark wrote:
>>> On Fri, Nov 22, 2024 at 8:19 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/20/2024 10:47 PM, Rob Clark wrote:
>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>> sequence as per request.
>>>>>>
>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>> configuration registers through adreno-smmu private
>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>
>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>> ---
>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>     include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>>>     3 files changed, 53 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> @@ -16,6 +16,8 @@
>>>>>>
>>>>>>     #define QCOM_DUMMY_VAL -1
>>>>>>
>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>> +
>>>>>>     static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>>     {
>>>>>>            return container_of(smmu, struct qcom_smmu, smmu);
>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>     }
>>>>>>
>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>>> +{
>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>> +       u32 reg = 0;
>>>>>> +
>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>> +       if (set)
>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>> +}
>>>>>> +
>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>>>> +{
>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>> +
>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>> +
>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>> +}
>>>>>> +
>>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>
>>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>>>     static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>                    struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>>>     {
>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>>>            struct adreno_smmu_priv *priv;
>>>>>>
>>>>>>            smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>> +       priv->set_prr_bit = NULL;
>>>>>> +       priv->set_prr_addr = NULL;
>>>>>> +
>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>>
>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>>>> mmu-500, so I guess the support actually goes back further.
>>>>>
>>>>
>>>> As I checked sc7180 was on previous variant of SMMU,
>>>> so targets on this variant have different steps to set PRR bit.
>>>> <Do not have both prr bit and PRR page registers>.
>>>
>>> Experimentally, I have to set both the PRR LADDR/UADDR regs and
>>> ACTLR.PRR bit on sc7180 to get the sparse-residency tests passing.  So
>>> the underlying hw seems to work in the same way as mmu-500.  _But_
>>> this is on a sc7180 chromebook, the situation might be different
>>> depending on fw on things that have QC hyp.
>>>
>>
>> I checked on sc7180 chipset which is based on the smmu-v2,
>> this time by looking for these offsets specifically. I can see the
>> nomenclature of the PRR related registers are a bit different
>> compared to MMU-500 variant.
>> Also the implementation register is 64 bit instead of
>> dual 32 bit as in case of MMU-500. and PRR bit is not marked in
>> ACTLR register offset.
> 
> Interesting.. in my experiments it needed both the ACTLR.PRR bit set
> and the LADDR/UADDR.  Maybe it was just happy coincidence that two 32b
> writes worked?
> 

I think it's similar to case we discussed [1] in v11
on why not good idea to use writeq on 32bit regs.
So some corner cases for unaligned 32b addresses might get truncated.

[1]: 
https://lore.kernel.org/all/ae35bf9b-4401-4a99-abd7-c0d9d399a46b@quicinc.com/#:~:text=So%20I%20think-,writeq,-for%2064bit%20write


>> So turns out PRR is supported but with some modification and
>> can be carried out with same compatible based approach only - as per
>> my understanding.
>>
>> In current patch plan was to provide support for MMU-500 based targets
>> and won't break any legacy targets, so we can take the PRR support
>> for legacy targets in different series once our evaluation is done on
>> smmu-v2 ?
> 
> I guess it wouldn't be the end of the world, but it would mean that
> drm would need to expose PRR support to userspace separately from
> sparse binding support.  Maybe we need to do that anyways.  (I'm not
> really sure how many different a6xx+smmu-v2 devices are out there, but
> I guess they are all based on the same generation of snapdragon?)
> 

We would need to audit on all the smmu-v2 based devices using a6xx
version before I can comment on this, but mostly it should be in the 
same generation of snapdragon (scxx/smxx).

Thanks & regards,
bibek

> BR,
> -R
> 
>> We would explore more on this PRR feature for smmu-v2 based targets,
>> before supporting it.
>>
>> Thanks & regards,
>> Bibek
>>
>>>> It's MMU-500 targets only where PRR support is with both PRR bit
>>>> and PRR page addr registers. As I was re-visiting our discussions on v13
>>>> regarding this - I remember that's why we started using the SMMU-
>>>> compatible string based PRR procedure selection instead of the reserved-
>>>> memory node. [1] i.e Based on SMMU variant (as selected by compatible
>>>> string), specific sequence will be selected.
>>>>
>>>> So for now only MMU-500 based selection has been supported as part of
>>>> this series and will add subsequent support for other SMMU-variants
>>>> thereafter.
>>>>
>>>>> I'm curious if we can just rely on this being supported by any hw that
>>>>> has a6xx or newer?
>>>>>
>>>>
>>>> I'd need to check on targets which will be based on a6xx onwards, on
>>>> what will be the procedure planned to support PRR feature. I'll update
>>>> the information over this space.
>>>
>>> One of the open questions about the drm/msm sparse-memory patchset is
>>> whether we need to expose to userspace whether PRR is supported, or if
>>> we can just rely on sparse-binding support implying sparse residency
>>> (ie, PRR) support. All a6xx devices support per-process pgtables,
>>> which is the only requirement for basic sparseBinding support.  But
>>> PRR is required to additionally expose
>>> sparseResidencyBuffer/sparseResidencyImage2D.
>>>
>>> I think, long term, turnip probably will want to drop support for
>>> older kernels and remove support for legacy buffer mapping.  But if we
>>> have some a6xx devices without PRR, then to do that we'd need to
>>> decouple sparse binding and sparse residency.  (Vulkan allows a driver
>>> to expose the former without the latter.)
>>>
>>> BR,
>>> -R
>>>
>>>> [1]:
>>>> https://lore.kernel.org/all/5790afa3-f9c0-4720-9804-8a7ff3d91854@quicinc.com/#:~:text=%3E%20I%20guess%20if,part%20as%20well.
>>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>> BR,
>>>>> -R
>>>>>
>>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>>>> +       }
>>>>>>
>>>>>>            return 0;
>>>>>>     }
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>
>>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>
>>>>>>     #define ARM_SMMU_CB_RESUME             0x8
>>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>> index c637e0997f6d..614665153b3e 100644
>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>>>      *                 the GPU driver must call resume_translation()
>>>>>>      * @resume_translation: Resume translation after a fault
>>>>>>      *
>>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>>>> + *             targets without PRR support. Exercise caution and verify target
>>>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>>>> + *             runtime errors or unexpected behavior.
>>>>>> + *
>>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>>>> + *                ACTLR register bits, currently used to configure
>>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>>>> + *                setup and reset sequence as requested.
>>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>>>> + *                physical address of PRR page passed from
>>>>>> + *                GPU driver.
>>>>>>      *
>>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>         void (*set_stall)(const void *cookie, bool enabled);
>>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
>>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>>>     };
>>>>>>
>>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>> --
>>>>>> 2.34.1
>>>>>>
>>>>
>>
Bibek Kumar Patro Dec. 6, 2024, 12:36 p.m. UTC | #12
On 12/4/2024 8:54 PM, Rob Clark wrote:
> On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 11/22/2024 10:04 PM, Rob Clark wrote:
>>> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/21/2024 3:40 AM, Rob Clark wrote:
>>>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>>>>>>
>>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>
>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>> sequence as per request.
>>>>>>>
>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>> configuration registers through adreno-smmu private
>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>
>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>> ---
>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>     include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>>>>     3 files changed, 53 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>> @@ -16,6 +16,8 @@
>>>>>>>
>>>>>>>     #define QCOM_DUMMY_VAL -1
>>>>>>>
>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>> +
>>>>>>>     static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>>>     {
>>>>>>>            return container_of(smmu, struct qcom_smmu, smmu);
>>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>     }
>>>>>>>
>>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>>>> +{
>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>> +       u32 reg = 0;
>>>>>>> +
>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>> +       if (set)
>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>>>>> +{
>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>> +
>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>> +
>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>> +}
>>>>>>> +
>>>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>>
>>>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>>>>     static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>                    struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>>>>     {
>>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>>>>            struct adreno_smmu_priv *priv;
>>>>>>>
>>>>>>>            smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>>> +       priv->set_prr_bit = NULL;
>>>>>>> +       priv->set_prr_addr = NULL;
>>>>>>> +
>>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>>>
>>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>>>>> mmu-500, so I guess the support actually goes back further.
>>>>>>
>>>>>> I'm curious if we can just rely on this being supported by any hw that
>>>>>> has a6xx or newer?
>>>>>
>>>>>
>>>>> Also, unrelated, but we can't assume the smmu is powered when drm
>>>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
>>>>> the register access?
>>>>>
>>>>
>>>> I see, thanks for this observation.
>>>> It's surely a possible case, when they access these registers
>>>> SMMU state is off.
>>>> I will add the suggested runpm ops around the register access.
>>>>
>>>>> Otherwise Conner and I have vk sparse residency working now
>>>>>
>>>>
>>>> Sorry, could not get this. Did you mean that vk sparse residency
>>>> is working now using this patch?
>>>
>>> Yes, vk sparse residency is working with this patch (plus addition of
>>> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
>>>
>>
>> Thanks for testing the sparse residency feature with our patch Rob,
>> I have an additional query regarding the adreno private interfaces.
>> Specifically, I was referring to other interfaces such as
>> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
>> runpm get/put around the register access in this context.
> 
> get_fault_info() is called from the iommu fault handler callback, so
> from the fault irq handler, which is why it didn't need the runpm
> get/put.  Maybe it is bad to make this assumption about usage, but
> then again adreno_smmu_priv isn't really a general purpose interface.
> 

Ah okay, got it.
I was just going through all the adreno_smmmu_priv interfaces just
to get a better understanding of it's interaction with smmu and it seems
like apart from PRR and get_fault_info(), set_ttbr0_cfg(),
resume_translation() is also having reg access but not voting.
Should we put runpm_put/get here as well or these can be excluded.
<Just curious about the thought process behind this, is it because of
some sequence when to put a vote, like reg access in middle of smmu
power cycle and not in beginning or end.>

>> Could you please clarify whether we need an SMMU vote around register
>> access in the case of PRR? IMO, should the users of this callback ensure
>> they put a vote before accessing the cb?
> 
> How can drm vote for the smmu device?  I guess it could power up
> itself and rely on device-link.. but that is pretty overkill to power
> up the entire gpu in this case.  I think it is best for the vote to
> happen in the PRR callbacks.
> 

Okay I see, GPU can only power itself up through <gpu_state_get I
assume> but won't have exclusive access to power on the smmu only.

Incase we go forward to put vote in PRR callback for SMMU, I was
planning that we can refactor existing arm_smmu_rpm_put/get() from
arm_smmu.c to it's header file so that the same can be used in
arm_smmu_qcom.c ? What's your thoughts on this?

Thanks & regards,
Bibek

> BR,
> -R
> 
>> [1]:
>> https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/
>>
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>> BR,
>>>>> -R
>>>>>
>>>>>>
>>>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>>>>> +       }
>>>>>>>
>>>>>>>            return 0;
>>>>>>>     }
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>>
>>>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>>
>>>>>>>     #define ARM_SMMU_CB_RESUME             0x8
>>>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>>> index c637e0997f6d..614665153b3e 100644
>>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>>>>      *                 the GPU driver must call resume_translation()
>>>>>>>      * @resume_translation: Resume translation after a fault
>>>>>>>      *
>>>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>>>>> + *             targets without PRR support. Exercise caution and verify target
>>>>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>>>>> + *             runtime errors or unexpected behavior.
>>>>>>> + *
>>>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>>>>> + *                ACTLR register bits, currently used to configure
>>>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>>>>> + *                setup and reset sequence as requested.
>>>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>>>>> + *                physical address of PRR page passed from
>>>>>>> + *                GPU driver.
>>>>>>>      *
>>>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>>         void (*set_stall)(const void *cookie, bool enabled);
>>>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
>>>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>>>>     };
>>>>>>>
>>>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>>> --
>>>>>>> 2.34.1
>>>>>>>
>>>>
>>
Rob Clark Dec. 6, 2024, 3:18 p.m. UTC | #13
On Fri, Dec 6, 2024 at 4:36 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 12/4/2024 8:54 PM, Rob Clark wrote:
> > On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 11/22/2024 10:04 PM, Rob Clark wrote:
> >>> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
> >>> <quic_bibekkum@quicinc.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 11/21/2024 3:40 AM, Rob Clark wrote:
> >>>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
> >>>>>>
> >>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> >>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>
> >>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>>>>> sequence as per request.
> >>>>>>>
> >>>>>>> This will be used by GPU to setup the PRR bit and related
> >>>>>>> configuration registers through adreno-smmu private
> >>>>>>> interface instead of directly poking the smmu hardware.
> >>>>>>>
> >>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>>> ---
> >>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>>>>     include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>>>>>>     3 files changed, 53 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>> index d26f5aea248e..0e4f3fbda961 100644
> >>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>> @@ -16,6 +16,8 @@
> >>>>>>>
> >>>>>>>     #define QCOM_DUMMY_VAL -1
> >>>>>>>
> >>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>>>> +
> >>>>>>>     static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>>>>>     {
> >>>>>>>            return container_of(smmu, struct qcom_smmu, smmu);
> >>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>>>>            arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>>>>     }
> >>>>>>>
> >>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>>>>>> +{
> >>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>>>>> +       u32 reg = 0;
> >>>>>>> +
> >>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>>>>> +       if (set)
> >>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >>>>>>> +{
> >>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>>> +
> >>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>>> +
> >>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>     #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>>>>
> >>>>>>>     static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>>>>>>     static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>>                    struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>>>>>>     {
> >>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>>>>>>            struct adreno_smmu_priv *priv;
> >>>>>>>
> >>>>>>>            smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>>            priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>>>>            priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>>>>            priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>>>>> +       priv->set_prr_bit = NULL;
> >>>>>>> +       priv->set_prr_addr = NULL;
> >>>>>>> +
> >>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>>>>>
> >>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
> >>>>>> mmu-500, so I guess the support actually goes back further.
> >>>>>>
> >>>>>> I'm curious if we can just rely on this being supported by any hw that
> >>>>>> has a6xx or newer?
> >>>>>
> >>>>>
> >>>>> Also, unrelated, but we can't assume the smmu is powered when drm
> >>>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
> >>>>> the register access?
> >>>>>
> >>>>
> >>>> I see, thanks for this observation.
> >>>> It's surely a possible case, when they access these registers
> >>>> SMMU state is off.
> >>>> I will add the suggested runpm ops around the register access.
> >>>>
> >>>>> Otherwise Conner and I have vk sparse residency working now
> >>>>>
> >>>>
> >>>> Sorry, could not get this. Did you mean that vk sparse residency
> >>>> is working now using this patch?
> >>>
> >>> Yes, vk sparse residency is working with this patch (plus addition of
> >>> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
> >>>
> >>
> >> Thanks for testing the sparse residency feature with our patch Rob,
> >> I have an additional query regarding the adreno private interfaces.
> >> Specifically, I was referring to other interfaces such as
> >> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
> >> runpm get/put around the register access in this context.
> >
> > get_fault_info() is called from the iommu fault handler callback, so
> > from the fault irq handler, which is why it didn't need the runpm
> > get/put.  Maybe it is bad to make this assumption about usage, but
> > then again adreno_smmu_priv isn't really a general purpose interface.
> >
>
> Ah okay, got it.
> I was just going through all the adreno_smmmu_priv interfaces just
> to get a better understanding of it's interaction with smmu and it seems
> like apart from PRR and get_fault_info(), set_ttbr0_cfg(),
> resume_translation() is also having reg access but not voting.
> Should we put runpm_put/get here as well or these can be excluded.
> <Just curious about the thought process behind this, is it because of
> some sequence when to put a vote, like reg access in middle of smmu
> power cycle and not in beginning or end.>

I think we just haven't needed it, or noticed that we needed it,
outside of setting up prr.

As I mentioned, get_fault_info() is called from the fault irq, so we
know the smmu is already powered.

As far as set_ttbr0_cfg(), it probably works just because
arm_smmu_write_context_bank() ends up getting called again in the
resume path, so if the smmu is suspended when set_ttbr0_cfg() is
called, the writes just get ignored.  But the updated cfg is
re-applied to the hw when it is resumed.  Probably the same situation
with resume_translation().. ie. if the smmu is suspended there are no
translations to resume.

Maybe it would be more correct in set_ttbr0_cfg() and
resume_translation() to do a pm_runtime_get_if_in_use() and skip the
hw writes if the smmu is suspended.

>
> >> Could you please clarify whether we need an SMMU vote around register
> >> access in the case of PRR? IMO, should the users of this callback ensure
> >> they put a vote before accessing the cb?
> >
> > How can drm vote for the smmu device?  I guess it could power up
> > itself and rely on device-link.. but that is pretty overkill to power
> > up the entire gpu in this case.  I think it is best for the vote to
> > happen in the PRR callbacks.
> >
>
> Okay I see, GPU can only power itself up through <gpu_state_get I
> assume> but won't have exclusive access to power on the smmu only.
>
> Incase we go forward to put vote in PRR callback for SMMU, I was
> planning that we can refactor existing arm_smmu_rpm_put/get() from
> arm_smmu.c to it's header file so that the same can be used in
> arm_smmu_qcom.c ? What's your thoughts on this?

I had briefly thought of doing the same.  But the main reason for
those helpers is common arm-smmu code that is used on non-qcom
platforms where runpm is not enabled.  In arm-smmu-qcom.c, we know
that runpm is enabled, so we could just use  return
pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() directly.

BR,
-R

> Thanks & regards,
> Bibek
>
> > BR,
> > -R
> >
> >> [1]:
> >> https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>> BR,
> >>> -R
> >>>
> >>>> Thanks & regards,
> >>>> Bibek
> >>>>
> >>>>> BR,
> >>>>> -R
> >>>>>
> >>>>>>
> >>>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
> >>>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
> >>>>>>> +       }
> >>>>>>>
> >>>>>>>            return 0;
> >>>>>>>     }
> >>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
> >>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>>>>>>     #define ARM_SMMU_SCTLR_M               BIT(0)
> >>>>>>>
> >>>>>>>     #define ARM_SMMU_CB_ACTLR              0x4
> >>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>>>>>>
> >>>>>>>     #define ARM_SMMU_CB_RESUME             0x8
> >>>>>>>     #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >>>>>>> index c637e0997f6d..614665153b3e 100644
> >>>>>>> --- a/include/linux/adreno-smmu-priv.h
> >>>>>>> +++ b/include/linux/adreno-smmu-priv.h
> >>>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
> >>>>>>>      *                 the GPU driver must call resume_translation()
> >>>>>>>      * @resume_translation: Resume translation after a fault
> >>>>>>>      *
> >>>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
> >>>>>>> + *             targets without PRR support. Exercise caution and verify target
> >>>>>>> + *             capabilities before invoking these callbacks to prevent potential
> >>>>>>> + *             runtime errors or unexpected behavior.
> >>>>>>> + *
> >>>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
> >>>>>>> + *                ACTLR register bits, currently used to configure
> >>>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
> >>>>>>> + *                setup and reset sequence as requested.
> >>>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
> >>>>>>> + *                physical address of PRR page passed from
> >>>>>>> + *                GPU driver.
> >>>>>>>      *
> >>>>>>>      * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>>>>>>      * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >>>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
> >>>>>>>         void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>>>>>>         void (*set_stall)(const void *cookie, bool enabled);
> >>>>>>>         void (*resume_translation)(const void *cookie, bool terminate);
> >>>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
> >>>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
> >>>>>>>     };
> >>>>>>>
> >>>>>>>     #endif /* __ADRENO_SMMU_PRIV_H */
> >>>>>>> --
> >>>>>>> 2.34.1
> >>>>>>>
> >>>>
> >>
>
Bibek Kumar Patro Dec. 11, 2024, 1:29 p.m. UTC | #14
On 12/6/2024 8:48 PM, Rob Clark wrote:
> On Fri, Dec 6, 2024 at 4:36 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 12/4/2024 8:54 PM, Rob Clark wrote:
>>> On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>>
>>>>
>>>> On 11/22/2024 10:04 PM, Rob Clark wrote:
>>>>> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 11/21/2024 3:40 AM, Rob Clark wrote:
>>>>>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
>>>>>>>>
>>>>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
>>>>>>>> <quic_bibekkum@quicinc.com> wrote:
>>>>>>>>>
>>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>>>>>>> sequence as per request.
>>>>>>>>>
>>>>>>>>> This will be used by GPU to setup the PRR bit and related
>>>>>>>>> configuration registers through adreno-smmu private
>>>>>>>>> interface instead of directly poking the smmu hardware.
>>>>>>>>>
>>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
>>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>>> ---
>>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
>>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>>>>>>      include/linux/adreno-smmu-priv.h           | 14 ++++++++
>>>>>>>>>      3 files changed, 53 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> index d26f5aea248e..0e4f3fbda961 100644
>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>>> @@ -16,6 +16,8 @@
>>>>>>>>>
>>>>>>>>>      #define QCOM_DUMMY_VAL -1
>>>>>>>>>
>>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>>>>>> +
>>>>>>>>>      static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
>>>>>>>>>      {
>>>>>>>>>             return container_of(smmu, struct qcom_smmu, smmu);
>>>>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>>>>>>             arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>>>>>>      }
>>>>>>>>>
>>>>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
>>>>>>>>> +{
>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>>>>>>> +       u32 reg = 0;
>>>>>>>>> +
>>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
>>>>>>>>> +       if (set)
>>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
>>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
>>>>>>>>> +{
>>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>>>>>>> +
>>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>>>>>>> +
>>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>      #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>>>>>>
>>>>>>>>>      static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
>>>>>>>>>      static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>>>                     struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
>>>>>>>>>      {
>>>>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
>>>>>>>>>             struct adreno_smmu_priv *priv;
>>>>>>>>>
>>>>>>>>>             smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
>>>>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>>>>>>             priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>>>>>>             priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>>>>>>             priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>>>>>>> +       priv->set_prr_bit = NULL;
>>>>>>>>> +       priv->set_prr_addr = NULL;
>>>>>>>>> +
>>>>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
>>>>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
>>>>>>>>
>>>>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
>>>>>>>> mmu-500, so I guess the support actually goes back further.
>>>>>>>>
>>>>>>>> I'm curious if we can just rely on this being supported by any hw that
>>>>>>>> has a6xx or newer?
>>>>>>>
>>>>>>>
>>>>>>> Also, unrelated, but we can't assume the smmu is powered when drm
>>>>>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
>>>>>>> the register access?
>>>>>>>
>>>>>>
>>>>>> I see, thanks for this observation.
>>>>>> It's surely a possible case, when they access these registers
>>>>>> SMMU state is off.
>>>>>> I will add the suggested runpm ops around the register access.
>>>>>>
>>>>>>> Otherwise Conner and I have vk sparse residency working now
>>>>>>>
>>>>>>
>>>>>> Sorry, could not get this. Did you mean that vk sparse residency
>>>>>> is working now using this patch?
>>>>>
>>>>> Yes, vk sparse residency is working with this patch (plus addition of
>>>>> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
>>>>>
>>>>
>>>> Thanks for testing the sparse residency feature with our patch Rob,
>>>> I have an additional query regarding the adreno private interfaces.
>>>> Specifically, I was referring to other interfaces such as
>>>> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
>>>> runpm get/put around the register access in this context.
>>>
>>> get_fault_info() is called from the iommu fault handler callback, so
>>> from the fault irq handler, which is why it didn't need the runpm
>>> get/put.  Maybe it is bad to make this assumption about usage, but
>>> then again adreno_smmu_priv isn't really a general purpose interface.
>>>
>>
>> Ah okay, got it.
>> I was just going through all the adreno_smmmu_priv interfaces just
>> to get a better understanding of it's interaction with smmu and it seems
>> like apart from PRR and get_fault_info(), set_ttbr0_cfg(),
>> resume_translation() is also having reg access but not voting.
>> Should we put runpm_put/get here as well or these can be excluded.
>> <Just curious about the thought process behind this, is it because of
>> some sequence when to put a vote, like reg access in middle of smmu
>> power cycle and not in beginning or end.>
> 
> I think we just haven't needed it, or noticed that we needed it,
> outside of setting up prr.
> 
> As I mentioned, get_fault_info() is called from the fault irq, so we
> know the smmu is already powered.
> 

okay got it, that makes sense.

> As far as set_ttbr0_cfg(), it probably works just because
> arm_smmu_write_context_bank() ends up getting called again in the
> resume path, so if the smmu is suspended when set_ttbr0_cfg() is
> called, the writes just get ignored.  But the updated cfg is
> re-applied to the hw when it is resumed.  Probably the same situation
> with resume_translation().. ie. if the smmu is suspended there are no
> translations to resume.
> 
> Maybe it would be more correct in set_ttbr0_cfg() and
> resume_translation() to do a pm_runtime_get_if_in_use() and skip the
> hw writes if the smmu is suspended.
> 
>>
>>>> Could you please clarify whether we need an SMMU vote around register
>>>> access in the case of PRR? IMO, should the users of this callback ensure
>>>> they put a vote before accessing the cb?
>>>
>>> How can drm vote for the smmu device?  I guess it could power up
>>> itself and rely on device-link.. but that is pretty overkill to power
>>> up the entire gpu in this case.  I think it is best for the vote to
>>> happen in the PRR callbacks.
>>>
>>
>> Okay I see, GPU can only power itself up through <gpu_state_get I
>> assume> but won't have exclusive access to power on the smmu only.
>>
>> Incase we go forward to put vote in PRR callback for SMMU, I was
>> planning that we can refactor existing arm_smmu_rpm_put/get() from
>> arm_smmu.c to it's header file so that the same can be used in
>> arm_smmu_qcom.c ? What's your thoughts on this?
> 
> I had briefly thought of doing the same.  But the main reason for
> those helpers is common arm-smmu code that is used on non-qcom
> platforms where runpm is not enabled.  In arm-smmu-qcom.c, we know
> that runpm is enabled, so we could just use  return
> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() directly.
> 

Ohkay I see, we then do not need pm_runtime_enabled() check for qcom 
platforms before putting the vote.
I am currently modifying this patch only to directly add
pm_runtime_resume_and_get()/pm_runtime_put_autosuspend()
around the register access of PRR related adreno private interfaces.
I will send this updated patch as part of v18 shortly.

Additionally, we can evaluate the use of pm_runtime operations for 
set_ttbr0_cfg() and resume_translation() in a separate series ?

Thanks & regards,
Bibek

> BR,
> -R
> 
>> Thanks & regards,
>> Bibek
>>
>>> BR,
>>> -R
>>>
>>>> [1]:
>>>> https://lore.kernel.org/all/20210610214431.539029-1-robdclark@gmail.com/
>>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>> BR,
>>>>> -R
>>>>>
>>>>>> Thanks & regards,
>>>>>> Bibek
>>>>>>
>>>>>>> BR,
>>>>>>> -R
>>>>>>>
>>>>>>>>
>>>>>>>>> +               priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
>>>>>>>>> +               priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
>>>>>>>>> +       }
>>>>>>>>>
>>>>>>>>>             return 0;
>>>>>>>>>      }
>>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> index e2aeb511ae90..2dbf3243b5ad 100644
>>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>>>>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>>>>>>      #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>>>>>>
>>>>>>>>>      #define ARM_SMMU_CB_ACTLR              0x4
>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>>>>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>>>>>>
>>>>>>>>>      #define ARM_SMMU_CB_RESUME             0x8
>>>>>>>>>      #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>>>>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>>>>>>> index c637e0997f6d..614665153b3e 100644
>>>>>>>>> --- a/include/linux/adreno-smmu-priv.h
>>>>>>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>>>>>>> @@ -50,6 +50,18 @@ struct adreno_smmu_fault_info {
>>>>>>>>>       *                 the GPU driver must call resume_translation()
>>>>>>>>>       * @resume_translation: Resume translation after a fault
>>>>>>>>>       *
>>>>>>>>> + * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
>>>>>>>>> + *             targets without PRR support. Exercise caution and verify target
>>>>>>>>> + *             capabilities before invoking these callbacks to prevent potential
>>>>>>>>> + *             runtime errors or unexpected behavior.
>>>>>>>>> + *
>>>>>>>>> + * @set_prr_bit:   Extendible interface to be used by GPU to modify the
>>>>>>>>> + *                ACTLR register bits, currently used to configure
>>>>>>>>> + *                Partially-Resident-Region (PRR) bit for feature's
>>>>>>>>> + *                setup and reset sequence as requested.
>>>>>>>>> + * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
>>>>>>>>> + *                physical address of PRR page passed from
>>>>>>>>> + *                GPU driver.
>>>>>>>>>       *
>>>>>>>>>       * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>>>>>>       * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>>>>>>> @@ -67,6 +79,8 @@ struct adreno_smmu_priv {
>>>>>>>>>          void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>>>>>>          void (*set_stall)(const void *cookie, bool enabled);
>>>>>>>>>          void (*resume_translation)(const void *cookie, bool terminate);
>>>>>>>>> +    void (*set_prr_bit)(const void *cookie, bool set);
>>>>>>>>> +    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
>>>>>>>>>      };
>>>>>>>>>
>>>>>>>>>      #endif /* __ADRENO_SMMU_PRIV_H */
>>>>>>>>> --
>>>>>>>>> 2.34.1
>>>>>>>>>
>>>>>>
>>>>
>>
Rob Clark Dec. 11, 2024, 3:22 p.m. UTC | #15
On Wed, Dec 11, 2024 at 5:30 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 12/6/2024 8:48 PM, Rob Clark wrote:
> > On Fri, Dec 6, 2024 at 4:36 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 12/4/2024 8:54 PM, Rob Clark wrote:
> >>> On Wed, Dec 4, 2024 at 3:28 AM Bibek Kumar Patro
> >>> <quic_bibekkum@quicinc.com> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 11/22/2024 10:04 PM, Rob Clark wrote:
> >>>>> On Fri, Nov 22, 2024 at 8:20 AM Bibek Kumar Patro
> >>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 11/21/2024 3:40 AM, Rob Clark wrote:
> >>>>>>> On Wed, Nov 20, 2024 at 9:17 AM Rob Clark <robdclark@gmail.com> wrote:
> >>>>>>>>
> >>>>>>>> On Thu, Nov 14, 2024 at 8:10 AM Bibek Kumar Patro
> >>>>>>>> <quic_bibekkum@quicinc.com> wrote:
> >>>>>>>>>
> >>>>>>>>> Add an adreno-smmu-priv interface for drm/msm to call
> >>>>>>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >>>>>>>>> sequence as per request.
> >>>>>>>>>
> >>>>>>>>> This will be used by GPU to setup the PRR bit and related
> >>>>>>>>> configuration registers through adreno-smmu private
> >>>>>>>>> interface instead of directly poking the smmu hardware.
> >>>>>>>>>
> >>>>>>>>> Suggested-by: Rob Clark <robdclark@gmail.com>
> >>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>>>>> ---
> >>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 ++++++++++++++++++++++
> >>>>>>>>>      drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>>>>>>>>      include/linux/adreno-smmu-priv.h           | 14 ++++++++
> >>>>>>>>>      3 files changed, 53 insertions(+)
> >>>>>>>>>
> >>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>> index d26f5aea248e..0e4f3fbda961 100644
> >>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>>>>> @@ -16,6 +16,8 @@
> >>>>>>>>>
> >>>>>>>>>      #define QCOM_DUMMY_VAL -1
> >>>>>>>>>
> >>>>>>>>> +#define GFX_ACTLR_PRR          (1 << 5)
> >>>>>>>>> +
> >>>>>>>>>      static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
> >>>>>>>>>      {
> >>>>>>>>>             return container_of(smmu, struct qcom_smmu, smmu);
> >>>>>>>>> @@ -99,6 +101,32 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>>>>>>>>             arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>>>>>>>>      }
> >>>>>>>>>
> >>>>>>>>> +static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
> >>>>>>>>> +{
> >>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >>>>>>>>> +       u32 reg = 0;
> >>>>>>>>> +
> >>>>>>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >>>>>>>>> +       reg &= ~GFX_ACTLR_PRR;
> >>>>>>>>> +       if (set)
> >>>>>>>>> +               reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
> >>>>>>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>> +static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
> >>>>>>>>> +{
> >>>>>>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >>>>>>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >>>>>>>>> +
> >>>>>>>>> +       writel_relaxed(lower_32_bits(page_addr),
> >>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >>>>>>>>> +
> >>>>>>>>> +       writel_relaxed(upper_32_bits(page_addr),
> >>>>>>>>> +                               smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>>      #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>>>>>>>>
> >>>>>>>>>      static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >>>>>>>>> @@ -210,6 +238,7 @@ static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
> >>>>>>>>>      static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>>>>                     struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
> >>>>>>>>>      {
> >>>>>>>>> +       const struct device_node *np = smmu_domain->smmu->dev->of_node;
> >>>>>>>>>             struct adreno_smmu_priv *priv;
> >>>>>>>>>
> >>>>>>>>>             smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
> >>>>>>>>> @@ -239,6 +268,14 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>>>>>>>>             priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>>>>>>>>             priv->set_stall = qcom_adreno_smmu_set_stall;
> >>>>>>>>>             priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >>>>>>>>> +       priv->set_prr_bit = NULL;
> >>>>>>>>> +       priv->set_prr_addr = NULL;
> >>>>>>>>> +
> >>>>>>>>> +       if (of_device_is_compatible(np, "qcom,smmu-500") &&
> >>>>>>>>> +                       of_device_is_compatible(np, "qcom,adreno-smmu")) {
> >>>>>>>>
> >>>>>>>> fwiw, it seems like PRR actually works on sc7180, which is _not_
> >>>>>>>> mmu-500, so I guess the support actually goes back further.
> >>>>>>>>
> >>>>>>>> I'm curious if we can just rely on this being supported by any hw that
> >>>>>>>> has a6xx or newer?
> >>>>>>>
> >>>>>>>
> >>>>>>> Also, unrelated, but we can't assume the smmu is powered when drm
> >>>>>>> driver calls set_prr_bit/addr, could you add in runpm get/put around
> >>>>>>> the register access?
> >>>>>>>
> >>>>>>
> >>>>>> I see, thanks for this observation.
> >>>>>> It's surely a possible case, when they access these registers
> >>>>>> SMMU state is off.
> >>>>>> I will add the suggested runpm ops around the register access.
> >>>>>>
> >>>>>>> Otherwise Conner and I have vk sparse residency working now
> >>>>>>>
> >>>>>>
> >>>>>> Sorry, could not get this. Did you mean that vk sparse residency
> >>>>>> is working now using this patch?
> >>>>>
> >>>>> Yes, vk sparse residency is working with this patch (plus addition of
> >>>>> runpm get/put, plus drm/msm patches plus turnip patches) ;-)
> >>>>>
> >>>>
> >>>> Thanks for testing the sparse residency feature with our patch Rob,
> >>>> I have an additional query regarding the adreno private interfaces.
> >>>> Specifically, I was referring to other interfaces such as
> >>>> qcom_adreno_smmu_get_fault_info [1]. It appears that we do not have a
> >>>> runpm get/put around the register access in this context.
> >>>
> >>> get_fault_info() is called from the iommu fault handler callback, so
> >>> from the fault irq handler, which is why it didn't need the runpm
> >>> get/put.  Maybe it is bad to make this assumption about usage, but
> >>> then again adreno_smmu_priv isn't really a general purpose interface.
> >>>
> >>
> >> Ah okay, got it.
> >> I was just going through all the adreno_smmmu_priv interfaces just
> >> to get a better understanding of it's interaction with smmu and it seems
> >> like apart from PRR and get_fault_info(), set_ttbr0_cfg(),
> >> resume_translation() is also having reg access but not voting.
> >> Should we put runpm_put/get here as well or these can be excluded.
> >> <Just curious about the thought process behind this, is it because of
> >> some sequence when to put a vote, like reg access in middle of smmu
> >> power cycle and not in beginning or end.>
> >
> > I think we just haven't needed it, or noticed that we needed it,
> > outside of setting up prr.
> >
> > As I mentioned, get_fault_info() is called from the fault irq, so we
> > know the smmu is already powered.
> >
>
> okay got it, that makes sense.
>
> > As far as set_ttbr0_cfg(), it probably works just because
> > arm_smmu_write_context_bank() ends up getting called again in the
> > resume path, so if the smmu is suspended when set_ttbr0_cfg() is
> > called, the writes just get ignored.  But the updated cfg is
> > re-applied to the hw when it is resumed.  Probably the same situation
> > with resume_translation().. ie. if the smmu is suspended there are no
> > translations to resume.
> >
> > Maybe it would be more correct in set_ttbr0_cfg() and
> > resume_translation() to do a pm_runtime_get_if_in_use() and skip the
> > hw writes if the smmu is suspended.
> >
> >>
> >>>> Could you please clarify whether we need an SMMU vote around register
> >>>> access in the case of PRR? IMO, should the users of this callback ensure
> >>>> they put a vote before accessing the cb?
> >>>
> >>> How can drm vote for the smmu device?  I guess it could power up
> >>> itself and rely on device-link.. but that is pretty overkill to power
> >>> up the entire gpu in this case.  I think it is best for the vote to
> >>> happen in the PRR callbacks.
> >>>
> >>
> >> Okay I see, GPU can only power itself up through <gpu_state_get I
> >> assume> but won't have exclusive access to power on the smmu only.
> >>
> >> Incase we go forward to put vote in PRR callback for SMMU, I was
> >> planning that we can refactor existing arm_smmu_rpm_put/get() from
> >> arm_smmu.c to it's header file so that the same can be used in
> >> arm_smmu_qcom.c ? What's your thoughts on this?
> >
> > I had briefly thought of doing the same.  But the main reason for
> > those helpers is common arm-smmu code that is used on non-qcom
> > platforms where runpm is not enabled.  In arm-smmu-qcom.c, we know
> > that runpm is enabled, so we could just use  return
> > pm_runtime_resume_and_get()/pm_runtime_put_autosuspend() directly.
> >
>
> Ohkay I see, we then do not need pm_runtime_enabled() check for qcom
> platforms before putting the vote.
> I am currently modifying this patch only to directly add
> pm_runtime_resume_and_get()/pm_runtime_put_autosuspend()
> around the register access of PRR related adreno private interfaces.
> I will send this updated patch as part of v18 shortly.
>
> Additionally, we can evaluate the use of pm_runtime operations for
> set_ttbr0_cfg() and resume_translation() in a separate series ?

Yup, sounds good

BR,
-R
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index d26f5aea248e..0e4f3fbda961 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -16,6 +16,8 @@ 

 #define QCOM_DUMMY_VAL	-1

+#define GFX_ACTLR_PRR          (1 << 5)
+
 static struct qcom_smmu *to_qcom_smmu(struct arm_smmu_device *smmu)
 {
 	return container_of(smmu, struct qcom_smmu, smmu);
@@ -99,6 +101,32 @@  static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
 	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
 }

+static void qcom_adreno_smmu_set_prr_bit(const void *cookie, bool set)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	u32 reg = 0;
+
+	reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+	reg &= ~GFX_ACTLR_PRR;
+	if (set)
+		reg |= FIELD_PREP(GFX_ACTLR_PRR, 1);
+	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+}
+
+static void qcom_adreno_smmu_set_prr_addr(const void *cookie, phys_addr_t page_addr)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+
+	writel_relaxed(lower_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_LADDR);
+
+	writel_relaxed(upper_32_bits(page_addr),
+				smmu->base + ARM_SMMU_GFX_PRR_CFG_UADDR);
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0

 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -210,6 +238,7 @@  static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
 static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 		struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
 {
+	const struct device_node *np = smmu_domain->smmu->dev->of_node;
 	struct adreno_smmu_priv *priv;

 	smmu_domain->cfg.flush_walk_prefer_tlbiasid = true;
@@ -239,6 +268,14 @@  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 	priv->set_stall = qcom_adreno_smmu_set_stall;
 	priv->resume_translation = qcom_adreno_smmu_resume_translation;
+	priv->set_prr_bit = NULL;
+	priv->set_prr_addr = NULL;
+
+	if (of_device_is_compatible(np, "qcom,smmu-500") &&
+			of_device_is_compatible(np, "qcom,adreno-smmu")) {
+		priv->set_prr_bit = qcom_adreno_smmu_set_prr_bit;
+		priv->set_prr_addr = qcom_adreno_smmu_set_prr_addr;
+	}

 	return 0;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae90..2dbf3243b5ad 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@  enum arm_smmu_cbar_type {
 #define ARM_SMMU_SCTLR_M		BIT(0)

 #define ARM_SMMU_CB_ACTLR		0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C

 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..614665153b3e 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -50,6 +50,18 @@  struct adreno_smmu_fault_info {
  *                 the GPU driver must call resume_translation()
  * @resume_translation: Resume translation after a fault
  *
+ * *CAUTION* : PRR callbacks (set_prr_bit/set_prr_addr) are NULL terminated for
+ *             targets without PRR support. Exercise caution and verify target
+ *             capabilities before invoking these callbacks to prevent potential
+ *             runtime errors or unexpected behavior.
+ *
+ * @set_prr_bit:   Extendible interface to be used by GPU to modify the
+ *		   ACTLR register bits, currently used to configure
+ *		   Partially-Resident-Region (PRR) bit for feature's
+ *		   setup and reset sequence as requested.
+ * @set_prr_addr:  Configure the PRR_CFG_*ADDR register with the
+ *		   physical address of PRR page passed from
+ *		   GPU driver.
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -67,6 +79,8 @@  struct adreno_smmu_priv {
     void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
     void (*set_stall)(const void *cookie, bool enabled);
     void (*resume_translation)(const void *cookie, bool terminate);
+    void (*set_prr_bit)(const void *cookie, bool set);
+    void (*set_prr_addr)(const void *cookie, phys_addr_t page_addr);
 };

 #endif /* __ADRENO_SMMU_PRIV_H */