diff mbox series

[v11,6/6] iommu/arm-smmu: add support for PRR bit setup

Message ID 20240605121713.3596499-7-quic_bibekkum@quicinc.com (mailing list archive)
State New, archived
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro June 5, 2024, 12:17 p.m. UTC
Add an adreno-smmu-priv interface for drm/msm to call
into arm-smmu-qcom and initiate the PRR bit setup or reset
sequence as per request.

This will be used by GPU side to setup the PRR bit and
related configuration registers through adreno-smmu private
interface instead of directly poking the smmu hardware.

Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 +++++++++++++++++++++
 drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
 include/linux/adreno-smmu-priv.h           |  5 ++++-
 3 files changed, 27 insertions(+), 1 deletion(-)

--
2.34.1

Comments

Rob Clark June 11, 2024, 7:02 p.m. UTC | #1
On Wed, Jun 5, 2024 at 5:18 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
> Add an adreno-smmu-priv interface for drm/msm to call
> into arm-smmu-qcom and initiate the PRR bit setup or reset
> sequence as per request.
>
> This will be used by GPU side to setup the PRR bit and
> related configuration registers through adreno-smmu private
> interface instead of directly poking the smmu hardware.
>
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 +++++++++++++++++++++
>  drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>  include/linux/adreno-smmu-priv.h           |  5 ++++-
>  3 files changed, 27 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index 8dabc26fa10e..2f4ee22f740a 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -28,6 +28,7 @@
>  #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>  #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>  #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> +#define GFX_ACTLR_PRR          (1 << 5)
>
>  static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>         { 0x0800, 0x24e1, PREFETCH_DEFAULT | CMTLB },
> @@ -212,6 +213,25 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>         arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>  }
>
> +static void qcom_adreno_smmu_set_actlr_bit(const void *cookie, phys_addr_t page_addr, bool set)
> +{

_set_actlr_bit() is a bit more of an implementation detail.  Maybe
_set_prr() would be a better name?

Also, the version of this patch that I typed up (but haven't sent to
list yet) took a `struct page *` instead of a phys_addr_t.. passing
NULL would disable PRR, so I didn't need the third arg

> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> +       u32 reg = 0;
> +
> +       writel_relaxed(lower_32_bits(page_addr),
> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_LADDR);
> +
> +       writel_relaxed(upper_32_bits(page_addr),
> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_UADDR);

I know downstream writes it as upper+lower, but I'd guess we could
just writeq, couldn't we?

> +
> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> +       reg |= FIELD_PREP(GFX_ACTLR_PRR, set ? 1 : 0);

this won't clear the ENABLE_PRR bit if we try to disable it after
enabling (unless this bit is read-as-zero

Also, can we give a name to PRR?  I'm guessing it is something like
physical-range-remap?

BR,
-R

> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> +
> +}
> +
>  #define QCOM_ADRENO_SMMU_GPU_SID 0
>
>  static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> @@ -384,6 +404,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>         priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>         priv->set_stall = qcom_adreno_smmu_set_stall;
>         priv->resume_translation = qcom_adreno_smmu_resume_translation;
> +       priv->set_actlr_bit = qcom_adreno_smmu_set_actlr_bit;
>
>         actlrvar = qsmmu->data->actlrvar;
>         if (!actlrvar)
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index d9c2ef8c1653..3076bef49e20 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>  #define ARM_SMMU_SCTLR_M               BIT(0)
>
>  #define ARM_SMMU_CB_ACTLR              0x4
> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>
>  #define ARM_SMMU_CB_RESUME             0x8
>  #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> index c637e0997f6d..448e191eeb52 100644
> --- a/include/linux/adreno-smmu-priv.h
> +++ b/include/linux/adreno-smmu-priv.h
> @@ -49,7 +49,9 @@ struct adreno_smmu_fault_info {
>   *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>   *                 the GPU driver must call resume_translation()
>   * @resume_translation: Resume translation after a fault
> - *
> + * @set_actlr_bits: Extendible interface to be used  by GPU to modify the
> + *                 ACTLR bits, currently used to intitate PRR bit setup or
> + *                 reset sequence for ACTLR registers as requested.
>   *
>   * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>   * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> @@ -67,6 +69,7 @@ struct adreno_smmu_priv {
>      void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>      void (*set_stall)(const void *cookie, bool enabled);
>      void (*resume_translation)(const void *cookie, bool terminate);
> +    void (*set_actlr_bit)(const void *cookie, phys_addr_t page_addr, bool set);
>  };
>
>  #endif /* __ADRENO_SMMU_PRIV_H */
> --
> 2.34.1
>
Bibek Kumar Patro June 13, 2024, 8:11 a.m. UTC | #2
On 6/12/2024 12:32 AM, Rob Clark wrote:
> On Wed, Jun 5, 2024 at 5:18 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>> Add an adreno-smmu-priv interface for drm/msm to call
>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>> sequence as per request.
>>
>> This will be used by GPU side to setup the PRR bit and
>> related configuration registers through adreno-smmu private
>> interface instead of directly poking the smmu hardware.
>>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 +++++++++++++++++++++
>>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>   include/linux/adreno-smmu-priv.h           |  5 ++++-
>>   3 files changed, 27 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index 8dabc26fa10e..2f4ee22f740a 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -28,6 +28,7 @@
>>   #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>   #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>   #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>> +#define GFX_ACTLR_PRR          (1 << 5)
>>
>>   static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>          { 0x0800, 0x24e1, PREFETCH_DEFAULT | CMTLB },
>> @@ -212,6 +213,25 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>   }
>>
>> +static void qcom_adreno_smmu_set_actlr_bit(const void *cookie, phys_addr_t page_addr, bool set)
>> +{
> 
> _set_actlr_bit() is a bit more of an implementation detail.  Maybe
> _set_prr() would be a better name?
> 

Yes set_prr sounds more explanatory. Infact Initially planned to name it 
as set_actlr_prr() but later changed it to set_actlr_bit() so to keep 
this interface extendible.
Incase if gfx driver in future wants to control some other ACTLR bit as
well along with PRR bit, then we can extend this same interface to
handle other bits.
So any additional adreno-smmu-priv interface would note be needed, and
set_actlr_bit can itself be used for gfx handling of all required ACTLR 
bits.
I think we can name it set_actlr_prr() for now, and later can change the 
name incase we extend it. What's your thought on this?

> Also, the version of this patch that I typed up (but haven't sent to
> list yet) took a `struct page *` instead of a phys_addr_t.. passing
> NULL would disable PRR, so I didn't need the third arg
> 
>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>> +       u32 reg = 0;
>> +
>> +       writel_relaxed(lower_32_bits(page_addr),
>> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_LADDR);
>> +
>> +       writel_relaxed(upper_32_bits(page_addr),
>> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_UADDR);
> 
> I know downstream writes it as upper+lower, but I'd guess we could
> just writeq, couldn't we?
> 

Actually ARM_SMMU_GFX_PRR_CFG_LADDR, ARM_SMMU_GFX_PRR_CFG_UADDR both are 
separate 32 bit registers. So I think writeq for 64bit write might not 
work, as these are not 64 bit registers nor these two are separated by 
64 aligned address.

>> +
>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>> +       reg |= FIELD_PREP(GFX_ACTLR_PRR, set ? 1 : 0);
> 
> this won't clear the ENABLE_PRR bit if we try to disable it after
> enabling (unless this bit is read-as-zero
> 

Ah okay right, got it. Thanks for pointing this out.
Will take care of this in next version.
will use set/reset in side if() instead with different
bitwise ops for set and reset.


> Also, can we give a name to PRR?  I'm guessing it is something like
> physical-range-remap?

Yes sure. I checked on this, PRR here actually stands for 
partially-resident-region.
So would be better If we add expansion for PRR in a comment ?
Because if we expand PRR in variables/defines then wouldn't
the names become too long?

Thanks & regards,
Bibek

> 
> BR,
> -R
> 
>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>> +
>> +}
>> +
>>   #define QCOM_ADRENO_SMMU_GPU_SID 0
>>
>>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>> @@ -384,6 +404,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>          priv->set_stall = qcom_adreno_smmu_set_stall;
>>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
>> +       priv->set_actlr_bit = qcom_adreno_smmu_set_actlr_bit;
>>
>>          actlrvar = qsmmu->data->actlrvar;
>>          if (!actlrvar)
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> index d9c2ef8c1653..3076bef49e20 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>   #define ARM_SMMU_SCTLR_M               BIT(0)
>>
>>   #define ARM_SMMU_CB_ACTLR              0x4
>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>
>>   #define ARM_SMMU_CB_RESUME             0x8
>>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>> index c637e0997f6d..448e191eeb52 100644
>> --- a/include/linux/adreno-smmu-priv.h
>> +++ b/include/linux/adreno-smmu-priv.h
>> @@ -49,7 +49,9 @@ struct adreno_smmu_fault_info {
>>    *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>    *                 the GPU driver must call resume_translation()
>>    * @resume_translation: Resume translation after a fault
>> - *
>> + * @set_actlr_bits: Extendible interface to be used  by GPU to modify the
>> + *                 ACTLR bits, currently used to intitate PRR bit setup or
>> + *                 reset sequence for ACTLR registers as requested.
>>    *
>>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>> @@ -67,6 +69,7 @@ struct adreno_smmu_priv {
>>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>       void (*set_stall)(const void *cookie, bool enabled);
>>       void (*resume_translation)(const void *cookie, bool terminate);
>> +    void (*set_actlr_bit)(const void *cookie, phys_addr_t page_addr, bool set);
>>   };
>>
>>   #endif /* __ADRENO_SMMU_PRIV_H */
>> --
>> 2.34.1
>>
Rob Clark June 26, 2024, 6:46 p.m. UTC | #3
On Thu, Jun 13, 2024 at 1:11 AM Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 6/12/2024 12:32 AM, Rob Clark wrote:
> > On Wed, Jun 5, 2024 at 5:18 AM Bibek Kumar Patro
> > <quic_bibekkum@quicinc.com> wrote:
> >>
> >> Add an adreno-smmu-priv interface for drm/msm to call
> >> into arm-smmu-qcom and initiate the PRR bit setup or reset
> >> sequence as per request.
> >>
> >> This will be used by GPU side to setup the PRR bit and
> >> related configuration registers through adreno-smmu private
> >> interface instead of directly poking the smmu hardware.
> >>
> >> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >> ---
> >>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 +++++++++++++++++++++
> >>   drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
> >>   include/linux/adreno-smmu-priv.h           |  5 ++++-
> >>   3 files changed, 27 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> index 8dabc26fa10e..2f4ee22f740a 100644
> >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >> @@ -28,6 +28,7 @@
> >>   #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
> >>   #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
> >>   #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
> >> +#define GFX_ACTLR_PRR          (1 << 5)
> >>
> >>   static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> >>          { 0x0800, 0x24e1, PREFETCH_DEFAULT | CMTLB },
> >> @@ -212,6 +213,25 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
> >>          arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
> >>   }
> >>
> >> +static void qcom_adreno_smmu_set_actlr_bit(const void *cookie, phys_addr_t page_addr, bool set)
> >> +{
> >
> > _set_actlr_bit() is a bit more of an implementation detail.  Maybe
> > _set_prr() would be a better name?
> >
>
> Yes set_prr sounds more explanatory. Infact Initially planned to name it
> as set_actlr_prr() but later changed it to set_actlr_bit() so to keep
> this interface extendible.
> Incase if gfx driver in future wants to control some other ACTLR bit as
> well along with PRR bit, then we can extend this same interface to
> handle other bits.
> So any additional adreno-smmu-priv interface would note be needed, and
> set_actlr_bit can itself be used for gfx handling of all required ACTLR
> bits.
> I think we can name it set_actlr_prr() for now, and later can change the
> name incase we extend it. What's your thought on this?

I think set_prr() or set_acltr_prr() is fine.. we can worry about
making it more generic if and when there are other ACLTR bits the gpu
wants to control

> > Also, the version of this patch that I typed up (but haven't sent to
> > list yet) took a `struct page *` instead of a phys_addr_t.. passing
> > NULL would disable PRR, so I didn't need the third arg
> >
> >> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
> >> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
> >> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
> >> +       u32 reg = 0;
> >> +
> >> +       writel_relaxed(lower_32_bits(page_addr),
> >> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_LADDR);
> >> +
> >> +       writel_relaxed(upper_32_bits(page_addr),
> >> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_UADDR);
> >
> > I know downstream writes it as upper+lower, but I'd guess we could
> > just writeq, couldn't we?
> >
>
> Actually ARM_SMMU_GFX_PRR_CFG_LADDR, ARM_SMMU_GFX_PRR_CFG_UADDR both are
> separate 32 bit registers. So I think writeq for 64bit write might not
> work, as these are not 64 bit registers nor these two are separated by
> 64 aligned address.

ahh, ok

> >> +
> >> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
> >> +       reg |= FIELD_PREP(GFX_ACTLR_PRR, set ? 1 : 0);
> >
> > this won't clear the ENABLE_PRR bit if we try to disable it after
> > enabling (unless this bit is read-as-zero
> >
>
> Ah okay right, got it. Thanks for pointing this out.
> Will take care of this in next version.
> will use set/reset in side if() instead with different
> bitwise ops for set and reset.
>
>
> > Also, can we give a name to PRR?  I'm guessing it is something like
> > physical-range-remap?
>
> Yes sure. I checked on this, PRR here actually stands for
> partially-resident-region.

ahh, ok, that makes sense

> So would be better If we add expansion for PRR in a comment ?
> Because if we expand PRR in variables/defines then wouldn't
> the names become too long?

Yeah, I was just thinking to mention it in the comment for set_prr()

BR,
-R

>
> Thanks & regards,
> Bibek
>
> >
> > BR,
> > -R
> >
> >> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
> >> +
> >> +}
> >> +
> >>   #define QCOM_ADRENO_SMMU_GPU_SID 0
> >>
> >>   static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
> >> @@ -384,6 +404,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
> >>          priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
> >>          priv->set_stall = qcom_adreno_smmu_set_stall;
> >>          priv->resume_translation = qcom_adreno_smmu_resume_translation;
> >> +       priv->set_actlr_bit = qcom_adreno_smmu_set_actlr_bit;
> >>
> >>          actlrvar = qsmmu->data->actlrvar;
> >>          if (!actlrvar)
> >> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> index d9c2ef8c1653..3076bef49e20 100644
> >> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> >> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
> >>   #define ARM_SMMU_SCTLR_M               BIT(0)
> >>
> >>   #define ARM_SMMU_CB_ACTLR              0x4
> >> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
> >> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
> >>
> >>   #define ARM_SMMU_CB_RESUME             0x8
> >>   #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
> >> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
> >> index c637e0997f6d..448e191eeb52 100644
> >> --- a/include/linux/adreno-smmu-priv.h
> >> +++ b/include/linux/adreno-smmu-priv.h
> >> @@ -49,7 +49,9 @@ struct adreno_smmu_fault_info {
> >>    *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
> >>    *                 the GPU driver must call resume_translation()
> >>    * @resume_translation: Resume translation after a fault
> >> - *
> >> + * @set_actlr_bits: Extendible interface to be used  by GPU to modify the
> >> + *                 ACTLR bits, currently used to intitate PRR bit setup or
> >> + *                 reset sequence for ACTLR registers as requested.
> >>    *
> >>    * The GPU driver (drm/msm) and adreno-smmu work together for controlling
> >>    * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
> >> @@ -67,6 +69,7 @@ struct adreno_smmu_priv {
> >>       void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
> >>       void (*set_stall)(const void *cookie, bool enabled);
> >>       void (*resume_translation)(const void *cookie, bool terminate);
> >> +    void (*set_actlr_bit)(const void *cookie, phys_addr_t page_addr, bool set);
> >>   };
> >>
> >>   #endif /* __ADRENO_SMMU_PRIV_H */
> >> --
> >> 2.34.1
> >>
Bibek Kumar Patro June 28, 2024, 2:11 p.m. UTC | #4
On 6/27/2024 12:16 AM, Rob Clark wrote:
> On Thu, Jun 13, 2024 at 1:11 AM Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 6/12/2024 12:32 AM, Rob Clark wrote:
>>> On Wed, Jun 5, 2024 at 5:18 AM Bibek Kumar Patro
>>> <quic_bibekkum@quicinc.com> wrote:
>>>>
>>>> Add an adreno-smmu-priv interface for drm/msm to call
>>>> into arm-smmu-qcom and initiate the PRR bit setup or reset
>>>> sequence as per request.
>>>>
>>>> This will be used by GPU side to setup the PRR bit and
>>>> related configuration registers through adreno-smmu private
>>>> interface instead of directly poking the smmu hardware.
>>>>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 21 +++++++++++++++++++++
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu.h      |  2 ++
>>>>    include/linux/adreno-smmu-priv.h           |  5 ++++-
>>>>    3 files changed, 27 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index 8dabc26fa10e..2f4ee22f740a 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -28,6 +28,7 @@
>>>>    #define PREFETCH_SHALLOW       (1 << PREFETCH_SHIFT)
>>>>    #define PREFETCH_MODERATE      (2 << PREFETCH_SHIFT)
>>>>    #define PREFETCH_DEEP          (3 << PREFETCH_SHIFT)
>>>> +#define GFX_ACTLR_PRR          (1 << 5)
>>>>
>>>>    static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>           { 0x0800, 0x24e1, PREFETCH_DEFAULT | CMTLB },
>>>> @@ -212,6 +213,25 @@ static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
>>>>           arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
>>>>    }
>>>>
>>>> +static void qcom_adreno_smmu_set_actlr_bit(const void *cookie, phys_addr_t page_addr, bool set)
>>>> +{
>>>
>>> _set_actlr_bit() is a bit more of an implementation detail.  Maybe
>>> _set_prr() would be a better name?
>>>
>>
>> Yes set_prr sounds more explanatory. Infact Initially planned to name it
>> as set_actlr_prr() but later changed it to set_actlr_bit() so to keep
>> this interface extendible.
>> Incase if gfx driver in future wants to control some other ACTLR bit as
>> well along with PRR bit, then we can extend this same interface to
>> handle other bits.
>> So any additional adreno-smmu-priv interface would note be needed, and
>> set_actlr_bit can itself be used for gfx handling of all required ACTLR
>> bits.
>> I think we can name it set_actlr_prr() for now, and later can change the
>> name incase we extend it. What's your thought on this?
> 
> I think set_prr() or set_acltr_prr() is fine.. we can worry about
> making it more generic if and when there are other ACLTR bits the gpu
> wants to control
> 

Ack, sounds good to me as well. I've used set_prr() for now.

>>> Also, the version of this patch that I typed up (but haven't sent to
>>> list yet) took a `struct page *` instead of a phys_addr_t.. passing
>>> NULL would disable PRR, so I didn't need the third arg
>>>
>>>> +       struct arm_smmu_domain *smmu_domain = (void *)cookie;
>>>> +       struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
>>>> +       struct arm_smmu_device *smmu = smmu_domain->smmu;
>>>> +       u32 reg = 0;
>>>> +
>>>> +       writel_relaxed(lower_32_bits(page_addr),
>>>> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_LADDR);
>>>> +
>>>> +       writel_relaxed(upper_32_bits(page_addr),
>>>> +                               (void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_UADDR);
>>>
>>> I know downstream writes it as upper+lower, but I'd guess we could
>>> just writeq, couldn't we?
>>>
>>
>> Actually ARM_SMMU_GFX_PRR_CFG_LADDR, ARM_SMMU_GFX_PRR_CFG_UADDR both are
>> separate 32 bit registers. So I think writeq for 64bit write might not
>> work, as these are not 64 bit registers nor these two are separated by
>> 64 aligned address.
> 
> ahh, ok
> 
>>>> +
>>>> +       reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
>>>> +       reg |= FIELD_PREP(GFX_ACTLR_PRR, set ? 1 : 0);
>>>
>>> this won't clear the ENABLE_PRR bit if we try to disable it after
>>> enabling (unless this bit is read-as-zero
>>>
>>
>> Ah okay right, got it. Thanks for pointing this out.
>> Will take care of this in next version.
>> will use set/reset in side if() instead with different
>> bitwise ops for set and reset.
>>
>>
>>> Also, can we give a name to PRR?  I'm guessing it is something like
>>> physical-range-remap?
>>
>> Yes sure. I checked on this, PRR here actually stands for
>> partially-resident-region.
> 
> ahh, ok, that makes sense
> 
>> So would be better If we add expansion for PRR in a comment ?
>> Because if we expand PRR in variables/defines then wouldn't
>> the names become too long?
> 
> Yeah, I was just thinking to mention it in the comment for set_prr()
> 

Yes, that seemed to be an appropriate place to add the expansion.
Mentioned it in the comment for now.

Thanks & regards,
Bibek

> BR,
> -R
> 
>>
>> Thanks & regards,
>> Bibek
>>
>>>
>>> BR,
>>> -R
>>>
>>>> +       arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
>>>> +
>>>> +}
>>>> +
>>>>    #define QCOM_ADRENO_SMMU_GPU_SID 0
>>>>
>>>>    static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
>>>> @@ -384,6 +404,7 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
>>>>           priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
>>>>           priv->set_stall = qcom_adreno_smmu_set_stall;
>>>>           priv->resume_translation = qcom_adreno_smmu_resume_translation;
>>>> +       priv->set_actlr_bit = qcom_adreno_smmu_set_actlr_bit;
>>>>
>>>>           actlrvar = qsmmu->data->actlrvar;
>>>>           if (!actlrvar)
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> index d9c2ef8c1653..3076bef49e20 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
>>>> @@ -154,6 +154,8 @@ enum arm_smmu_cbar_type {
>>>>    #define ARM_SMMU_SCTLR_M               BIT(0)
>>>>
>>>>    #define ARM_SMMU_CB_ACTLR              0x4
>>>> +#define ARM_SMMU_GFX_PRR_CFG_LADDR     0x6008
>>>> +#define ARM_SMMU_GFX_PRR_CFG_UADDR     0x600C
>>>>
>>>>    #define ARM_SMMU_CB_RESUME             0x8
>>>>    #define ARM_SMMU_RESUME_TERMINATE      BIT(0)
>>>> diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
>>>> index c637e0997f6d..448e191eeb52 100644
>>>> --- a/include/linux/adreno-smmu-priv.h
>>>> +++ b/include/linux/adreno-smmu-priv.h
>>>> @@ -49,7 +49,9 @@ struct adreno_smmu_fault_info {
>>>>     *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
>>>>     *                 the GPU driver must call resume_translation()
>>>>     * @resume_translation: Resume translation after a fault
>>>> - *
>>>> + * @set_actlr_bits: Extendible interface to be used  by GPU to modify the
>>>> + *                 ACTLR bits, currently used to intitate PRR bit setup or
>>>> + *                 reset sequence for ACTLR registers as requested.
>>>>     *
>>>>     * The GPU driver (drm/msm) and adreno-smmu work together for controlling
>>>>     * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
>>>> @@ -67,6 +69,7 @@ struct adreno_smmu_priv {
>>>>        void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
>>>>        void (*set_stall)(const void *cookie, bool enabled);
>>>>        void (*resume_translation)(const void *cookie, bool terminate);
>>>> +    void (*set_actlr_bit)(const void *cookie, phys_addr_t page_addr, bool set);
>>>>    };
>>>>
>>>>    #endif /* __ADRENO_SMMU_PRIV_H */
>>>> --
>>>> 2.34.1
>>>>
>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 8dabc26fa10e..2f4ee22f740a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -28,6 +28,7 @@ 
 #define PREFETCH_SHALLOW	(1 << PREFETCH_SHIFT)
 #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
 #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
+#define GFX_ACTLR_PRR		(1 << 5)

 static const struct actlr_config sc7280_apps_actlr_cfg[] = {
 	{ 0x0800, 0x24e1, PREFETCH_DEFAULT | CMTLB },
@@ -212,6 +213,25 @@  static void qcom_adreno_smmu_resume_translation(const void *cookie, bool termina
 	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_RESUME, reg);
 }

+static void qcom_adreno_smmu_set_actlr_bit(const void *cookie, phys_addr_t page_addr, bool set)
+{
+	struct arm_smmu_domain *smmu_domain = (void *)cookie;
+	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
+	struct arm_smmu_device *smmu = smmu_domain->smmu;
+	u32 reg = 0;
+
+	writel_relaxed(lower_32_bits(page_addr),
+				(void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_LADDR);
+
+	writel_relaxed(upper_32_bits(page_addr),
+				(void *)smmu->ioaddr + ARM_SMMU_GFX_PRR_CFG_UADDR);
+
+	reg =  arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR);
+	reg |= FIELD_PREP(GFX_ACTLR_PRR, set ? 1 : 0);
+	arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_ACTLR, reg);
+
+}
+
 #define QCOM_ADRENO_SMMU_GPU_SID 0

 static bool qcom_adreno_smmu_is_gpu_device(struct device *dev)
@@ -384,6 +404,7 @@  static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
 	priv->get_fault_info = qcom_adreno_smmu_get_fault_info;
 	priv->set_stall = qcom_adreno_smmu_set_stall;
 	priv->resume_translation = qcom_adreno_smmu_resume_translation;
+	priv->set_actlr_bit = qcom_adreno_smmu_set_actlr_bit;

 	actlrvar = qsmmu->data->actlrvar;
 	if (!actlrvar)
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index d9c2ef8c1653..3076bef49e20 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -154,6 +154,8 @@  enum arm_smmu_cbar_type {
 #define ARM_SMMU_SCTLR_M		BIT(0)

 #define ARM_SMMU_CB_ACTLR		0x4
+#define ARM_SMMU_GFX_PRR_CFG_LADDR	0x6008
+#define ARM_SMMU_GFX_PRR_CFG_UADDR	0x600C

 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_RESUME_TERMINATE	BIT(0)
diff --git a/include/linux/adreno-smmu-priv.h b/include/linux/adreno-smmu-priv.h
index c637e0997f6d..448e191eeb52 100644
--- a/include/linux/adreno-smmu-priv.h
+++ b/include/linux/adreno-smmu-priv.h
@@ -49,7 +49,9 @@  struct adreno_smmu_fault_info {
  *                 before set_ttbr0_cfg().  If stalling on fault is enabled,
  *                 the GPU driver must call resume_translation()
  * @resume_translation: Resume translation after a fault
- *
+ * @set_actlr_bits: Extendible interface to be used  by GPU to modify the
+ *		    ACTLR bits, currently used to intitate PRR bit setup or
+ *		    reset sequence for ACTLR registers as requested.
  *
  * The GPU driver (drm/msm) and adreno-smmu work together for controlling
  * the GPU's SMMU instance.  This is by necessity, as the GPU is directly
@@ -67,6 +69,7 @@  struct adreno_smmu_priv {
     void (*get_fault_info)(const void *cookie, struct adreno_smmu_fault_info *info);
     void (*set_stall)(const void *cookie, bool enabled);
     void (*resume_translation)(const void *cookie, bool terminate);
+    void (*set_actlr_bit)(const void *cookie, phys_addr_t page_addr, bool set);
 };

 #endif /* __ADRENO_SMMU_PRIV_H */