diff mbox series

[v4,1/5] iommu/arm-smmu: Save additional information on context fault

Message ID 20250304-msm-gpu-fault-fixes-next-v4-1-be14be37f4c3@gmail.com (mailing list archive)
State Handled Elsewhere
Headers show
Series iommu/arm-smmu, drm/msm: Fixes for stall-on-fault | expand

Commit Message

Connor Abbott March 4, 2025, 4:56 p.m. UTC
This will be used by drm/msm for GPU page faults, replacing the manual
register reading it does.

Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
 drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
 drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
 3 files changed, 21 insertions(+), 15 deletions(-)

Comments

Rob Clark March 5, 2025, 7:09 p.m. UTC | #1
On Tue, Mar 4, 2025 at 8:57 AM Connor Abbott <cwabbott0@gmail.com> wrote:
>
> This will be used by drm/msm for GPU page faults, replacing the manual
> register reading it does.
>
> Signed-off-by: Connor Abbott <cwabbott0@gmail.com>

Reviewed-by: Rob Clark <robdclark@gmail.com>

> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
>  drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
>  drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
>  3 files changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> @@ -400,7 +400,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
>
>         if (list_empty(&tbu_list)) {
>                 ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> -                                        cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> +                                        cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
>
>                 if (ret == -ENOSYS)
>                         arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
>         phys_soft = ops->iova_to_phys(ops, cfi.iova);
>
>         tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> -                                cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> +                                cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
>         if (!tmp || tmp == -EBUSY) {
>                 ret = IRQ_HANDLED;
>                 resume = ARM_SMMU_RESUME_TERMINATE;
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
>                                       struct arm_smmu_context_fault_info *cfi)
>  {
>         cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> +       cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
>         cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> -       cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> +       cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> +       cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
>         cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> +       cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
>  }
>
>  void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
> @@ -419,7 +422,7 @@ void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
>  {
>         dev_err(smmu->dev,
>                 "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
> -               cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx);
> +               cfi->fsr, cfi->iova, cfi->fsynr0, cfi->cbfrsynra, idx);
>
>         dev_err(smmu->dev, "FSR    = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n",
>                 cfi->fsr,
> @@ -437,15 +440,15 @@ void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
>                 cfi->cbfrsynra);
>
>         dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n",
> -               cfi->fsynr,
> -               (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr),
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "",
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "",
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "",
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "",
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "",
> -               (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "",
> -               (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr));
> +               cfi->fsynr0,
> +               (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr0),
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "",
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "",
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "",
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "",
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "",
> +               (cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "",
> +               (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr0));
>  }
>
>  static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
> @@ -464,7 +467,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
>                 return IRQ_NONE;
>
>         ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> -               cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> +               cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
>
>         if (ret == -ENOSYS && __ratelimit(&rs))
>                 arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> index e2aeb511ae903302e3c15d2cf5f22e2a26ac2346..d3bc77dcd4d40f25bc70f3289616fb866649b022 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
> @@ -543,9 +543,12 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu);
>
>  struct arm_smmu_context_fault_info {
>         unsigned long iova;
> +       u64 ttbr0;
>         u32 fsr;
> -       u32 fsynr;
> +       u32 fsynr0;
> +       u32 fsynr1;
>         u32 cbfrsynra;
> +       u32 contextidr;
>  };
>
>  void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
>
> --
> 2.47.1
>
Will Deacon March 11, 2025, 6:05 p.m. UTC | #2
On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> This will be used by drm/msm for GPU page faults, replacing the manual
> register reading it does.
> 
> Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
>  drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
>  drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
>  3 files changed, 21 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> @@ -400,7 +400,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
>  
>  	if (list_empty(&tbu_list)) {
>  		ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> -					 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> +					 cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
>  
>  		if (ret == -ENOSYS)
>  			arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
>  	phys_soft = ops->iova_to_phys(ops, cfi.iova);
>  
>  	tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> -				 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> +				 cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
>  	if (!tmp || tmp == -EBUSY) {
>  		ret = IRQ_HANDLED;
>  		resume = ARM_SMMU_RESUME_TERMINATE;
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
>  				      struct arm_smmu_context_fault_info *cfi)
>  {
>  	cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> +	cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
>  	cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> -	cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> +	cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> +	cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);

We already have an implementation hook (->get_fault_info()) which the
qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
That thing dumps these registers already so if we're moving that into
the core SMMU driver, let's get rid of the hook and move everybody over
rather than having it done in both places.

>  	cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> +	cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);

I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
it for stage-2 domains.

Will
Connor Abbott March 11, 2025, 10:36 p.m. UTC | #3
On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> > This will be used by drm/msm for GPU page faults, replacing the manual
> > register reading it does.
> >
> > Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
> > ---
> >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
> >  drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
> >  drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
> >  3 files changed, 21 insertions(+), 15 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > @@ -400,7 +400,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> >
> >       if (list_empty(&tbu_list)) {
> >               ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > -                                      cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > +                                      cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> >
> >               if (ret == -ENOSYS)
> >                       arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> > @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> >       phys_soft = ops->iova_to_phys(ops, cfi.iova);
> >
> >       tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > -                              cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > +                              cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> >       if (!tmp || tmp == -EBUSY) {
> >               ret = IRQ_HANDLED;
> >               resume = ARM_SMMU_RESUME_TERMINATE;
> > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> >                                     struct arm_smmu_context_fault_info *cfi)
> >  {
> >       cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> > +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> >       cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> > -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
>
> We already have an implementation hook (->get_fault_info()) which the
> qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> That thing dumps these registers already so if we're moving that into
> the core SMMU driver, let's get rid of the hook and move everybody over
> rather than having it done in both places.

As you probably saw, the next commit moves over
qcom_adreno_smmu_get_fault_info() to use this. The current back door
used by drm/msm to access these functions is specific to adreno_smmu
and there isn't an equivalent interface to allow it to call a generic
SMMU function so it isn't possible to move it entirely to the core. At
least not without a bigger refactoring that isn't justified for this
series that is just trying to fix things.

>
> >       cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> > +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
>
> I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> it for stage-2 domains.
>
> Will

Does it matter if we read the register though, as long as users are
aware of this and don't use its value for anything?

Connor
Will Deacon March 12, 2025, 1:05 p.m. UTC | #4
On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
> On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
> >
> > On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> > > This will be used by drm/msm for GPU page faults, replacing the manual
> > > register reading it does.
> > >
> > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
> > > ---
> > >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
> > >  drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
> > >  drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
> > >  3 files changed, 21 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
> > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > @@ -400,7 +400,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> > >
> > >       if (list_empty(&tbu_list)) {
> > >               ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > > -                                      cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > +                                      cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > >
> > >               if (ret == -ENOSYS)
> > >                       arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> > > @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> > >       phys_soft = ops->iova_to_phys(ops, cfi.iova);
> > >
> > >       tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > > -                              cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > +                              cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > >       if (!tmp || tmp == -EBUSY) {
> > >               ret = IRQ_HANDLED;
> > >               resume = ARM_SMMU_RESUME_TERMINATE;
> > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> > >                                     struct arm_smmu_context_fault_info *cfi)
> > >  {
> > >       cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> > > +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> > >       cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> > > -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
> >
> > We already have an implementation hook (->get_fault_info()) which the
> > qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> > That thing dumps these registers already so if we're moving that into
> > the core SMMU driver, let's get rid of the hook and move everybody over
> > rather than having it done in both places.
> 
> As you probably saw, the next commit moves over
> qcom_adreno_smmu_get_fault_info() to use this. The current back door
> used by drm/msm to access these functions is specific to adreno_smmu
> and there isn't an equivalent interface to allow it to call a generic
> SMMU function so it isn't possible to move it entirely to the core. At
> least not without a bigger refactoring that isn't justified for this
> series that is just trying to fix things.

Ok :(

> > >       cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> > > +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
> >
> > I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> > it for stage-2 domains.
> >
> Does it matter if we read the register though, as long as users are
> aware of this and don't use its value for anything?

I think the contents are "UNKNOWN", so it could be hugely confusing even
if they just got logged someplace. Why is it difficult to avoid touching
it for stage-2?

Will
Rob Clark March 12, 2025, 2:59 p.m. UTC | #5
On Wed, Mar 12, 2025 at 6:05 AM Will Deacon <will@kernel.org> wrote:
>
> On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
> > On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
> > >
> > > On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> > > > This will be used by drm/msm for GPU page faults, replacing the manual
> > > > register reading it does.
> > > >
> > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com>
> > > > ---
> > > >  drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c |  4 ++--
> > > >  drivers/iommu/arm/arm-smmu/arm-smmu.c            | 27 +++++++++++++-----------
> > > >  drivers/iommu/arm/arm-smmu/arm-smmu.h            |  5 ++++-
> > > >  3 files changed, 21 insertions(+), 15 deletions(-)
> > > >
> > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > > index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
> > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
> > > > @@ -400,7 +400,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> > > >
> > > >       if (list_empty(&tbu_list)) {
> > > >               ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > > > -                                      cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > > +                                      cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > >
> > > >               if (ret == -ENOSYS)
> > > >                       arm_smmu_print_context_fault_info(smmu, idx, &cfi);
> > > > @@ -412,7 +412,7 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
> > > >       phys_soft = ops->iova_to_phys(ops, cfi.iova);
> > > >
> > > >       tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
> > > > -                              cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > > +                              cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
> > > >       if (!tmp || tmp == -EBUSY) {
> > > >               ret = IRQ_HANDLED;
> > > >               resume = ARM_SMMU_RESUME_TERMINATE;
> > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> > > >                                     struct arm_smmu_context_fault_info *cfi)
> > > >  {
> > > >       cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> > > > +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> > > >       cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> > > > -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
> > >
> > > We already have an implementation hook (->get_fault_info()) which the
> > > qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> > > That thing dumps these registers already so if we're moving that into
> > > the core SMMU driver, let's get rid of the hook and move everybody over
> > > rather than having it done in both places.
> >
> > As you probably saw, the next commit moves over
> > qcom_adreno_smmu_get_fault_info() to use this. The current back door
> > used by drm/msm to access these functions is specific to adreno_smmu
> > and there isn't an equivalent interface to allow it to call a generic
> > SMMU function so it isn't possible to move it entirely to the core. At
> > least not without a bigger refactoring that isn't justified for this
> > series that is just trying to fix things.
>
> Ok :(
>
> > > >       cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> > > > +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
> > >
> > > I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> > > it for stage-2 domains.
> > >
> > Does it matter if we read the register though, as long as users are
> > aware of this and don't use its value for anything?
>
> I think the contents are "UNKNOWN", so it could be hugely confusing even
> if they just got logged someplace. Why is it difficult to avoid touching
> it for stage-2?
>
> Will

Fwiw, we are only ever using stage-1

BR,
-R
Will Deacon March 12, 2025, 4:47 p.m. UTC | #6
On Wed, Mar 12, 2025 at 07:59:52AM -0700, Rob Clark wrote:
> On Wed, Mar 12, 2025 at 6:05 AM Will Deacon <will@kernel.org> wrote:
> > On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
> > > On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
> > > > On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> > > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> > > > >                                     struct arm_smmu_context_fault_info *cfi)
> > > > >  {
> > > > >       cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> > > > > +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> > > > >       cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> > > > > -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > > +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > > +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
> > > >
> > > > We already have an implementation hook (->get_fault_info()) which the
> > > > qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> > > > That thing dumps these registers already so if we're moving that into
> > > > the core SMMU driver, let's get rid of the hook and move everybody over
> > > > rather than having it done in both places.
> > >
> > > As you probably saw, the next commit moves over
> > > qcom_adreno_smmu_get_fault_info() to use this. The current back door
> > > used by drm/msm to access these functions is specific to adreno_smmu
> > > and there isn't an equivalent interface to allow it to call a generic
> > > SMMU function so it isn't possible to move it entirely to the core. At
> > > least not without a bigger refactoring that isn't justified for this
> > > series that is just trying to fix things.
> >
> > Ok :(
> >
> > > > >       cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> > > > > +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
> > > >
> > > > I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> > > > it for stage-2 domains.
> > > >
> > > Does it matter if we read the register though, as long as users are
> > > aware of this and don't use its value for anything?
> >
> > I think the contents are "UNKNOWN", so it could be hugely confusing even
> > if they just got logged someplace. Why is it difficult to avoid touching
> > it for stage-2?
> >
> Fwiw, we are only ever using stage-1

Sure, but this is in arm-smmu.c which is used by other people and supports
both stages.

Will
Rob Clark March 12, 2025, 5:23 p.m. UTC | #7
On Wed, Mar 12, 2025 at 9:47 AM Will Deacon <will@kernel.org> wrote:
>
> On Wed, Mar 12, 2025 at 07:59:52AM -0700, Rob Clark wrote:
> > On Wed, Mar 12, 2025 at 6:05 AM Will Deacon <will@kernel.org> wrote:
> > > On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
> > > > On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
> > > > > On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > > index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> > > > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> > > > > > @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> > > > > >                                     struct arm_smmu_context_fault_info *cfi)
> > > > > >  {
> > > > > >       cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> > > > > > +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> > > > > >       cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> > > > > > -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > > > +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> > > > > > +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
> > > > >
> > > > > We already have an implementation hook (->get_fault_info()) which the
> > > > > qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> > > > > That thing dumps these registers already so if we're moving that into
> > > > > the core SMMU driver, let's get rid of the hook and move everybody over
> > > > > rather than having it done in both places.
> > > >
> > > > As you probably saw, the next commit moves over
> > > > qcom_adreno_smmu_get_fault_info() to use this. The current back door
> > > > used by drm/msm to access these functions is specific to adreno_smmu
> > > > and there isn't an equivalent interface to allow it to call a generic
> > > > SMMU function so it isn't possible to move it entirely to the core. At
> > > > least not without a bigger refactoring that isn't justified for this
> > > > series that is just trying to fix things.
> > >
> > > Ok :(
> > >
> > > > > >       cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> > > > > > +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
> > > > >
> > > > > I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> > > > > it for stage-2 domains.
> > > > >
> > > > Does it matter if we read the register though, as long as users are
> > > > aware of this and don't use its value for anything?
> > >
> > > I think the contents are "UNKNOWN", so it could be hugely confusing even
> > > if they just got logged someplace. Why is it difficult to avoid touching
> > > it for stage-2?
> > >
> > Fwiw, we are only ever using stage-1
>
> Sure, but this is in arm-smmu.c which is used by other people and supports
> both stages.

Sure, but no one else is using this field in the fault-info.  So maybe
the addition of a comment in the struct would be enough if it isn't
going to cause an SError/etc to read it for S2 cb?

BR,
-R
Robin Murphy March 12, 2025, 6:01 p.m. UTC | #8
On 12/03/2025 5:23 pm, Rob Clark wrote:
> On Wed, Mar 12, 2025 at 9:47 AM Will Deacon <will@kernel.org> wrote:
>>
>> On Wed, Mar 12, 2025 at 07:59:52AM -0700, Rob Clark wrote:
>>> On Wed, Mar 12, 2025 at 6:05 AM Will Deacon <will@kernel.org> wrote:
>>>> On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
>>>>> On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
>>>>>> On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>>>>>>> index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
>>>>>>> @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
>>>>>>>                                      struct arm_smmu_context_fault_info *cfi)
>>>>>>>   {
>>>>>>>        cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
>>>>>>> +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
>>>>>>>        cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
>>>>>>> -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
>>>>>>> +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
>>>>>>> +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
>>>>>>
>>>>>> We already have an implementation hook (->get_fault_info()) which the
>>>>>> qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
>>>>>> That thing dumps these registers already so if we're moving that into
>>>>>> the core SMMU driver, let's get rid of the hook and move everybody over
>>>>>> rather than having it done in both places.
>>>>>
>>>>> As you probably saw, the next commit moves over
>>>>> qcom_adreno_smmu_get_fault_info() to use this. The current back door
>>>>> used by drm/msm to access these functions is specific to adreno_smmu
>>>>> and there isn't an equivalent interface to allow it to call a generic
>>>>> SMMU function so it isn't possible to move it entirely to the core. At
>>>>> least not without a bigger refactoring that isn't justified for this
>>>>> series that is just trying to fix things.
>>>>
>>>> Ok :(
>>>>
>>>>>>>        cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
>>>>>>> +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
>>>>>>
>>>>>> I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
>>>>>> it for stage-2 domains.
>>>>>>
>>>>> Does it matter if we read the register though, as long as users are
>>>>> aware of this and don't use its value for anything?
>>>>
>>>> I think the contents are "UNKNOWN", so it could be hugely confusing even
>>>> if they just got logged someplace. Why is it difficult to avoid touching
>>>> it for stage-2?
>>>>
>>> Fwiw, we are only ever using stage-1
>>
>> Sure, but this is in arm-smmu.c which is used by other people and supports
>> both stages.
> 
> Sure, but no one else is using this field in the fault-info.  So maybe
> the addition of a comment in the struct would be enough if it isn't
> going to cause an SError/etc to read it for S2 cb?

Any worthwhile comment isn't going to be significantly shorter or 
clearer than 1 extra line of "if (smmu_domain->stage == 
ARM_SMMU_DOMAIN_S1)"...

TBH it's the Qualcomm register-middle-man firmware I'd be more worried 
about than real hardware, given how touchy it can be even with register 
accesses which *should* be well defined. But then I guess it also has 
the habit of killing the system if anything other than the GPU dares 
cause a fault in the first place, so maybe it OK?

If anyone still uses Arm Fast Models SMMUv1/2 components it'll probably 
squawk an annoying warning there too - ISTR I had at least one patch 
motivated by that in the past.

Thanks,
Robin.
Rob Clark March 12, 2025, 8:20 p.m. UTC | #9
On Wed, Mar 12, 2025 at 11:01 AM Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 12/03/2025 5:23 pm, Rob Clark wrote:
> > On Wed, Mar 12, 2025 at 9:47 AM Will Deacon <will@kernel.org> wrote:
> >>
> >> On Wed, Mar 12, 2025 at 07:59:52AM -0700, Rob Clark wrote:
> >>> On Wed, Mar 12, 2025 at 6:05 AM Will Deacon <will@kernel.org> wrote:
> >>>> On Tue, Mar 11, 2025 at 06:36:38PM -0400, Connor Abbott wrote:
> >>>>> On Tue, Mar 11, 2025 at 2:06 PM Will Deacon <will@kernel.org> wrote:
> >>>>>> On Tue, Mar 04, 2025 at 11:56:47AM -0500, Connor Abbott wrote:
> >>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> >>>>>>> index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
> >>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
> >>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
> >>>>>>> @@ -409,9 +409,12 @@ void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
> >>>>>>>                                      struct arm_smmu_context_fault_info *cfi)
> >>>>>>>   {
> >>>>>>>        cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
> >>>>>>> +     cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
> >>>>>>>        cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
> >>>>>>> -     cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> >>>>>>> +     cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
> >>>>>>> +     cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
> >>>>>>
> >>>>>> We already have an implementation hook (->get_fault_info()) which the
> >>>>>> qcom SMMU driver can override with qcom_adreno_smmu_get_fault_info().
> >>>>>> That thing dumps these registers already so if we're moving that into
> >>>>>> the core SMMU driver, let's get rid of the hook and move everybody over
> >>>>>> rather than having it done in both places.
> >>>>>
> >>>>> As you probably saw, the next commit moves over
> >>>>> qcom_adreno_smmu_get_fault_info() to use this. The current back door
> >>>>> used by drm/msm to access these functions is specific to adreno_smmu
> >>>>> and there isn't an equivalent interface to allow it to call a generic
> >>>>> SMMU function so it isn't possible to move it entirely to the core. At
> >>>>> least not without a bigger refactoring that isn't justified for this
> >>>>> series that is just trying to fix things.
> >>>>
> >>>> Ok :(
> >>>>
> >>>>>>>        cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
> >>>>>>> +     cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
> >>>>>>
> >>>>>> I think the CONTEXTIDR register is stage-1 only, so we shouldn't dump
> >>>>>> it for stage-2 domains.
> >>>>>>
> >>>>> Does it matter if we read the register though, as long as users are
> >>>>> aware of this and don't use its value for anything?
> >>>>
> >>>> I think the contents are "UNKNOWN", so it could be hugely confusing even
> >>>> if they just got logged someplace. Why is it difficult to avoid touching
> >>>> it for stage-2?
> >>>>
> >>> Fwiw, we are only ever using stage-1
> >>
> >> Sure, but this is in arm-smmu.c which is used by other people and supports
> >> both stages.
> >
> > Sure, but no one else is using this field in the fault-info.  So maybe
> > the addition of a comment in the struct would be enough if it isn't
> > going to cause an SError/etc to read it for S2 cb?
>
> Any worthwhile comment isn't going to be significantly shorter or
> clearer than 1 extra line of "if (smmu_domain->stage ==
> ARM_SMMU_DOMAIN_S1)"...

Just that smmu_domain isn't passed to
arm_smmu_read_context_fault_info(), so it would add some extra churn
on top of that one extra line

> TBH it's the Qualcomm register-middle-man firmware I'd be more worried
> about than real hardware, given how touchy it can be even with register
> accesses which *should* be well defined. But then I guess it also has
> the habit of killing the system if anything other than the GPU dares
> cause a fault in the first place, so maybe it OK?

If we have qc hyp, then we don't get S2 in the first place ;-)

BR,
-R

> If anyone still uses Arm Fast Models SMMUv1/2 components it'll probably
> squawk an annoying warning there too - ISTR I had at least one patch
> motivated by that in the past.
>
> Thanks,
> Robin.
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index 548783f3f8e89fd978367afa65c473002f66e2e7..ae4fdbbce6ba80440f539557a39866a932360d4e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -400,7 +400,7 @@  irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
 
 	if (list_empty(&tbu_list)) {
 		ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
-					 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
+					 cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
 
 		if (ret == -ENOSYS)
 			arm_smmu_print_context_fault_info(smmu, idx, &cfi);
@@ -412,7 +412,7 @@  irqreturn_t qcom_smmu_context_fault(int irq, void *dev)
 	phys_soft = ops->iova_to_phys(ops, cfi.iova);
 
 	tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
-				 cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
+				 cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
 	if (!tmp || tmp == -EBUSY) {
 		ret = IRQ_HANDLED;
 		resume = ARM_SMMU_RESUME_TERMINATE;
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index ade4684c14c9b2724a71e2457288dbfaf7562c83..a9213e0f1579d1e3be0bfba75eea1d5de23117de 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -409,9 +409,12 @@  void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,
 				      struct arm_smmu_context_fault_info *cfi)
 {
 	cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
+	cfi->ttbr0 = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_TTBR0);
 	cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
-	cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
+	cfi->fsynr0 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
+	cfi->fsynr1 = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR1);
 	cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
+	cfi->contextidr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_CONTEXTIDR);
 }
 
 void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
@@ -419,7 +422,7 @@  void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
 {
 	dev_err(smmu->dev,
 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
-		cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx);
+		cfi->fsr, cfi->iova, cfi->fsynr0, cfi->cbfrsynra, idx);
 
 	dev_err(smmu->dev, "FSR    = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n",
 		cfi->fsr,
@@ -437,15 +440,15 @@  void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx,
 		cfi->cbfrsynra);
 
 	dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n",
-		cfi->fsynr,
-		(u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr),
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "",
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "",
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "",
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "",
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "",
-		(cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "",
-		(u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr));
+		cfi->fsynr0,
+		(u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr0),
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "",
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "",
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "",
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "",
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "",
+		(cfi->fsynr0 & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "",
+		(u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr0));
 }
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
@@ -464,7 +467,7 @@  static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 		return IRQ_NONE;
 
 	ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova,
-		cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
+		cfi.fsynr0 & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
 
 	if (ret == -ENOSYS && __ratelimit(&rs))
 		arm_smmu_print_context_fault_info(smmu, idx, &cfi);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h
index e2aeb511ae903302e3c15d2cf5f22e2a26ac2346..d3bc77dcd4d40f25bc70f3289616fb866649b022 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h
@@ -543,9 +543,12 @@  int arm_mmu500_reset(struct arm_smmu_device *smmu);
 
 struct arm_smmu_context_fault_info {
 	unsigned long iova;
+	u64 ttbr0;
 	u32 fsr;
-	u32 fsynr;
+	u32 fsynr0;
+	u32 fsynr1;
 	u32 cbfrsynra;
+	u32 contextidr;
 };
 
 void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx,