Message ID | 20250304-msm-gpu-fault-fixes-next-v4-4-be14be37f4c3@gmail.com (mailing list archive) |
---|---|
State | Handled Elsewhere |
Headers | show |
Series | iommu/arm-smmu, drm/msm: Fixes for stall-on-fault | expand |
On Tue, Mar 4, 2025 at 8:57 AM Connor Abbott <cwabbott0@gmail.com> wrote: > > Up until now we have only called the set_stall callback during > initialization when the device is off. But we will soon start calling it > to temporarily disable stall-on-fault when the device is on, so handle > that by checking if the device is on and writing SCTLR. > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> Reviewed-by: Rob Clark <robdclark@gmail.com> > --- > drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- > 1 file changed, 27 insertions(+), 3 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) > { > struct arm_smmu_domain *smmu_domain = (void *)cookie; > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > + u32 mask = BIT(cfg->cbndx); > + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; > + unsigned long flags; > > if (enabled) > - qsmmu->stall_enabled |= BIT(cfg->cbndx); > + qsmmu->stall_enabled |= mask; > else > - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); > + qsmmu->stall_enabled &= ~mask; > + > + /* > + * If the device is on and we changed the setting, update the register. > + */ > + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { > + spin_lock_irqsave(&smmu_domain->cb_lock, flags); > + > + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); > + > + if (enabled) > + reg |= ARM_SMMU_SCTLR_CFCFG; > + else > + reg &= ~ARM_SMMU_SCTLR_CFCFG; > + > + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); > + > + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); > + > + pm_runtime_put_autosuspend(smmu->dev); > + } > } > > static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate) > > -- > 2.47.1 >
On Tue, Mar 04, 2025 at 11:56:50AM -0500, Connor Abbott wrote: > Up until now we have only called the set_stall callback during > initialization when the device is off. But we will soon start calling it > to temporarily disable stall-on-fault when the device is on, so handle > that by checking if the device is on and writing SCTLR. > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > --- > drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- > 1 file changed, 27 insertions(+), 3 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) > { > struct arm_smmu_domain *smmu_domain = (void *)cookie; > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > + u32 mask = BIT(cfg->cbndx); > + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; > + unsigned long flags; > > if (enabled) > - qsmmu->stall_enabled |= BIT(cfg->cbndx); > + qsmmu->stall_enabled |= mask; > else > - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); > + qsmmu->stall_enabled &= ~mask; > + > + /* > + * If the device is on and we changed the setting, update the register. > + */ > + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { > + spin_lock_irqsave(&smmu_domain->cb_lock, flags); > + > + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); > + > + if (enabled) > + reg |= ARM_SMMU_SCTLR_CFCFG; > + else > + reg &= ~ARM_SMMU_SCTLR_CFCFG; > + > + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); Are you sure you don't need TLB invalidation for this to take effect? I think some fields in the SCTLR can be cached in the TLB but you'll need to check whether or not that applies to CFCFG. Will
On Tue, Mar 11, 2025 at 2:11 PM Will Deacon <will@kernel.org> wrote: > > On Tue, Mar 04, 2025 at 11:56:50AM -0500, Connor Abbott wrote: > > Up until now we have only called the set_stall callback during > > initialization when the device is off. But we will soon start calling it > > to temporarily disable stall-on-fault when the device is on, so handle > > that by checking if the device is on and writing SCTLR. > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > --- > > drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- > > 1 file changed, 27 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) > > { > > struct arm_smmu_domain *smmu_domain = (void *)cookie; > > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > > - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > > + u32 mask = BIT(cfg->cbndx); > > + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; > > + unsigned long flags; > > > > if (enabled) > > - qsmmu->stall_enabled |= BIT(cfg->cbndx); > > + qsmmu->stall_enabled |= mask; > > else > > - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); > > + qsmmu->stall_enabled &= ~mask; > > + > > + /* > > + * If the device is on and we changed the setting, update the register. > > + */ > > + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { > > + spin_lock_irqsave(&smmu_domain->cb_lock, flags); > > + > > + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); > > + > > + if (enabled) > > + reg |= ARM_SMMU_SCTLR_CFCFG; > > + else > > + reg &= ~ARM_SMMU_SCTLR_CFCFG; > > + > > + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); > > Are you sure you don't need TLB invalidation for this to take effect? I > think some fields in the SCTLR can be cached in the TLB but you'll need > to check whether or not that applies to CFCFG. > > Will I think it should be fine because CFCFG only controls behavior when there's a context fault and there can't be TLB entries for entries that cause a context fault: "The architecture permits the caching of any translation table entry that has been returned from memory without a fault and that does not, as a result of that entry, cause a Translation Fault or an Access Flag fault." Connor
On Tue, Mar 11, 2025 at 04:01:00PM -0400, Connor Abbott wrote: > On Tue, Mar 11, 2025 at 2:11 PM Will Deacon <will@kernel.org> wrote: > > > > On Tue, Mar 04, 2025 at 11:56:50AM -0500, Connor Abbott wrote: > > > Up until now we have only called the set_stall callback during > > > initialization when the device is off. But we will soon start calling it > > > to temporarily disable stall-on-fault when the device is on, so handle > > > that by checking if the device is on and writing SCTLR. > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > --- > > > drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- > > > 1 file changed, 27 insertions(+), 3 deletions(-) > > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) > > > { > > > struct arm_smmu_domain *smmu_domain = (void *)cookie; > > > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > > > - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); > > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > > + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > > > + u32 mask = BIT(cfg->cbndx); > > > + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; > > > + unsigned long flags; > > > > > > if (enabled) > > > - qsmmu->stall_enabled |= BIT(cfg->cbndx); > > > + qsmmu->stall_enabled |= mask; > > > else > > > - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); > > > + qsmmu->stall_enabled &= ~mask; > > > + > > > + /* > > > + * If the device is on and we changed the setting, update the register. > > > + */ > > > + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { > > > + spin_lock_irqsave(&smmu_domain->cb_lock, flags); > > > + > > > + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); > > > + > > > + if (enabled) > > > + reg |= ARM_SMMU_SCTLR_CFCFG; > > > + else > > > + reg &= ~ARM_SMMU_SCTLR_CFCFG; > > > + > > > + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); > > > > Are you sure you don't need TLB invalidation for this to take effect? I > > think some fields in the SCTLR can be cached in the TLB but you'll need > > to check whether or not that applies to CFCFG. > > > > I think it should be fine because CFCFG only controls behavior when > there's a context fault and there can't be TLB entries for entries > that cause a context fault: "The architecture permits the caching of > any translation table entry that has been returned from memory without > a fault and that does not, as a result of that entry, cause a > Translation Fault or an Access Flag fault." Ok, but what about other types of fault? For example, a permission fault or an address size fault? Will
On Wed, Mar 12, 2025 at 8:49 AM Will Deacon <will@kernel.org> wrote: > > On Tue, Mar 11, 2025 at 04:01:00PM -0400, Connor Abbott wrote: > > On Tue, Mar 11, 2025 at 2:11 PM Will Deacon <will@kernel.org> wrote: > > > > > > On Tue, Mar 04, 2025 at 11:56:50AM -0500, Connor Abbott wrote: > > > > Up until now we have only called the set_stall callback during > > > > initialization when the device is off. But we will soon start calling it > > > > to temporarily disable stall-on-fault when the device is on, so handle > > > > that by checking if the device is on and writing SCTLR. > > > > > > > > Signed-off-by: Connor Abbott <cwabbott0@gmail.com> > > > > --- > > > > drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- > > > > 1 file changed, 27 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > > index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 > > > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c > > > > @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) > > > > { > > > > struct arm_smmu_domain *smmu_domain = (void *)cookie; > > > > struct arm_smmu_cfg *cfg = &smmu_domain->cfg; > > > > - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); > > > > + struct arm_smmu_device *smmu = smmu_domain->smmu; > > > > + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); > > > > + u32 mask = BIT(cfg->cbndx); > > > > + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; > > > > + unsigned long flags; > > > > > > > > if (enabled) > > > > - qsmmu->stall_enabled |= BIT(cfg->cbndx); > > > > + qsmmu->stall_enabled |= mask; > > > > else > > > > - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); > > > > + qsmmu->stall_enabled &= ~mask; > > > > + > > > > + /* > > > > + * If the device is on and we changed the setting, update the register. > > > > + */ > > > > + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { > > > > + spin_lock_irqsave(&smmu_domain->cb_lock, flags); > > > > + > > > > + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); > > > > + > > > > + if (enabled) > > > > + reg |= ARM_SMMU_SCTLR_CFCFG; > > > > + else > > > > + reg &= ~ARM_SMMU_SCTLR_CFCFG; > > > > + > > > > + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); > > > > > > Are you sure you don't need TLB invalidation for this to take effect? I > > > think some fields in the SCTLR can be cached in the TLB but you'll need > > > to check whether or not that applies to CFCFG. > > > > > > > I think it should be fine because CFCFG only controls behavior when > > there's a context fault and there can't be TLB entries for entries > > that cause a context fault: "The architecture permits the caching of > > any translation table entry that has been returned from memory without > > a fault and that does not, as a result of that entry, cause a > > Translation Fault or an Access Flag fault." > > Ok, but what about other types of fault? For example, a permission fault > or an address size fault? > > Will I'm not sure, but the pseudocode for context faults mentions resampling CFCFG after a fault happens ("We have a fault and must resample FSR, CFCFG and HUPCF") so I don't think it would be legal to cache it. Also in practice this does seem to work. Does that answer it? Connor
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index a428e53add08d451fb2152e3ab80e0fba936e214..d34a0d917013bb3d5a24b3ce72f48e3b38474da2 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -77,12 +77,36 @@ static void qcom_adreno_smmu_set_stall(const void *cookie, bool enabled) { struct arm_smmu_domain *smmu_domain = (void *)cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct qcom_smmu *qsmmu = to_qcom_smmu(smmu_domain->smmu); + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct qcom_smmu *qsmmu = to_qcom_smmu(smmu); + u32 mask = BIT(cfg->cbndx); + bool stall_changed = !!(qsmmu->stall_enabled & mask) != enabled; + unsigned long flags; if (enabled) - qsmmu->stall_enabled |= BIT(cfg->cbndx); + qsmmu->stall_enabled |= mask; else - qsmmu->stall_enabled &= ~BIT(cfg->cbndx); + qsmmu->stall_enabled &= ~mask; + + /* + * If the device is on and we changed the setting, update the register. + */ + if (stall_changed && pm_runtime_get_if_active(smmu->dev) > 0) { + spin_lock_irqsave(&smmu_domain->cb_lock, flags); + + u32 reg = arm_smmu_cb_read(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR); + + if (enabled) + reg |= ARM_SMMU_SCTLR_CFCFG; + else + reg &= ~ARM_SMMU_SCTLR_CFCFG; + + arm_smmu_cb_write(smmu, cfg->cbndx, ARM_SMMU_CB_SCTLR, reg); + + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); + + pm_runtime_put_autosuspend(smmu->dev); + } } static void qcom_adreno_smmu_resume_translation(const void *cookie, bool terminate)
Up until now we have only called the set_stall callback during initialization when the device is off. But we will soon start calling it to temporarily disable stall-on-fault when the device is on, so handle that by checking if the device is on and writing SCTLR. Signed-off-by: Connor Abbott <cwabbott0@gmail.com> --- drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-)