diff mbox series

[v2,4/5] drm/panthor: Let IRQ handlers clear the interrupts themselves

Message ID 20250401182348.252422-5-boris.brezillon@collabora.com (mailing list archive)
State New
Headers show
Series drm/panthor: Misc fixes | expand

Commit Message

Boris Brezillon April 1, 2025, 6:23 p.m. UTC
MMU handler needs to be in control of the job interrupt clears because
clearing the interrupt also unblocks the writer/reader that triggered
the fault, and we don't want it to be unblocked until we've had a chance
to process the IRQ.

Since clearing the clearing is just one line, let's make it explicit
instead of doing it in the generic code path.

Changes in v2:
- Move the MMU_INT_CLEAR around

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
---
 drivers/gpu/drm/panthor/panthor_device.h | 2 --
 drivers/gpu/drm/panthor/panthor_fw.c     | 2 ++
 drivers/gpu/drm/panthor/panthor_gpu.c    | 2 ++
 drivers/gpu/drm/panthor/panthor_mmu.c    | 5 +++++
 4 files changed, 9 insertions(+), 2 deletions(-)

Comments

Liviu Dudau April 1, 2025, 8:36 p.m. UTC | #1
On Tue, Apr 01, 2025 at 08:23:47PM +0200, Boris Brezillon wrote:
> MMU handler needs to be in control of the job interrupt clears because
> clearing the interrupt also unblocks the writer/reader that triggered
> the fault, and we don't want it to be unblocked until we've had a chance
> to process the IRQ.
> 
> Since clearing the clearing is just one line, let's make it explicit
> instead of doing it in the generic code path.
> 
> Changes in v2:
> - Move the MMU_INT_CLEAR around
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>

Best regards,
Liviu

> ---
>  drivers/gpu/drm/panthor/panthor_device.h | 2 --
>  drivers/gpu/drm/panthor/panthor_fw.c     | 2 ++
>  drivers/gpu/drm/panthor/panthor_gpu.c    | 2 ++
>  drivers/gpu/drm/panthor/panthor_mmu.c    | 5 +++++
>  4 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index da6574021664..4c27b6d85f46 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -383,8 +383,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
>  		if (!status)									\
>  			break;									\
>  												\
> -		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> -												\
>  		__handler(ptdev, status);							\
>  		ret = IRQ_HANDLED;								\
>  	}											\
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
> index 0f52766a3120..446bb377b953 100644
> --- a/drivers/gpu/drm/panthor/panthor_fw.c
> +++ b/drivers/gpu/drm/panthor/panthor_fw.c
> @@ -1008,6 +1008,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
>  
>  static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
> +	gpu_write(ptdev, JOB_INT_CLEAR, status);
> +
>  	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
>  		ptdev->fw->booted = true;
>  
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 671049020afa..32d678a0114e 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -150,6 +150,8 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev)
>  
>  static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
> +	gpu_write(ptdev, GPU_INT_CLEAR, status);
> +
>  	if (status & GPU_IRQ_FAULT) {
>  		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
>  		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 7cca97d298ea..4ac95a31907d 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1710,6 +1710,11 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>  			access_type, access_type_name(ptdev, fault_status),
>  			source_id);
>  
> +		/* We don't handle VM faults at the moment, so let's just clear the
> +		 * interrupt and let the writer/reader crash.
> +		 */
> +		gpu_write(ptdev, MMU_INT_CLEAR, mask);
> +
>  		/* Ignore MMU interrupts on this AS until it's been
>  		 * re-enabled.
>  		 */
> -- 
> 2.49.0
>
Steven Price April 2, 2025, 10:20 a.m. UTC | #2
On 01/04/2025 19:23, Boris Brezillon wrote:
> MMU handler needs to be in control of the job interrupt clears because
> clearing the interrupt also unblocks the writer/reader that triggered
> the fault, and we don't want it to be unblocked until we've had a chance
> to process the IRQ.
> 
> Since clearing the clearing is just one line, let's make it explicit
> instead of doing it in the generic code path.
> 
> Changes in v2:
> - Move the MMU_INT_CLEAR around
> 
> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>

Look at this made me look at panthor_mmu_irq_handler() and I can't
understand this bit of code:

> static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> {
> 	bool has_unhandled_faults = false;
> 
> 	status = panthor_mmu_fault_mask(ptdev, status);
> 	while (status) {
> 		u32 as = ffs(status | (status >> 16)) - 1;

panthor_mmu_fault_mask() masks status with GENMASK(15, 0), so AFAICT
(status >> 16) is always 0.

Which isn't a big issue, other than you are now only clearing IRQs which
are recognised by the loop.

So this changes the behaviour to not clear interrupt bits 16-31 (the
COMMAND_COMPLETED bits). I believe we don't actually care about them and
AFAICT we always mask those interrupt bits in the IRQ mask - so it
should be safe...

TLDR; I think this change is fine, but I'm not sure if it's what you
intended and it would be good to include something in the commit message
about the functional change.

Thanks,
Steve

> ---
>  drivers/gpu/drm/panthor/panthor_device.h | 2 --
>  drivers/gpu/drm/panthor/panthor_fw.c     | 2 ++
>  drivers/gpu/drm/panthor/panthor_gpu.c    | 2 ++
>  drivers/gpu/drm/panthor/panthor_mmu.c    | 5 +++++
>  4 files changed, 9 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> index da6574021664..4c27b6d85f46 100644
> --- a/drivers/gpu/drm/panthor/panthor_device.h
> +++ b/drivers/gpu/drm/panthor/panthor_device.h
> @@ -383,8 +383,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
>  		if (!status)									\
>  			break;									\
>  												\
> -		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> -												\
>  		__handler(ptdev, status);							\
>  		ret = IRQ_HANDLED;								\
>  	}											\
> diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
> index 0f52766a3120..446bb377b953 100644
> --- a/drivers/gpu/drm/panthor/panthor_fw.c
> +++ b/drivers/gpu/drm/panthor/panthor_fw.c
> @@ -1008,6 +1008,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
>  
>  static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
> +	gpu_write(ptdev, JOB_INT_CLEAR, status);
> +
>  	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
>  		ptdev->fw->booted = true;
>  
> diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> index 671049020afa..32d678a0114e 100644
> --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> @@ -150,6 +150,8 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev)
>  
>  static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
>  {
> +	gpu_write(ptdev, GPU_INT_CLEAR, status);
> +
>  	if (status & GPU_IRQ_FAULT) {
>  		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
>  		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
> diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> index 7cca97d298ea..4ac95a31907d 100644
> --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> @@ -1710,6 +1710,11 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>  			access_type, access_type_name(ptdev, fault_status),
>  			source_id);
>  
> +		/* We don't handle VM faults at the moment, so let's just clear the
> +		 * interrupt and let the writer/reader crash.
> +		 */
> +		gpu_write(ptdev, MMU_INT_CLEAR, mask);
> +
>  		/* Ignore MMU interrupts on this AS until it's been
>  		 * re-enabled.
>  		 */
Boris Brezillon April 2, 2025, 10:52 a.m. UTC | #3
On Wed, 2 Apr 2025 11:20:17 +0100
Steven Price <steven.price@arm.com> wrote:

> On 01/04/2025 19:23, Boris Brezillon wrote:
> > MMU handler needs to be in control of the job interrupt clears because
> > clearing the interrupt also unblocks the writer/reader that triggered
> > the fault, and we don't want it to be unblocked until we've had a chance
> > to process the IRQ.
> > 
> > Since clearing the clearing is just one line, let's make it explicit
> > instead of doing it in the generic code path.
> > 
> > Changes in v2:
> > - Move the MMU_INT_CLEAR around
> > 
> > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>  
> 
> Look at this made me look at panthor_mmu_irq_handler() and I can't
> understand this bit of code:
> 
> > static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> > {
> > 	bool has_unhandled_faults = false;
> > 
> > 	status = panthor_mmu_fault_mask(ptdev, status);
> > 	while (status) {
> > 		u32 as = ffs(status | (status >> 16)) - 1;  
> 
> panthor_mmu_fault_mask() masks status with GENMASK(15, 0), so AFAICT
> (status >> 16) is always 0.
> 
> Which isn't a big issue, other than you are now only clearing IRQs which
> are recognised by the loop.
> 
> So this changes the behaviour to not clear interrupt bits 16-31 (the
> COMMAND_COMPLETED bits). I believe we don't actually care about them and
> AFAICT we always mask those interrupt bits in the IRQ mask - so it
> should be safe...
> 
> TLDR; I think this change is fine, but I'm not sure if it's what you
> intended and it would be good to include something in the commit message
> about the functional change.

Didn't realize it was changing the behavior, but I was aware we were
only unmasking fault interrupts, and I left COMPLETED ones uncleared on
purpose. I'll add a comment about that in the commit message.

> 
> Thanks,
> Steve
> 
> > ---
> >  drivers/gpu/drm/panthor/panthor_device.h | 2 --
> >  drivers/gpu/drm/panthor/panthor_fw.c     | 2 ++
> >  drivers/gpu/drm/panthor/panthor_gpu.c    | 2 ++
> >  drivers/gpu/drm/panthor/panthor_mmu.c    | 5 +++++
> >  4 files changed, 9 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
> > index da6574021664..4c27b6d85f46 100644
> > --- a/drivers/gpu/drm/panthor/panthor_device.h
> > +++ b/drivers/gpu/drm/panthor/panthor_device.h
> > @@ -383,8 +383,6 @@ static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
> >  		if (!status)									\
> >  			break;									\
> >  												\
> > -		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
> > -												\
> >  		__handler(ptdev, status);							\
> >  		ret = IRQ_HANDLED;								\
> >  	}											\
> > diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
> > index 0f52766a3120..446bb377b953 100644
> > --- a/drivers/gpu/drm/panthor/panthor_fw.c
> > +++ b/drivers/gpu/drm/panthor/panthor_fw.c
> > @@ -1008,6 +1008,8 @@ static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
> >  
> >  static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
> >  {
> > +	gpu_write(ptdev, JOB_INT_CLEAR, status);
> > +
> >  	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
> >  		ptdev->fw->booted = true;
> >  
> > diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
> > index 671049020afa..32d678a0114e 100644
> > --- a/drivers/gpu/drm/panthor/panthor_gpu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_gpu.c
> > @@ -150,6 +150,8 @@ static void panthor_gpu_init_info(struct panthor_device *ptdev)
> >  
> >  static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
> >  {
> > +	gpu_write(ptdev, GPU_INT_CLEAR, status);
> > +
> >  	if (status & GPU_IRQ_FAULT) {
> >  		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
> >  		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
> > diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
> > index 7cca97d298ea..4ac95a31907d 100644
> > --- a/drivers/gpu/drm/panthor/panthor_mmu.c
> > +++ b/drivers/gpu/drm/panthor/panthor_mmu.c
> > @@ -1710,6 +1710,11 @@ static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> >  			access_type, access_type_name(ptdev, fault_status),
> >  			source_id);
> >  
> > +		/* We don't handle VM faults at the moment, so let's just clear the
> > +		 * interrupt and let the writer/reader crash.
> > +		 */
> > +		gpu_write(ptdev, MMU_INT_CLEAR, mask);
> > +
> >  		/* Ignore MMU interrupts on this AS until it's been
> >  		 * re-enabled.
> >  		 */  
>
Boris Brezillon April 2, 2025, 10:58 a.m. UTC | #4
On Wed, 2 Apr 2025 11:20:17 +0100
Steven Price <steven.price@arm.com> wrote:

> > static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
> > {
> > 	bool has_unhandled_faults = false;
> > 
> > 	status = panthor_mmu_fault_mask(ptdev, status);
> > 	while (status) {
> > 		u32 as = ffs(status | (status >> 16)) - 1;  
> 
> panthor_mmu_fault_mask() masks status with GENMASK(15, 0), so AFAICT
> (status >> 16) is always 0.
> 
> Which isn't a big issue, other than you are now only clearing IRQs which
> are recognised by the loop.

Quick note on that: I think it's been written when I was trying to add
support for CSF in panfrost, and panthor_mmu_fault_mask() (at the time
it was panfrost_mmu_fault_mask()) was a way to have a different
fault_mask for JM and CSF. Now that JM support in panthor is being
considered [1], I'd prefer to keep this logic, even if it seems silly
right now.

[1]https://gitlab.freedesktop.org/panfrost/linux/-/merge_requests/16
Steven Price April 2, 2025, 11:03 a.m. UTC | #5
On 02/04/2025 11:58, Boris Brezillon wrote:
> On Wed, 2 Apr 2025 11:20:17 +0100
> Steven Price <steven.price@arm.com> wrote:
> 
>>> static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
>>> {
>>> 	bool has_unhandled_faults = false;
>>>
>>> 	status = panthor_mmu_fault_mask(ptdev, status);
>>> 	while (status) {
>>> 		u32 as = ffs(status | (status >> 16)) - 1;  
>>
>> panthor_mmu_fault_mask() masks status with GENMASK(15, 0), so AFAICT
>> (status >> 16) is always 0.
>>
>> Which isn't a big issue, other than you are now only clearing IRQs which
>> are recognised by the loop.
> 
> Quick note on that: I think it's been written when I was trying to add
> support for CSF in panfrost, and panthor_mmu_fault_mask() (at the time
> it was panfrost_mmu_fault_mask()) was a way to have a different
> fault_mask for JM and CSF. Now that JM support in panthor is being
> considered [1], I'd prefer to keep this logic, even if it seems silly
> right now.
> 
> [1]https://gitlab.freedesktop.org/panfrost/linux/-/merge_requests/16

Yeah no problem - it was mostly that when I looked at the code it
appears to be expecting 'status' to have bits set in the top half, but
we then only clear the bits in the lower half. It took a bit of digging
to satisfy myself that the IRQ mask always masks out the top bits.

With an updated commit message I think this is fine.

JM support in panthor is a whole other discussion ;)

Thanks,
Steve
diff mbox series

Patch

diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
index da6574021664..4c27b6d85f46 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -383,8 +383,6 @@  static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *da
 		if (!status)									\
 			break;									\
 												\
-		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
-												\
 		__handler(ptdev, status);							\
 		ret = IRQ_HANDLED;								\
 	}											\
diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
index 0f52766a3120..446bb377b953 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.c
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -1008,6 +1008,8 @@  static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
 
 static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
 {
+	gpu_write(ptdev, JOB_INT_CLEAR, status);
+
 	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
 		ptdev->fw->booted = true;
 
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
index 671049020afa..32d678a0114e 100644
--- a/drivers/gpu/drm/panthor/panthor_gpu.c
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -150,6 +150,8 @@  static void panthor_gpu_init_info(struct panthor_device *ptdev)
 
 static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
 {
+	gpu_write(ptdev, GPU_INT_CLEAR, status);
+
 	if (status & GPU_IRQ_FAULT) {
 		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
 		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
index 7cca97d298ea..4ac95a31907d 100644
--- a/drivers/gpu/drm/panthor/panthor_mmu.c
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -1710,6 +1710,11 @@  static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
 			access_type, access_type_name(ptdev, fault_status),
 			source_id);
 
+		/* We don't handle VM faults at the moment, so let's just clear the
+		 * interrupt and let the writer/reader crash.
+		 */
+		gpu_write(ptdev, MMU_INT_CLEAR, mask);
+
 		/* Ignore MMU interrupts on this AS until it's been
 		 * re-enabled.
 		 */