diff mbox series

drm/i915/uncore: keep track of last mmio accesses

Message ID 20220404181844.2649726-1-lucas.demarchi@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/uncore: keep track of last mmio accesses | expand

Commit Message

Lucas De Marchi April 4, 2022, 6:18 p.m. UTC
Sine gen6 we use FPGA_DBG register to detect unclaimed MMIO registers.
This register is in the display engine IP and can only ever detect
unclaimed accesses to registers in this area. However sometimes there
are reports of this triggering for registers in other areas, which
should not be possible.

This keeps track of the last 4 registers which should hopefully be
sufficient to understand where these are coming from. And without
increasing the debug struct too much:

Before:
	struct intel_uncore_mmio_debug {
		spinlock_t                 lock;                 /*     0    64 */
		/* --- cacheline 1 boundary (64 bytes) --- */
		int                        unclaimed_mmio_check; /*    64     4 */
		int                        saved_mmio_check;     /*    68     4 */
		u32                        suspend_count;        /*    72     4 */

		/* size: 80, cachelines: 2, members: 4 */
		/* padding: 4 */
		/* last cacheline: 16 bytes */
	};

After:
	struct intel_uncore_mmio_debug {
		spinlock_t                 lock;                 /*     0    64 */
		/* --- cacheline 1 boundary (64 bytes) --- */
		int                        unclaimed_mmio_check; /*    64     4 */
		int                        saved_mmio_check;     /*    68     4 */
		u32                        last_reg[4];          /*    72    16 */
		u32                        last_reg_pos;         /*    88     4 */
		u32                        suspend_count;        /*    92     4 */

		/* size: 96, cachelines: 2, members: 6 */
		/* last cacheline: 32 bytes */
	};

Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 18 +++++++++++++++++-
 drivers/gpu/drm/i915/intel_uncore.h |  4 ++++
 2 files changed, 21 insertions(+), 1 deletion(-)

Comments

Matt Roper April 4, 2022, 8:38 p.m. UTC | #1
On Mon, Apr 04, 2022 at 11:18:44AM -0700, Lucas De Marchi wrote:
> Sine gen6 we use FPGA_DBG register to detect unclaimed MMIO registers.
> This register is in the display engine IP and can only ever detect
> unclaimed accesses to registers in this area. However sometimes there
> are reports of this triggering for registers in other areas, which
> should not be possible.
> 
> This keeps track of the last 4 registers which should hopefully be
> sufficient to understand where these are coming from. And without
> increasing the debug struct too much:

Doesn't the unclaimed access checking happen within uncore->lock,
guaranteeing that an unclaimed access triggered by an uncore read/write
is always from the current one and not a previous one?  Presumably if
the wrong access is being identified, then the true culprit is probably
a __raw_uncore_{read,write} that doesn't have any checking of its own
and doesn't use the uncore lock?  I think we could probably confirm this
theory by updating __unclaimed_reg_debug() to drop the "!before"
condition and print a slightly different message if we detect an
unclaimed access has already happened before we do the new operation.


Matt

> 
> Before:
> 	struct intel_uncore_mmio_debug {
> 		spinlock_t                 lock;                 /*     0    64 */
> 		/* --- cacheline 1 boundary (64 bytes) --- */
> 		int                        unclaimed_mmio_check; /*    64     4 */
> 		int                        saved_mmio_check;     /*    68     4 */
> 		u32                        suspend_count;        /*    72     4 */
> 
> 		/* size: 80, cachelines: 2, members: 4 */
> 		/* padding: 4 */
> 		/* last cacheline: 16 bytes */
> 	};
> 
> After:
> 	struct intel_uncore_mmio_debug {
> 		spinlock_t                 lock;                 /*     0    64 */
> 		/* --- cacheline 1 boundary (64 bytes) --- */
> 		int                        unclaimed_mmio_check; /*    64     4 */
> 		int                        saved_mmio_check;     /*    68     4 */
> 		u32                        last_reg[4];          /*    72    16 */
> 		u32                        last_reg_pos;         /*    88     4 */
> 		u32                        suspend_count;        /*    92     4 */
> 
> 		/* size: 96, cachelines: 2, members: 6 */
> 		/* last cacheline: 32 bytes */
> 	};
> 
> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 18 +++++++++++++++++-
>  drivers/gpu/drm/i915/intel_uncore.h |  4 ++++
>  2 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8b9caaaacc21..31a23b0e2907 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1509,9 +1509,25 @@ __unclaimed_reg_debug(struct intel_uncore *uncore,
>  		     check_for_unclaimed_mmio(uncore) && !before,
>  		     "Unclaimed %s register 0x%x\n",
>  		     read ? "read from" : "write to",
> -		     i915_mmio_reg_offset(reg)))
> +		     i915_mmio_reg_offset(reg))) {
> +		unsigned int i;
> +
>  		/* Only report the first N failures */
>  		uncore->i915->params.mmio_debug--;
> +
> +		drm_dbg(&uncore->i915->drm, "Last register accesses:\n");
> +		for (i = uncore->debug->last_reg_pos;
> +		     i < uncore->debug->last_reg_pos + INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
> +		     i++)
> +			drm_dbg(&uncore->i915->drm, "0x%x\n",
> +				uncore->debug->last_reg[i % INTEL_UNCORE_MMIO_DEBUG_REG_COUNT]);
> +	}
> +
> +	if (!before) {
> +		uncore->debug->last_reg[uncore->debug->last_reg_pos++] =
> +			i915_mmio_reg_offset(reg);
> +		uncore->debug->last_reg_pos %= INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
> +	}
>  }
>  
>  static inline void
> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
> index 52fe3d89dd2b..5b5d2858ae11 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.h
> +++ b/drivers/gpu/drm/i915/intel_uncore.h
> @@ -38,10 +38,14 @@ struct intel_runtime_pm;
>  struct intel_uncore;
>  struct intel_gt;
>  
> +#define INTEL_UNCORE_MMIO_DEBUG_REG_COUNT	4
> +
>  struct intel_uncore_mmio_debug {
>  	spinlock_t lock; /** lock is also taken in irq contexts. */
>  	int unclaimed_mmio_check;
>  	int saved_mmio_check;
> +	u32 last_reg[INTEL_UNCORE_MMIO_DEBUG_REG_COUNT];
> +	u32 last_reg_pos;
>  	u32 suspend_count;
>  };
>  
> -- 
> 2.35.1
>
Lucas De Marchi April 4, 2022, 11:53 p.m. UTC | #2
On Mon, Apr 04, 2022 at 01:38:58PM -0700, Matt Roper wrote:
>On Mon, Apr 04, 2022 at 11:18:44AM -0700, Lucas De Marchi wrote:
>> Sine gen6 we use FPGA_DBG register to detect unclaimed MMIO registers.
>> This register is in the display engine IP and can only ever detect
>> unclaimed accesses to registers in this area. However sometimes there
>> are reports of this triggering for registers in other areas, which
>> should not be possible.
>>
>> This keeps track of the last 4 registers which should hopefully be
>> sufficient to understand where these are coming from. And without
>> increasing the debug struct too much:
>
>Doesn't the unclaimed access checking happen within uncore->lock,
>guaranteeing that an unclaimed access triggered by an uncore read/write
>is always from the current one and not a previous one?  Presumably if
>the wrong access is being identified, then the true culprit is probably
>a __raw_uncore_{read,write} that doesn't have any checking of its own
>and doesn't use the uncore lock?  I think we could probably confirm this
>theory by updating __unclaimed_reg_debug() to drop the "!before"
>condition and print a slightly different message if we detect an
>unclaimed access has already happened before we do the new operation.

I was actually thinking this could come from the 2 mmio reads being
batched and the hw not updating the FPGA_DBG in between.

__raw_uncore_* being used could be true.... from what I can see we'd
need to check users of intel_de_read_fw().

Lucas De Marchi

>
>
>Matt
>
>>
>> Before:
>> 	struct intel_uncore_mmio_debug {
>> 		spinlock_t                 lock;                 /*     0    64 */
>> 		/* --- cacheline 1 boundary (64 bytes) --- */
>> 		int                        unclaimed_mmio_check; /*    64     4 */
>> 		int                        saved_mmio_check;     /*    68     4 */
>> 		u32                        suspend_count;        /*    72     4 */
>>
>> 		/* size: 80, cachelines: 2, members: 4 */
>> 		/* padding: 4 */
>> 		/* last cacheline: 16 bytes */
>> 	};
>>
>> After:
>> 	struct intel_uncore_mmio_debug {
>> 		spinlock_t                 lock;                 /*     0    64 */
>> 		/* --- cacheline 1 boundary (64 bytes) --- */
>> 		int                        unclaimed_mmio_check; /*    64     4 */
>> 		int                        saved_mmio_check;     /*    68     4 */
>> 		u32                        last_reg[4];          /*    72    16 */
>> 		u32                        last_reg_pos;         /*    88     4 */
>> 		u32                        suspend_count;        /*    92     4 */
>>
>> 		/* size: 96, cachelines: 2, members: 6 */
>> 		/* last cacheline: 32 bytes */
>> 	};
>>
>> Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
>> ---
>>  drivers/gpu/drm/i915/intel_uncore.c | 18 +++++++++++++++++-
>>  drivers/gpu/drm/i915/intel_uncore.h |  4 ++++
>>  2 files changed, 21 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 8b9caaaacc21..31a23b0e2907 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -1509,9 +1509,25 @@ __unclaimed_reg_debug(struct intel_uncore *uncore,
>>  		     check_for_unclaimed_mmio(uncore) && !before,
>>  		     "Unclaimed %s register 0x%x\n",
>>  		     read ? "read from" : "write to",
>> -		     i915_mmio_reg_offset(reg)))
>> +		     i915_mmio_reg_offset(reg))) {
>> +		unsigned int i;
>> +
>>  		/* Only report the first N failures */
>>  		uncore->i915->params.mmio_debug--;
>> +
>> +		drm_dbg(&uncore->i915->drm, "Last register accesses:\n");
>> +		for (i = uncore->debug->last_reg_pos;
>> +		     i < uncore->debug->last_reg_pos + INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
>> +		     i++)
>> +			drm_dbg(&uncore->i915->drm, "0x%x\n",
>> +				uncore->debug->last_reg[i % INTEL_UNCORE_MMIO_DEBUG_REG_COUNT]);
>> +	}
>> +
>> +	if (!before) {
>> +		uncore->debug->last_reg[uncore->debug->last_reg_pos++] =
>> +			i915_mmio_reg_offset(reg);
>> +		uncore->debug->last_reg_pos %= INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
>> +	}
>>  }
>>
>>  static inline void
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
>> index 52fe3d89dd2b..5b5d2858ae11 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.h
>> +++ b/drivers/gpu/drm/i915/intel_uncore.h
>> @@ -38,10 +38,14 @@ struct intel_runtime_pm;
>>  struct intel_uncore;
>>  struct intel_gt;
>>
>> +#define INTEL_UNCORE_MMIO_DEBUG_REG_COUNT	4
>> +
>>  struct intel_uncore_mmio_debug {
>>  	spinlock_t lock; /** lock is also taken in irq contexts. */
>>  	int unclaimed_mmio_check;
>>  	int saved_mmio_check;
>> +	u32 last_reg[INTEL_UNCORE_MMIO_DEBUG_REG_COUNT];
>> +	u32 last_reg_pos;
>>  	u32 suspend_count;
>>  };
>>
>> --
>> 2.35.1
>>
>
>-- 
>Matt Roper
>Graphics Software Engineer
>VTT-OSGC Platform Enablement
>Intel Corporation
>(916) 356-2795
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 8b9caaaacc21..31a23b0e2907 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1509,9 +1509,25 @@  __unclaimed_reg_debug(struct intel_uncore *uncore,
 		     check_for_unclaimed_mmio(uncore) && !before,
 		     "Unclaimed %s register 0x%x\n",
 		     read ? "read from" : "write to",
-		     i915_mmio_reg_offset(reg)))
+		     i915_mmio_reg_offset(reg))) {
+		unsigned int i;
+
 		/* Only report the first N failures */
 		uncore->i915->params.mmio_debug--;
+
+		drm_dbg(&uncore->i915->drm, "Last register accesses:\n");
+		for (i = uncore->debug->last_reg_pos;
+		     i < uncore->debug->last_reg_pos + INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
+		     i++)
+			drm_dbg(&uncore->i915->drm, "0x%x\n",
+				uncore->debug->last_reg[i % INTEL_UNCORE_MMIO_DEBUG_REG_COUNT]);
+	}
+
+	if (!before) {
+		uncore->debug->last_reg[uncore->debug->last_reg_pos++] =
+			i915_mmio_reg_offset(reg);
+		uncore->debug->last_reg_pos %= INTEL_UNCORE_MMIO_DEBUG_REG_COUNT;
+	}
 }
 
 static inline void
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index 52fe3d89dd2b..5b5d2858ae11 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -38,10 +38,14 @@  struct intel_runtime_pm;
 struct intel_uncore;
 struct intel_gt;
 
+#define INTEL_UNCORE_MMIO_DEBUG_REG_COUNT	4
+
 struct intel_uncore_mmio_debug {
 	spinlock_t lock; /** lock is also taken in irq contexts. */
 	int unclaimed_mmio_check;
 	int saved_mmio_check;
+	u32 last_reg[INTEL_UNCORE_MMIO_DEBUG_REG_COUNT];
+	u32 last_reg_pos;
 	u32 suspend_count;
 };