diff mbox

[4/4] drm/i915: Reduce frequency of unspecific HSW reg debugging

Message ID 1441309905-2744-5-git-send-email-paulo.r.zanoni@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zanoni, Paulo R Sept. 3, 2015, 7:51 p.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

Delay the expensive read on the FPGA_DBG register from once per mmio to
once per forcewake section when we are doing the general wellbeing
check rather than the targetted error detection. This almost reduces
the overhead of the debug facility (for example when submitting execlists)
to zero whilst keeping the debug checks around.

v2: Enable one-shot mmio debugging from the interrupt check as well as a
    safeguard to catch invalid display writes from outside the powerwell.
v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
    removal

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 23 deletions(-)

Comments

Mika Kuoppala Sept. 4, 2015, 7:02 a.m. UTC | #1
Paulo Zanoni <paulo.r.zanoni@intel.com> writes:

> From: Chris Wilson <chris@chris-wilson.co.uk>
>
> Delay the expensive read on the FPGA_DBG register from once per mmio to
> once per forcewake section when we are doing the general wellbeing
> check rather than the targetted error detection. This almost reduces
> the overhead of the debug facility (for example when submitting execlists)
> to zero whilst keeping the debug checks around.
>
> v2: Enable one-shot mmio debugging from the interrupt check as well as a
>     safeguard to catch invalid display writes from outside the powerwell.
> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
>     removal
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8844c314..1fe63fc 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
>  }
>  
>  static void
> +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
> +{
> +	static bool mmio_debug_once = true;
> +
> +	if (i915.mmio_debug || !mmio_debug_once)
> +		return;
> +
> +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
> +		DRM_ERROR("Unclaimed register detected, "
> +			  "enabling oneshot unclaimed register reporting. "
> +			  "Please use i915.mmio_debug=N for more information.\n");
> +		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
> +		i915.mmio_debug = mmio_debug_once--;
> +	}
> +}
> +
> +static void
> +fw_domains_put_debug(struct drm_i915_private *dev_priv,
> +		     enum forcewake_domains fw_domains)
> +{
> +	hsw_unclaimed_reg_detect(dev_priv);
> +	fw_domains_put(dev_priv, fw_domains);
> +}
> +
> +static void
>  fw_domains_posting_read(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_uncore_forcewake_domain *d;
> @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
>  	}
>  }
>  
> -static void
> -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 reg)
> -{
> -	static bool mmio_debug_once = true;
> -
> -	if (!UNCLAIMED_CHECK_RANGE(reg))
> -		return;
> -

You lost the range check in the moved version of function.
-Mika

> -	if (i915.mmio_debug || !mmio_debug_once)
> -		return;
> -
> -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
> -		DRM_ERROR("Unclaimed register detected, "
> -			  "enabling oneshot unclaimed register reporting. "
> -			  "Please use i915.mmio_debug=N for more information.\n");
> -		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
> -		i915.mmio_debug = mmio_debug_once--;
> -	}
> -}
> -
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_device_not_suspended(dev_priv);
> @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
>  		gen6_gt_check_fifodbg(dev_priv); \
>  	} \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace
>  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
>  	__raw_i915_write##x(dev_priv, reg, val); \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
>  		__force_wake_get(dev_priv, fw_engine); \
>  	__raw_i915_write##x(dev_priv, reg, val); \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -1194,6 +1196,10 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
>  			       FORCEWAKE, FORCEWAKE_ACK);
>  	}
>  
> +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
> +	    dev_priv->uncore.funcs.force_wake_put == fw_domains_put)
> +		dev_priv->uncore.funcs.force_wake_put = fw_domains_put_debug;
> +
>  	/* All future platforms are expected to require complex power gating */
>  	WARN_ON(dev_priv->uncore.fw_domains == 0);
>  }
> -- 
> 2.5.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Sept. 4, 2015, 8:27 a.m. UTC | #2
On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> From: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Delay the expensive read on the FPGA_DBG register from once per mmio to
> once per forcewake section when we are doing the general wellbeing
> check rather than the targetted error detection. This almost reduces
> the overhead of the debug facility (for example when submitting execlists)
> to zero whilst keeping the debug checks around.
> 
> v2: Enable one-shot mmio debugging from the interrupt check as well as a
>     safeguard to catch invalid display writes from outside the powerwell.
> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
>     removal
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

I'm unclear how this interacts (or how it sould interact) with patch 2:
Forcwake is mostly for GT registers, but patch 2 also tries to optimize
forcwake for GT registers. Do we really need both?
-Daniel

> ---
>  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 8844c314..1fe63fc 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
>  }
>  
>  static void
> +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
> +{
> +	static bool mmio_debug_once = true;
> +
> +	if (i915.mmio_debug || !mmio_debug_once)
> +		return;
> +
> +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
> +		DRM_ERROR("Unclaimed register detected, "
> +			  "enabling oneshot unclaimed register reporting. "
> +			  "Please use i915.mmio_debug=N for more information.\n");
> +		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
> +		i915.mmio_debug = mmio_debug_once--;
> +	}
> +}
> +
> +static void
> +fw_domains_put_debug(struct drm_i915_private *dev_priv,
> +		     enum forcewake_domains fw_domains)
> +{
> +	hsw_unclaimed_reg_detect(dev_priv);
> +	fw_domains_put(dev_priv, fw_domains);
> +}
> +
> +static void
>  fw_domains_posting_read(struct drm_i915_private *dev_priv)
>  {
>  	struct intel_uncore_forcewake_domain *d;
> @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
>  	}
>  }
>  
> -static void
> -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 reg)
> -{
> -	static bool mmio_debug_once = true;
> -
> -	if (!UNCLAIMED_CHECK_RANGE(reg))
> -		return;
> -
> -	if (i915.mmio_debug || !mmio_debug_once)
> -		return;
> -
> -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
> -		DRM_ERROR("Unclaimed register detected, "
> -			  "enabling oneshot unclaimed register reporting. "
> -			  "Please use i915.mmio_debug=N for more information.\n");
> -		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
> -		i915.mmio_debug = mmio_debug_once--;
> -	}
> -}
> -
>  #define GEN2_READ_HEADER(x) \
>  	u##x val = 0; \
>  	assert_device_not_suspended(dev_priv);
> @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
>  		gen6_gt_check_fifodbg(dev_priv); \
>  	} \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace
>  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
>  	__raw_i915_write##x(dev_priv, reg, val); \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
>  		__force_wake_get(dev_priv, fw_engine); \
>  	__raw_i915_write##x(dev_priv, reg, val); \
>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>  	GEN6_WRITE_FOOTER; \
>  }
>  
> @@ -1194,6 +1196,10 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
>  			       FORCEWAKE, FORCEWAKE_ACK);
>  	}
>  
> +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
> +	    dev_priv->uncore.funcs.force_wake_put == fw_domains_put)
> +		dev_priv->uncore.funcs.force_wake_put = fw_domains_put_debug;
> +
>  	/* All future platforms are expected to require complex power gating */
>  	WARN_ON(dev_priv->uncore.fw_domains == 0);
>  }
> -- 
> 2.5.0
>
Mika Kuoppala Sept. 4, 2015, 8:40 a.m. UTC | #3
Daniel Vetter <daniel@ffwll.ch> writes:

> On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
>> From: Chris Wilson <chris@chris-wilson.co.uk>
>> 
>> Delay the expensive read on the FPGA_DBG register from once per mmio to
>> once per forcewake section when we are doing the general wellbeing
>> check rather than the targetted error detection. This almost reduces
>> the overhead of the debug facility (for example when submitting execlists)
>> to zero whilst keeping the debug checks around.
>> 
>> v2: Enable one-shot mmio debugging from the interrupt check as well as a
>>     safeguard to catch invalid display writes from outside the powerwell.
>> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
>>     removal
>> 
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>
> I'm unclear how this interacts (or how it sould interact) with patch 2:
> Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> forcwake for GT registers. Do we really need both?

Assuming the hardware detects access to unpowered domains and
to unregistered ranges by setting this bit, I would say that patch 2
is not needed. One could argue that patch 2 is somewhat harmful as
current register access pattern affects the detection.

Also the commit message in patch 2 is not valid wrt the code.

With skl, the debug bit seems to decay with time, instead of being
sticky. So in there we could argue that in patch 4/4, the reading
should be done before (and after) the forcewake scope.

Paulo, have you tried if this bit detects access to unpowered
domain with hsw/bdw?

-Mika

> -Daniel
>
>> ---
>>  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++----------------
>>  1 file changed, 29 insertions(+), 23 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 8844c314..1fe63fc 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
>>  }
>>  
>>  static void
>> +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
>> +{
>> +	static bool mmio_debug_once = true;
>> +
>> +	if (i915.mmio_debug || !mmio_debug_once)
>> +		return;
>> +
>> +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
>> +		DRM_ERROR("Unclaimed register detected, "
>> +			  "enabling oneshot unclaimed register reporting. "
>> +			  "Please use i915.mmio_debug=N for more information.\n");
>> +		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
>> +		i915.mmio_debug = mmio_debug_once--;
>> +	}
>> +}
>> +
>> +static void
>> +fw_domains_put_debug(struct drm_i915_private *dev_priv,
>> +		     enum forcewake_domains fw_domains)
>> +{
>> +	hsw_unclaimed_reg_detect(dev_priv);
>> +	fw_domains_put(dev_priv, fw_domains);
>> +}
>> +
>> +static void
>>  fw_domains_posting_read(struct drm_i915_private *dev_priv)
>>  {
>>  	struct intel_uncore_forcewake_domain *d;
>> @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
>>  	}
>>  }
>>  
>> -static void
>> -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 reg)
>> -{
>> -	static bool mmio_debug_once = true;
>> -
>> -	if (!UNCLAIMED_CHECK_RANGE(reg))
>> -		return;
>> -
>> -	if (i915.mmio_debug || !mmio_debug_once)
>> -		return;
>> -
>> -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
>> -		DRM_ERROR("Unclaimed register detected, "
>> -			  "enabling oneshot unclaimed register reporting. "
>> -			  "Please use i915.mmio_debug=N for more information.\n");
>> -		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
>> -		i915.mmio_debug = mmio_debug_once--;
>> -	}
>> -}
>> -
>>  #define GEN2_READ_HEADER(x) \
>>  	u##x val = 0; \
>>  	assert_device_not_suspended(dev_priv);
>> @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
>>  		gen6_gt_check_fifodbg(dev_priv); \
>>  	} \
>>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>>  	GEN6_WRITE_FOOTER; \
>>  }
>>  
>> @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace
>>  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
>>  	__raw_i915_write##x(dev_priv, reg, val); \
>>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>>  	GEN6_WRITE_FOOTER; \
>>  }
>>  
>> @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
>>  		__force_wake_get(dev_priv, fw_engine); \
>>  	__raw_i915_write##x(dev_priv, reg, val); \
>>  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>>  	GEN6_WRITE_FOOTER; \
>>  }
>>  
>> @@ -1194,6 +1196,10 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
>>  			       FORCEWAKE, FORCEWAKE_ACK);
>>  	}
>>  
>> +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
>> +	    dev_priv->uncore.funcs.force_wake_put == fw_domains_put)
>> +		dev_priv->uncore.funcs.force_wake_put = fw_domains_put_debug;
>> +
>>  	/* All future platforms are expected to require complex power gating */
>>  	WARN_ON(dev_priv->uncore.fw_domains == 0);
>>  }
>> -- 
>> 2.5.0
>> 
>
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter Sept. 4, 2015, 8:59 a.m. UTC | #4
On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
> Daniel Vetter <daniel@ffwll.ch> writes:
> 
> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> >> From: Chris Wilson <chris@chris-wilson.co.uk>
> >> 
> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
> >> once per forcewake section when we are doing the general wellbeing
> >> check rather than the targetted error detection. This almost reduces
> >> the overhead of the debug facility (for example when submitting execlists)
> >> to zero whilst keeping the debug checks around.
> >> 
> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
> >>     safeguard to catch invalid display writes from outside the powerwell.
> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
> >>     removal
> >> 
> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >
> > I'm unclear how this interacts (or how it sould interact) with patch 2:
> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> > forcwake for GT registers. Do we really need both?
> 
> Assuming the hardware detects access to unpowered domains and
> to unregistered ranges by setting this bit, I would say that patch 2
> is not needed. One could argue that patch 2 is somewhat harmful as
> current register access pattern affects the detection.
> 
> Also the commit message in patch 2 is not valid wrt the code.
> 
> With skl, the debug bit seems to decay with time, instead of being
> sticky. So in there we could argue that in patch 4/4, the reading
> should be done before (and after) the forcewake scope.

Do we know where the bits decay? Could it be that the firmware (dmc) does
something with it, or maybe it gets reset when we change display power
wells?

> Paulo, have you tried if this bit detects access to unpowered
> domain with hsw/bdw?

Yup, that's the main reason we have this, it's great for catching
rpm/power wells bugs.
-Daniel
Mika Kuoppala Sept. 4, 2015, 11:45 a.m. UTC | #5
Daniel Vetter <daniel@ffwll.ch> writes:

> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
>> Daniel Vetter <daniel@ffwll.ch> writes:
>> 
>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
>> >> 
>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
>> >> once per forcewake section when we are doing the general wellbeing
>> >> check rather than the targetted error detection. This almost reduces
>> >> the overhead of the debug facility (for example when submitting execlists)
>> >> to zero whilst keeping the debug checks around.
>> >> 
>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
>> >>     safeguard to catch invalid display writes from outside the powerwell.
>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
>> >>     removal
>> >> 
>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> >
>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
>> > forcwake for GT registers. Do we really need both?
>> 
>> Assuming the hardware detects access to unpowered domains and
>> to unregistered ranges by setting this bit, I would say that patch 2
>> is not needed. One could argue that patch 2 is somewhat harmful as
>> current register access pattern affects the detection.
>> 
>> Also the commit message in patch 2 is not valid wrt the code.
>> 
>> With skl, the debug bit seems to decay with time, instead of being
>> sticky. So in there we could argue that in patch 4/4, the reading
>> should be done before (and after) the forcewake scope.
>
> Do we know where the bits decay? Could it be that the firmware (dmc) does
> something with it, or maybe it gets reset when we change display power
> wells?

Now when trying to actually measure the decay time, I can't reproduce
the same behaviour anymore. Now it is sticky. Up until the display
power off clears the register without explicit clearing write.

Now I just wonder why I saw decay in just less than millisecond,
few weeks back. I am willing to blame my imperfect test setup on this,
and something just cleared it behind my back.

-Mika



>
>> Paulo, have you tried if this bit detects access to unpowered
>> domain with hsw/bdw?
>
> Yup, that's the main reason we have this, it's great for catching
> rpm/power wells bugs.
> -Daniel
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Mika Kuoppala Sept. 4, 2015, 12:18 p.m. UTC | #6
Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:

> Daniel Vetter <daniel@ffwll.ch> writes:
>
>> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
>>> Daniel Vetter <daniel@ffwll.ch> writes:
>>> 
>>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
>>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
>>> >> 
>>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
>>> >> once per forcewake section when we are doing the general wellbeing
>>> >> check rather than the targetted error detection. This almost reduces
>>> >> the overhead of the debug facility (for example when submitting execlists)
>>> >> to zero whilst keeping the debug checks around.
>>> >> 
>>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
>>> >>     safeguard to catch invalid display writes from outside the powerwell.
>>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
>>> >>     removal
>>> >> 
>>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>> >
>>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
>>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
>>> > forcwake for GT registers. Do we really need both?
>>> 
>>> Assuming the hardware detects access to unpowered domains and
>>> to unregistered ranges by setting this bit, I would say that patch 2
>>> is not needed. One could argue that patch 2 is somewhat harmful as
>>> current register access pattern affects the detection.
>>> 
>>> Also the commit message in patch 2 is not valid wrt the code.
>>> 
>>> With skl, the debug bit seems to decay with time, instead of being
>>> sticky. So in there we could argue that in patch 4/4, the reading
>>> should be done before (and after) the forcewake scope.
>>
>> Do we know where the bits decay? Could it be that the firmware (dmc) does
>> something with it, or maybe it gets reset when we change display power
>> wells?
>
> Now when trying to actually measure the decay time, I can't reproduce
> the same behaviour anymore. Now it is sticky. Up until the display
> power off clears the register without explicit clearing write.
>
> Now I just wonder why I saw decay in just less than millisecond,
> few weeks back. I am willing to blame my imperfect test setup on this,
> and something just cleared it behind my back.
>

Well, apparently this is in display power well, like Daniel
noted.  Which means that if display is off, the write won't
stick. (but stays in some pci write buffer for 0.5usecs?
until it vanishes, thus the decay)

Test setup was otherwise water proof, but if one does
skl debugging from console and bdw debugging through
ssh, the display power well states are quite different...

<blinks>

Perks of this job is that you get to laugh a alot,
to yourself :)

-Mika


> -Mika
>
>
>
>>
>>> Paulo, have you tried if this bit detects access to unpowered
>>> domain with hsw/bdw?
>>
>> Yup, that's the main reason we have this, it's great for catching
>> rpm/power wells bugs.
>> -Daniel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Zanoni, Paulo R Sept. 4, 2015, 1:39 p.m. UTC | #7
Em Sex, 2015-09-04 às 10:02 +0300, Mika Kuoppala escreveu:
> Paulo Zanoni <paulo.r.zanoni@intel.com> writes:

> 

> > From: Chris Wilson <chris@chris-wilson.co.uk>

> > 

> > Delay the expensive read on the FPGA_DBG register from once per 

> > mmio to

> > once per forcewake section when we are doing the general wellbeing

> > check rather than the targetted error detection. This almost 

> > reduces

> > the overhead of the debug facility (for example when submitting 

> > execlists)

> > to zero whilst keeping the debug checks around.

> > 

> > v2: Enable one-shot mmio debugging from the interrupt check as well 

> > as a

> >     safeguard to catch invalid display writes from outside the 

> > powerwell.

> > v3 (from Paulo): rebase since gen9 addition and 

> > intel_uncore_check_errors

> >     removal

> > 

> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

> > Cc: Mika Kuoppala <mika.kuoppala@intel.com>

> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > ---

> >  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++----

> > ------------

> >  1 file changed, 29 insertions(+), 23 deletions(-)

> > 

> > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 

> > b/drivers/gpu/drm/i915/intel_uncore.c

> > index 8844c314..1fe63fc 100644

> > --- a/drivers/gpu/drm/i915/intel_uncore.c

> > +++ b/drivers/gpu/drm/i915/intel_uncore.c

> > @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private 

> > *dev_priv, enum forcewake_domains fw_doma

> >  }

> >  

> >  static void

> > +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)

> > +{

> > +	static bool mmio_debug_once = true;

> > +

> > +	if (i915.mmio_debug || !mmio_debug_once)

> > +		return;

> > +

> > +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 

> > FPGA_DBG_RM_NOCLAIM) {

> > +		DRM_ERROR("Unclaimed register detected, "

> > +			  "enabling oneshot unclaimed register 

> > reporting. "

> > +			  "Please use i915.mmio_debug=N for more 

> > information.\n");

> > +		__raw_i915_write32(dev_priv, FPGA_DBG, 

> > FPGA_DBG_RM_NOCLAIM);

> > +		i915.mmio_debug = mmio_debug_once--;

> > +	}

> > +}

> > +

> > +static void

> > +fw_domains_put_debug(struct drm_i915_private *dev_priv,

> > +		     enum forcewake_domains fw_domains)

> > +{

> > +	hsw_unclaimed_reg_detect(dev_priv);

> > +	fw_domains_put(dev_priv, fw_domains);

> > +}

> > +

> > +static void

> >  fw_domains_posting_read(struct drm_i915_private *dev_priv)

> >  {

> >  	struct intel_uncore_forcewake_domain *d;

> > @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct 

> > drm_i915_private *dev_priv, u32 reg, bool read,

> >  	}

> >  }

> >  

> > -static void

> > -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 

> > reg)

> > -{

> > -	static bool mmio_debug_once = true;

> > -

> > -	if (!UNCLAIMED_CHECK_RANGE(reg))

> > -		return;

> > -

> 

> You lost the range check in the moved version of function.


This was not an accident. After moving the function, the "reg" argument
doesn't make sense anymore, since the function is now called from
fw_domains_put() instead of the register macros.


> -Mika

> 

> > -	if (i915.mmio_debug || !mmio_debug_once)

> > -		return;

> > -

> > -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 

> > FPGA_DBG_RM_NOCLAIM) {

> > -		DRM_ERROR("Unclaimed register detected, "

> > -			  "enabling oneshot unclaimed register 

> > reporting. "

> > -			  "Please use i915.mmio_debug=N for more 

> > information.\n");

> > -		__raw_i915_write32(dev_priv, FPGA_DBG, 

> > FPGA_DBG_RM_NOCLAIM);

> > -		i915.mmio_debug = mmio_debug_once--;

> > -	}

> > -}

> > -

> >  #define GEN2_READ_HEADER(x) \

> >  	u##x val = 0; \

> >  	assert_device_not_suspended(dev_priv);

> > @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private *dev_priv, 

> > off_t reg, u##x val, bool trace)

> >  		gen6_gt_check_fifodbg(dev_priv); \

> >  	} \

> >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> >  	GEN6_WRITE_FOOTER; \

> >  }

> >  

> > @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private 

> > *dev_priv, off_t reg, u##x val, bool trace

> >  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \

> >  	__raw_i915_write##x(dev_priv, reg, val); \

> >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> >  	GEN6_WRITE_FOOTER; \

> >  }

> >  

> > @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private 

> > *dev_priv, off_t reg, u##x val, \

> >  		__force_wake_get(dev_priv, fw_engine); \

> >  	__raw_i915_write##x(dev_priv, reg, val); \

> >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> >  	GEN6_WRITE_FOOTER; \

> >  }

> >  

> > @@ -1194,6 +1196,10 @@ static void 

> > intel_uncore_fw_domains_init(struct drm_device *dev)

> >  			       FORCEWAKE, FORCEWAKE_ACK);

> >  	}

> >  

> > +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&

> > +	    dev_priv->uncore.funcs.force_wake_put == 

> > fw_domains_put)

> > +		dev_priv->uncore.funcs.force_wake_put = 

> > fw_domains_put_debug;

> > +

> >  	/* All future platforms are expected to require complex 

> > power gating */

> >  	WARN_ON(dev_priv->uncore.fw_domains == 0);

> >  }
Zanoni, Paulo R Sept. 4, 2015, 1:46 p.m. UTC | #8
Em Sex, 2015-09-04 às 11:40 +0300, Mika Kuoppala escreveu:
> Daniel Vetter <daniel@ffwll.ch> writes:

> 

> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:

> > > From: Chris Wilson <chris@chris-wilson.co.uk>

> > > 

> > > Delay the expensive read on the FPGA_DBG register from once per 

> > > mmio to

> > > once per forcewake section when we are doing the general 

> > > wellbeing

> > > check rather than the targetted error detection. This almost 

> > > reduces

> > > the overhead of the debug facility (for example when submitting 

> > > execlists)

> > > to zero whilst keeping the debug checks around.

> > > 

> > > v2: Enable one-shot mmio debugging from the interrupt check as 

> > > well as a

> > >     safeguard to catch invalid display writes from outside the 

> > > powerwell.

> > > v3 (from Paulo): rebase since gen9 addition and 

> > > intel_uncore_check_errors

> > >     removal

> > > 

> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>

> > > Cc: Mika Kuoppala <mika.kuoppala@intel.com>

> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > 

> > I'm unclear how this interacts (or how it sould interact) with 

> > patch 2:

> > Forcwake is mostly for GT registers, but patch 2 also tries to 

> > optimize

> > forcwake for GT registers. Do we really need both?

> 

> Assuming the hardware detects access to unpowered domains and

> to unregistered ranges by setting this bit, I would say that patch 2

> is not needed. One could argue that patch 2 is somewhat harmful as

> current register access pattern affects the detection.


Can you please elaborate? I'm more in favor of keeping patch 2 instead
of this. But I'm operating under the assumption that we only flip
FPGA_DBG bit 31 to 1 if we do an access inside the range. See the
discussion on patch 2. So doing the checks outside the range is useless
since it won't catch problems inside i915.ko.

> 

> Also the commit message in patch 2 is not valid wrt the code.


Why?

> 

> With skl, the debug bit seems to decay with time, instead of being

> sticky. So in there we could argue that in patch 4/4, the reading

> should be done before (and after) the forcewake scope.


I tested my patch on SKL and it still does detect the same problems it
was detecting before. Maybe we could write a little debugfs file to do
the accesses in the mentioned pattern and check the forcewake theory.

> 

> Paulo, have you tried if this bit detects access to unpowered

> domain with hsw/bdw?


FPGA_DBG bit 31 becomes 1 when we access an existing register on an
unpowered power well on HSW/BDW.

> 

> -Mika

> 

> > -Daniel

> > 

> > > ---

> > >  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++--

> > > --------------

> > >  1 file changed, 29 insertions(+), 23 deletions(-)

> > > 

> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 

> > > b/drivers/gpu/drm/i915/intel_uncore.c

> > > index 8844c314..1fe63fc 100644

> > > --- a/drivers/gpu/drm/i915/intel_uncore.c

> > > +++ b/drivers/gpu/drm/i915/intel_uncore.c

> > > @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private 

> > > *dev_priv, enum forcewake_domains fw_doma

> > >  }

> > >  

> > >  static void

> > > +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)

> > > +{

> > > +	static bool mmio_debug_once = true;

> > > +

> > > +	if (i915.mmio_debug || !mmio_debug_once)

> > > +		return;

> > > +

> > > +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 

> > > FPGA_DBG_RM_NOCLAIM) {

> > > +		DRM_ERROR("Unclaimed register detected, "

> > > +			  "enabling oneshot unclaimed register 

> > > reporting. "

> > > +			  "Please use i915.mmio_debug=N for more 

> > > information.\n");

> > > +		__raw_i915_write32(dev_priv, FPGA_DBG, 

> > > FPGA_DBG_RM_NOCLAIM);

> > > +		i915.mmio_debug = mmio_debug_once--;

> > > +	}

> > > +}

> > > +

> > > +static void

> > > +fw_domains_put_debug(struct drm_i915_private *dev_priv,

> > > +		     enum forcewake_domains fw_domains)

> > > +{

> > > +	hsw_unclaimed_reg_detect(dev_priv);

> > > +	fw_domains_put(dev_priv, fw_domains);

> > > +}

> > > +

> > > +static void

> > >  fw_domains_posting_read(struct drm_i915_private *dev_priv)

> > >  {

> > >  	struct intel_uncore_forcewake_domain *d;

> > > @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct 

> > > drm_i915_private *dev_priv, u32 reg, bool read,

> > >  	}

> > >  }

> > >  

> > > -static void

> > > -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 

> > > reg)

> > > -{

> > > -	static bool mmio_debug_once = true;

> > > -

> > > -	if (!UNCLAIMED_CHECK_RANGE(reg))

> > > -		return;

> > > -

> > > -	if (i915.mmio_debug || !mmio_debug_once)

> > > -		return;

> > > -

> > > -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 

> > > FPGA_DBG_RM_NOCLAIM) {

> > > -		DRM_ERROR("Unclaimed register detected, "

> > > -			  "enabling oneshot unclaimed register 

> > > reporting. "

> > > -			  "Please use i915.mmio_debug=N for more 

> > > information.\n");

> > > -		__raw_i915_write32(dev_priv, FPGA_DBG, 

> > > FPGA_DBG_RM_NOCLAIM);

> > > -		i915.mmio_debug = mmio_debug_once--;

> > > -	}

> > > -}

> > > -

> > >  #define GEN2_READ_HEADER(x) \

> > >  	u##x val = 0; \

> > >  	assert_device_not_suspended(dev_priv);

> > > @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private 

> > > *dev_priv, off_t reg, u##x val, bool trace)

> > >  		gen6_gt_check_fifodbg(dev_priv); \

> > >  	} \

> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> > >  	GEN6_WRITE_FOOTER; \

> > >  }

> > >  

> > > @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private 

> > > *dev_priv, off_t reg, u##x val, bool trace

> > >  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \

> > >  	__raw_i915_write##x(dev_priv, reg, val); \

> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> > >  	GEN6_WRITE_FOOTER; \

> > >  }

> > >  

> > > @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private 

> > > *dev_priv, off_t reg, u##x val, \

> > >  		__force_wake_get(dev_priv, fw_engine); \

> > >  	__raw_i915_write##x(dev_priv, reg, val); \

> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \

> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \

> > >  	GEN6_WRITE_FOOTER; \

> > >  }

> > >  

> > > @@ -1194,6 +1196,10 @@ static void 

> > > intel_uncore_fw_domains_init(struct drm_device *dev)

> > >  			       FORCEWAKE, FORCEWAKE_ACK);

> > >  	}

> > >  

> > > +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&

> > > +	    dev_priv->uncore.funcs.force_wake_put == 

> > > fw_domains_put)

> > > +		dev_priv->uncore.funcs.force_wake_put = 

> > > fw_domains_put_debug;

> > > +

> > >  	/* All future platforms are expected to require complex 

> > > power gating */

> > >  	WARN_ON(dev_priv->uncore.fw_domains == 0);

> > >  }

> > > -- 

> > > 2.5.0

> > > 

> >
Mika Kuoppala Sept. 4, 2015, 1:57 p.m. UTC | #9
"Zanoni, Paulo R" <paulo.r.zanoni@intel.com> writes:

> Em Sex, 2015-09-04 às 11:40 +0300, Mika Kuoppala escreveu:
>> Daniel Vetter <daniel@ffwll.ch> writes:
>> 
>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
>> > > From: Chris Wilson <chris@chris-wilson.co.uk>
>> > > 
>> > > Delay the expensive read on the FPGA_DBG register from once per 
>> > > mmio to
>> > > once per forcewake section when we are doing the general 
>> > > wellbeing
>> > > check rather than the targetted error detection. This almost 
>> > > reduces
>> > > the overhead of the debug facility (for example when submitting 
>> > > execlists)
>> > > to zero whilst keeping the debug checks around.
>> > > 
>> > > v2: Enable one-shot mmio debugging from the interrupt check as 
>> > > well as a
>> > >     safeguard to catch invalid display writes from outside the 
>> > > powerwell.
>> > > v3 (from Paulo): rebase since gen9 addition and 
>> > > intel_uncore_check_errors
>> > >     removal
>> > > 
>> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>> > > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> > 
>> > I'm unclear how this interacts (or how it sould interact) with 
>> > patch 2:
>> > Forcwake is mostly for GT registers, but patch 2 also tries to 
>> > optimize
>> > forcwake for GT registers. Do we really need both?
>> 
>> Assuming the hardware detects access to unpowered domains and
>> to unregistered ranges by setting this bit, I would say that patch 2
>> is not needed. One could argue that patch 2 is somewhat harmful as
>> current register access pattern affects the detection.
>
> Can you please elaborate? I'm more in favor of keeping patch 2 instead
> of this. But I'm operating under the assumption that we only flip
> FPGA_DBG bit 31 to 1 if we do an access inside the range. See the
> discussion on patch 2. So doing the checks outside the range is useless
> since it won't catch problems inside i915.ko.
>

Your answer with the example program cleared some confusion I had.
I have to test run that example on skl.

But lets imagine the situation userpsace does access unclaimed
range, and driver access pattern after the event is outside of
that detection range. We never sample the bit. We will notice the
unclaimed access only until drivers does something inside the
range. Is this acceptable?

If our goal is to catch problems only inside i915.ko, then
the range check is not harmful.

>> 
>> Also the commit message in patch 2 is not valid wrt the code.
>
> Why?
>

"For the normal use case (with i915.mmio_debug=0), this commit will
 avoid the extra __raw_i915_read32() call for every register outside..."

This was for v1 of the patch? It seems that whatever mmio_debug
has is secondary. The read is avoided always if the reg is outside
the range.

Thanks,
-Mika

>> 
>> With skl, the debug bit seems to decay with time, instead of being
>> sticky. So in there we could argue that in patch 4/4, the reading
>> should be done before (and after) the forcewake scope.
>
> I tested my patch on SKL and it still does detect the same problems it
> was detecting before. Maybe we could write a little debugfs file to do
> the accesses in the mentioned pattern and check the forcewake theory.
>
>> 
>> Paulo, have you tried if this bit detects access to unpowered
>> domain with hsw/bdw?
>
> FPGA_DBG bit 31 becomes 1 when we access an existing register on an
> unpowered power well on HSW/BDW.
>
>> 
>> -Mika
>> 
>> > -Daniel
>> > 
>> > > ---
>> > >  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++--
>> > > --------------
>> > >  1 file changed, 29 insertions(+), 23 deletions(-)
>> > > 
>> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
>> > > b/drivers/gpu/drm/i915/intel_uncore.c
>> > > index 8844c314..1fe63fc 100644
>> > > --- a/drivers/gpu/drm/i915/intel_uncore.c
>> > > +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> > > @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private 
>> > > *dev_priv, enum forcewake_domains fw_doma
>> > >  }
>> > >  
>> > >  static void
>> > > +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
>> > > +{
>> > > +	static bool mmio_debug_once = true;
>> > > +
>> > > +	if (i915.mmio_debug || !mmio_debug_once)
>> > > +		return;
>> > > +
>> > > +	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 
>> > > FPGA_DBG_RM_NOCLAIM) {
>> > > +		DRM_ERROR("Unclaimed register detected, "
>> > > +			  "enabling oneshot unclaimed register 
>> > > reporting. "
>> > > +			  "Please use i915.mmio_debug=N for more 
>> > > information.\n");
>> > > +		__raw_i915_write32(dev_priv, FPGA_DBG, 
>> > > FPGA_DBG_RM_NOCLAIM);
>> > > +		i915.mmio_debug = mmio_debug_once--;
>> > > +	}
>> > > +}
>> > > +
>> > > +static void
>> > > +fw_domains_put_debug(struct drm_i915_private *dev_priv,
>> > > +		     enum forcewake_domains fw_domains)
>> > > +{
>> > > +	hsw_unclaimed_reg_detect(dev_priv);
>> > > +	fw_domains_put(dev_priv, fw_domains);
>> > > +}
>> > > +
>> > > +static void
>> > >  fw_domains_posting_read(struct drm_i915_private *dev_priv)
>> > >  {
>> > >  	struct intel_uncore_forcewake_domain *d;
>> > > @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct 
>> > > drm_i915_private *dev_priv, u32 reg, bool read,
>> > >  	}
>> > >  }
>> > >  
>> > > -static void
>> > > -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 
>> > > reg)
>> > > -{
>> > > -	static bool mmio_debug_once = true;
>> > > -
>> > > -	if (!UNCLAIMED_CHECK_RANGE(reg))
>> > > -		return;
>> > > -
>> > > -	if (i915.mmio_debug || !mmio_debug_once)
>> > > -		return;
>> > > -
>> > > -	if (__raw_i915_read32(dev_priv, FPGA_DBG) & 
>> > > FPGA_DBG_RM_NOCLAIM) {
>> > > -		DRM_ERROR("Unclaimed register detected, "
>> > > -			  "enabling oneshot unclaimed register 
>> > > reporting. "
>> > > -			  "Please use i915.mmio_debug=N for more 
>> > > information.\n");
>> > > -		__raw_i915_write32(dev_priv, FPGA_DBG, 
>> > > FPGA_DBG_RM_NOCLAIM);
>> > > -		i915.mmio_debug = mmio_debug_once--;
>> > > -	}
>> > > -}
>> > > -
>> > >  #define GEN2_READ_HEADER(x) \
>> > >  	u##x val = 0; \
>> > >  	assert_device_not_suspended(dev_priv);
>> > > @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private 
>> > > *dev_priv, off_t reg, u##x val, bool trace)
>> > >  		gen6_gt_check_fifodbg(dev_priv); \
>> > >  	} \
>> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>> > >  	GEN6_WRITE_FOOTER; \
>> > >  }
>> > >  
>> > > @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private 
>> > > *dev_priv, off_t reg, u##x val, bool trace
>> > >  		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
>> > >  	__raw_i915_write##x(dev_priv, reg, val); \
>> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>> > >  	GEN6_WRITE_FOOTER; \
>> > >  }
>> > >  
>> > > @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private 
>> > > *dev_priv, off_t reg, u##x val, \
>> > >  		__force_wake_get(dev_priv, fw_engine); \
>> > >  	__raw_i915_write##x(dev_priv, reg, val); \
>> > >  	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>> > > -	hsw_unclaimed_reg_detect(dev_priv, reg); \
>> > >  	GEN6_WRITE_FOOTER; \
>> > >  }
>> > >  
>> > > @@ -1194,6 +1196,10 @@ static void 
>> > > intel_uncore_fw_domains_init(struct drm_device *dev)
>> > >  			       FORCEWAKE, FORCEWAKE_ACK);
>> > >  	}
>> > >  
>> > > +	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
>> > > +	    dev_priv->uncore.funcs.force_wake_put == 
>> > > fw_domains_put)
>> > > +		dev_priv->uncore.funcs.force_wake_put = 
>> > > fw_domains_put_debug;
>> > > +
>> > >  	/* All future platforms are expected to require complex 
>> > > power gating */
>> > >  	WARN_ON(dev_priv->uncore.fw_domains == 0);
>> > >  }
>> > > -- 
>> > > 2.5.0
>> > > 
>> >
Paulo Zanoni Sept. 4, 2015, 2:08 p.m. UTC | #10
2015-09-04 10:57 GMT-03:00 Mika Kuoppala <mika.kuoppala@linux.intel.com>:
> "Zanoni, Paulo R" <paulo.r.zanoni@intel.com> writes:
>
>> Em Sex, 2015-09-04 às 11:40 +0300, Mika Kuoppala escreveu:
>>> Daniel Vetter <daniel@ffwll.ch> writes:
>>>
>>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
>>> > > From: Chris Wilson <chris@chris-wilson.co.uk>
>>> > >
>>> > > Delay the expensive read on the FPGA_DBG register from once per
>>> > > mmio to
>>> > > once per forcewake section when we are doing the general
>>> > > wellbeing
>>> > > check rather than the targetted error detection. This almost
>>> > > reduces
>>> > > the overhead of the debug facility (for example when submitting
>>> > > execlists)
>>> > > to zero whilst keeping the debug checks around.
>>> > >
>>> > > v2: Enable one-shot mmio debugging from the interrupt check as
>>> > > well as a
>>> > >     safeguard to catch invalid display writes from outside the
>>> > > powerwell.
>>> > > v3 (from Paulo): rebase since gen9 addition and
>>> > > intel_uncore_check_errors
>>> > >     removal
>>> > >
>>> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> > > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> > > Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>> >
>>> > I'm unclear how this interacts (or how it sould interact) with
>>> > patch 2:
>>> > Forcwake is mostly for GT registers, but patch 2 also tries to
>>> > optimize
>>> > forcwake for GT registers. Do we really need both?
>>>
>>> Assuming the hardware detects access to unpowered domains and
>>> to unregistered ranges by setting this bit, I would say that patch 2
>>> is not needed. One could argue that patch 2 is somewhat harmful as
>>> current register access pattern affects the detection.
>>
>> Can you please elaborate? I'm more in favor of keeping patch 2 instead
>> of this. But I'm operating under the assumption that we only flip
>> FPGA_DBG bit 31 to 1 if we do an access inside the range. See the
>> discussion on patch 2. So doing the checks outside the range is useless
>> since it won't catch problems inside i915.ko.
>>
>
> Your answer with the example program cleared some confusion I had.
> I have to test run that example on skl.
>
> But lets imagine the situation userpsace does access unclaimed
> range, and driver access pattern after the event is outside of
> that detection range. We never sample the bit. We will notice the
> unclaimed access only until drivers does something inside the
> range. Is this acceptable?

That is a good question. If it turns out something actually erases the
FPGA_DBG contents after some time (which is what you seem to have
observed a few weeks ago), then we may have a problem. But if the bit
just stays there (my original assumption), eventually we will do MMIO
access in the display range (such as when the WM does a page flip or
the user moves the mouse) and we will detect it. But I don't have a
definitive "yes this will always be acceptable" answer. We can
probably imagine corner cases.

Maybe the real question is: which one is more acceptable? The (i)
forcewake put, (ii) the display register access, (iii) both or (iv)
none? And let's not forget that the "none" solution is the one that
keep hsw_unclaimed_reg_detect() appearing on "perf".

>
> If our goal is to catch problems only inside i915.ko, then
> the range check is not harmful.

The goal is mainly for i915.ko, but we should also be able to detect
other accesses (and not let bad user space DoS our driver while doing
it). History tells me that 99.9% of the cases the problem is on
i915.ko.

>
>>>
>>> Also the commit message in patch 2 is not valid wrt the code.
>>
>> Why?
>>
>
> "For the normal use case (with i915.mmio_debug=0), this commit will
>  avoid the extra __raw_i915_read32() call for every register outside..."
>
> This was for v1 of the patch? It seems that whatever mmio_debug
> has is secondary. The read is avoided always if the reg is outside
> the range.

I was just trying to highlight that the use case we really need to
care about is i915.mmio_debug=0 since it's the default. Maybe I should
rewrite the sentence.

>
> Thanks,
> -Mika
>
>>>
>>> With skl, the debug bit seems to decay with time, instead of being
>>> sticky. So in there we could argue that in patch 4/4, the reading
>>> should be done before (and after) the forcewake scope.
>>
>> I tested my patch on SKL and it still does detect the same problems it
>> was detecting before. Maybe we could write a little debugfs file to do
>> the accesses in the mentioned pattern and check the forcewake theory.
>>
>>>
>>> Paulo, have you tried if this bit detects access to unpowered
>>> domain with hsw/bdw?
>>
>> FPGA_DBG bit 31 becomes 1 when we access an existing register on an
>> unpowered power well on HSW/BDW.
>>
>>>
>>> -Mika
>>>
>>> > -Daniel
>>> >
>>> > > ---
>>> > >  drivers/gpu/drm/i915/intel_uncore.c | 52 +++++++++++++++++++++--
>>> > > --------------
>>> > >  1 file changed, 29 insertions(+), 23 deletions(-)
>>> > >
>>> > > diff --git a/drivers/gpu/drm/i915/intel_uncore.c
>>> > > b/drivers/gpu/drm/i915/intel_uncore.c
>>> > > index 8844c314..1fe63fc 100644
>>> > > --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> > > +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> > > @@ -148,6 +148,31 @@ fw_domains_put(struct drm_i915_private
>>> > > *dev_priv, enum forcewake_domains fw_doma
>>> > >  }
>>> > >
>>> > >  static void
>>> > > +hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
>>> > > +{
>>> > > +        static bool mmio_debug_once = true;
>>> > > +
>>> > > +        if (i915.mmio_debug || !mmio_debug_once)
>>> > > +                return;
>>> > > +
>>> > > +        if (__raw_i915_read32(dev_priv, FPGA_DBG) &
>>> > > FPGA_DBG_RM_NOCLAIM) {
>>> > > +                DRM_ERROR("Unclaimed register detected, "
>>> > > +                          "enabling oneshot unclaimed register
>>> > > reporting. "
>>> > > +                          "Please use i915.mmio_debug=N for more
>>> > > information.\n");
>>> > > +                __raw_i915_write32(dev_priv, FPGA_DBG,
>>> > > FPGA_DBG_RM_NOCLAIM);
>>> > > +                i915.mmio_debug = mmio_debug_once--;
>>> > > +        }
>>> > > +}
>>> > > +
>>> > > +static void
>>> > > +fw_domains_put_debug(struct drm_i915_private *dev_priv,
>>> > > +                     enum forcewake_domains fw_domains)
>>> > > +{
>>> > > +        hsw_unclaimed_reg_detect(dev_priv);
>>> > > +        fw_domains_put(dev_priv, fw_domains);
>>> > > +}
>>> > > +
>>> > > +static void
>>> > >  fw_domains_posting_read(struct drm_i915_private *dev_priv)
>>> > >  {
>>> > >          struct intel_uncore_forcewake_domain *d;
>>> > > @@ -627,26 +652,6 @@ hsw_unclaimed_reg_debug(struct
>>> > > drm_i915_private *dev_priv, u32 reg, bool read,
>>> > >          }
>>> > >  }
>>> > >
>>> > > -static void
>>> > > -hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32
>>> > > reg)
>>> > > -{
>>> > > -        static bool mmio_debug_once = true;
>>> > > -
>>> > > -        if (!UNCLAIMED_CHECK_RANGE(reg))
>>> > > -                return;
>>> > > -
>>> > > -        if (i915.mmio_debug || !mmio_debug_once)
>>> > > -                return;
>>> > > -
>>> > > -        if (__raw_i915_read32(dev_priv, FPGA_DBG) &
>>> > > FPGA_DBG_RM_NOCLAIM) {
>>> > > -                DRM_ERROR("Unclaimed register detected, "
>>> > > -                          "enabling oneshot unclaimed register
>>> > > reporting. "
>>> > > -                          "Please use i915.mmio_debug=N for more
>>> > > information.\n");
>>> > > -                __raw_i915_write32(dev_priv, FPGA_DBG,
>>> > > FPGA_DBG_RM_NOCLAIM);
>>> > > -                i915.mmio_debug = mmio_debug_once--;
>>> > > -        }
>>> > > -}
>>> > > -
>>> > >  #define GEN2_READ_HEADER(x) \
>>> > >          u##x val = 0; \
>>> > >          assert_device_not_suspended(dev_priv);
>>> > > @@ -900,7 +905,6 @@ hsw_write##x(struct drm_i915_private
>>> > > *dev_priv, off_t reg, u##x val, bool trace)
>>> > >                  gen6_gt_check_fifodbg(dev_priv); \
>>> > >          } \
>>> > >          hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>>> > > -        hsw_unclaimed_reg_detect(dev_priv, reg); \
>>> > >          GEN6_WRITE_FOOTER; \
>>> > >  }
>>> > >
>>> > > @@ -942,7 +946,6 @@ gen8_write##x(struct drm_i915_private
>>> > > *dev_priv, off_t reg, u##x val, bool trace
>>> > >                  __force_wake_get(dev_priv, FORCEWAKE_RENDER); \
>>> > >          __raw_i915_write##x(dev_priv, reg, val); \
>>> > >          hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>>> > > -        hsw_unclaimed_reg_detect(dev_priv, reg); \
>>> > >          GEN6_WRITE_FOOTER; \
>>> > >  }
>>> > >
>>> > > @@ -1008,7 +1011,6 @@ gen9_write##x(struct drm_i915_private
>>> > > *dev_priv, off_t reg, u##x val, \
>>> > >                  __force_wake_get(dev_priv, fw_engine); \
>>> > >          __raw_i915_write##x(dev_priv, reg, val); \
>>> > >          hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
>>> > > -        hsw_unclaimed_reg_detect(dev_priv, reg); \
>>> > >          GEN6_WRITE_FOOTER; \
>>> > >  }
>>> > >
>>> > > @@ -1194,6 +1196,10 @@ static void
>>> > > intel_uncore_fw_domains_init(struct drm_device *dev)
>>> > >                                 FORCEWAKE, FORCEWAKE_ACK);
>>> > >          }
>>> > >
>>> > > +        if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
>>> > > +            dev_priv->uncore.funcs.force_wake_put ==
>>> > > fw_domains_put)
>>> > > +                dev_priv->uncore.funcs.force_wake_put =
>>> > > fw_domains_put_debug;
>>> > > +
>>> > >          /* All future platforms are expected to require complex
>>> > > power gating */
>>> > >          WARN_ON(dev_priv->uncore.fw_domains == 0);
>>> > >  }
>>> > > --
>>> > > 2.5.0
>>> > >
>>> >
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Sept. 4, 2015, 2:53 p.m. UTC | #11
On Fri, Sep 04, 2015 at 03:18:14PM +0300, Mika Kuoppala wrote:
> Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:
> 
> > Daniel Vetter <daniel@ffwll.ch> writes:
> >
> >> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
> >>> Daniel Vetter <daniel@ffwll.ch> writes:
> >>> 
> >>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> >>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
> >>> >> 
> >>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
> >>> >> once per forcewake section when we are doing the general wellbeing
> >>> >> check rather than the targetted error detection. This almost reduces
> >>> >> the overhead of the debug facility (for example when submitting execlists)
> >>> >> to zero whilst keeping the debug checks around.
> >>> >> 
> >>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
> >>> >>     safeguard to catch invalid display writes from outside the powerwell.
> >>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
> >>> >>     removal
> >>> >> 
> >>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> >>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >>> >
> >>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
> >>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> >>> > forcwake for GT registers. Do we really need both?
> >>> 
> >>> Assuming the hardware detects access to unpowered domains and
> >>> to unregistered ranges by setting this bit, I would say that patch 2
> >>> is not needed. One could argue that patch 2 is somewhat harmful as
> >>> current register access pattern affects the detection.
> >>> 
> >>> Also the commit message in patch 2 is not valid wrt the code.
> >>> 
> >>> With skl, the debug bit seems to decay with time, instead of being
> >>> sticky. So in there we could argue that in patch 4/4, the reading
> >>> should be done before (and after) the forcewake scope.
> >>
> >> Do we know where the bits decay? Could it be that the firmware (dmc) does
> >> something with it, or maybe it gets reset when we change display power
> >> wells?
> >
> > Now when trying to actually measure the decay time, I can't reproduce
> > the same behaviour anymore. Now it is sticky. Up until the display
> > power off clears the register without explicit clearing write.
> >
> > Now I just wonder why I saw decay in just less than millisecond,
> > few weeks back. I am willing to blame my imperfect test setup on this,
> > and something just cleared it behind my back.
> >
> 
> Well, apparently this is in display power well, like Daniel
> noted.  Which means that if display is off, the write won't
> stick. (but stays in some pci write buffer for 0.5usecs?
> until it vanishes, thus the decay)
> 
> Test setup was otherwise water proof, but if one does
> skl debugging from console and bdw debugging through
> ssh, the display power well states are quite different...

Hm, doesn't that mean that we can't do delayed checking since what we
really want is spot unclaimed register writes when the power well is off?
At least it was really useful to catch the oddball runtime pm management
bug. So perhaps we indeed want the range-based filter and not this patch
here?
-Daniel
Ville Syrjälä Sept. 4, 2015, 3:16 p.m. UTC | #12
On Fri, Sep 04, 2015 at 04:53:28PM +0200, Daniel Vetter wrote:
> On Fri, Sep 04, 2015 at 03:18:14PM +0300, Mika Kuoppala wrote:
> > Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:
> > 
> > > Daniel Vetter <daniel@ffwll.ch> writes:
> > >
> > >> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
> > >>> Daniel Vetter <daniel@ffwll.ch> writes:
> > >>> 
> > >>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> > >>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
> > >>> >> 
> > >>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
> > >>> >> once per forcewake section when we are doing the general wellbeing
> > >>> >> check rather than the targetted error detection. This almost reduces
> > >>> >> the overhead of the debug facility (for example when submitting execlists)
> > >>> >> to zero whilst keeping the debug checks around.
> > >>> >> 
> > >>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
> > >>> >>     safeguard to catch invalid display writes from outside the powerwell.
> > >>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
> > >>> >>     removal
> > >>> >> 
> > >>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > >>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > >>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > >>> >
> > >>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
> > >>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> > >>> > forcwake for GT registers. Do we really need both?
> > >>> 
> > >>> Assuming the hardware detects access to unpowered domains and
> > >>> to unregistered ranges by setting this bit, I would say that patch 2
> > >>> is not needed. One could argue that patch 2 is somewhat harmful as
> > >>> current register access pattern affects the detection.
> > >>> 
> > >>> Also the commit message in patch 2 is not valid wrt the code.
> > >>> 
> > >>> With skl, the debug bit seems to decay with time, instead of being
> > >>> sticky. So in there we could argue that in patch 4/4, the reading
> > >>> should be done before (and after) the forcewake scope.
> > >>
> > >> Do we know where the bits decay? Could it be that the firmware (dmc) does
> > >> something with it, or maybe it gets reset when we change display power
> > >> wells?
> > >
> > > Now when trying to actually measure the decay time, I can't reproduce
> > > the same behaviour anymore. Now it is sticky. Up until the display
> > > power off clears the register without explicit clearing write.
> > >
> > > Now I just wonder why I saw decay in just less than millisecond,
> > > few weeks back. I am willing to blame my imperfect test setup on this,
> > > and something just cleared it behind my back.
> > >
> > 
> > Well, apparently this is in display power well, like Daniel
> > noted.  Which means that if display is off, the write won't
> > stick. (but stays in some pci write buffer for 0.5usecs?
> > until it vanishes, thus the decay)
> > 
> > Test setup was otherwise water proof, but if one does
> > skl debugging from console and bdw debugging through
> > ssh, the display power well states are quite different...
> 
> Hm, doesn't that mean that we can't do delayed checking since what we
> really want is spot unclaimed register writes when the power well is off?
> At least it was really useful to catch the oddball runtime pm management
> bug. So perhaps we indeed want the range-based filter and not this patch
> here?

Would it also make sense to enable/disable the checks from the power
well enable/disable hooks, so that we would minimize the overhead when
the power well is actually enabled?
Ville Syrjälä Sept. 4, 2015, 3:20 p.m. UTC | #13
On Fri, Sep 04, 2015 at 06:16:19PM +0300, Ville Syrjälä wrote:
> On Fri, Sep 04, 2015 at 04:53:28PM +0200, Daniel Vetter wrote:
> > On Fri, Sep 04, 2015 at 03:18:14PM +0300, Mika Kuoppala wrote:
> > > Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:
> > > 
> > > > Daniel Vetter <daniel@ffwll.ch> writes:
> > > >
> > > >> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
> > > >>> Daniel Vetter <daniel@ffwll.ch> writes:
> > > >>> 
> > > >>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> > > >>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
> > > >>> >> 
> > > >>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
> > > >>> >> once per forcewake section when we are doing the general wellbeing
> > > >>> >> check rather than the targetted error detection. This almost reduces
> > > >>> >> the overhead of the debug facility (for example when submitting execlists)
> > > >>> >> to zero whilst keeping the debug checks around.
> > > >>> >> 
> > > >>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
> > > >>> >>     safeguard to catch invalid display writes from outside the powerwell.
> > > >>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
> > > >>> >>     removal
> > > >>> >> 
> > > >>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > >>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > >>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > > >>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > >>> >
> > > >>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
> > > >>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> > > >>> > forcwake for GT registers. Do we really need both?
> > > >>> 
> > > >>> Assuming the hardware detects access to unpowered domains and
> > > >>> to unregistered ranges by setting this bit, I would say that patch 2
> > > >>> is not needed. One could argue that patch 2 is somewhat harmful as
> > > >>> current register access pattern affects the detection.
> > > >>> 
> > > >>> Also the commit message in patch 2 is not valid wrt the code.
> > > >>> 
> > > >>> With skl, the debug bit seems to decay with time, instead of being
> > > >>> sticky. So in there we could argue that in patch 4/4, the reading
> > > >>> should be done before (and after) the forcewake scope.
> > > >>
> > > >> Do we know where the bits decay? Could it be that the firmware (dmc) does
> > > >> something with it, or maybe it gets reset when we change display power
> > > >> wells?
> > > >
> > > > Now when trying to actually measure the decay time, I can't reproduce
> > > > the same behaviour anymore. Now it is sticky. Up until the display
> > > > power off clears the register without explicit clearing write.
> > > >
> > > > Now I just wonder why I saw decay in just less than millisecond,
> > > > few weeks back. I am willing to blame my imperfect test setup on this,
> > > > and something just cleared it behind my back.
> > > >
> > > 
> > > Well, apparently this is in display power well, like Daniel
> > > noted.  Which means that if display is off, the write won't
> > > stick. (but stays in some pci write buffer for 0.5usecs?
> > > until it vanishes, thus the decay)
> > > 
> > > Test setup was otherwise water proof, but if one does
> > > skl debugging from console and bdw debugging through
> > > ssh, the display power well states are quite different...
> > 
> > Hm, doesn't that mean that we can't do delayed checking since what we
> > really want is spot unclaimed register writes when the power well is off?
> > At least it was really useful to catch the oddball runtime pm management
> > bug. So perhaps we indeed want the range-based filter and not this patch
> > here?
> 
> Would it also make sense to enable/disable the checks from the power
> well enable/disable hooks, so that we would minimize the overhead when
> the power well is actually enabled?

Answering myself... That would potentially lose detection of totally
invalid register (ie. ones that don't even exist) accesses while the
power well is on. Assuming it can detect such things.
Daniel Vetter Sept. 4, 2015, 3:23 p.m. UTC | #14
On Fri, Sep 04, 2015 at 06:16:19PM +0300, Ville Syrjälä wrote:
> On Fri, Sep 04, 2015 at 04:53:28PM +0200, Daniel Vetter wrote:
> > On Fri, Sep 04, 2015 at 03:18:14PM +0300, Mika Kuoppala wrote:
> > > Mika Kuoppala <mika.kuoppala@linux.intel.com> writes:
> > > 
> > > > Daniel Vetter <daniel@ffwll.ch> writes:
> > > >
> > > >> On Fri, Sep 04, 2015 at 11:40:26AM +0300, Mika Kuoppala wrote:
> > > >>> Daniel Vetter <daniel@ffwll.ch> writes:
> > > >>> 
> > > >>> > On Thu, Sep 03, 2015 at 04:51:45PM -0300, Paulo Zanoni wrote:
> > > >>> >> From: Chris Wilson <chris@chris-wilson.co.uk>
> > > >>> >> 
> > > >>> >> Delay the expensive read on the FPGA_DBG register from once per mmio to
> > > >>> >> once per forcewake section when we are doing the general wellbeing
> > > >>> >> check rather than the targetted error detection. This almost reduces
> > > >>> >> the overhead of the debug facility (for example when submitting execlists)
> > > >>> >> to zero whilst keeping the debug checks around.
> > > >>> >> 
> > > >>> >> v2: Enable one-shot mmio debugging from the interrupt check as well as a
> > > >>> >>     safeguard to catch invalid display writes from outside the powerwell.
> > > >>> >> v3 (from Paulo): rebase since gen9 addition and intel_uncore_check_errors
> > > >>> >>     removal
> > > >>> >> 
> > > >>> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > >>> >> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > >>> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > > >>> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > >>> >
> > > >>> > I'm unclear how this interacts (or how it sould interact) with patch 2:
> > > >>> > Forcwake is mostly for GT registers, but patch 2 also tries to optimize
> > > >>> > forcwake for GT registers. Do we really need both?
> > > >>> 
> > > >>> Assuming the hardware detects access to unpowered domains and
> > > >>> to unregistered ranges by setting this bit, I would say that patch 2
> > > >>> is not needed. One could argue that patch 2 is somewhat harmful as
> > > >>> current register access pattern affects the detection.
> > > >>> 
> > > >>> Also the commit message in patch 2 is not valid wrt the code.
> > > >>> 
> > > >>> With skl, the debug bit seems to decay with time, instead of being
> > > >>> sticky. So in there we could argue that in patch 4/4, the reading
> > > >>> should be done before (and after) the forcewake scope.
> > > >>
> > > >> Do we know where the bits decay? Could it be that the firmware (dmc) does
> > > >> something with it, or maybe it gets reset when we change display power
> > > >> wells?
> > > >
> > > > Now when trying to actually measure the decay time, I can't reproduce
> > > > the same behaviour anymore. Now it is sticky. Up until the display
> > > > power off clears the register without explicit clearing write.
> > > >
> > > > Now I just wonder why I saw decay in just less than millisecond,
> > > > few weeks back. I am willing to blame my imperfect test setup on this,
> > > > and something just cleared it behind my back.
> > > >
> > > 
> > > Well, apparently this is in display power well, like Daniel
> > > noted.  Which means that if display is off, the write won't
> > > stick. (but stays in some pci write buffer for 0.5usecs?
> > > until it vanishes, thus the decay)
> > > 
> > > Test setup was otherwise water proof, but if one does
> > > skl debugging from console and bdw debugging through
> > > ssh, the display power well states are quite different...
> > 
> > Hm, doesn't that mean that we can't do delayed checking since what we
> > really want is spot unclaimed register writes when the power well is off?
> > At least it was really useful to catch the oddball runtime pm management
> > bug. So perhaps we indeed want the range-based filter and not this patch
> > here?
> 
> Would it also make sense to enable/disable the checks from the power
> well enable/disable hooks, so that we would minimize the overhead when
> the power well is actually enabled?

Since we have Russian dolls power wells we could only completely disable
them when everything is on, which is only when you have more than 1 screen
connected. That's not too many systems :(
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 8844c314..1fe63fc 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -148,6 +148,31 @@  fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma
 }
 
 static void
+hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv)
+{
+	static bool mmio_debug_once = true;
+
+	if (i915.mmio_debug || !mmio_debug_once)
+		return;
+
+	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
+		DRM_ERROR("Unclaimed register detected, "
+			  "enabling oneshot unclaimed register reporting. "
+			  "Please use i915.mmio_debug=N for more information.\n");
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+		i915.mmio_debug = mmio_debug_once--;
+	}
+}
+
+static void
+fw_domains_put_debug(struct drm_i915_private *dev_priv,
+		     enum forcewake_domains fw_domains)
+{
+	hsw_unclaimed_reg_detect(dev_priv);
+	fw_domains_put(dev_priv, fw_domains);
+}
+
+static void
 fw_domains_posting_read(struct drm_i915_private *dev_priv)
 {
 	struct intel_uncore_forcewake_domain *d;
@@ -627,26 +652,6 @@  hsw_unclaimed_reg_debug(struct drm_i915_private *dev_priv, u32 reg, bool read,
 	}
 }
 
-static void
-hsw_unclaimed_reg_detect(struct drm_i915_private *dev_priv, u32 reg)
-{
-	static bool mmio_debug_once = true;
-
-	if (!UNCLAIMED_CHECK_RANGE(reg))
-		return;
-
-	if (i915.mmio_debug || !mmio_debug_once)
-		return;
-
-	if (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM) {
-		DRM_ERROR("Unclaimed register detected, "
-			  "enabling oneshot unclaimed register reporting. "
-			  "Please use i915.mmio_debug=N for more information.\n");
-		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-		i915.mmio_debug = mmio_debug_once--;
-	}
-}
-
 #define GEN2_READ_HEADER(x) \
 	u##x val = 0; \
 	assert_device_not_suspended(dev_priv);
@@ -900,7 +905,6 @@  hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace)
 		gen6_gt_check_fifodbg(dev_priv); \
 	} \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv, reg); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -942,7 +946,6 @@  gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace
 		__force_wake_get(dev_priv, FORCEWAKE_RENDER); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv, reg); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -1008,7 +1011,6 @@  gen9_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, \
 		__force_wake_get(dev_priv, fw_engine); \
 	__raw_i915_write##x(dev_priv, reg, val); \
 	hsw_unclaimed_reg_debug(dev_priv, reg, false, false); \
-	hsw_unclaimed_reg_detect(dev_priv, reg); \
 	GEN6_WRITE_FOOTER; \
 }
 
@@ -1194,6 +1196,10 @@  static void intel_uncore_fw_domains_init(struct drm_device *dev)
 			       FORCEWAKE, FORCEWAKE_ACK);
 	}
 
+	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
+	    dev_priv->uncore.funcs.force_wake_put == fw_domains_put)
+		dev_priv->uncore.funcs.force_wake_put = fw_domains_put_debug;
+
 	/* All future platforms are expected to require complex power gating */
 	WARN_ON(dev_priv->uncore.fw_domains == 0);
 }