diff mbox

[4/6] drm/i915/vlv: Added 3 rendering specific Hw Workarounds in clock gating fn

Message ID 1390362310-15963-5-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Jan. 22, 2014, 3:45 a.m. UTC
From: Akash Goel <akash.goel@intel.com>

Added 2 new rendering specific Workarounds
1. WaDisable_RenderCache_OperationalFlush
     Operational flush cannot be enabled on
     BWG A0 [Errata BWT006]
2. WaVSThreadDispatchOverride
     Performance optimization - Hw will decide which
     half slice the thread will dispatch, May not be
     really needed for VLV, as its single slice

Modified the implementation of 1 workaround
1. WaDisableL3Bank2xClockGate
    Disabling L3 clock gating- MMIO 940c[25] = 1

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |  3 +++
 drivers/gpu/drm/i915/intel_pm.c | 22 +++++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

Comments

Ville Syrjala Jan. 22, 2014, 11:10 a.m. UTC | #1
On Wed, Jan 22, 2014 at 09:15:08AM +0530, akash.goel@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
> 
> Added 2 new rendering specific Workarounds
> 1. WaDisable_RenderCache_OperationalFlush
>      Operational flush cannot be enabled on
>      BWG A0 [Errata BWT006]
> 2. WaVSThreadDispatchOverride
>      Performance optimization - Hw will decide which
>      half slice the thread will dispatch, May not be
>      really needed for VLV, as its single slice
> 
> Modified the implementation of 1 workaround
> 1. WaDisableL3Bank2xClockGate
>     Disabling L3 clock gating- MMIO 940c[25] = 1

Three things in one patch -> needs to be split up.

> 
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |  3 +++
>  drivers/gpu/drm/i915/intel_pm.c | 22 +++++++++++++++++++++-
>  2 files changed, 24 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index a699efd..d829754 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -934,6 +934,9 @@
>  #define   ECO_GATING_CX_ONLY	(1<<3)
>  #define   ECO_FLIP_DONE		(1<<0)
>  
> +#define GEN7_CACHE_MODE_0	0x07000 /* IVB+ only */
> +#define GEN7_RC_OP_FLUSH_ENABLE (1<<0)
> +
>  #define CACHE_MODE_1		0x7004 /* IVB+ */
>  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
>  
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 469170c..4c36ff8 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4955,6 +4955,12 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
>  	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
>  
> +	/* WaDisable_RenderCache_OperationalFlush
> +	 * Clear bit 0, so we do a AND with the mask
> +	 * to keep other bits the same */
> +	I915_WRITE(GEN7_CACHE_MODE_0,  (I915_READ(GEN7_CACHE_MODE_0) |
> +			  _MASKED_BIT_DISABLE(GEN7_RC_OP_FLUSH_ENABLE)));

This should be disabled for everything gen4+. We don't seem to do it for
any other platform though, and I guess the reason is that it should
already default to disabled. So I'm not sure we need this, but if we do
then I'd prefer to do it consistently for all platforms.

> +
>  	/* WaForceL3Serialization:vlv */
>  	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
>  		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
> @@ -4991,10 +4997,24 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
>  		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
>  		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
>  
> -	I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
> +	/* WaDisableL3Bank2xClockGate
> +	 * Disabling L3 clock gating- MMIO 940c[25] = 1
> +	 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
> +	I915_WRIT9E(GEN7_UCGCTL4,
> +		I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
>  
>  	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
>  
> +	/* WaVSThreadDispatchOverride
> +	 * Hw will decide which half slice the thread will dispatch.
> +	 * May not be needed for VLV, as its a single slice */
> +	I915_WRITE(GEN7_CACHE_MODE_0,
> +		I915_READ(GEN7_FF_THREAD_MODE) &
> +		(~GEN7_FF_VS_SCHED_LOAD_BALANCE));

I'm pretty sure my workaround series from last summer tried to clean up
this w/a for all platforms. I guess you didn't look at those patches.

> +
> +	/* WaDisable4x2SubspanOptimization,
> +	 * Disable combining of two 2x2 subspans into a 4x2 subspan
> +	 * Set chicken bit to disable subspan optimization */

This should also be another patch.

>  	I915_WRITE(CACHE_MODE_1,
>  		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
>  
> -- 
> 1.8.5.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index a699efd..d829754 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -934,6 +934,9 @@ 
 #define   ECO_GATING_CX_ONLY	(1<<3)
 #define   ECO_FLIP_DONE		(1<<0)
 
+#define GEN7_CACHE_MODE_0	0x07000 /* IVB+ only */
+#define GEN7_RC_OP_FLUSH_ENABLE (1<<0)
+
 #define CACHE_MODE_1		0x7004 /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE (1<<6)
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 469170c..4c36ff8 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4955,6 +4955,12 @@  static void valleyview_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN7_L3CNTLREG1, I915_READ(GEN7_L3CNTLREG1) | GEN7_L3AGDIS);
 	I915_WRITE(GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
 
+	/* WaDisable_RenderCache_OperationalFlush
+	 * Clear bit 0, so we do a AND with the mask
+	 * to keep other bits the same */
+	I915_WRITE(GEN7_CACHE_MODE_0,  (I915_READ(GEN7_CACHE_MODE_0) |
+			  _MASKED_BIT_DISABLE(GEN7_RC_OP_FLUSH_ENABLE)));
+
 	/* WaForceL3Serialization:vlv */
 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
@@ -4991,10 +4997,24 @@  static void valleyview_init_clock_gating(struct drm_device *dev)
 		   GEN6_RCPBUNIT_CLOCK_GATE_DISABLE |
 		   GEN6_RCCUNIT_CLOCK_GATE_DISABLE);
 
-	I915_WRITE(GEN7_UCGCTL4, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
+	/* WaDisableL3Bank2xClockGate
+	 * Disabling L3 clock gating- MMIO 940c[25] = 1
+	 * Set bit 25, to disable L3_BANK_2x_CLK_GATING */
+	I915_WRITE(GEN7_UCGCTL4,
+		I915_READ(GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
 
 	I915_WRITE(MI_ARB_VLV, MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE);
 
+	/* WaVSThreadDispatchOverride
+	 * Hw will decide which half slice the thread will dispatch.
+	 * May not be needed for VLV, as its a single slice */
+	I915_WRITE(GEN7_CACHE_MODE_0,
+		I915_READ(GEN7_FF_THREAD_MODE) &
+		(~GEN7_FF_VS_SCHED_LOAD_BALANCE));
+
+	/* WaDisable4x2SubspanOptimization,
+	 * Disable combining of two 2x2 subspans into a 4x2 subspan
+	 * Set chicken bit to disable subspan optimization */
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));