diff mbox

[v3,5/6] drm/i915/vlv:Implement the WA 'WaDisable_RenderCache_OperationalFlush'

Message ID 1396500134-26674-1-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com April 3, 2014, 4:42 a.m. UTC
From: Akash Goel <akash.goel@intel.com>

On Gen4+ platforms (except BDW), Render Cache Operational flush
cannot be enabled.
This WA is apparently required for all Gen4+ platforms,except BDW.
In BDW, the bit has been repurposed otherwise.
This has been tested only on vlv.

v2: Corrected the code regarding the wrong usage of
MASKED_BIT_DISABLE (Chris)

v3: Enhancing the scope of WA to Gen4+ platforms except BDW (Ville)

Signed-off-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h |  1 +
 drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

Comments

Ville Syrjala April 4, 2014, 11:17 a.m. UTC | #1
On Thu, Apr 03, 2014 at 10:12:14AM +0530, sourab.gupta@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
> 
> On Gen4+ platforms (except BDW), Render Cache Operational flush
> cannot be enabled.
> This WA is apparently required for all Gen4+ platforms,except BDW.
> In BDW, the bit has been repurposed otherwise.
> This has been tested only on vlv.
> 
> v2: Corrected the code regarding the wrong usage of
> MASKED_BIT_DISABLE (Chris)
> 
> v3: Enhancing the scope of WA to Gen4+ platforms except BDW (Ville)

Actually you you missed g4x,crestline,broadwater. Add it into those as
well, and you can add:
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

> 
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h |  1 +
>  drivers/gpu/drm/i915/intel_pm.c | 15 +++++++++++++++
>  2 files changed, 16 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 393f93e..366c0bf 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1060,6 +1060,7 @@ enum punit_power_well {
>  #define   ECO_FLIP_DONE		(1<<0)
>  
>  #define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
> +#define RC_OP_FLUSH_ENABLE (1<<0)
>  #define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
>  #define CACHE_MODE_1		0x7004 /* IVB+ */
>  #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1<<6)
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 1454777..d181735 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -4624,6 +4624,9 @@ static void ironlake_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(CACHE_MODE_0,
>  		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
>  
> +	/* WaDisable_RenderCache_OperationalFlush:ilk */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> +
>  	g4x_disable_trickle_feed(dev);
>  
>  	ibx_init_clock_gating(dev);
> @@ -4699,6 +4702,9 @@ static void gen6_init_clock_gating(struct drm_device *dev)
>  		I915_WRITE(GEN6_GT_MODE,
>  			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
>  
> +	/* WaDisable_RenderCache_OperationalFlush:snb */
> +	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> +
>  	/*
>  	 * BSpec recoomends 8x4 when MSAA is used,
>  	 * however in practice 16x4 seems fastest.
> @@ -4938,6 +4944,9 @@ static void haswell_init_clock_gating(struct drm_device *dev)
>  	I915_WRITE(GEN7_FF_THREAD_MODE,
>  		   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
>  
> +	/* WaDisable_RenderCache_OperationalFlush:hsw */
> +	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> +
>  	/* enable HiZ Raw Stall Optimization */
>  	I915_WRITE(CACHE_MODE_0_GEN7,
>  		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
> @@ -4990,6 +4999,9 @@ static void ivybridge_init_clock_gating(struct drm_device *dev)
>  		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
>  			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
>  
> +	/* WaDisable_RenderCache_OperationalFlush:ivb */
> +	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> +
>  	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
>  	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
>  		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
> @@ -5107,6 +5119,9 @@ static void valleyview_init_clock_gating(struct drm_device *dev)
>  		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
>  				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
>  
> +	/* WaDisable_RenderCache_OperationalFlush:vlv */
> +	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
> +
>  	/* WaForceL3Serialization:vlv */
>  	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
>  		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);
> -- 
> 1.8.5.1
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 393f93e..366c0bf 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1060,6 +1060,7 @@  enum punit_power_well {
 #define   ECO_FLIP_DONE		(1<<0)
 
 #define CACHE_MODE_0_GEN7	0x7000 /* IVB+ */
+#define RC_OP_FLUSH_ENABLE (1<<0)
 #define   HIZ_RAW_STALL_OPT_DISABLE (1<<2)
 #define CACHE_MODE_1		0x7004 /* IVB+ */
 #define   PIXEL_SUBSPAN_COLLECT_OPT_DISABLE	(1<<6)
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1454777..d181735 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4624,6 +4624,9 @@  static void ironlake_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(CACHE_MODE_0,
 		   _MASKED_BIT_ENABLE(CM0_PIPELINED_RENDER_FLUSH_DISABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:ilk */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	g4x_disable_trickle_feed(dev);
 
 	ibx_init_clock_gating(dev);
@@ -4699,6 +4702,9 @@  static void gen6_init_clock_gating(struct drm_device *dev)
 		I915_WRITE(GEN6_GT_MODE,
 			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:snb */
+	I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/*
 	 * BSpec recoomends 8x4 when MSAA is used,
 	 * however in practice 16x4 seems fastest.
@@ -4938,6 +4944,9 @@  static void haswell_init_clock_gating(struct drm_device *dev)
 	I915_WRITE(GEN7_FF_THREAD_MODE,
 		   I915_READ(GEN7_FF_THREAD_MODE) & ~GEN7_FF_VS_REF_CNT_FFME);
 
+	/* WaDisable_RenderCache_OperationalFlush:hsw */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* enable HiZ Raw Stall Optimization */
 	I915_WRITE(CACHE_MODE_0_GEN7,
 		   _MASKED_BIT_DISABLE(HIZ_RAW_STALL_OPT_DISABLE));
@@ -4990,6 +4999,9 @@  static void ivybridge_init_clock_gating(struct drm_device *dev)
 		I915_WRITE(GEN7_HALF_SLICE_CHICKEN1,
 			   _MASKED_BIT_ENABLE(GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:ivb */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
 	I915_WRITE(GEN7_COMMON_SLICE_CHICKEN1,
 		   GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
@@ -5107,6 +5119,9 @@  static void valleyview_init_clock_gating(struct drm_device *dev)
 		   _MASKED_BIT_ENABLE(GEN7_MAX_PS_THREAD_DEP |
 				      GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE));
 
+	/* WaDisable_RenderCache_OperationalFlush:vlv */
+	I915_WRITE(CACHE_MODE_0_GEN7, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE));
+
 	/* WaForceL3Serialization:vlv */
 	I915_WRITE(GEN7_L3SQCREG4, I915_READ(GEN7_L3SQCREG4) &
 		   ~L3SQ_URB_READ_CAM_MATCH_DISABLE);