diff mbox

[v2] drm/i915: Rework workaround init functions for BDW and CHV

Message ID 1409649257-12114-1-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com Sept. 2, 2014, 9:14 a.m. UTC
This rework is based on suggestion from Chris.
Now w/a are organized in an array and all of them are emitted in
single fn instead of sending them individually. This approach is
clean and new w/a can be added with minimal changes.
The same array can be used when exporting them to debugfs and the
temporary array in the current implementation is not required.

v2: As suggested by Damien a new w/a reg definition is added.
This helps in initializing w/a that are unmasked registers.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 158 ++++++++++++++------------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  26 ++++++
 2 files changed, 93 insertions(+), 91 deletions(-)

Comments

Lespiau, Damien Sept. 2, 2014, 9:45 a.m. UTC | #1
On Tue, Sep 02, 2014 at 10:14:17AM +0100, Arun Siluvery wrote:
> This rework is based on suggestion from Chris.
> Now w/a are organized in an array and all of them are emitted in
> single fn instead of sending them individually. This approach is
> clean and new w/a can be added with minimal changes.
> The same array can be used when exporting them to debugfs and the
> temporary array in the current implementation is not required.
> 
> v2: As suggested by Damien a new w/a reg definition is added.
> This helps in initializing w/a that are unmasked registers.
> 
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 158 ++++++++++++++------------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  26 ++++++
>  2 files changed, 93 insertions(+), 91 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c5e4dc7..3ed0ad5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -650,87 +650,80 @@ err:
>  	return ret;
>  }
>  
> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> -				       u32 addr, u32 value)
> +static int i915_request_emit_lri(struct intel_engine_cs *ring,
> +				 int num_registers,
> +				 const struct intel_wa_reg *lri_list)
>  {

[...]

> +	for (i = 0; i < num_registers; ++i) {
> +		u32 value;
> +		const struct intel_wa_reg *p = (lri_list + i);
>
> +		if (p->masked_register)
> +			value = (p->mask << 16) | p->value;
> +		else
> +			value = (I915_READ(p->addr) & ~p->mask) | p->value;

There's one case when this won't work, when several WAs for a single
'normal' register are defined. The read done here means only the last of
those W/As will end up being applied (because the last LRI to that
register will be the value that ends up in the register. We'll probably
need to coalesce all W/A defined for a single normal register into one
write.

Considering that, and the comment about the define, maybe the simplest
way to go forward with the patch is to forget the non-masked case for
the moment, especially as it's not used in this patch.

> +#define INIT_MASKED_WA(_addr, _value) \
> +	INIT_WA_REG(1, _addr, ((_value) >> 16), ((_value) & 0xFFFF))

Those outer '()' look unnecessary

> +#define INIT_UNMASKED_WA(_addr, _value) \
> +	INIT_WA_REG(0, _addr, ~(_value), _value)
> +

~value is not a mask. Consider values with one or more bits defined to 0.
Lespiau, Damien Sept. 2, 2014, 9:49 a.m. UTC | #2
On Tue, Sep 02, 2014 at 10:45:45AM +0100, Damien Lespiau wrote:
> There's one case when this won't work, when several WAs for a single
> 'normal' register are defined. The read done here means only the last of
> those W/As will end up being applied (because the last LRI to that
> register will be the value that ends up in the register. We'll probably
> need to coalesce all W/A defined for a single normal register into one
> write.

To more correct/clear, it's not the read that's the problem, in the case
where a normal register has several W/A, the last write will override
the previous ones.
Chris Wilson Sept. 2, 2014, 9:51 a.m. UTC | #3
On Tue, Sep 02, 2014 at 10:45:45AM +0100, Damien Lespiau wrote:
> On Tue, Sep 02, 2014 at 10:14:17AM +0100, Arun Siluvery wrote:
> > This rework is based on suggestion from Chris.
> > Now w/a are organized in an array and all of them are emitted in
> > single fn instead of sending them individually. This approach is
> > clean and new w/a can be added with minimal changes.
> > The same array can be used when exporting them to debugfs and the
> > temporary array in the current implementation is not required.
> > 
> > v2: As suggested by Damien a new w/a reg definition is added.
> > This helps in initializing w/a that are unmasked registers.
> > 
> > Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_ringbuffer.c | 158 ++++++++++++++------------------
> >  drivers/gpu/drm/i915/intel_ringbuffer.h |  26 ++++++
> >  2 files changed, 93 insertions(+), 91 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > index c5e4dc7..3ed0ad5 100644
> > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> > @@ -650,87 +650,80 @@ err:
> >  	return ret;
> >  }
> >  
> > -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> > -				       u32 addr, u32 value)
> > +static int i915_request_emit_lri(struct intel_engine_cs *ring,
> > +				 int num_registers,
> > +				 const struct intel_wa_reg *lri_list)
> >  {
> 
> [...]
> 
> > +	for (i = 0; i < num_registers; ++i) {
> > +		u32 value;
> > +		const struct intel_wa_reg *p = (lri_list + i);
> >
> > +		if (p->masked_register)
> > +			value = (p->mask << 16) | p->value;
> > +		else
> > +			value = (I915_READ(p->addr) & ~p->mask) | p->value;
> 
> There's one case when this won't work, when several WAs for a single
> 'normal' register are defined. The read done here means only the last of
> those W/As will end up being applied (because the last LRI to that
> register will be the value that ends up in the register. We'll probably
> need to coalesce all W/A defined for a single normal register into one
> write.
> 
> Considering that, and the comment about the define, maybe the simplest
> way to go forward with the patch is to forget the non-masked case for
> the moment, especially as it's not used in this patch.

If you are still considering an in-kernel table, despite that it can be
done in userspace, design it to track all w/a and not just the ring
specific fixups.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c5e4dc7..3ed0ad5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -650,87 +650,80 @@  err:
 	return ret;
 }
 
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
-				       u32 addr, u32 value)
+static int i915_request_emit_lri(struct intel_engine_cs *ring,
+				 int num_registers,
+				 const struct intel_wa_reg *lri_list)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+	int ret;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 
-	if (dev_priv->num_wa_regs > I915_MAX_WA_REGS)
-		return;
+	ret = intel_ring_begin(ring, (2 * num_registers + 1));
+	if (ret)
+		return ret;
 
-	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, addr);
-	intel_ring_emit(ring, value);
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_registers));
+	for (i = 0; i < num_registers; ++i) {
+		u32 value;
+		const struct intel_wa_reg *p = (lri_list + i);
 
-	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
-	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = (value) & 0xFFFF;
-	/* value is updated with the status of remaining bits of this
-	 * register when it is read from debugfs file
-	 */
-	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
-	dev_priv->num_wa_regs++;
+		if (p->masked_register)
+			value = (p->mask << 16) | p->value;
+		else
+			value = (I915_READ(p->addr) & ~p->mask) | p->value;
+		intel_ring_emit(ring, p->addr);
+		intel_ring_emit(ring, value);
+	}
 
-	return;
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
 }
 
-static int bdw8_init_workarounds(struct intel_engine_cs *ring)
-{
-	int ret;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+static const struct intel_wa_reg bdw_ring_init_context[] = {
 
 	/*
 	 * workarounds applied in this fn are part of register state context,
 	 * they need to be re-initialized followed by gpu reset, suspend/resume,
 	 * module reload.
 	 */
-	dev_priv->num_wa_regs = 0;
-	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-	/*
-	 * update the number of dwords required based on the
-	 * actual number of workarounds applied
-	 */
-	ret = intel_ring_begin(ring, 24);
-	if (ret)
-		return ret;
 
 	/* WaDisablePartialInstShootdown:bdw */
 	/* WaDisableThreadStallDopClockGating:bdw */
 	/* FIXME: Unclear whether we really need this on production bdw. */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
-					     | STALL_DOP_GATING_DISABLE));
+	INIT_MASKED_WA(GEN8_ROW_CHICKEN,
+		       _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+		       _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)),
 
 	/* WaDisableDopClockGating:bdw May not be needed for production */
-	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+	INIT_MASKED_WA(GEN7_ROW_CHICKEN2,
+		       _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)),
 
 	/*
 	 * This GEN8_CENTROID_PIXEL_OPT_DIS W/A is only needed for
 	 * pre-production hardware
 	 */
-	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-			   _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS
-					      | GEN8_SAMPLER_POWER_BYPASS_DIS));
+	INIT_MASKED_WA(HALF_SLICE_CHICKEN3,
+		       _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS) |
+		       _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)),
 
-	intel_ring_emit_wa(ring, GEN7_HALF_SLICE_CHICKEN1,
-			   _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE));
+	INIT_MASKED_WA(GEN7_HALF_SLICE_CHICKEN1,
+		       _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)),
 
-	intel_ring_emit_wa(ring, COMMON_SLICE_CHICKEN2,
-			   _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE));
+	INIT_MASKED_WA(COMMON_SLICE_CHICKEN2,
+		       _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)),
 
 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 	 * workaround for for a possible hang in the unlikely event a TLB
 	 * invalidation occurs during a PSD flush.
 	 */
-	intel_ring_emit_wa(ring, HDC_CHICKEN0,
-			   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
+	INIT_MASKED_WA(HDC_CHICKEN0,
+		       _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT)),
 
 	/* Wa4x4STCOptimizationDisable:bdw */
-	intel_ring_emit_wa(ring, CACHE_MODE_1,
-			   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
+	INIT_MASKED_WA(CACHE_MODE_1,
+		       _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE)),
 
 	/*
 	 * BSpec recommends 8x4 when MSAA is used,
@@ -740,54 +733,40 @@  static int bdw8_init_workarounds(struct intel_engine_cs *ring)
 	 * disable bit, which we don't touch here, but it's good
 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 	 */
-	intel_ring_emit_wa(ring, GEN7_GT_MODE,
-			   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
-
-	intel_ring_advance(ring);
-
-	DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
-			 dev_priv->num_wa_regs);
-
-	return 0;
-}
-
-static int chv_init_workarounds(struct intel_engine_cs *ring)
-{
-	int ret;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/*
-	 * workarounds applied in this fn are part of register state context,
-	 * they need to be re-initialized followed by gpu reset, suspend/resume,
-	 * module reload.
-	 */
-	dev_priv->num_wa_regs = 0;
-	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-	ret = intel_ring_begin(ring, 12);
-	if (ret)
-		return ret;
+	INIT_MASKED_WA(GEN7_GT_MODE,
+		       (GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4)),
+};
 
+static const struct intel_wa_reg chv_ring_init_context[] = {
 	/* WaDisablePartialInstShootdown:chv */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
-
 	/* WaDisableThreadStallDopClockGating:chv */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
-			   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+	INIT_MASKED_WA(GEN8_ROW_CHICKEN,
+		       _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE) |
+		       _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)),
 
 	/* WaDisableDopClockGating:chv (pre-production hw) */
-	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
-			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
+	INIT_MASKED_WA(GEN7_ROW_CHICKEN2,
+		       _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE)),
 
 	/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
-			   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
+	INIT_MASKED_WA(HALF_SLICE_CHICKEN3,
+		       _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)),
+};
 
-	intel_ring_advance(ring);
+static int ring_init_context(struct intel_engine_cs *ring)
+{
+	int ret = -EINVAL;
 
-	return 0;
+	if (IS_CHERRYVIEW(ring->dev))
+		ret = i915_request_emit_lri(ring,
+					    ARRAY_SIZE(chv_ring_init_context),
+					    chv_ring_init_context);
+	else
+		ret = i915_request_emit_lri(ring,
+					    ARRAY_SIZE(bdw_ring_init_context),
+					    bdw_ring_init_context);
+
+	return ret;
 }
 
 static int init_render_ring(struct intel_engine_cs *ring)
@@ -2275,10 +2254,7 @@  int intel_init_render_ring_buffer(struct drm_device *dev)
 					dev_priv->semaphore_obj = obj;
 			}
 		}
-		if (IS_CHERRYVIEW(dev))
-			ring->init_context = chv_init_workarounds;
-		else
-			ring->init_context = bdw_init_workarounds;
+		ring->init_context = ring_init_context;
 		ring->add_request = gen6_add_request;
 		ring->flush = gen8_render_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 96479c8..c9ed06c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -317,6 +317,32 @@  struct  intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
+/*
+ * Workaround register definition
+ * mask: bits correponding to the w/a
+ * value: represents reference values of w/a bits
+ */
+struct intel_wa_reg {
+	u32 masked_register : 1;
+	u32 addr;
+	u32 mask;
+	u32 value;
+};
+
+#define INIT_WA_REG(_masked, _addr, _mask, _value)	\
+{					\
+	.masked_register = _masked,	\
+	.addr = _addr,	\
+	.mask = _mask,	\
+	.value = _value,\
+}
+
+#define INIT_MASKED_WA(_addr, _value) \
+	INIT_WA_REG(1, _addr, ((_value) >> 16), ((_value) & 0xFFFF))
+
+#define INIT_UNMASKED_WA(_addr, _value) \
+	INIT_WA_REG(0, _addr, ~(_value), _value)
+
 bool intel_ring_initialized(struct intel_engine_cs *ring);
 
 static inline unsigned