diff mbox

[3/7] drm/i915/bdw: MI_FLUSH_DW a qword instead of dword

Message ID 1407176119-5294-3-git-send-email-rodrigo.vivi@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rodrigo Vivi Aug. 4, 2014, 6:15 p.m. UTC
From: Ben Widawsky <benjamin.widawsky@intel.com>

The actual post sync op is "Write Immediate Data QWord." It is therefore
arguable that we should have always done a qword write.

The actual impetus for this patch is our decoder complains when we write
a dword and I was trying to eliminate the spurious errors. With this
patch, I've noticed a really strange reproducible error turns into a
different strange reproducible error - so it does indeed have some
effect of some sort.

This was also recommended to me by someone that is familiar with the
Windows driver.

It's based on top of the semaphore series, so it won't be easily
applied, I'd guess.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_ringbuffer.c | 95 +++++++++++++++++++++++++--------
 1 file changed, 74 insertions(+), 21 deletions(-)

Comments

Ben Widawsky Aug. 5, 2014, 1:21 a.m. UTC | #1
On Mon, Aug 04, 2014 at 11:15:15AM -0700, Rodrigo Vivi wrote:
> From: Ben Widawsky <benjamin.widawsky@intel.com>
> 
> The actual post sync op is "Write Immediate Data QWord." It is therefore
> arguable that we should have always done a qword write.
> 
> The actual impetus for this patch is our decoder complains when we write
> a dword and I was trying to eliminate the spurious errors. With this
> patch, I've noticed a really strange reproducible error turns into a
> different strange reproducible error - so it does indeed have some
> effect of some sort.
> 
> This was also recommended to me by someone that is familiar with the
> Windows driver.
> 
> It's based on top of the semaphore series, so it won't be easily
> applied, I'd guess.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

Did we determine this was needed for anything other than shutting up the
instruction decoder, for debug? Seems like if the existing stuff ain't
broke, don't fix it.

[snip]
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2908896..9a562b5 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -727,7 +727,7 @@  static int gen8_rcs_signal(struct intel_engine_cs *signaller,
 static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 			   unsigned int num_dwords)
 {
-#define MBOX_UPDATE_DWORDS 6
+#define MBOX_UPDATE_DWORDS 8
 	struct drm_device *dev = signaller->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *waiter;
@@ -746,15 +746,18 @@  static int gen8_xcs_signal(struct intel_engine_cs *signaller,
 		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
 			continue;
 
-		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
+		intel_ring_emit(signaller, (MI_FLUSH_DW + 2) |
 					   MI_FLUSH_DW_OP_STOREDW);
 		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
 					   MI_FLUSH_DW_USE_GTT);
 		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
 		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
+		intel_ring_emit(signaller, 0); /* upper dword */
+
 		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
 					   MI_SEMAPHORE_TARGET(waiter->id));
 		intel_ring_emit(signaller, 0);
+		intel_ring_emit(signaller, MI_NOOP);
 	}
 
 	return 0;
@@ -1939,8 +1942,6 @@  static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_INFO(ring->dev)->gen >= 8)
-		cmd += 1;
 	/*
 	 * Bspec vol 1c.5 - video engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -1952,13 +1953,38 @@  static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
 			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	if (INTEL_INFO(ring->dev)->gen >= 8) {
-		intel_ring_emit(ring, 0); /* upper addr */
-		intel_ring_emit(ring, 0); /* value */
-	} else  {
-		intel_ring_emit(ring, 0);
-		intel_ring_emit(ring, MI_NOOP);
-	}
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+	return 0;
+}
+
+static int gen8_bsd_ring_flush(struct intel_engine_cs *ring,
+			       u32 invalidate, u32 flush)
+{
+	uint32_t cmd;
+	int ret;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	cmd = MI_FLUSH_DW + 2;
+	/*
+	 * Bspec vol 1c.5 - video engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
+	if (invalidate & I915_GEM_GPU_DOMAINS)
+		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+	intel_ring_emit(ring, cmd);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+	intel_ring_emit(ring, 0); /* upper addr */
+	intel_ring_emit(ring, 0); /* value */
+	intel_ring_emit(ring, 0); /* value */
+	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 	return 0;
 }
@@ -2029,8 +2055,38 @@  gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 	return 0;
 }
 
-/* Blitter support (SandyBridge+) */
+static int gen8_ring_flush(struct intel_engine_cs *ring,
+			   u32 invalidate, u32 flush)
+{
+	uint32_t cmd;
+	int ret;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	cmd = MI_FLUSH_DW + 2;
+	/*
+	 * Bspec vol 1c.3 - blitter engine command streamer:
+	 * "If ENABLED, all TLBs will be invalidated once the flush
+	 * operation is complete. This bit is only valid when the
+	 * Post-Sync Operation field is a value of 1h or 3h."
+	 */
+	if (invalidate & I915_GEM_DOMAIN_RENDER)
+		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+			MI_FLUSH_DW_OP_STOREDW;
+	intel_ring_emit(ring, cmd);
+	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
+	intel_ring_emit(ring, 0); /* upper addr */
+	intel_ring_emit(ring, 0); /* value */
+	intel_ring_emit(ring, 0); /* value */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
 
+/* Blitter support (SandyBridge+) */
 static int gen6_ring_flush(struct intel_engine_cs *ring,
 			   u32 invalidate, u32 flush)
 {
@@ -2043,8 +2099,7 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_INFO(ring->dev)->gen >= 8)
-		cmd += 1;
+
 	/*
 	 * Bspec vol 1c.3 - blitter engine command streamer:
 	 * "If ENABLED, all TLBs will be invalidated once the flush
@@ -2056,13 +2111,8 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 			MI_FLUSH_DW_OP_STOREDW;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	if (INTEL_INFO(ring->dev)->gen >= 8) {
-		intel_ring_emit(ring, 0); /* upper addr */
-		intel_ring_emit(ring, 0); /* value */
-	} else  {
-		intel_ring_emit(ring, 0);
-		intel_ring_emit(ring, MI_NOOP);
-	}
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_advance(ring);
 
 	if (IS_GEN7(dev) && !invalidate && flush)
@@ -2314,6 +2364,7 @@  int intel_init_bsd_ring_buffer(struct drm_device *dev)
 		if (INTEL_INFO(dev)->gen >= 8) {
 			ring->irq_enable_mask =
 				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
+			ring->flush = gen8_bsd_ring_flush;
 			ring->irq_get = gen8_ring_get_irq;
 			ring->irq_put = gen8_ring_put_irq;
 			ring->dispatch_execbuffer =
@@ -2422,6 +2473,7 @@  int intel_init_blt_ring_buffer(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen >= 8) {
 		ring->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
+		ring->flush = gen8_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
@@ -2480,6 +2532,7 @@  int intel_init_vebox_ring_buffer(struct drm_device *dev)
 	if (INTEL_INFO(dev)->gen >= 8) {
 		ring->irq_enable_mask =
 			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
+		ring->flush = gen8_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
 		ring->irq_put = gen8_ring_put_irq;
 		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;