diff mbox

[0137/1094] drm/i915: Include HW status page in error capture

Message ID 1413889294-31328-138-git-send-email-dheerajx.s.jamwal@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dheeraj Jamwal Oct. 21, 2014, 10:45 a.m. UTC
From: Chris Wilson <chris@chris-wilson.co.uk>

Many times in the past we have concluded that the cause of the GPU hang
has been that the hw status page was stale, usually because the GPU and
CPU disagreed over the address of the page. Having stumbled across yet
another issue that seems to be related to the HWSP, it is time to
include that information in the GPU error dump.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
(cherry picked from commit f3ce3821393e31a3f1a8ca6c24eb2d735a428445)

Signed-off-by: Dheeraj Jamwal <dheerajx.s.jamwal@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |    3 +-
 drivers/gpu/drm/i915/i915_gpu_error.c |   50 +++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ad474a9..b777be3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -318,6 +318,7 @@  struct drm_i915_error_state {
 	u32 tail[I915_NUM_RINGS];
 	u32 head[I915_NUM_RINGS];
 	u32 ctl[I915_NUM_RINGS];
+	u32 hws[I915_NUM_RINGS];
 	u32 ipeir[I915_NUM_RINGS];
 	u32 ipehr[I915_NUM_RINGS];
 	u32 instdone[I915_NUM_RINGS];
@@ -347,7 +348,7 @@  struct drm_i915_error_state {
 			int page_count;
 			u32 gtt_offset;
 			u32 *pages[0];
-		} *ringbuffer, *batchbuffer, *ctx;
+		} *ringbuffer, *batchbuffer, *ctx, *hws;
 		struct drm_i915_error_request {
 			long jiffies;
 			u32 seqno;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d1b2623..60044e9 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -249,6 +249,7 @@  static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
 	err_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
 	err_printf(m, "  CTL: 0x%08x\n", error->ctl[ring]);
+	err_printf(m, "  HWS: 0x%08x\n", error->hws[ring]);
 	err_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
 	err_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
 	err_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
@@ -390,6 +391,22 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
+		if ((obj = error->ring[i].hws)) {
+			err_printf(m, "%s --- HW Status = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   obj->gtt_offset);
+			offset = 0;
+			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
+				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
+					   offset,
+					   obj->pages[0][elt],
+					   obj->pages[0][elt+1],
+					   obj->pages[0][elt+2],
+					   obj->pages[0][elt+3]);
+					offset += 16;
+			}
+		}
+
 		if ((obj = error->ring[i].ctx)) {
 			err_printf(m, "%s --- HW Context = 0x%08x\n",
 				   dev_priv->ring[i].name,
@@ -472,6 +489,7 @@  static void i915_error_state_free(struct kref *error_ref)
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		i915_error_object_free(error->ring[i].batchbuffer);
 		i915_error_object_free(error->ring[i].ringbuffer);
+		i915_error_object_free(error->ring[i].hws);
 		i915_error_object_free(error->ring[i].ctx);
 		kfree(error->ring[i].requests);
 	}
@@ -787,6 +805,35 @@  static void i915_record_ring_state(struct drm_device *dev,
 	error->tail[ring->id] = I915_READ_TAIL(ring);
 	error->ctl[ring->id] = I915_READ_CTL(ring);
 
+	if (I915_NEED_GFX_HWS(dev)) {
+		int mmio;
+
+		if (IS_GEN7(dev)) {
+			switch (ring->id) {
+			default:
+			case RCS:
+				mmio = RENDER_HWS_PGA_GEN7;
+				break;
+			case BCS:
+				mmio = BLT_HWS_PGA_GEN7;
+				break;
+			case VCS:
+				mmio = BSD_HWS_PGA_GEN7;
+				break;
+			case VECS:
+				mmio = VEBOX_HWS_PGA_GEN7;
+				break;
+			}
+		} else if (IS_GEN6(ring->dev)) {
+			mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
+		} else {
+			/* XXX: gen8 returns to sanity */
+			mmio = RING_HWS_PGA(ring->mmio_base);
+		}
+
+		error->hws[ring->id] = I915_READ(mmio);
+	}
+
 	error->cpu_ring_head[ring->id] = ring->head;
 	error->cpu_ring_tail[ring->id] = ring->tail;
 
@@ -840,6 +887,9 @@  static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].ringbuffer =
 			i915_error_ggtt_object_create(dev_priv, ring->obj);
 
+		if (ring->status_page.obj)
+			error->ring[i].hws =
+				i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
 
 		i915_gem_record_active_context(ring, error, &error->ring[i]);