diff mbox

[2/2] drm/i915: include hangcheck action and score in error_state

Message ID 1378472608-14597-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala Sept. 6, 2013, 1:03 p.m. UTC
Score and action reveals what all the rings were doing
and why hang was declared. Add idle state so that
we can distinguish between waiting and idle ring.

v2: - add idle as a hangcheck action
    - consensed hangcheck status to single line (Chris)
    - mark active explicitly when we are making progress (Chris)

Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |    2 ++
 drivers/gpu/drm/i915/i915_gpu_error.c   |   24 ++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c         |    5 +++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |    1 +
 4 files changed, 32 insertions(+)

Comments

Daniel Vetter Sept. 6, 2013, 3:56 p.m. UTC | #1
On Fri, Sep 06, 2013 at 04:03:28PM +0300, Mika Kuoppala wrote:
> Score and action reveals what all the rings were doing
> and why hang was declared. Add idle state so that
> we can distinguish between waiting and idle ring.
> 
> v2: - add idle as a hangcheck action
>     - consensed hangcheck status to single line (Chris)
>     - mark active explicitly when we are making progress (Chris)
> 
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>

Both patches merged to dinq, thanks for patches and review.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c5f0aba..1fb01b5 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -328,6 +328,8 @@  struct drm_i915_error_state {
 	u32 *active_bo_count, *pinned_bo_count;
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
+	int hangcheck_score[I915_NUM_RINGS];
+	enum intel_ring_hangcheck_action hangcheck_action[I915_NUM_RINGS];
 };
 
 struct intel_crtc_config;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index aba9d74..c38d575 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -213,6 +213,24 @@  static void print_error_buffers(struct drm_i915_error_state_buf *m,
 	}
 }
 
+static const char *hangcheck_action_to_str(enum intel_ring_hangcheck_action a)
+{
+	switch (a) {
+	case HANGCHECK_IDLE:
+		return "idle";
+	case HANGCHECK_WAIT:
+		return "wait";
+	case HANGCHECK_ACTIVE:
+		return "active";
+	case HANGCHECK_KICK:
+		return "kick";
+	case HANGCHECK_HUNG:
+		return "hung";
+	}
+
+	return "unknown";
+}
+
 static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 				  struct drm_device *dev,
 				  struct drm_i915_error_state *error,
@@ -253,6 +271,9 @@  static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
 	err_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
 	err_printf(m, "  ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
 	err_printf(m, "  ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
+	err_printf(m, "  hangcheck: %s [%d]\n",
+		   hangcheck_action_to_str(error->hangcheck_action[ring]),
+		   error->hangcheck_score[ring]);
 }
 
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
@@ -718,6 +739,9 @@  static void i915_record_ring_state(struct drm_device *dev,
 
 	error->cpu_ring_head[ring->id] = ring->head;
 	error->cpu_ring_tail[ring->id] = ring->tail;
+
+	error->hangcheck_score[ring->id] = ring->hangcheck.score;
+	error->hangcheck_action[ring->id] = ring->hangcheck.action;
 }
 
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9e48cf2..5350ef5 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1975,6 +1975,8 @@  static void i915_hangcheck_elapsed(unsigned long data)
 
 		if (ring->hangcheck.seqno == seqno) {
 			if (ring_idle(ring, seqno)) {
+				ring->hangcheck.action = HANGCHECK_IDLE;
+
 				if (waitqueue_active(&ring->irq_queue)) {
 					/* Issue a wake-up to catch stuck h/w. */
 					DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
@@ -2003,6 +2005,7 @@  static void i915_hangcheck_elapsed(unsigned long data)
 								    acthd);
 
 				switch (ring->hangcheck.action) {
+				case HANGCHECK_IDLE:
 				case HANGCHECK_WAIT:
 					break;
 				case HANGCHECK_ACTIVE:
@@ -2018,6 +2021,8 @@  static void i915_hangcheck_elapsed(unsigned long data)
 				}
 			}
 		} else {
+			ring->hangcheck.action = HANGCHECK_ACTIVE;
+
 			/* Gradually reduce the count so that we catch DoS
 			 * attempts across multiple batches.
 			 */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ad2dd65..b5aac57 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -34,6 +34,7 @@  struct  intel_hw_status_page {
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
 enum intel_ring_hangcheck_action {
+	HANGCHECK_IDLE = 0,
 	HANGCHECK_WAIT,
 	HANGCHECK_ACTIVE,
 	HANGCHECK_KICK,