@@ -356,7 +356,6 @@ static void notify_ring(struct drm_device *dev,
wake_up_all(&ring->irq_queue);
if (i915_enable_hangcheck) {
- dev_priv->gpu_error.hangcheck_count = 0;
mod_timer(&dev_priv->gpu_error.hangcheck_timer,
round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
}
@@ -1818,52 +1817,58 @@ void i915_hangcheck_elapsed(unsigned long data)
struct drm_device *dev = (struct drm_device *)data;
drm_i915_private_t *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
- bool err = false, idle;
int i;
- u32 seqno[I915_NUM_RINGS];
- bool work_done;
+ int busy_count = 0, rings_hung = 0;
if (!i915_enable_hangcheck)
return;
- idle = true;
for_each_ring(ring, dev_priv, i) {
- seqno[i] = ring->get_seqno(ring, false);
- idle &= i915_hangcheck_ring_idle(ring, seqno[i], &err);
- }
+ u32 seqno;
+ bool idle, err = false;
+
+ seqno = ring->get_seqno(ring, false);
+ idle = i915_hangcheck_ring_idle(ring, seqno, &err);
- /* If all work is done then ACTHD clearly hasn't advanced. */
- if (idle) {
- if (err) {
- if (i915_hangcheck_hung(dev))
- return;
+ if (idle) {
+ if (err)
+ ring->hangcheck_score++;
+ else
+ ring->hangcheck_score = 0;
+ } else {
+ busy_count++;
- goto repeat;
+ if (ring->hangcheck_seqno == seqno) {
+ ring->hangcheck_score++;
+
+ /* If the ring is not waiting, raise
+ the score further */
+ if (i915_hangcheck_ring_hung(dev, ring))
+ ring->hangcheck_score++;
+ } else {
+ ring->hangcheck_score = 0;
+ }
}
- dev_priv->gpu_error.hangcheck_count = 0;
- return;
+ ring->hangcheck_seqno = seqno;
}
- work_done = false;
for_each_ring(ring, dev_priv, i) {
- if (ring->hangcheck_seqno != seqno[i]) {
- work_done = true;
- ring->hangcheck_seqno = seqno[i];
+ if (ring->hangcheck_score > 2) {
+ rings_hung++;
+ DRM_ERROR("%s seems hung\n", ring->name);
}
}
- if (!work_done) {
- if (i915_hangcheck_hung(dev))
- return;
- } else {
- dev_priv->gpu_error.hangcheck_count = 0;
- }
+ if (rings_hung)
+ return i915_handle_error(dev, true);
-repeat:
- /* Reset timer case chip hangs without another request being added */
- mod_timer(&dev_priv->gpu_error.hangcheck_timer,
- round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
+ if (busy_count)
+ /* Reset timer case chip hangs without another request
+ * being added */
+ mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+ round_jiffies_up(jiffies +
+ DRM_I915_HANGCHECK_JIFFIES));
}
/* drm_dma.h hooks
@@ -138,6 +138,7 @@ struct intel_ring_buffer {
struct drm_i915_gem_object *last_context_obj;
u32 hangcheck_seqno;
+ int hangcheck_score;
void *private;
};
Add per ring score of possible culprit for gpu hang. If ring is busy and not waiting, it will get the highest score across calls to i915_hangcheck_elapsed. This way we are most likely to find the ring that caused the hang among the waiting ones. Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com> --- drivers/gpu/drm/i915/i915_irq.c | 65 +++++++++++++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 36 insertions(+), 30 deletions(-)