Message ID | 1458331676-567-3-git-send-email-arun.siluvery@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Arun Siluvery <arun.siluvery@linux.intel.com> writes: > [ text/plain ] > In preparation for engine reset, the wedged argument of i915_handle_error() > is extended to reflect as a mask of engines that are hung. This is further > passed down to error state capture functions which are also updated. > > Engine reset recovery mechanism uses this mask and schedules recovery work > for those particular engines. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Mika Kuoppala <mika.kuoppala@intel.com> > Signed-off-by: Tomas Elf <tomas.elf@intel.com> > Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++-- > drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- > drivers/gpu/drm/i915/i915_irq.c | 16 ++++++++-------- > 3 files changed, 14 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 549a232..49ac065 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2735,7 +2735,7 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); > /* i915_irq.c */ > void i915_queue_hangcheck(struct drm_device *dev); > __printf(3, 4) > -void i915_handle_error(struct drm_device *dev, bool wedged, > +void i915_handle_error(struct drm_device *dev, u32 engine_mask, > const char *fmt, ...); > > extern void intel_irq_init(struct drm_i915_private *dev_priv); > @@ -3321,7 +3321,7 @@ static inline void i915_error_state_buf_release( > { > kfree(eb->buf); > } > -void i915_capture_error_state(struct drm_device *dev, bool wedge, > +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask, > const char *error_msg); > void i915_error_state_get(struct drm_device *dev, > struct i915_error_state_file_priv *error_priv); > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index db8600a..1f8ff06 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -1301,7 +1301,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, > > static void i915_error_capture_msg(struct drm_device *dev, > struct drm_i915_error_state *error, > - bool wedged, > + u32 engine_mask, > const char *error_msg) > { > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -1324,7 +1324,7 @@ static void i915_error_capture_msg(struct drm_device *dev, > scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, > ", reason: %s, action: %s", > error_msg, > - wedged ? "reset" : "continue"); > + engine_mask ? "reset" : "continue"); > } > > static void i915_capture_gen_state(struct drm_i915_private *dev_priv, > @@ -1347,7 +1347,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, > * out a structure which becomes available in debugfs for user level tools > * to pick up. > */ > -void i915_capture_error_state(struct drm_device *dev, bool wedged, > +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask, > const char *error_msg) > { > static bool warned; > @@ -1375,7 +1375,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged, > error->overlay = intel_overlay_capture_error_state(dev); > error->display = intel_display_capture_error_state(dev); > > - i915_error_capture_msg(dev, error, wedged, error_msg); > + i915_error_capture_msg(dev, error, engine_mask, error_msg); > DRM_INFO("%s\n", error->error_msg); > > spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c > index 8f3e330..a55a7cc 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -2653,14 +2653,14 @@ static void i915_report_and_clear_eir(struct drm_device *dev) > /** > * i915_handle_error - handle a gpu error > * @dev: drm device > - * > + * @engine_mask: mask representing engines that are hung > * Do some basic checking of register state at error time and > * dump it to the syslog. Also call i915_capture_error_state() to make > * sure we get a record and make it available in debugfs. Fire a uevent > * so userspace knows something bad happened (should trigger collection > * of a ring dump etc.). > */ > -void i915_handle_error(struct drm_device *dev, bool wedged, > +void i915_handle_error(struct drm_device *dev, u32 engine_mask, > const char *fmt, ...) > { > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -2671,10 +2671,10 @@ void i915_handle_error(struct drm_device *dev, bool wedged, > vscnprintf(error_msg, sizeof(error_msg), fmt, args); > va_end(args); > > - i915_capture_error_state(dev, wedged, error_msg); > + i915_capture_error_state(dev, engine_mask, error_msg); > i915_report_and_clear_eir(dev); > > - if (wedged) { > + if (engine_mask) { > atomic_or(I915_RESET_IN_PROGRESS_FLAG, > &dev_priv->gpu_error.reset_counter); > > @@ -3033,7 +3033,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) > */ > tmp = I915_READ_CTL(engine); > if (tmp & RING_WAIT) { > - i915_handle_error(dev, false, > + i915_handle_error(dev, 0, > "Kicking stuck wait on %s", > engine->name); > I915_WRITE_CTL(engine, tmp); > @@ -3045,7 +3045,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) > default: > return HANGCHECK_HUNG; > case 1: > - i915_handle_error(dev, false, > + i915_handle_error(dev, 0, > "Kicking stuck semaphore on %s", > engine->name); > I915_WRITE_CTL(engine, tmp); > @@ -3189,12 +3189,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work) > DRM_INFO("%s on %s\n", > stuck[i] ? "stuck" : "no progress", > engine->name); > - rings_hung++; > + rings_hung |= intel_engine_flag(engine); We can change the int to u32 when we rename rings_hung to engines_hung. Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> > } > } > > if (rings_hung) { > - i915_handle_error(dev, true, "Ring hung"); > + i915_handle_error(dev, rings_hung, "Engine(s) hung"); > goto out; > } > > -- > 1.9.1
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 549a232..49ac065 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2735,7 +2735,7 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port); /* i915_irq.c */ void i915_queue_hangcheck(struct drm_device *dev); __printf(3, 4) -void i915_handle_error(struct drm_device *dev, bool wedged, +void i915_handle_error(struct drm_device *dev, u32 engine_mask, const char *fmt, ...); extern void intel_irq_init(struct drm_i915_private *dev_priv); @@ -3321,7 +3321,7 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } -void i915_capture_error_state(struct drm_device *dev, bool wedge, +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask, const char *error_msg); void i915_error_state_get(struct drm_device *dev, struct i915_error_state_file_priv *error_priv); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index db8600a..1f8ff06 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1301,7 +1301,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, static void i915_error_capture_msg(struct drm_device *dev, struct drm_i915_error_state *error, - bool wedged, + u32 engine_mask, const char *error_msg) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -1324,7 +1324,7 @@ static void i915_error_capture_msg(struct drm_device *dev, scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", error_msg, - wedged ? "reset" : "continue"); + engine_mask ? "reset" : "continue"); } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, @@ -1347,7 +1347,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, * out a structure which becomes available in debugfs for user level tools * to pick up. */ -void i915_capture_error_state(struct drm_device *dev, bool wedged, +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask, const char *error_msg) { static bool warned; @@ -1375,7 +1375,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged, error->overlay = intel_overlay_capture_error_state(dev); error->display = intel_display_capture_error_state(dev); - i915_error_capture_msg(dev, error, wedged, error_msg); + i915_error_capture_msg(dev, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); spin_lock_irqsave(&dev_priv->gpu_error.lock, flags); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8f3e330..a55a7cc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2653,14 +2653,14 @@ static void i915_report_and_clear_eir(struct drm_device *dev) /** * i915_handle_error - handle a gpu error * @dev: drm device - * + * @engine_mask: mask representing engines that are hung * Do some basic checking of register state at error time and * dump it to the syslog. Also call i915_capture_error_state() to make * sure we get a record and make it available in debugfs. Fire a uevent * so userspace knows something bad happened (should trigger collection * of a ring dump etc.). */ -void i915_handle_error(struct drm_device *dev, bool wedged, +void i915_handle_error(struct drm_device *dev, u32 engine_mask, const char *fmt, ...) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2671,10 +2671,10 @@ void i915_handle_error(struct drm_device *dev, bool wedged, vscnprintf(error_msg, sizeof(error_msg), fmt, args); va_end(args); - i915_capture_error_state(dev, wedged, error_msg); + i915_capture_error_state(dev, engine_mask, error_msg); i915_report_and_clear_eir(dev); - if (wedged) { + if (engine_mask) { atomic_or(I915_RESET_IN_PROGRESS_FLAG, &dev_priv->gpu_error.reset_counter); @@ -3033,7 +3033,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev, false, + i915_handle_error(dev, 0, "Kicking stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); @@ -3045,7 +3045,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd) default: return HANGCHECK_HUNG; case 1: - i915_handle_error(dev, false, + i915_handle_error(dev, 0, "Kicking stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); @@ -3189,12 +3189,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work) DRM_INFO("%s on %s\n", stuck[i] ? "stuck" : "no progress", engine->name); - rings_hung++; + rings_hung |= intel_engine_flag(engine); } } if (rings_hung) { - i915_handle_error(dev, true, "Ring hung"); + i915_handle_error(dev, rings_hung, "Engine(s) hung"); goto out; }