@@ -1824,6 +1824,27 @@ void i915_reset(struct drm_i915_private *dev_priv)
goto wakeup;
}
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ */
+int i915_reset_engine(struct intel_engine_cs *engine)
+{
+ int ret;
+ struct drm_i915_private *dev_priv = engine->i915;
+
+ /* FIXME: replace me with engine reset sequence */
+ ret = -ENODEV;
+
+ /* use full gpu reset to recover on error */
+ set_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags);
+
+ return ret;
+}
+
static int i915_pm_suspend(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);
@@ -781,6 +781,7 @@ struct intel_csr {
func(has_ddi); \
func(has_decoupled_mmio); \
func(has_dp_mst); \
+ func(has_engine_reset); \
func(has_fbc); \
func(has_fpga_dbg); \
func(has_full_ppgtt); \
@@ -3007,6 +3008,8 @@ extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv);
+extern bool intel_has_engine_reset(struct drm_i915_private *dev_priv);
+extern int i915_reset_engine(struct intel_engine_cs *engine);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -2594,7 +2594,8 @@ static void i915_error_wake_up(struct drm_i915_private *dev_priv)
* Fire an error uevent so userspace can see that a hang or error
* was detected.
*/
-static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
+static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv,
+ u32 engine_mask)
{
struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj;
char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
@@ -2603,7 +2604,15 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
- DRM_DEBUG_DRIVER("resetting chip\n");
+ /*
+ * This event needs to be sent before performing gpu reset. When
+ * engine resets are supported we iterate through all engines and
+ * reset hung engines individually. To keep the event dispatch
+ * mechanism consistent with full gpu reset, this is only sent once
+ * even when multiple engines are hung. It is also safe to move this
+ * here because when we are in this function, we will definitely
+ * perform gpu reset.
+ */
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/*
@@ -2614,30 +2623,55 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
* simulated reset via debugs, so get an RPM reference.
*/
intel_runtime_pm_get(dev_priv);
- intel_prepare_reset(dev_priv);
- do {
- /*
- * All state reset _must_ be completed before we update the
- * reset counter, for otherwise waiters might miss the reset
- * pending state and not properly drop locks, resulting in
- * deadlocks with the reset work.
- */
- if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv);
- mutex_unlock(&dev_priv->drm.struct_mutex);
+ /* If hardware supports it (GEN8+), try engine reset first */
+ if (intel_has_engine_reset(dev_priv)) {
+ struct intel_engine_cs *engine;
+ unsigned int tmp, ret;
+
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
+ ret = i915_reset_engine(engine);
+ /* on failure fallback to full gpu reset for recovery */
+ if (ret)
+ break;
}
+ }
- /* We need to wait for anyone holding the lock to wakeup */
- } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
- I915_RESET_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE,
- HZ));
+ /*
+ * If the waiter already held the struct_mutex lock, it may have already
+ * triggered the GPU reset and the reset_in_progress can be false.
+ */
+ if (i915_reset_in_progress(&dev_priv->gpu_error)) {
+ DRM_DEBUG_DRIVER("resetting chip\n");
+ intel_prepare_reset(dev_priv);
+
+ do {
+ /*
+ * All state reset _must_ be completed before we update
+ * the reset counter, for otherwise waiters might miss
+ * the reset pending state and not properly drop locks,
+ * resulting in deadlocks with the reset work.
+ */
+ if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
+ i915_reset(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+ }
+
+ /*
+ * We need to wait for anyone holding the lock to
+ * wakeup.
+ */
+ } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags,
+ I915_RESET_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE,
+ HZ));
+
+ intel_finish_reset(dev_priv);
+ }
- intel_finish_reset(dev_priv);
intel_runtime_pm_put(dev_priv);
- if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags))
+ if (!i915_terminally_wedged(&dev_priv->gpu_error))
kobject_uevent_env(kobj,
KOBJ_CHANGE, reset_done_event);
@@ -2728,9 +2762,15 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
if (!engine_mask)
return;
- if (test_and_set_bit(I915_RESET_IN_PROGRESS,
- &dev_priv->gpu_error.flags))
- return;
+ /*
+ * Engine reset support is only available from Gen8 onwards so if
+ * it is not available or explicity disabled, use full gpu reset.
+ */
+ if (!intel_has_engine_reset(dev_priv)) {
+ if (test_and_set_bit(I915_RESET_IN_PROGRESS,
+ &dev_priv->gpu_error.flags))
+ return;
+ }
/*
* Wakeup waiting processes so that the reset function
@@ -2746,7 +2786,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
*/
i915_error_wake_up(dev_priv);
- i915_reset_and_wakeup(dev_priv);
+ i915_reset_and_wakeup(dev_priv, engine_mask);
}
/* Called from drm generic code, passed 'crtc' which
@@ -308,7 +308,8 @@ static const struct intel_device_info intel_haswell_info = {
BDW_COLORS, \
.has_logical_ring_contexts = 1, \
.has_full_48bit_ppgtt = 1, \
- .has_64bit_reloc = 1
+ .has_64bit_reloc = 1, \
+ .has_engine_reset = 1
static const struct intel_device_info intel_broadwell_info = {
BDW_FEATURES,
@@ -339,6 +340,7 @@ static const struct intel_device_info intel_cherryview_info = {
.has_gmch_display = 1,
.has_aliasing_ppgtt = 1,
.has_full_ppgtt = 1,
+ .has_engine_reset = 1,
.display_mmio_offset = VLV_DISPLAY_BASE,
GEN_CHV_PIPEOFFSETS,
CURSOR_OFFSETS,
@@ -390,6 +392,7 @@ static const struct intel_device_info intel_skylake_gt3_info = {
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
.has_full_48bit_ppgtt = 1, \
+ .has_engine_reset = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
IVB_CURSOR_OFFSETS, \
BDW_COLORS
@@ -1851,6 +1851,17 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
return intel_get_gpu_reset(dev_priv) != NULL;
}
+/*
+ * When GuC submission is enabled, GuC manages ELSP and can initiate the
+ * engine reset too. For now, fall back to full GPU reset if it is enabled.
+ */
+bool intel_has_engine_reset(struct drm_i915_private *dev_priv)
+{
+ return (dev_priv->info.has_engine_reset &&
+ !dev_priv->guc.execbuf_client &&
+ i915.reset == 2);
+}
+
int intel_guc_reset(struct drm_i915_private *dev_priv)
{
int ret;