@@ -1794,7 +1794,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
}
/**
- * i915_reset - reset chip after a hang
+ * i915_reset_chip - reset chip after a hang
* @dev_priv: device private to reset
*
* Reset the chip. Useful if a hang is detected. Marks the device as wedged
@@ -1810,7 +1810,7 @@ static int i915_resume_switcheroo(struct drm_device *dev)
* - re-init interrupt state
* - re-init display
*/
-void i915_reset(struct drm_i915_private *dev_priv)
+void i915_reset_chip(struct drm_i915_private *dev_priv)
{
struct i915_gpu_error *error = &dev_priv->gpu_error;
int ret;
@@ -1821,6 +1821,8 @@ void i915_reset(struct drm_i915_private *dev_priv)
if (!test_bit(I915_RESET_HANDOFF, &error->flags))
return;
+ DRM_DEBUG_DRIVER("resetting chip\n");
+
/* Clear any previous failed attempts at recovery. Time to try again. */
if (!i915_gem_unset_wedged(dev_priv))
goto wakeup;
@@ -1884,6 +1886,49 @@ void i915_reset(struct drm_i915_private *dev_priv)
goto finish;
}
+/**
+ * i915_reset_engine - reset GPU engine to recover from a hang
+ * @engine: engine to reset
+ *
+ * Reset a specific GPU engine. Useful if a hang is detected.
+ * Returns zero on successful reset or otherwise an error code.
+ */
+int i915_reset_engine(struct intel_engine_cs *engine)
+{
+ /* FIXME: replace me with engine reset sequence */
+ return -ENODEV;
+}
+
+/**
+ * i915_reset - start either engine or full GPU reset to recover from a hang
+ * @dev_priv: device private
+ * @engine_mask: mask representing engines that are hung
+ *
+ * Wrapper function to initiate a GPU reset. When platform supports it, attempt
+ * to reset the hung engine only. If engine reset fails (or is not supported),
+ * reset the full GPU. If more than one engine is hung, the speed gains of
+ * reset_engine are negligible, thus promote to full reset.
+ *
+ * Caller must hold the struct_mutex.
+ */
+void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask)
+{
+ /* try engine reset first */
+ if (intel_has_reset_engine(dev_priv) &&
+ !(engine_mask & (engine_mask - 1))) {
+ struct intel_engine_cs *engine =
+ dev_priv->engine[intel_engineid_from_flag(engine_mask)];
+
+ if (i915_reset_engine(engine))
+ goto reset_chip;
+
+ return;
+ }
+
+reset_chip:
+ i915_reset_chip(dev_priv);
+}
+
static int i915_pm_suspend(struct device *kdev)
{
struct pci_dev *pdev = to_pci_dev(kdev);
@@ -832,6 +832,7 @@ struct intel_csr {
func(has_ddi); \
func(has_decoupled_mmio); \
func(has_dp_mst); \
+ func(has_reset_engine); \
func(has_fbc); \
func(has_fpga_dbg); \
func(has_full_ppgtt); \
@@ -1636,6 +1637,9 @@ struct i915_gpu_error {
#define I915_RESET_HANDOFF 1
#define I915_WEDGED (BITS_PER_LONG - 1)
+ /* if available, engine-specific reset is tried before full gpu reset */
+ u32 reset_engine_mask;
+
/**
* Waitqueue to signal when a hang is detected. Used to for waiters
* to release the struct_mutex for the reset to procede.
@@ -3038,7 +3042,8 @@ extern int i915_driver_load(struct pci_dev *pdev,
extern void i915_driver_unload(struct drm_device *dev);
extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
-extern void i915_reset(struct drm_i915_private *dev_priv);
+extern void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
+extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -1016,7 +1016,8 @@ static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *req
return false;
__set_current_state(TASK_RUNNING);
- i915_reset(request->i915);
+ i915_reset(request->i915,
+ request->i915->gpu_error.reset_engine_mask);
return true;
}
@@ -2647,7 +2647,15 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
- DRM_DEBUG_DRIVER("resetting chip\n");
+ /*
+ * This event needs to be sent before performing gpu reset. When
+ * engine resets are supported we iterate through all engines and
+ * reset hung engines individually. To keep the event dispatch
+ * mechanism consistent with full gpu reset, this is only sent once
+ * even when multiple engines are hung. It is also safe to move this
+ * here because when we are in this function, we will definitely
+ * perform gpu reset.
+ */
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
intel_prepare_reset(dev_priv);
@@ -2663,7 +2671,8 @@ static void i915_reset_and_wakeup(struct drm_i915_private *dev_priv)
* deadlocks with the reset work.
*/
if (mutex_trylock(&dev_priv->drm.struct_mutex)) {
- i915_reset(dev_priv);
+ i915_reset(dev_priv,
+ dev_priv->gpu_error.reset_engine_mask);
mutex_unlock(&dev_priv->drm.struct_mutex);
}
@@ -2780,6 +2789,12 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
&dev_priv->gpu_error.flags))
goto out;
+ /*
+ * Save which engines need reset; if engine support is available,
+ * we can just reset the hung engines.
+ */
+ dev_priv->gpu_error.reset_engine_mask = engine_mask;
+
i915_reset_and_wakeup(dev_priv);
out:
@@ -308,7 +308,8 @@ static const struct intel_device_info intel_haswell_info = {
BDW_COLORS, \
.has_logical_ring_contexts = 1, \
.has_full_48bit_ppgtt = 1, \
- .has_64bit_reloc = 1
+ .has_64bit_reloc = 1, \
+ .has_reset_engine = 1
static const struct intel_device_info intel_broadwell_info = {
BDW_FEATURES,
@@ -340,6 +341,7 @@ static const struct intel_device_info intel_cherryview_info = {
.has_gmch_display = 1,
.has_aliasing_ppgtt = 1,
.has_full_ppgtt = 1,
+ .has_reset_engine = 1,
.display_mmio_offset = VLV_DISPLAY_BASE,
GEN_CHV_PIPEOFFSETS,
CURSOR_OFFSETS,
@@ -389,6 +391,7 @@ static const struct intel_device_info intel_skylake_gt3_info = {
.has_aliasing_ppgtt = 1, \
.has_full_ppgtt = 1, \
.has_full_48bit_ppgtt = 1, \
+ .has_reset_engine = 1, \
GEN_DEFAULT_PIPEOFFSETS, \
IVB_CURSOR_OFFSETS, \
BDW_COLORS
@@ -438,6 +438,22 @@ intel_engine_flag(const struct intel_engine_cs *engine)
return 1 << engine->id;
}
+/* works only for engine_mask with 1 engine bit set */
+static inline unsigned
+intel_engineid_from_flag(unsigned engine_mask)
+{
+ unsigned engine_id = 0;
+
+ GEM_BUG_ON(engine_mask & (engine_mask - 1));
+
+ while (engine_mask >>= 1)
+ engine_id++;
+
+ GEM_BUG_ON(engine_id >= I915_NUM_ENGINES);
+
+ return engine_id;
+}
+
static inline void
intel_flush_status_page(struct intel_engine_cs *engine, int reg)
{
@@ -1751,6 +1751,17 @@ bool intel_has_gpu_reset(struct drm_i915_private *dev_priv)
return intel_get_gpu_reset(dev_priv) != NULL;
}
+/*
+ * When GuC submission is enabled, GuC manages ELSP and can initiate the
+ * engine reset too. For now, fall back to full GPU reset if it is enabled.
+ */
+bool intel_has_reset_engine(struct drm_i915_private *dev_priv)
+{
+ return (dev_priv->info.has_reset_engine &&
+ !dev_priv->guc.execbuf_client &&
+ i915.reset == 2);
+}
+
int intel_guc_reset(struct drm_i915_private *dev_priv)
{
int ret;
@@ -307,7 +307,7 @@ static int igt_global_reset(void *arg)
mutex_lock(&i915->drm.struct_mutex);
reset_count = i915_reset_count(&i915->gpu_error);
- i915_reset(i915);
+ i915_reset(i915, ALL_ENGINES);
if (i915_reset_count(&i915->gpu_error) == reset_count) {
pr_err("No GPU reset recorded!\n");
@@ -472,7 +472,7 @@ static int igt_reset_queue(void *arg)
reset_count = fake_hangcheck(prev);
- i915_reset(i915);
+ i915_reset(i915, ALL_ENGINES);
GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
&i915->gpu_error.flags));