@@ -1831,7 +1831,7 @@ void i915_reset_chip(struct drm_i915_private *dev_priv)
pr_notice("drm/i915: Resetting chip after gpu hang\n");
disable_irq(dev_priv->drm.irq);
- ret = i915_gem_reset_prepare(dev_priv);
+ ret = i915_gem_reset_prepare(dev_priv, ALL_ENGINES);
if (ret) {
DRM_ERROR("GPU recovery failed\n");
intel_gpu_reset(dev_priv, ALL_ENGINES);
@@ -1873,7 +1873,7 @@ void i915_reset_chip(struct drm_i915_private *dev_priv)
i915_queue_hangcheck(dev_priv);
finish:
- i915_gem_reset_finish(dev_priv);
+ i915_gem_reset_finish(dev_priv, ALL_ENGINES);
enable_irq(dev_priv->drm.irq);
wakeup:
@@ -1892,11 +1892,91 @@ void i915_reset_chip(struct drm_i915_private *dev_priv)
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
+ *
+ * Caller must hold the struct_mutex.
+ *
+ * Procedure is:
+ * - identifies the request that caused the hang and it is dropped
+ * - force engine to idle: this is done by issuing a reset request
+ * - reset engine
+ * - restart submissions to the engine
*/
int i915_reset_engine(struct intel_engine_cs *engine)
{
- /* FIXME: replace me with engine reset sequence */
- return -ENODEV;
+ int ret;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
+ GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
+
+ if (!test_and_clear_bit(I915_RESET_HANDOFF, &error->flags))
+ return 0;
+
+ DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
+
+ /*
+ * We need to first idle the engine by issuing a reset request,
+ * then perform soft reset and re-initialize hw state, for all of
+ * this GT power need to be awake so ensure it does throughout the
+ * process
+ */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ disable_irq(dev_priv->drm.irq);
+ ret = i915_gem_reset_prepare(dev_priv, intel_engine_flag(engine));
+ if (ret) {
+ DRM_ERROR("Previous reset failed - promote to full reset\n");
+ goto error;
+ }
+
+ if (dev_priv->gt.active_requests)
+ engine_retire_requests(engine);
+
+ /*
+ * the request that caused the hang is stuck on elsp, identify the
+ * active request and drop it, adjust head to skip the offending
+ * request to resume executing remaining requests in the queue.
+ */
+ i915_gem_reset_engine(engine);
+
+ /* forcing engine to idle */
+ ret = intel_request_reset_engine(engine);
+ if (ret) {
+ DRM_ERROR("Failed to disable %s\n", engine->name);
+ goto error;
+ }
+
+ /* finally, reset engine */
+ ret = intel_gpu_reset(dev_priv, intel_engine_flag(engine));
+ if (ret) {
+ DRM_ERROR("Failed to reset %s, ret=%d\n", engine->name, ret);
+ intel_unrequest_reset_engine(engine);
+ goto error;
+ }
+
+ /* be sure the request reset bit gets cleared */
+ intel_unrequest_reset_engine(engine);
+
+ /* i915_gem_reset_prepare revoked the fences */
+ i915_gem_restore_fences(dev_priv);
+ i915_gem_reset_finish(dev_priv, intel_engine_flag(engine));
+
+ /* replay remaining requests in the queue */
+ ret = engine->init_hw(engine);
+ if (ret)
+ goto error;
+
+wakeup:
+ enable_irq(dev_priv->drm.irq);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+ return ret;
+
+error:
+ /* use full gpu reset to recover on error */
+ set_bit(I915_RESET_HANDOFF, &error->flags);
+ goto wakeup;
}
/**
@@ -3044,6 +3044,8 @@ extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
+extern int intel_request_reset_engine(struct intel_engine_cs *engine);
+extern void intel_unrequest_reset_engine(struct intel_engine_cs *engine);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -3420,7 +3422,7 @@ int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno);
struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *engine);
-
+void engine_retire_requests(struct intel_engine_cs *engine);
void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
static inline bool i915_reset_backoff(struct i915_gpu_error *error)
@@ -3448,11 +3450,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv,
+ unsigned int engine_mask);
void i915_gem_reset(struct drm_i915_private *dev_priv);
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv,
+ unsigned int engine_mask);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_reset_engine(struct intel_engine_cs *engine);
void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
@@ -2741,14 +2741,15 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true;
}
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv,
+ unsigned int engine_mask)
{
struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ unsigned int tmp;
int err = 0;
/* Ensure irq handler finishes, and not run again. */
- for_each_engine(engine, dev_priv, id) {
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
struct drm_i915_gem_request *request;
/* Prevent the signaler thread from updating the request
@@ -2868,7 +2869,7 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
return guilty;
}
-static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
@@ -2914,14 +2915,15 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
}
}
-void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
+void i915_gem_reset_finish(struct drm_i915_private *dev_priv,
+ unsigned int engine_mask)
{
struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ unsigned int tmp;
lockdep_assert_held(&dev_priv->drm.struct_mutex);
- for_each_engine(engine, dev_priv, id) {
+ for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
tasklet_enable(&engine->irq_tasklet);
kthread_unpark(engine->breadcrumbs.signaler);
}
@@ -1177,7 +1177,7 @@ long i915_wait_request(struct drm_i915_gem_request *req,
return timeout;
}
-static void engine_retire_requests(struct intel_engine_cs *engine)
+void engine_retire_requests(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request, *next;
u32 seqno = intel_engine_get_seqno(engine);
@@ -1781,6 +1781,26 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
return ret;
}
+/*
+ * On gen8+ a reset request has to be issued via the reset control register
+ * before a GPU engine can be reset in order to stop the command streamer
+ * and idle the engine. This replaces the legacy way of stopping an engine
+ * by writing to the stop ring bit in the MI_MODE register.
+ */
+int intel_request_reset_engine(struct intel_engine_cs *engine)
+{
+ return gen8_request_reset_engine(engine);
+}
+
+/*
+ * It is possible to back off from a previously issued reset request by simply
+ * clearing the reset request bit in the reset control register.
+ */
+void intel_unrequest_reset_engine(struct intel_engine_cs *engine)
+{
+ gen8_unrequest_reset_engine(engine);
+}
+
bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
{
return check_for_unclaimed_mmio(dev_priv);