@@ -1809,21 +1809,68 @@ error:
* Returns zero on successful reset or otherwise an error code.
*
* Procedure is fairly simple:
- * - force engine to idle
- * - save current state which includes head and current request
- * - reset engine
- * - restore saved state and resubmit context
+ * - identifies the request that caused the hang and it is dropped
+ * - force engine to idle: this is done by issuing a reset request
+ * - reset engine
+ * - restart submissions to the engine
*/
int i915_reset_engine(struct intel_engine_cs *engine)
{
int ret;
struct drm_i915_private *dev_priv = engine->i915;
- /* FIXME: replace me with engine reset sequence */
- ret = -ENODEV;
+ /*
+ * We need to first idle the engine by issuing a reset request,
+ * then perform soft reset and re-initialize hw state, for all of
+ * this GT power need to be awake so ensure it does throughout the
+ * process
+ */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ /*
+ * the request that caused the hang is stuck on elsp, identify the
+ * active request and drop it, adjust head to skip the offending
+ * request to resume executing remaining requests in the queue.
+ */
+ i915_gem_reset_engine(engine);
+
+ ret = intel_engine_reset_begin(engine);
+ if (ret) {
+ DRM_ERROR("Failed to disable %s\n", engine->name);
+ goto error;
+ }
+
+ ret = intel_gpu_reset(dev_priv, intel_engine_flag(engine));
+ if (ret) {
+ DRM_ERROR("Failed to reset %s, ret=%d\n", engine->name, ret);
+ intel_engine_reset_cancel(engine);
+ goto error;
+ }
+
+ ret = engine->init_hw(engine);
+ if (ret)
+ goto error;
+
+ intel_engine_reset_cancel(engine);
+ intel_execlists_restart_submission(engine);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ return 0;
+
+error:
+ /* use full gpu reset to recover on error */
set_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags);
+ /* Engine reset is performed without taking struct_mutex, since it
+ * failed we now fallback to full gpu reset. Wakeup any waiters
+ * which should now see the reset_in_progress and release
+ * struct_mutex for us to continue recovery.
+ */
+ rcu_read_lock();
+ intel_engine_wakeup(engine);
+ rcu_read_unlock();
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
return ret;
}
@@ -2869,6 +2869,8 @@ extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern int i915_reset(struct drm_i915_private *dev_priv);
extern bool intel_has_engine_reset(struct drm_i915_private *dev_priv);
+extern int intel_engine_reset_begin(struct intel_engine_cs *engine);
+extern int intel_engine_reset_cancel(struct intel_engine_cs *engine);
extern int i915_reset_engine(struct intel_engine_cs *engine);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
@@ -3230,6 +3232,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
void i915_gem_reset(struct drm_device *dev);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_reset_engine(struct intel_engine_cs *engine);
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -2402,7 +2402,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
return NULL;
}
-static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
struct i915_gem_context *incomplete_ctx;
@@ -543,6 +543,16 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
spin_unlock_bh(&engine->execlist_lock);
}
+void intel_execlists_restart_submission(struct intel_engine_cs *engine)
+{
+ spin_lock_bh(&engine->execlist_lock);
+
+ if (execlists_elsp_idle(engine))
+ tasklet_hi_schedule(&engine->irq_tasklet);
+
+ spin_unlock_bh(&engine->execlist_lock);
+}
+
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -96,5 +96,6 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx,
int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv,
int enable_execlists);
void intel_execlists_enable_submission(struct drm_i915_private *dev_priv);
+void intel_execlists_restart_submission(struct intel_engine_cs *engine);
#endif /* _INTEL_LRC_H_ */
@@ -1683,7 +1683,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
return ret;
}
-static int gen8_request_engine_reset(struct intel_engine_cs *engine)
+static int gen8_engine_reset_begin(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
int ret;
@@ -1702,7 +1702,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
return ret;
}
-static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine)
+static void gen8_engine_reset_cancel(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1716,14 +1716,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
struct intel_engine_cs *engine;
for_each_engine_masked(engine, dev_priv, engine_mask)
- if (gen8_request_engine_reset(engine))
+ if (gen8_engine_reset_begin(engine))
goto not_ready;
return gen6_reset_engines(dev_priv, engine_mask);
not_ready:
for_each_engine_masked(engine, dev_priv, engine_mask)
- gen8_unrequest_engine_reset(engine);
+ gen8_engine_reset_cancel(engine);
return -EIO;
}
@@ -1799,6 +1799,39 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
return ret;
}
+/*
+ * On gen8+ a reset request has to be issued via the reset control register
+ * before a GPU engine can be reset in order to stop the command streamer
+ * and idle the engine. This replaces the legacy way of stopping an engine
+ * by writing to the stop ring bit in the MI_MODE register.
+ */
+int intel_engine_reset_begin(struct intel_engine_cs *engine)
+{
+ if (!intel_has_engine_reset(engine->i915)) {
+ DRM_ERROR("Engine Reset not supported on Gen%d\n",
+ INTEL_INFO(engine->i915)->gen);
+ return -EINVAL;
+ }
+
+ return gen8_engine_reset_begin(engine);
+}
+
+/*
+ * It is possible to back off from a previously issued reset request by simply
+ * clearing the reset request bit in the reset control register.
+ */
+int intel_engine_reset_cancel(struct intel_engine_cs *engine)
+{
+ if (!intel_has_engine_reset(engine->i915)) {
+ DRM_ERROR("Request to clear reset not supported on Gen%d\n",
+ INTEL_INFO(engine->i915)->gen);
+ return -EINVAL;
+ }
+
+ gen8_engine_reset_cancel(engine);
+ return 0;
+}
+
bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
{
return check_for_unclaimed_mmio(dev_priv);