@@ -1832,11 +1832,76 @@ void i915_reset_chip(struct drm_i915_private *dev_priv)
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
+ *
+ * Caller must hold the struct_mutex.
+ *
+ * Procedure is:
+ * - identifies the request that caused the hang and it is dropped
+ * - force engine to idle: this is done by issuing a reset request
+ * - reset engine
+ * - restart submissions to the engine
*/
int i915_reset_engine(struct intel_engine_cs *engine)
{
- /* FIXME: replace me with engine reset sequence */
- return -ENODEV;
+ int ret;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+ lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+ if (!test_and_clear_bit(I915_RESET_IN_PROGRESS, &error->flags))
+ return 0;
+
+ DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
+
+ /*
+ * We need to first idle the engine by issuing a reset request,
+ * then perform soft reset and re-initialize hw state, for all of
+ * this GT power need to be awake so ensure it does throughout the
+ * process
+ */
+ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+ /*
+ * the request that caused the hang is stuck on elsp, identify the
+ * active request and drop it, adjust head to skip the offending
+ * request to resume executing remaining requests in the queue.
+ */
+ disable_engines_irq(dev_priv);
+ i915_gem_reset_engine(engine);
+ enable_engines_irq(dev_priv);
+
+ /* forcing engine to idle */
+ ret = intel_reset_engine_begin(engine);
+ if (ret) {
+ DRM_ERROR("Failed to disable %s\n", engine->name);
+ goto error;
+ }
+
+ /* finally, reset engine */
+ ret = intel_gpu_reset(dev_priv, intel_engine_flag(engine));
+ if (ret) {
+ DRM_ERROR("Failed to reset %s, ret=%d\n", engine->name, ret);
+ intel_reset_engine_cancel(engine);
+ goto error;
+ }
+
+ ret = intel_reset_engine_cancel(engine);
+ if (ret)
+ goto error;
+
+ ret = engine->init_hw(engine);
+ if (ret)
+ goto error;
+
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ return 0;
+
+error:
+ /* use full gpu reset to recover on error */
+ set_bit(I915_RESET_IN_PROGRESS, &dev_priv->gpu_error.flags);
+ intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+ return ret;
}
/**
@@ -2938,6 +2938,8 @@ extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask);
extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv);
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
+extern int intel_reset_engine_begin(struct intel_engine_cs *engine);
+extern int intel_reset_engine_cancel(struct intel_engine_cs *engine);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -3338,6 +3340,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
void i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_reset_engine(struct intel_engine_cs *engine);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);
@@ -2635,7 +2635,7 @@ void i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
i915_gem_revoke_fences(dev_priv);
}
-static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
struct i915_gem_context *hung_ctx;
@@ -1758,7 +1758,7 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv,
return ret;
}
-static int gen8_request_engine_reset(struct intel_engine_cs *engine)
+static int gen8_reset_engine_begin(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
int ret;
@@ -1777,7 +1777,7 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine)
return ret;
}
-static void gen8_unrequest_engine_reset(struct intel_engine_cs *engine)
+static void gen8_reset_engine_cancel(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
@@ -1792,14 +1792,14 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv,
unsigned int tmp;
for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
- if (gen8_request_engine_reset(engine))
+ if (gen8_reset_engine_begin(engine))
goto not_ready;
return gen6_reset_engines(dev_priv, engine_mask);
not_ready:
for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
- gen8_unrequest_engine_reset(engine);
+ gen8_reset_engine_cancel(engine);
return -EIO;
}
@@ -1881,6 +1881,39 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
return ret;
}
+/*
+ * On gen8+ a reset request has to be issued via the reset control register
+ * before a GPU engine can be reset in order to stop the command streamer
+ * and idle the engine. This replaces the legacy way of stopping an engine
+ * by writing to the stop ring bit in the MI_MODE register.
+ */
+int intel_reset_engine_begin(struct intel_engine_cs *engine)
+{
+ if (!intel_has_reset_engine(engine->i915)) {
+ DRM_ERROR("Engine Reset not supported on Gen%d\n",
+ INTEL_INFO(engine->i915)->gen);
+ return -EINVAL;
+ }
+
+ return gen8_reset_engine_begin(engine);
+}
+
+/*
+ * It is possible to back off from a previously issued reset request by simply
+ * clearing the reset request bit in the reset control register.
+ */
+int intel_reset_engine_cancel(struct intel_engine_cs *engine)
+{
+ if (!intel_has_reset_engine(engine->i915)) {
+ DRM_ERROR("Request to clear reset not supported on Gen%d\n",
+ INTEL_INFO(engine->i915)->gen);
+ return -EINVAL;
+ }
+
+ gen8_reset_engine_cancel(engine);
+ return 0;
+}
+
bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
{
return check_for_unclaimed_mmio(dev_priv);