@@ -1883,11 +1883,65 @@ void i915_reset(struct drm_i915_private *dev_priv)
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
+ *
+ * Procedure is:
+ * - identifies the request that caused the hang and it is dropped
+ * - force engine to idle: this is done by issuing a reset request
+ * - reset engine
+ * - re-init/configure engine
*/
int i915_reset_engine(struct intel_engine_cs *engine)
{
- /* FIXME: replace me with engine reset sequence */
- return -ENODEV;
+ int ret;
+ struct drm_i915_private *dev_priv = engine->i915;
+ struct i915_gpu_error *error = &dev_priv->gpu_error;
+
+ GEM_BUG_ON(!test_bit(I915_RESET_ENGINE_IN_PROGRESS, &error->flags));
+
+ DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
+
+ ret = i915_gem_reset_prepare_engine(engine);
+ if (ret) {
+ DRM_ERROR("Previous reset failed - promote to full reset\n");
+ goto out;
+ }
+
+ /*
+ * the request that caused the hang is stuck on elsp, identify the
+ * active request and drop it, adjust head to skip the offending
+ * request to resume executing remaining requests in the queue.
+ */
+ i915_gem_reset_engine(engine);
+
+ /* forcing engine to idle */
+ ret = intel_reset_engine_start(engine);
+ if (ret) {
+ DRM_ERROR("Failed to disable %s\n", engine->name);
+ goto out;
+ }
+
+ /* finally, reset engine */
+ ret = intel_gpu_reset(dev_priv, intel_engine_flag(engine));
+ if (ret) {
+ DRM_ERROR("Failed to reset %s, ret=%d\n", engine->name, ret);
+ intel_reset_engine_cancel(engine);
+ goto out;
+ }
+
+ /* be sure the request reset bit gets cleared */
+ intel_reset_engine_cancel(engine);
+
+ i915_gem_reset_finish_engine(engine);
+
+ /*
+ * The engine and its registers (and workarounds in case of render)
+ * have been reset to their default values. Follow the init_ring
+ * process to program RING_MODE, HWSP and re-enable submission.
+ */
+ ret = engine->init_hw(engine);
+
+out:
+ return ret;
}
static int i915_pm_suspend(struct device *kdev)
@@ -2997,6 +2997,8 @@ extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv);
extern void i915_reset(struct drm_i915_private *dev_priv);
extern int i915_reset_engine(struct intel_engine_cs *engine);
extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv);
+extern int intel_reset_engine_start(struct intel_engine_cs *engine);
+extern void intel_reset_engine_cancel(struct intel_engine_cs *engine);
extern int intel_guc_reset(struct drm_i915_private *dev_priv);
extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine);
extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
@@ -3368,11 +3370,14 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
+int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv);
+void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
+void i915_gem_reset_engine(struct intel_engine_cs *engine);
void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
@@ -2793,48 +2793,56 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true;
}
-int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+/* Ensure irq handler finishes, and not run again. */
+int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
+ struct drm_i915_gem_request *request;
int err = 0;
- /* Ensure irq handler finishes, and not run again. */
- for_each_engine(engine, dev_priv, id) {
- struct drm_i915_gem_request *request;
-
- /* Prevent the signaler thread from updating the request
- * state (by calling dma_fence_signal) as we are processing
- * the reset. The write from the GPU of the seqno is
- * asynchronous and the signaler thread may see a different
- * value to us and declare the request complete, even though
- * the reset routine have picked that request as the active
- * (incomplete) request. This conflict is not handled
- * gracefully!
- */
- kthread_park(engine->breadcrumbs.signaler);
-
- /* Prevent request submission to the hardware until we have
- * completed the reset in i915_gem_reset_finish(). If a request
- * is completed by one engine, it may then queue a request
- * to a second via its engine->irq_tasklet *just* as we are
- * calling engine->init_hw() and also writing the ELSP.
- * Turning off the engine->irq_tasklet until the reset is over
- * prevents the race.
- */
- tasklet_kill(&engine->irq_tasklet);
- tasklet_disable(&engine->irq_tasklet);
- if (engine->irq_seqno_barrier)
- engine->irq_seqno_barrier(engine);
+ /* Prevent the signaler thread from updating the request
+ * state (by calling dma_fence_signal) as we are processing
+ * the reset. The write from the GPU of the seqno is
+ * asynchronous and the signaler thread may see a different
+ * value to us and declare the request complete, even though
+ * the reset routine have picked that request as the active
+ * (incomplete) request. This conflict is not handled
+ * gracefully!
+ */
+ kthread_park(engine->breadcrumbs.signaler);
+
+ /* Prevent request submission to the hardware until we have
+ * completed the reset in i915_gem_reset_finish(). If a request
+ * is completed by one engine, it may then queue a request
+ * to a second via its engine->irq_tasklet *just* as we are
+ * calling engine->init_hw() and also writing the ELSP.
+ * Turning off the engine->irq_tasklet until the reset is over
+ * prevents the race.
+ */
+ tasklet_kill(&engine->irq_tasklet);
+ tasklet_disable(&engine->irq_tasklet);
- if (engine_stalled(engine)) {
- request = i915_gem_find_active_request(engine);
- if (request && request->fence.error == -EIO)
- err = -EIO; /* Previous reset failed! */
- }
+ if (engine->irq_seqno_barrier)
+ engine->irq_seqno_barrier(engine);
+
+ if (engine_stalled(engine)) {
+ request = i915_gem_find_active_request(engine);
+ if (request && request->fence.error == -EIO)
+ err = -EIO; /* Previous reset failed! */
}
+ return err;
+}
+
+int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
+{
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+ int err = 0;
+
+ for_each_engine(engine, dev_priv, id)
+ err = i915_gem_reset_prepare_engine(engine);
+
i915_gem_revoke_fences(dev_priv);
return err;
@@ -2920,7 +2928,7 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
return guilty;
}
-static void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *request;
@@ -2966,6 +2974,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
}
}
+void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
+{
+ tasklet_enable(&engine->irq_tasklet);
+ kthread_unpark(engine->breadcrumbs.signaler);
+}
+
void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
@@ -2973,10 +2987,8 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex);
- for_each_engine(engine, dev_priv, id) {
- tasklet_enable(&engine->irq_tasklet);
- kthread_unpark(engine->breadcrumbs.signaler);
- }
+ for_each_engine(engine, dev_priv, id)
+ i915_gem_reset_finish_engine(engine);
}
static void nop_submit_request(struct drm_i915_gem_request *request)
@@ -1801,6 +1801,26 @@ int intel_guc_reset(struct drm_i915_private *dev_priv)
return ret;
}
+/*
+ * On gen8+ a reset request has to be issued via the reset control register
+ * before a GPU engine can be reset in order to stop the command streamer
+ * and idle the engine. This replaces the legacy way of stopping an engine
+ * by writing to the stop ring bit in the MI_MODE register.
+ */
+int intel_reset_engine_start(struct intel_engine_cs *engine)
+{
+ return gen8_reset_engine_start(engine);
+}
+
+/*
+ * It is possible to back off from a previously issued reset request by simply
+ * clearing the reset request bit in the reset control register.
+ */
+void intel_reset_engine_cancel(struct intel_engine_cs *engine)
+{
+ gen8_reset_engine_cancel(engine);
+}
+
bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv)
{
return check_for_unclaimed_mmio(dev_priv);