@@ -1895,23 +1895,25 @@ int i915_reset_engine(struct intel_engine_cs *engine)
int ret;
struct drm_i915_private *dev_priv = engine->i915;
struct i915_gpu_error *error = &dev_priv->gpu_error;
+ struct drm_i915_gem_request *active_request;
GEM_BUG_ON(!test_bit(I915_RESET_ENGINE_IN_PROGRESS, &error->flags));
DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
- ret = i915_gem_reset_prepare_engine(engine);
- if (ret) {
- DRM_ERROR("Previous reset failed - promote to full reset\n");
+ active_request = i915_gem_reset_prepare_engine(engine);
+ if (IS_ERR(active_request)) {
+ DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n");
+ ret = PTR_ERR(active_request);
goto out;
}
/*
- * the request that caused the hang is stuck on elsp, identify the
- * active request and drop it, adjust head to skip the offending
+ * the request that caused the hang is stuck on elsp, we know the
+ * active request and can drop it, adjust head to skip the offending
* request to resume executing remaining requests in the queue.
*/
- i915_gem_reset_engine(engine);
+ i915_gem_reset_engine(engine, active_request);
/* forcing engine to idle */
ret = intel_reset_engine_start(engine);
@@ -3370,14 +3370,16 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count);
}
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv);
-void i915_gem_reset_engine(struct intel_engine_cs *engine);
+void i915_gem_reset_engine(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request);
void i915_gem_init_mmio(struct drm_i915_private *i915);
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
@@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true;
}
-/* Ensure irq handler finishes, and not run again. */
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also store the active request so that we only search for it once.
+ */
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
{
- struct drm_i915_gem_request *request;
- int err = 0;
-
+ struct drm_i915_gem_request *request = NULL;
/* Prevent the signaler thread from updating the request
* state (by calling dma_fence_signal) as we are processing
@@ -2828,20 +2830,28 @@ int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
if (engine_stalled(engine)) {
request = i915_gem_find_active_request(engine);
if (request && request->fence.error == -EIO)
- err = -EIO; /* Previous reset failed! */
+ request = ERR_PTR(-EIO); /* Previous reset failed! */
}
- return err;
+ return request;
}
int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
{
struct intel_engine_cs *engine;
+ struct drm_i915_gem_request *request;
enum intel_engine_id id;
int err = 0;
- for_each_engine(engine, dev_priv, id)
- err = i915_gem_reset_prepare_engine(engine);
+ for_each_engine(engine, dev_priv, id) {
+ request = i915_gem_reset_prepare_engine(engine);
+ if (IS_ERR(request)) {
+ err = PTR_ERR(request);
+ break;
+ }
+
+ engine->hangcheck.active_request = request;
+ }
i915_gem_revoke_fences(dev_priv);
@@ -2894,7 +2904,7 @@ static void engine_skip_context(struct drm_i915_gem_request *request)
static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
{
/* Read once and return the resolution */
- const bool guilty = engine_stalled(request->engine);
+ const bool guilty = !i915_gem_request_completed(request);
/* The guilty request will get skipped on a hung engine.
*
@@ -2928,11 +2938,9 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
return guilty;
}
-void i915_gem_reset_engine(struct intel_engine_cs *engine)
+void i915_gem_reset_engine(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
{
- struct drm_i915_gem_request *request;
-
- request = i915_gem_find_active_request(engine);
if (request && i915_gem_reset_request(request)) {
DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
engine->name, request->global_seqno);
@@ -2958,7 +2966,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
for_each_engine(engine, dev_priv, id) {
struct i915_gem_context *ctx;
- i915_gem_reset_engine(engine);
+ i915_gem_reset_engine(engine, engine->hangcheck.active_request);
ctx = fetch_and_zero(&engine->last_retired_context);
if (ctx)
engine->context_unpin(engine, ctx);
@@ -121,6 +121,7 @@ struct intel_engine_hangcheck {
unsigned long action_timestamp;
int deadlock;
struct intel_instdone instdone;
+ struct drm_i915_gem_request *active_request;
bool stalled;
};
And store the active request so that we only search for it once; this applies for reset-engine and full reset. v2: Check for request completion inside _prepare_engine, don't use ECANCELED, remove unnecessary null checks (Chris). v3: Capture active requests during reset_prepare and store it the engine hangcheck obj. v4: Rename commit, change i915_gem_reset_request to just confirm the active_request is still incomplete, instead of engine_stalled (Chris). v5: With style; pass the active request to gem_reset_engine, keep single return in reset_prepare_engine (Chris). Suggested-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Michel Thierry <michel.thierry@intel.com> --- drivers/gpu/drm/i915/i915_drv.c | 14 ++++++------ drivers/gpu/drm/i915/i915_drv.h | 6 ++++-- drivers/gpu/drm/i915/i915_gem.c | 38 ++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 36 insertions(+), 23 deletions(-)