Message ID | 20170518182257.23241-1-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 5/18/2017 11:22 AM, Michel Thierry wrote: > fixes > > Signed-off-by: Michel Thierry <michel.thierry@intel.com> rebase mistake
On Thu, May 18, 2017 at 11:22:57AM -0700, Michel Thierry wrote: > And store the active request so that we only search for it once; this > applies for reset-engine and full reset. > > v2: Check for request completion inside _prepare_engine, don't use > ECANCELED, remove unnecessary null checks (Chris). > > v3: Capture active requests during reset_prepare and store it the > engine hangcheck obj. > > v4: Rename commit, change i915_gem_reset_request to just confirm the > active_request is still incomplete, instead of engine_stalled (Chris). > > Suggested-by: Chris Wilson <chris@chris-wilson.co.uk> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> > > fixes > > Signed-off-by: Michel Thierry <michel.thierry@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.c | 11 ++++++----- > drivers/gpu/drm/i915/i915_drv.h | 3 ++- > drivers/gpu/drm/i915/i915_gem.c | 34 +++++++++++++++++++++------------ > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + > 4 files changed, 31 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index d62793805794..ec719376fc24 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1900,15 +1900,16 @@ int i915_reset_engine(struct intel_engine_cs *engine) > > DRM_DEBUG_DRIVER("resetting %s\n", engine->name); > > - ret = i915_gem_reset_prepare_engine(engine); > - if (ret) { > - DRM_ERROR("Previous reset failed - promote to full reset\n"); > + engine->hangcheck.active_request = i915_gem_reset_prepare_engine(engine); Whilst this is not wrong (since we are serialising the per-engine and global resets), I would suggest we avoid storing the request in the hangcheck here and just pass the request along to i915_gem_request_engine. No strong reason, just less magic state passing between functions. > + if (IS_ERR(engine->hangcheck.active_request)) { > + DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n"); > + ret = PTR_ERR(engine->hangcheck.active_request); > goto out; > } > > index b5dc073a5ddc..c9f139b322d2 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs *engine) > return true; > } > > -/* Ensure irq handler finishes, and not run again. */ > -int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) > +/* > + * Ensure irq handler finishes, and not run again. > + * Also store the active request so that we only search for it once. > + */ > +struct drm_i915_gem_request * > +i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) > { > - struct drm_i915_gem_request *request; > - int err = 0; > - > + struct drm_i915_gem_request *request = NULL; > > /* Prevent the signaler thread from updating the request > * state (by calling dma_fence_signal) as we are processing > @@ -2827,21 +2829,30 @@ int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) > > if (engine_stalled(engine)) { > request = i915_gem_find_active_request(engine); > + If we neuter the return beneath the if, this blank line can also go. > if (request && request->fence.error == -EIO) > - err = -EIO; /* Previous reset failed! */ > + return ERR_PTR(-EIO); /* Previous reset failed! */ request = ERR_PTR(-EIO); and then keep the single return. -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d62793805794..ec719376fc24 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1900,15 +1900,16 @@ int i915_reset_engine(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("resetting %s\n", engine->name); - ret = i915_gem_reset_prepare_engine(engine); - if (ret) { - DRM_ERROR("Previous reset failed - promote to full reset\n"); + engine->hangcheck.active_request = i915_gem_reset_prepare_engine(engine); + if (IS_ERR(engine->hangcheck.active_request)) { + DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n"); + ret = PTR_ERR(engine->hangcheck.active_request); goto out; } /* - * the request that caused the hang is stuck on elsp, identify the - * active request and drop it, adjust head to skip the offending + * the request that caused the hang is stuck on elsp, we know the + * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ i915_gem_reset_engine(engine); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a5b9c666b3bf..6cbfeaa02246 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3370,7 +3370,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) return READ_ONCE(error->reset_count); } -int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); +struct drm_i915_gem_request * +i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b5dc073a5ddc..c9f139b322d2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs *engine) return true; } -/* Ensure irq handler finishes, and not run again. */ -int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) +/* + * Ensure irq handler finishes, and not run again. + * Also store the active request so that we only search for it once. + */ +struct drm_i915_gem_request * +i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; - int err = 0; - + struct drm_i915_gem_request *request = NULL; /* Prevent the signaler thread from updating the request * state (by calling dma_fence_signal) as we are processing @@ -2827,21 +2829,30 @@ int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine) if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); + if (request && request->fence.error == -EIO) - err = -EIO; /* Previous reset failed! */ + return ERR_PTR(-EIO); /* Previous reset failed! */ } - return err; + return request; } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; + struct drm_i915_gem_request *request; enum intel_engine_id id; int err = 0; - for_each_engine(engine, dev_priv, id) - err = i915_gem_reset_prepare_engine(engine); + for_each_engine(engine, dev_priv, id) { + request = i915_gem_reset_prepare_engine(engine); + if (IS_ERR(request)) { + err = PTR_ERR(request); + break; + } + + engine->hangcheck.active_request = request; + } i915_gem_revoke_fences(dev_priv); @@ -2894,7 +2905,7 @@ static void engine_skip_context(struct drm_i915_gem_request *request) static bool i915_gem_reset_request(struct drm_i915_gem_request *request) { /* Read once and return the resolution */ - const bool guilty = engine_stalled(request->engine); + const bool guilty = !i915_gem_request_completed(request); /* The guilty request will get skipped on a hung engine. * @@ -2930,9 +2941,8 @@ static bool i915_gem_reset_request(struct drm_i915_gem_request *request) void i915_gem_reset_engine(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request = engine->hangcheck.active_request; - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", engine->name, request->global_seqno); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index ec16fb6fde62..f850c4b12337 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -121,6 +121,7 @@ struct intel_engine_hangcheck { unsigned long action_timestamp; int deadlock; struct intel_instdone instdone; + struct drm_i915_gem_request *active_request; bool stalled; };