Message ID | 1469551257-26803-4-git-send-email-arun.siluvery@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Jul 26, 2016 at 05:40:49PM +0100, Arun Siluvery wrote: > Now that we track reset progress using separate set of flags, update it to > account for engine reset as well. > > A bit corresponding engine->id is set if reset is in progress for that > engine. Bit0 is for full gpu reset. > > Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++- > 1 file changed, 15 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 11436e7..125fafa 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1403,6 +1403,8 @@ struct i915_gpu_error { > #define I915_RESET_IN_PROGRESS 0 > #define I915_WEDGED (BITS_PER_LONG-1) > > + unsigned long engine_reset_count[I915_NUM_ENGINES]; > + > /** > * Waitqueue to signal when a hang is detected. Used to for waiters > * to release the struct_mutex for the reset to procede. > @@ -3194,9 +3196,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); > void i915_gem_retire_requests(struct drm_i915_private *dev_priv); > void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); > > +/* indicates the progress of engine reset or full gpu reset */ > static inline bool i915_reset_in_progress(struct i915_gpu_error *error) > { > - return test_bit(I915_RESET_IN_PROGRESS, &error->flags); > + return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED)); > } > > static inline bool i915_terminally_wedged(struct i915_gpu_error *error) > @@ -3214,6 +3217,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) > return READ_ONCE(error->reset_count); > } > > +static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error) > +{ > + return test_bit(I915_RESET_IN_PROGRESS, &error->flags); > +} > + > +static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error, > + struct intel_engine_cs *engine) > +{ > + return test_bit(engine->id + 1, &error->flags); > +} ? A totally unexplained change. If it is because you think to want to break waiters on struct_mutex, try again. -Chris
On 26/07/2016 22:52, Chris Wilson wrote: > On Tue, Jul 26, 2016 at 05:40:49PM +0100, Arun Siluvery wrote: >> Now that we track reset progress using separate set of flags, update it to >> account for engine reset as well. >> >> A bit corresponding engine->id is set if reset is in progress for that >> engine. Bit0 is for full gpu reset. >> >> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++- >> 1 file changed, 15 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index 11436e7..125fafa 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1403,6 +1403,8 @@ struct i915_gpu_error { >> #define I915_RESET_IN_PROGRESS 0 >> #define I915_WEDGED (BITS_PER_LONG-1) >> >> + unsigned long engine_reset_count[I915_NUM_ENGINES]; >> + >> /** >> * Waitqueue to signal when a hang is detected. Used to for waiters >> * to release the struct_mutex for the reset to procede. >> @@ -3194,9 +3196,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); >> void i915_gem_retire_requests(struct drm_i915_private *dev_priv); >> void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); >> >> +/* indicates the progress of engine reset or full gpu reset */ >> static inline bool i915_reset_in_progress(struct i915_gpu_error *error) >> { >> - return test_bit(I915_RESET_IN_PROGRESS, &error->flags); >> + return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED)); >> } >> >> static inline bool i915_terminally_wedged(struct i915_gpu_error *error) >> @@ -3214,6 +3217,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) >> return READ_ONCE(error->reset_count); >> } >> >> +static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error) >> +{ >> + return test_bit(I915_RESET_IN_PROGRESS, &error->flags); >> +} >> + >> +static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error, >> + struct intel_engine_cs *engine) >> +{ >> + return test_bit(engine->id + 1, &error->flags); >> +} > > ? > > A totally unexplained change. If it is because you think to want to break > waiters on struct_mutex, try again. So you don't want error->flags to include engine reset bits? ok, it should be possible to use engine_mask itself. Next patch separates engine reset and full gpu reset in separate functions, for branching purposes i915_full_gpu_reset_in_progress() is added, is this ok or directly use test_bit() ? regards Arun > -Chris >
On Wed, Jul 27, 2016 at 12:16:04PM +0100, Arun Siluvery wrote: > On 26/07/2016 22:52, Chris Wilson wrote: > >A totally unexplained change. If it is because you think to want to break > >waiters on struct_mutex, try again. > So you don't want error->flags to include engine reset bits? > ok, it should be possible to use engine_mask itself. > > Next patch separates engine reset and full gpu reset in separate > functions, for branching purposes i915_full_gpu_reset_in_progress() > is added, is this ok or directly use test_bit() ? The bit serves 2 functions: serialise error handling, and waking up waiters on the struct_mutex. That second function is exposed through the i915_reset_in_progress(), which is being altered here without explaining how the changed semantics impacts the current users or why it is necessary. imo we can do engine resets without struct_mutex. -Chris
On 27/07/2016 12:41, Chris Wilson wrote: > On Wed, Jul 27, 2016 at 12:16:04PM +0100, Arun Siluvery wrote: >> On 26/07/2016 22:52, Chris Wilson wrote: >>> A totally unexplained change. If it is because you think to want to break >>> waiters on struct_mutex, try again. >> So you don't want error->flags to include engine reset bits? >> ok, it should be possible to use engine_mask itself. >> >> Next patch separates engine reset and full gpu reset in separate >> functions, for branching purposes i915_full_gpu_reset_in_progress() >> is added, is this ok or directly use test_bit() ? > > The bit serves 2 functions: serialise error handling, and waking up > waiters on the struct_mutex. That second function is exposed through the > i915_reset_in_progress(), which is being altered here without explaining > how the changed semantics impacts the current users or why it is > necessary. imo we can do engine resets without struct_mutex. Yes, as you suggested I am not taking struct_mutex now but adding engine reset bits to error->flags was breaking the waiters. I will try to use engine_mask itself and keep error->flags unchanged. regards Arun > -Chris >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 11436e7..125fafa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1403,6 +1403,8 @@ struct i915_gpu_error { #define I915_RESET_IN_PROGRESS 0 #define I915_WEDGED (BITS_PER_LONG-1) + unsigned long engine_reset_count[I915_NUM_ENGINES]; + /** * Waitqueue to signal when a hang is detected. Used to for waiters * to release the struct_mutex for the reset to procede. @@ -3194,9 +3196,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine); void i915_gem_retire_requests(struct drm_i915_private *dev_priv); void i915_gem_retire_requests_ring(struct intel_engine_cs *engine); +/* indicates the progress of engine reset or full gpu reset */ static inline bool i915_reset_in_progress(struct i915_gpu_error *error) { - return test_bit(I915_RESET_IN_PROGRESS, &error->flags); + return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED)); } static inline bool i915_terminally_wedged(struct i915_gpu_error *error) @@ -3214,6 +3217,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) return READ_ONCE(error->reset_count); } +static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error) +{ + return test_bit(I915_RESET_IN_PROGRESS, &error->flags); +} + +static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error, + struct intel_engine_cs *engine) +{ + return test_bit(engine->id + 1, &error->flags); +} + void i915_gem_reset(struct drm_device *dev); bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); int __must_check i915_gem_init(struct drm_device *dev);
Now that we track reset progress using separate set of flags, update it to account for engine reset as well. A bit corresponding engine->id is set if reset is in progress for that engine. Bit0 is for full gpu reset. Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-)