diff mbox

[03/11] drm/i915/tdr: Update reset_in_progress to account for engine reset

Message ID 1469551257-26803-4-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com July 26, 2016, 4:40 p.m. UTC
Now that we track reset progress using separate set of flags, update it to
account for engine reset as well.

A bit corresponding engine->id is set if reset is in progress for that
engine. Bit0 is for full gpu reset.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

Comments

Chris Wilson July 26, 2016, 9:52 p.m. UTC | #1
On Tue, Jul 26, 2016 at 05:40:49PM +0100, Arun Siluvery wrote:
> Now that we track reset progress using separate set of flags, update it to
> account for engine reset as well.
> 
> A bit corresponding engine->id is set if reset is in progress for that
> engine. Bit0 is for full gpu reset.
> 
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++-
>  1 file changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 11436e7..125fafa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1403,6 +1403,8 @@ struct i915_gpu_error {
>  #define I915_RESET_IN_PROGRESS	0
>  #define I915_WEDGED		(BITS_PER_LONG-1)
>  
> +	unsigned long engine_reset_count[I915_NUM_ENGINES];
> +
>  	/**
>  	 * Waitqueue to signal when a hang is detected. Used to for waiters
>  	 * to release the struct_mutex for the reset to procede.
> @@ -3194,9 +3196,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
>  void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
>  void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
>  
> +/* indicates the progress of engine reset or full gpu reset */
>  static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>  {
> -	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
> +	return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED));
>  }
>  
>  static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
> @@ -3214,6 +3217,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
>  	return READ_ONCE(error->reset_count);
>  }
>  
> +static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error)
> +{
> +	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
> +}
> +
> +static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error,
> +						 struct intel_engine_cs *engine)
> +{
> +	return test_bit(engine->id + 1, &error->flags);
> +}

?

A totally unexplained change. If it is because you think to want to break
waiters on struct_mutex, try again.
-Chris
arun.siluvery@linux.intel.com July 27, 2016, 11:16 a.m. UTC | #2
On 26/07/2016 22:52, Chris Wilson wrote:
> On Tue, Jul 26, 2016 at 05:40:49PM +0100, Arun Siluvery wrote:
>> Now that we track reset progress using separate set of flags, update it to
>> account for engine reset as well.
>>
>> A bit corresponding engine->id is set if reset is in progress for that
>> engine. Bit0 is for full gpu reset.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h | 16 +++++++++++++++-
>>   1 file changed, 15 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 11436e7..125fafa 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1403,6 +1403,8 @@ struct i915_gpu_error {
>>   #define I915_RESET_IN_PROGRESS	0
>>   #define I915_WEDGED		(BITS_PER_LONG-1)
>>
>> +	unsigned long engine_reset_count[I915_NUM_ENGINES];
>> +
>>   	/**
>>   	 * Waitqueue to signal when a hang is detected. Used to for waiters
>>   	 * to release the struct_mutex for the reset to procede.
>> @@ -3194,9 +3196,10 @@ i915_gem_find_active_request(struct intel_engine_cs *engine);
>>   void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
>>   void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
>>
>> +/* indicates the progress of engine reset or full gpu reset */
>>   static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
>>   {
>> -	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
>> +	return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED));
>>   }
>>
>>   static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
>> @@ -3214,6 +3217,17 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
>>   	return READ_ONCE(error->reset_count);
>>   }
>>
>> +static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error)
>> +{
>> +	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
>> +}
>> +
>> +static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error,
>> +						 struct intel_engine_cs *engine)
>> +{
>> +	return test_bit(engine->id + 1, &error->flags);
>> +}
>
> ?
>
> A totally unexplained change. If it is because you think to want to break
> waiters on struct_mutex, try again.
So you don't want error->flags to include engine reset bits?
ok, it should be possible to use engine_mask itself.

Next patch separates engine reset and full gpu reset in separate 
functions, for branching purposes i915_full_gpu_reset_in_progress() is 
added, is this ok or directly use test_bit() ?

regards
Arun


> -Chris
>
Chris Wilson July 27, 2016, 11:41 a.m. UTC | #3
On Wed, Jul 27, 2016 at 12:16:04PM +0100, Arun Siluvery wrote:
> On 26/07/2016 22:52, Chris Wilson wrote:
> >A totally unexplained change. If it is because you think to want to break
> >waiters on struct_mutex, try again.
> So you don't want error->flags to include engine reset bits?
> ok, it should be possible to use engine_mask itself.
> 
> Next patch separates engine reset and full gpu reset in separate
> functions, for branching purposes i915_full_gpu_reset_in_progress()
> is added, is this ok or directly use test_bit() ?

The bit serves 2 functions: serialise error handling, and waking up
waiters on the struct_mutex. That second function is exposed through the
i915_reset_in_progress(), which is being altered here without explaining
how the changed semantics impacts the current users or why it is
necessary. imo we can do engine resets without struct_mutex.
-Chris
arun.siluvery@linux.intel.com July 27, 2016, 11:52 a.m. UTC | #4
On 27/07/2016 12:41, Chris Wilson wrote:
> On Wed, Jul 27, 2016 at 12:16:04PM +0100, Arun Siluvery wrote:
>> On 26/07/2016 22:52, Chris Wilson wrote:
>>> A totally unexplained change. If it is because you think to want to break
>>> waiters on struct_mutex, try again.
>> So you don't want error->flags to include engine reset bits?
>> ok, it should be possible to use engine_mask itself.
>>
>> Next patch separates engine reset and full gpu reset in separate
>> functions, for branching purposes i915_full_gpu_reset_in_progress()
>> is added, is this ok or directly use test_bit() ?
>
> The bit serves 2 functions: serialise error handling, and waking up
> waiters on the struct_mutex. That second function is exposed through the
> i915_reset_in_progress(), which is being altered here without explaining
> how the changed semantics impacts the current users or why it is
> necessary. imo we can do engine resets without struct_mutex.
Yes, as you suggested I am not taking struct_mutex now but adding engine 
reset bits to error->flags was breaking the waiters. I will try to use 
engine_mask itself and keep error->flags unchanged.

regards
Arun

> -Chris
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11436e7..125fafa 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1403,6 +1403,8 @@  struct i915_gpu_error {
 #define I915_RESET_IN_PROGRESS	0
 #define I915_WEDGED		(BITS_PER_LONG-1)
 
+	unsigned long engine_reset_count[I915_NUM_ENGINES];
+
 	/**
 	 * Waitqueue to signal when a hang is detected. Used to for waiters
 	 * to release the struct_mutex for the reset to procede.
@@ -3194,9 +3196,10 @@  i915_gem_find_active_request(struct intel_engine_cs *engine);
 void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
 void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
 
+/* indicates the progress of engine reset or full gpu reset */
 static inline bool i915_reset_in_progress(struct i915_gpu_error *error)
 {
-	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
+	return unlikely(READ_ONCE(error->flags) & ~BIT(I915_WEDGED));
 }
 
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
@@ -3214,6 +3217,17 @@  static inline u32 i915_reset_count(struct i915_gpu_error *error)
 	return READ_ONCE(error->reset_count);
 }
 
+static inline bool i915_full_gpu_reset_in_progress(struct i915_gpu_error *error)
+{
+	return test_bit(I915_RESET_IN_PROGRESS, &error->flags);
+}
+
+static inline bool i915_engine_reset_in_progress(struct i915_gpu_error *error,
+						 struct intel_engine_cs *engine)
+{
+	return test_bit(engine->id + 1, &error->flags);
+}
+
 void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_init(struct drm_device *dev);