diff mbox series

[1/2] drm/i915/guc: Improve clean up of busyness stats worker

Message ID 20230112015447.2430224-2-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show
Series Clean up some GuC related failure paths | expand

Commit Message

John Harrison Jan. 12, 2023, 1:54 a.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The stats worker thread management was mis-matched between
enable/disable call sites. Fix those up. Also, abstract the cancel
code into a helper function rather than replicating in multiple places.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

Comments

Daniele Ceraolo Spurio Jan. 25, 2023, 12:55 a.m. UTC | #1
On 1/11/2023 5:54 PM, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The stats worker thread management was mis-matched between
> enable/disable call sites. Fix those up. Also, abstract the cancel
> code into a helper function rather than replicating in multiple places.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++-------
>   1 file changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index b436dd7f12e42..982364777d0c6 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1435,19 +1435,25 @@ static void guc_init_engine_stats(struct intel_guc *guc)
>   {
>   	struct intel_gt *gt = guc_to_gt(guc);
>   	intel_wakeref_t wakeref;
> +	int ret;
>   
>   	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>   			 guc->timestamp.ping_delay);
>   
> -	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
> -		int ret = guc_action_enable_usage_stats(guc);
> +	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
> +		ret = guc_action_enable_usage_stats(guc);
>   
> -		if (ret)
> -			drm_err(&gt->i915->drm,
> -				"Failed to enable usage stats: %d!\n", ret);
> +	if (ret) {
> +		cancel_delayed_work_sync(&guc->timestamp.work);

Wouldn't it be easier to just call mod_delayed_work after the H2G if 
ret==0, instead of having it before and cancelling if we get a failure?

> +		drm_err(&gt->i915->drm, "Failed to enable usage stats: %d!\n", ret);
>   	}
>   }
>   
> +static void guc_park_engine_stats(struct intel_guc *guc)
> +{
> +	cancel_delayed_work_sync(&guc->timestamp.work);
> +}
> +

Now you're asymmetric with the park/unpark, because on the park side you 
have this wrapper, while on the unpark side you directly call 
mod_delayed_work.

Daniele

>   void intel_guc_busyness_park(struct intel_gt *gt)
>   {
>   	struct intel_guc *guc = &gt->uc.guc;
> @@ -1460,7 +1466,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
>   	 * and causes an unclaimed register access warning. Cancel the worker
>   	 * synchronously here.
>   	 */
> -	cancel_delayed_work_sync(&guc->timestamp.work);
> +	guc_park_engine_stats(guc);
>   
>   	/*
>   	 * Before parking, we should sample engine busyness stats if we need to.
> @@ -4409,11 +4415,11 @@ void intel_guc_submission_enable(struct intel_guc *guc)
>   	guc_init_global_schedule_policy(guc);
>   }
>   
> +/* Note: By the time we're here, GuC may have already been reset */
>   void intel_guc_submission_disable(struct intel_guc *guc)
>   {
>   	struct intel_gt *gt = guc_to_gt(guc);
> -
> -	/* Note: By the time we're here, GuC may have already been reset */
> +	guc_park_engine_stats(guc);
>   
>   	/* Disable and route to host */
>   	if (GRAPHICS_VER(gt->i915) >= 12)
John Harrison Feb. 17, 2023, 8:13 p.m. UTC | #2
On 1/24/2023 16:55, Ceraolo Spurio, Daniele wrote:
> On 1/11/2023 5:54 PM, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> The stats worker thread management was mis-matched between
>> enable/disable call sites. Fix those up. Also, abstract the cancel
>> code into a helper function rather than replicating in multiple places.
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++-------
>>   1 file changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> index b436dd7f12e42..982364777d0c6 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> @@ -1435,19 +1435,25 @@ static void guc_init_engine_stats(struct 
>> intel_guc *guc)
>>   {
>>       struct intel_gt *gt = guc_to_gt(guc);
>>       intel_wakeref_t wakeref;
>> +    int ret;
>>         mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
>>                guc->timestamp.ping_delay);
>>   -    with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
>> -        int ret = guc_action_enable_usage_stats(guc);
>> +    with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
>> +        ret = guc_action_enable_usage_stats(guc);
>>   -        if (ret)
>> -            drm_err(&gt->i915->drm,
>> -                "Failed to enable usage stats: %d!\n", ret);
>> +    if (ret) {
>> +        cancel_delayed_work_sync(&guc->timestamp.work);
>
> Wouldn't it be easier to just call mod_delayed_work after the H2G if 
> ret==0, instead of having it before and cancelling if we get a failure?
>
>> +        drm_err(&gt->i915->drm, "Failed to enable usage stats: 
>> %d!\n", ret);
>>       }
>>   }
>>   +static void guc_park_engine_stats(struct intel_guc *guc)
>> +{
>> +    cancel_delayed_work_sync(&guc->timestamp.work);
>> +}
>> +
>
> Now you're asymmetric with the park/unpark, because on the park side 
> you have this wrapper, while on the unpark side you directly call 
> mod_delayed_work.
The point is that submission disable needs to also cancel the worker. 
But calling the actual busyness park function seems excessive - no need 
to do all the updating if we are about to reset the GuC or unload the 
driver.

Thinking about it more, calling this park_engine_stats is actually wrong 
given that engine stats and busyness are the same thing, so basically we 
would have two functions with the same name where one is a subset of the 
other. Is it simpler (and safe?) to just call the full busyness unpark 
from submission_disable? Or is it better to have a 
cancel/enable_busyness_worker() pair for all instances of turning the 
worker on or off?

John.


>
> Daniele
>
>>   void intel_guc_busyness_park(struct intel_gt *gt)
>>   {
>>       struct intel_guc *guc = &gt->uc.guc;
>> @@ -1460,7 +1466,7 @@ void intel_guc_busyness_park(struct intel_gt *gt)
>>        * and causes an unclaimed register access warning. Cancel the 
>> worker
>>        * synchronously here.
>>        */
>> -    cancel_delayed_work_sync(&guc->timestamp.work);
>> +    guc_park_engine_stats(guc);
>>         /*
>>        * Before parking, we should sample engine busyness stats if we 
>> need to.
>> @@ -4409,11 +4415,11 @@ void intel_guc_submission_enable(struct 
>> intel_guc *guc)
>>       guc_init_global_schedule_policy(guc);
>>   }
>>   +/* Note: By the time we're here, GuC may have already been reset */
>>   void intel_guc_submission_disable(struct intel_guc *guc)
>>   {
>>       struct intel_gt *gt = guc_to_gt(guc);
>> -
>> -    /* Note: By the time we're here, GuC may have already been reset */
>> +    guc_park_engine_stats(guc);
>>         /* Disable and route to host */
>>       if (GRAPHICS_VER(gt->i915) >= 12)
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index b436dd7f12e42..982364777d0c6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1435,19 +1435,25 @@  static void guc_init_engine_stats(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
 	intel_wakeref_t wakeref;
+	int ret;
 
 	mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
 			 guc->timestamp.ping_delay);
 
-	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
-		int ret = guc_action_enable_usage_stats(guc);
+	with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+		ret = guc_action_enable_usage_stats(guc);
 
-		if (ret)
-			drm_err(&gt->i915->drm,
-				"Failed to enable usage stats: %d!\n", ret);
+	if (ret) {
+		cancel_delayed_work_sync(&guc->timestamp.work);
+		drm_err(&gt->i915->drm, "Failed to enable usage stats: %d!\n", ret);
 	}
 }
 
+static void guc_park_engine_stats(struct intel_guc *guc)
+{
+	cancel_delayed_work_sync(&guc->timestamp.work);
+}
+
 void intel_guc_busyness_park(struct intel_gt *gt)
 {
 	struct intel_guc *guc = &gt->uc.guc;
@@ -1460,7 +1466,7 @@  void intel_guc_busyness_park(struct intel_gt *gt)
 	 * and causes an unclaimed register access warning. Cancel the worker
 	 * synchronously here.
 	 */
-	cancel_delayed_work_sync(&guc->timestamp.work);
+	guc_park_engine_stats(guc);
 
 	/*
 	 * Before parking, we should sample engine busyness stats if we need to.
@@ -4409,11 +4415,11 @@  void intel_guc_submission_enable(struct intel_guc *guc)
 	guc_init_global_schedule_policy(guc);
 }
 
+/* Note: By the time we're here, GuC may have already been reset */
 void intel_guc_submission_disable(struct intel_guc *guc)
 {
 	struct intel_gt *gt = guc_to_gt(guc);
-
-	/* Note: By the time we're here, GuC may have already been reset */
+	guc_park_engine_stats(guc);
 
 	/* Disable and route to host */
 	if (GRAPHICS_VER(gt->i915) >= 12)