diff mbox series

remoteproc: Use unbounded/high priority workqueue for recovery work

Message ID 1642620644-19297-1-git-send-email-quic_mojha@quicinc.com (mailing list archive)
State Changes Requested
Headers show
Series remoteproc: Use unbounded/high priority workqueue for recovery work | expand

Commit Message

Mukesh Ojha Jan. 19, 2022, 7:30 p.m. UTC
There could be a scenario where there is too much load(n number
of tasks which is affined) on a core on which rproc recovery
is queued. Due to which, it takes number of seconds to complete
the recovery.

If we make this queue unbounded and move it to high priority worker
pool then this work can be attempted to finished in less time.

Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
 drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Comments

Mukesh Ojha Jan. 24, 2022, 2:04 p.m. UTC | #1
+linux-arm-msm

Thanks,
-Mukesh

On 1/20/2022 1:00 AM, Mukesh Ojha wrote:
> There could be a scenario where there is too much load(n number
> of tasks which is affined) on a core on which rproc recovery
> is queued. Due to which, it takes number of seconds to complete
> the recovery.
>
> If we make this queue unbounded and move it to high priority worker
> pool then this work can be attempted to finished in less time.
>
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
>   drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
>   1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51ac..efb6316 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>   
>   /* Unique indices for remoteproc devices */
>   static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>   
>   static const char * const rproc_crash_names[] = {
>   	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>   		rproc->name, rproc_crash_to_string(type));
>   
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	if (rproc_recovery_wq)
> +		queue_work(rproc_recovery_wq, &rproc->crash_handler);
> +	else
> +		queue_work(system_freezable_wq, &rproc->crash_handler);
>   }
>   EXPORT_SYMBOL(rproc_report_crash);
>   
> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>   
>   static int __init remoteproc_init(void)
>   {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
> +				WQ_HIGHPRI | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq)
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +
>   	rproc_init_sysfs();
>   	rproc_init_debugfs();
>   	rproc_init_cdev();
> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
>   	rproc_exit_panic();
>   	rproc_exit_debugfs();
>   	rproc_exit_sysfs();
> +	if (rproc_recovery_wq)
> +		destroy_workqueue(rproc_recovery_wq);
>   }
>   module_exit(remoteproc_exit);
>
Bjorn Andersson March 11, 2022, 9:01 p.m. UTC | #2
On Wed 19 Jan 13:30 CST 2022, Mukesh Ojha wrote:

> There could be a scenario where there is too much load(n number
> of tasks which is affined) on a core on which rproc recovery
> is queued. Due to which, it takes number of seconds to complete
> the recovery.
> 
> If we make this queue unbounded and move it to high priority worker
> pool then this work can be attempted to finished in less time.

I unfortunately find this reasoning for adding WQ_HIGHPRI rather
speculative. Please describe a concrete case that warrants the new
work queue to be high priority.

What is "number of seconds", what is "less time" and why is it more
important to recover some remote processor than whatever else the system
is busy doing?

Thanks,
Bjorn

> 
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
>  drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index 69f51ac..efb6316 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>  
>  /* Unique indices for remoteproc devices */
>  static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>  
>  static const char * const rproc_crash_names[] = {
>  	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>  	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>  		rproc->name, rproc_crash_to_string(type));
>  
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	if (rproc_recovery_wq)
> +		queue_work(rproc_recovery_wq, &rproc->crash_handler);
> +	else
> +		queue_work(system_freezable_wq, &rproc->crash_handler);
>  }
>  EXPORT_SYMBOL(rproc_report_crash);
>  
> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>  
>  static int __init remoteproc_init(void)
>  {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
> +				WQ_HIGHPRI | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq)
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +
>  	rproc_init_sysfs();
>  	rproc_init_debugfs();
>  	rproc_init_cdev();
> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
>  	rproc_exit_panic();
>  	rproc_exit_debugfs();
>  	rproc_exit_sysfs();
> +	if (rproc_recovery_wq)
> +		destroy_workqueue(rproc_recovery_wq);
>  }
>  module_exit(remoteproc_exit);
>  
> -- 
> 2.7.4
>
Mukesh Ojha March 29, 2022, 8:23 a.m. UTC | #3
On 3/12/2022 2:31 AM, Bjorn Andersson wrote:
> On Wed 19 Jan 13:30 CST 2022, Mukesh Ojha wrote:
>
>> There could be a scenario where there is too much load(n number
>> of tasks which is affined) on a core on which rproc recovery
>> is queued. Due to which, it takes number of seconds to complete
>> the recovery.
>>
>> If we make this queue unbounded and move it to high priority worker
>> pool then this work can be attempted to finished in less time.
> I unfortunately find this reasoning for adding WQ_HIGHPRI rather
> speculative. Please describe a concrete case that warrants the new
> work queue to be high priority.
>
> What is "number of seconds", what is "less time" and why is it more
> important to recover some remote processor than whatever else the system
> is busy doing?

Meanwhile, I will try to check if making it unbound only helps us in our 
low latency use cases.
So, does it make sense to make it  Unbound | freezable ?

-Mukesh

> Thanks,
> Bjorn
>
>> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
>> ---
>>   drivers/remoteproc/remoteproc_core.c | 14 ++++++++++++--
>>   1 file changed, 12 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
>> index 69f51ac..efb6316 100644
>> --- a/drivers/remoteproc/remoteproc_core.c
>> +++ b/drivers/remoteproc/remoteproc_core.c
>> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>>   
>>   /* Unique indices for remoteproc devices */
>>   static DEFINE_IDA(rproc_dev_index);
>> +static struct workqueue_struct *rproc_recovery_wq;
>>   
>>   static const char * const rproc_crash_names[] = {
>>   	[RPROC_MMUFAULT]	= "mmufault",
>> @@ -2752,8 +2753,10 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>>   		rproc->name, rproc_crash_to_string(type));
>>   
>> -	/* Have a worker handle the error; ensure system is not suspended */
>> -	queue_work(system_freezable_wq, &rproc->crash_handler);
>> +	if (rproc_recovery_wq)
>> +		queue_work(rproc_recovery_wq, &rproc->crash_handler);
>> +	else
>> +		queue_work(system_freezable_wq, &rproc->crash_handler);
>>   }
>>   EXPORT_SYMBOL(rproc_report_crash);
>>   
>> @@ -2802,6 +2805,11 @@ static void __exit rproc_exit_panic(void)
>>   
>>   static int __init remoteproc_init(void)
>>   {
>> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
>> +				WQ_HIGHPRI | WQ_FREEZABLE, 0);
>> +	if (!rproc_recovery_wq)
>> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
>> +
>>   	rproc_init_sysfs();
>>   	rproc_init_debugfs();
>>   	rproc_init_cdev();
>> @@ -2818,6 +2826,8 @@ static void __exit remoteproc_exit(void)
>>   	rproc_exit_panic();
>>   	rproc_exit_debugfs();
>>   	rproc_exit_sysfs();
>> +	if (rproc_recovery_wq)
>> +		destroy_workqueue(rproc_recovery_wq);
>>   }
>>   module_exit(remoteproc_exit);
>>   
>> -- 
>> 2.7.4
>>
diff mbox series

Patch

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 69f51ac..efb6316 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -59,6 +59,7 @@  static int rproc_release_carveout(struct rproc *rproc,
 
 /* Unique indices for remoteproc devices */
 static DEFINE_IDA(rproc_dev_index);
+static struct workqueue_struct *rproc_recovery_wq;
 
 static const char * const rproc_crash_names[] = {
 	[RPROC_MMUFAULT]	= "mmufault",
@@ -2752,8 +2753,10 @@  void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
 	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
 		rproc->name, rproc_crash_to_string(type));
 
-	/* Have a worker handle the error; ensure system is not suspended */
-	queue_work(system_freezable_wq, &rproc->crash_handler);
+	if (rproc_recovery_wq)
+		queue_work(rproc_recovery_wq, &rproc->crash_handler);
+	else
+		queue_work(system_freezable_wq, &rproc->crash_handler);
 }
 EXPORT_SYMBOL(rproc_report_crash);
 
@@ -2802,6 +2805,11 @@  static void __exit rproc_exit_panic(void)
 
 static int __init remoteproc_init(void)
 {
+	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq", WQ_UNBOUND |
+				WQ_HIGHPRI | WQ_FREEZABLE, 0);
+	if (!rproc_recovery_wq)
+		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
+
 	rproc_init_sysfs();
 	rproc_init_debugfs();
 	rproc_init_cdev();
@@ -2818,6 +2826,8 @@  static void __exit remoteproc_exit(void)
 	rproc_exit_panic();
 	rproc_exit_debugfs();
 	rproc_exit_sysfs();
+	if (rproc_recovery_wq)
+		destroy_workqueue(rproc_recovery_wq);
 }
 module_exit(remoteproc_exit);