diff mbox series

[v3] remoteproc: Use unbounded workqueue for recovery work

Message ID 1649694620-10070-1-git-send-email-quic_mojha@quicinc.com (mailing list archive)
State Superseded
Headers show
Series [v3] remoteproc: Use unbounded workqueue for recovery work | expand

Commit Message

Mukesh Ojha April 11, 2022, 4:30 p.m. UTC
There could be a scenario where there is too much load on a core
(n number of tasks which is affined) or in a case when multiple
rproc subsystem is going for a recovery and they queued recovery
work to one core so even though subsystem are independent there
recovery will be delayed if one of the subsystem recovery work
is taking more time in completing.

If we make this queue unbounded, the recovery work could be picked
on any cpu. This patch try to address this.

Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
---
Changes in v3:
  - Add fallback option to go back to earlier path incase recovery wq
    creation fails.

Changes in v2:
  - Removed WQ_HIGHPRI.
  - Updated commit text.

 drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

Comments

Mukesh Ojha April 12, 2022, 1:55 p.m. UTC | #1
Hi,

Can we consider this ? please suggest.

Thanks,
-Mukesh

On 4/11/2022 10:00 PM, Mukesh Ojha wrote:
> There could be a scenario where there is too much load on a core
> (n number of tasks which is affined) or in a case when multiple
> rproc subsystem is going for a recovery and they queued recovery
> work to one core so even though subsystem are independent there
> recovery will be delayed if one of the subsystem recovery work
> is taking more time in completing.
>
> If we make this queue unbounded, the recovery work could be picked
> on any cpu. This patch try to address this.
>
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
> Changes in v3:
>    - Add fallback option to go back to earlier path incase recovery wq
>      creation fails.
>
> Changes in v2:
>    - Removed WQ_HIGHPRI.
>    - Updated commit text.
>
>   drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
>   1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index c510125..6446c84 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>   
>   /* Unique indices for remoteproc devices */
>   static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>   
>   static const char * const rproc_crash_names[] = {
>   	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2755,8 +2756,11 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>   	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>   		rproc->name, rproc_crash_to_string(type));
>   
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	if (rproc_recovery_wq)
> +		queue_work(rproc_recovery_wq, &rproc->crash_handler);
> +	else
> +		queue_work(system_freezable_wq, &rproc->crash_handler);
> +
>   }
>   EXPORT_SYMBOL(rproc_report_crash);
>   
> @@ -2805,6 +2809,11 @@ static void __exit rproc_exit_panic(void)
>   
>   static int __init remoteproc_init(void)
>   {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
> +						WQ_UNBOUND | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq)
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
> +
>   	rproc_init_sysfs();
>   	rproc_init_debugfs();
>   	rproc_init_cdev();
> @@ -2821,6 +2830,8 @@ static void __exit remoteproc_exit(void)
>   	rproc_exit_panic();
>   	rproc_exit_debugfs();
>   	rproc_exit_sysfs();
> +	if (rproc_recovery_wq)
> +		destroy_workqueue(rproc_recovery_wq);
>   }
>   module_exit(remoteproc_exit);
>
Saravana Kannan April 15, 2022, 12:44 a.m. UTC | #2
Mukesh Ojha <quic_mojha@quicinc.com> wrote:
> There could be a scenario where there is too much load on a core
> (n number of tasks which is affined) or in a case when multiple
> rproc subsystem is going for a recovery and they queued recovery
> work to one core so even though subsystem are independent there
> recovery will be delayed if one of the subsystem recovery work
> is taking more time in completing.
> 
> If we make this queue unbounded, the recovery work could be picked
> on any cpu. This patch try to address this.
> 
> Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
> ---
> Changes in v3:
>   - Add fallback option to go back to earlier path incase recovery wq
>     creation fails.
> 
> Changes in v2:
>   - Removed WQ_HIGHPRI.
>   - Updated commit text.
> 
>  drivers/remoteproc/remoteproc_core.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
> index c510125..6446c84 100644
> --- a/drivers/remoteproc/remoteproc_core.c
> +++ b/drivers/remoteproc/remoteproc_core.c
> @@ -59,6 +59,7 @@ static int rproc_release_carveout(struct rproc *rproc,
>  
>  /* Unique indices for remoteproc devices */
>  static DEFINE_IDA(rproc_dev_index);
> +static struct workqueue_struct *rproc_recovery_wq;
>  
>  static const char * const rproc_crash_names[] = {
>  	[RPROC_MMUFAULT]	= "mmufault",
> @@ -2755,8 +2756,11 @@ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
>  	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
>  		rproc->name, rproc_crash_to_string(type));
>  
> -	/* Have a worker handle the error; ensure system is not suspended */
> -	queue_work(system_freezable_wq, &rproc->crash_handler);
> +	if (rproc_recovery_wq)
> +		queue_work(rproc_recovery_wq, &rproc->crash_handler);
> +	else
> +		queue_work(system_freezable_wq, &rproc->crash_handler);

This is unnecessarily complicated. If you can't create a workqueue you
have bigger problems with the system. Just stick with the new rproc
workqueue.

> +
>  }
>  EXPORT_SYMBOL(rproc_report_crash);
>  
> @@ -2805,6 +2809,11 @@ static void __exit rproc_exit_panic(void)
>  
>  static int __init remoteproc_init(void)
>  {
> +	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
> +						WQ_UNBOUND | WQ_FREEZABLE, 0);
> +	if (!rproc_recovery_wq)
> +		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");

Fail the init if you can't create a workqueue.

> +
>  	rproc_init_sysfs();
>  	rproc_init_debugfs();
>  	rproc_init_cdev();
> @@ -2821,6 +2830,8 @@ static void __exit remoteproc_exit(void)
>  	rproc_exit_panic();
>  	rproc_exit_debugfs();
>  	rproc_exit_sysfs();
> +	if (rproc_recovery_wq)
> +		destroy_workqueue(rproc_recovery_wq);

Will need a fix here too.

-Saravana

>  }
>  module_exit(remoteproc_exit);
>  
> -- 
> 2.7.4
diff mbox series

Patch

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index c510125..6446c84 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -59,6 +59,7 @@  static int rproc_release_carveout(struct rproc *rproc,
 
 /* Unique indices for remoteproc devices */
 static DEFINE_IDA(rproc_dev_index);
+static struct workqueue_struct *rproc_recovery_wq;
 
 static const char * const rproc_crash_names[] = {
 	[RPROC_MMUFAULT]	= "mmufault",
@@ -2755,8 +2756,11 @@  void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
 	dev_err(&rproc->dev, "crash detected in %s: type %s\n",
 		rproc->name, rproc_crash_to_string(type));
 
-	/* Have a worker handle the error; ensure system is not suspended */
-	queue_work(system_freezable_wq, &rproc->crash_handler);
+	if (rproc_recovery_wq)
+		queue_work(rproc_recovery_wq, &rproc->crash_handler);
+	else
+		queue_work(system_freezable_wq, &rproc->crash_handler);
+
 }
 EXPORT_SYMBOL(rproc_report_crash);
 
@@ -2805,6 +2809,11 @@  static void __exit rproc_exit_panic(void)
 
 static int __init remoteproc_init(void)
 {
+	rproc_recovery_wq = alloc_workqueue("rproc_recovery_wq",
+						WQ_UNBOUND | WQ_FREEZABLE, 0);
+	if (!rproc_recovery_wq)
+		pr_err("remoteproc: creation of rproc_recovery_wq failed\n");
+
 	rproc_init_sysfs();
 	rproc_init_debugfs();
 	rproc_init_cdev();
@@ -2821,6 +2830,8 @@  static void __exit remoteproc_exit(void)
 	rproc_exit_panic();
 	rproc_exit_debugfs();
 	rproc_exit_sysfs();
+	if (rproc_recovery_wq)
+		destroy_workqueue(rproc_recovery_wq);
 }
 module_exit(remoteproc_exit);