diff mbox series

[v4,4/4] rcu: Support direct wake-up of synchronize_rcu() users

Message ID 20240104162510.72773-5-urezki@gmail.com (mailing list archive)
State New, archived
Headers show
Series Reduce synchronize_rcu() latency(v4) | expand

Commit Message

Uladzislau Rezki Jan. 4, 2024, 4:25 p.m. UTC
This patch introduces a small enhancement which allows to do a
direct wake-up of synchronize_rcu() callers. It occurs after a
completion of grace period, thus by the gp-kthread.

Number of clients is limited by the hard-coded maximum allowed
threshold. The remaining part, if still exists is deferred to
a main worker.

Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 kernel/rcu/tree.c | 31 +++++++++++++++++++++++++++++--
 kernel/rcu/tree.h |  6 ++++++
 2 files changed, 35 insertions(+), 2 deletions(-)

Comments

Zqiang Jan. 13, 2024, 9:19 a.m. UTC | #1
>
> This patch introduces a small enhancement which allows to do a
> direct wake-up of synchronize_rcu() callers. It occurs after a
> completion of grace period, thus by the gp-kthread.
>
> Number of clients is limited by the hard-coded maximum allowed
> threshold. The remaining part, if still exists is deferred to
> a main worker.
>
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  kernel/rcu/tree.c | 31 +++++++++++++++++++++++++++++--
>  kernel/rcu/tree.h |  6 ++++++
>  2 files changed, 35 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 88a47a6a658e..96fe9cc122ca 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1642,7 +1642,8 @@ static DECLARE_WORK(sr_normal_gp_cleanup, rcu_sr_normal_gp_cleanup_work);
>   */
>  static void rcu_sr_normal_gp_cleanup(void)
>  {
> -       struct llist_node *wait_tail;
> +       struct llist_node *wait_tail, *next, *rcu;
> +       int done = 0;
>
>         wait_tail = rcu_state.srs_wait_tail;
>         if (wait_tail == NULL)
> @@ -1650,12 +1651,38 @@ static void rcu_sr_normal_gp_cleanup(void)
>
>         rcu_state.srs_wait_tail = NULL;
>         ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
> +       WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail));
> +
> +       /*
> +        * Process (a) and (d) cases. See an illustration. Apart of
> +        * that it handles the scenario when all clients are done,
> +        * wait-head is released if last. The worker is not kicked.
> +        */
> +       llist_for_each_safe(rcu, next, wait_tail->next) {
> +               if (rcu_sr_is_wait_head(rcu)) {
> +                       if (!rcu->next) {
> +                               rcu_sr_put_wait_head(rcu);
> +                               wait_tail->next = NULL;
> +                       } else {
> +                               wait_tail->next = rcu;
> +                       }
> +
> +                       break;
> +               }
> +
> +               rcu_sr_normal_complete(rcu);
> +               // It can be last, update a next on this step.
> +               wait_tail->next = next;
> +
> +               if (++done == SR_MAX_USERS_WAKE_FROM_GP)
> +                       break;
> +       }
>
>         // concurrent sr_normal_gp_cleanup work might observe this update.
>         smp_store_release(&rcu_state.srs_done_tail, wait_tail);
>         ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
>
> -       if (wait_tail)
> +       if (wait_tail->next)
>                 queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
>

I'm testing these patches :) , one question is as follows:
Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform
wake-up actions? avoid kworker creation failure under memory pressure, causing
the wake-up action to be delayed.

Thanks
Zqiang



>  }
>
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 4c35d7d37647..5d8b71a1caec 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -316,6 +316,12 @@ do {                                                                       \
>         __set_current_state(TASK_RUNNING);                              \
>  } while (0)
>
> +/*
> + * A max threshold for synchronize_rcu() users which are
> + * awaken directly by the rcu_gp_kthread(). Left part is
> + * deferred to the main worker.
> + */
> +#define SR_MAX_USERS_WAKE_FROM_GP 5
>  #define SR_NORMAL_GP_WAIT_HEAD_MAX 5
>
>  struct sr_wait_node {
> --
> 2.39.2
>
>
Uladzislau Rezki Jan. 15, 2024, 10:46 a.m. UTC | #2
Hello, Zqiang.

> >
> >         // concurrent sr_normal_gp_cleanup work might observe this update.
> >         smp_store_release(&rcu_state.srs_done_tail, wait_tail);
> >         ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
> >
> > -       if (wait_tail)
> > +       if (wait_tail->next)
> >                 queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
> >
> 
> I'm testing these patches :) , one question is as follows:
> Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform
> wake-up actions? avoid kworker creation failure under memory pressure, causing
> the wake-up action to be delayed.
> 
I do not have any objections in not doing that, so we can add.

Thank for testing this!

--
Uladzislau Rezki
Uladzislau Rezki Jan. 15, 2024, 10:57 a.m. UTC | #3
> Hello, Zqiang.
> 
> > >
> > >         // concurrent sr_normal_gp_cleanup work might observe this update.
> > >         smp_store_release(&rcu_state.srs_done_tail, wait_tail);
> > >         ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
> > >
> > > -       if (wait_tail)
> > > +       if (wait_tail->next)
> > >                 queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
> > >
> > 
> > I'm testing these patches :) , one question is as follows:
> > Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform
> > wake-up actions? avoid kworker creation failure under memory pressure, causing
> > the wake-up action to be delayed.
> > 
> I do not have any objections in not doing that, so we can add.
> 
> Thank for testing this!
> 
I forgot to ask, is your testing simulates a low memory condition so
you see the failure you refer to? Or it is just a possible scenario?

Thanks!

--
Uladzislau Rezki
Zqiang Jan. 16, 2024, 6:19 a.m. UTC | #4
>
> > Hello, Zqiang.
> >
> > > >
> > > >         // concurrent sr_normal_gp_cleanup work might observe this update.
> > > >         smp_store_release(&rcu_state.srs_done_tail, wait_tail);
> > > >         ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
> > > >
> > > > -       if (wait_tail)
> > > > +       if (wait_tail->next)
> > > >                 queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
> > > >
> > >
> > > I'm testing these patches :) , one question is as follows:
> > > Can we use (WQ_MEM_RECLAIM | WQ_HIGHPR)type of workqueue to perform
> > > wake-up actions? avoid kworker creation failure under memory pressure, causing
> > > the wake-up action to be delayed.
> > >
> > I do not have any objections in not doing that, so we can add.
> >
> > Thank for testing this!
> >
> I forgot to ask, is your testing simulates a low memory condition so
> you see the failure you refer to? Or it is just a possible scenario?
>

I'm not currently testing this feature in low memory scenarios,  I thought
of this possible scenario.  I will test it in a low memory scenario later and
let you know if it happens :) .

Thanks
Zqiang

>
> Thanks!

>
> --
> Uladzislau Rezki
>
diff mbox series

Patch

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 88a47a6a658e..96fe9cc122ca 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1642,7 +1642,8 @@  static DECLARE_WORK(sr_normal_gp_cleanup, rcu_sr_normal_gp_cleanup_work);
  */
 static void rcu_sr_normal_gp_cleanup(void)
 {
-	struct llist_node *wait_tail;
+	struct llist_node *wait_tail, *next, *rcu;
+	int done = 0;
 
 	wait_tail = rcu_state.srs_wait_tail;
 	if (wait_tail == NULL)
@@ -1650,12 +1651,38 @@  static void rcu_sr_normal_gp_cleanup(void)
 
 	rcu_state.srs_wait_tail = NULL;
 	ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_wait_tail);
+	WARN_ON_ONCE(!rcu_sr_is_wait_head(wait_tail));
+
+	/*
+	 * Process (a) and (d) cases. See an illustration. Apart of
+	 * that it handles the scenario when all clients are done,
+	 * wait-head is released if last. The worker is not kicked.
+	 */
+	llist_for_each_safe(rcu, next, wait_tail->next) {
+		if (rcu_sr_is_wait_head(rcu)) {
+			if (!rcu->next) {
+				rcu_sr_put_wait_head(rcu);
+				wait_tail->next = NULL;
+			} else {
+				wait_tail->next = rcu;
+			}
+
+			break;
+		}
+
+		rcu_sr_normal_complete(rcu);
+		// It can be last, update a next on this step.
+		wait_tail->next = next;
+
+		if (++done == SR_MAX_USERS_WAKE_FROM_GP)
+			break;
+	}
 
 	// concurrent sr_normal_gp_cleanup work might observe this update.
 	smp_store_release(&rcu_state.srs_done_tail, wait_tail);
 	ASSERT_EXCLUSIVE_WRITER(rcu_state.srs_done_tail);
 
-	if (wait_tail)
+	if (wait_tail->next)
 		queue_work(system_highpri_wq, &sr_normal_gp_cleanup);
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4c35d7d37647..5d8b71a1caec 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -316,6 +316,12 @@  do {									\
 	__set_current_state(TASK_RUNNING);				\
 } while (0)
 
+/*
+ * A max threshold for synchronize_rcu() users which are
+ * awaken directly by the rcu_gp_kthread(). Left part is
+ * deferred to the main worker.
+ */
+#define SR_MAX_USERS_WAKE_FROM_GP 5
 #define SR_NORMAL_GP_WAIT_HEAD_MAX 5
 
 struct sr_wait_node {