diff mbox series

[v2,3/3] rcu: Use _full() API to debug synchronize_rcu()

Message ID 20250224133659.879074-3-urezki@gmail.com (mailing list archive)
State Superseded
Headers show
Series [v2,1/3] rcutorture: Allow a negative value for nfakewriters | expand

Commit Message

Uladzislau Rezki Feb. 24, 2025, 1:36 p.m. UTC
Switch for using of get_state_synchronize_rcu_full() and
poll_state_synchronize_rcu_full() pair for debug a normal
synchronize_rcu() call.

Just using "not" full APIs to identify if a grace period
is passed or not might lead to a false kernel splat.

Link: https://lore.kernel.org/lkml/Z5ikQeVmVdsWQrdD@pc636/T/
Fixes: 988f569ae041 ("rcu: Reduce synchronize_rcu() latency")
Reported-by: cheung wall <zzqq0103.hey@gmail.com>
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
---
 include/linux/rcupdate_wait.h | 4 ++++
 kernel/rcu/tree.c             | 8 +++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

Comments

Paul E. McKenney Feb. 24, 2025, 7:06 p.m. UTC | #1
On Mon, Feb 24, 2025 at 02:36:59PM +0100, Uladzislau Rezki (Sony) wrote:
> Switch for using of get_state_synchronize_rcu_full() and
> poll_state_synchronize_rcu_full() pair for debug a normal
> synchronize_rcu() call.
> 
> Just using "not" full APIs to identify if a grace period
> is passed or not might lead to a false kernel splat.
> 
> Link: https://lore.kernel.org/lkml/Z5ikQeVmVdsWQrdD@pc636/T/
> Fixes: 988f569ae041 ("rcu: Reduce synchronize_rcu() latency")
> Reported-by: cheung wall <zzqq0103.hey@gmail.com>
> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> ---
>  include/linux/rcupdate_wait.h | 4 ++++
>  kernel/rcu/tree.c             | 8 +++-----
>  2 files changed, 7 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
> index f9bed3d3f78d..a16fc2a9a7d7 100644
> --- a/include/linux/rcupdate_wait.h
> +++ b/include/linux/rcupdate_wait.h
> @@ -16,6 +16,10 @@
>  struct rcu_synchronize {
>  	struct rcu_head head;
>  	struct completion completion;
> +#ifdef CONFIG_PROVE_RCU
> +	/* This is for testing. */
> +	struct rcu_gp_oldstate oldstate;
> +#endif

This causes the build to fail on TREE01.  One way to make the build
succeed is to remove the #ifdefs above.  Another way would be to add
#ifdefs to the WARN_ONCE() below.  I suspect that removing the #ifdefs
is best, at least until such time as people start passing many tens
of SRCU instances to synchronize_rcu_mult() or some such (which seems
quite unlikely).

Thoughts?

							Thanx, Paul

>  };
>  void wakeme_after_rcu(struct rcu_head *head);
>  
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index 8625f616c65a..48384fa2eaeb 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -1632,12 +1632,10 @@ static void rcu_sr_normal_complete(struct llist_node *node)
>  {
>  	struct rcu_synchronize *rs = container_of(
>  		(struct rcu_head *) node, struct rcu_synchronize, head);
> -	unsigned long oldstate = (unsigned long) rs->head.func;
>  
>  	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
> -		!poll_state_synchronize_rcu(oldstate),
> -		"A full grace period is not passed yet: %lu",
> -		rcu_seq_diff(get_state_synchronize_rcu(), oldstate));
> +		!poll_state_synchronize_rcu_full(&rs->oldstate),
> +		"A full grace period is not passed yet!\n");
>  
>  	/* Finally. */
>  	complete(&rs->completion);
> @@ -3247,7 +3245,7 @@ static void synchronize_rcu_normal(void)
>  	 * snapshot before adding a request.
>  	 */
>  	if (IS_ENABLED(CONFIG_PROVE_RCU))
> -		rs.head.func = (void *) get_state_synchronize_rcu();
> +		get_state_synchronize_rcu_full(&rs.oldstate);
>  
>  	rcu_sr_normal_add_req(&rs);
>  
> -- 
> 2.39.5
>
Uladzislau Rezki Feb. 25, 2025, 10:54 a.m. UTC | #2
On Mon, Feb 24, 2025 at 11:06:01AM -0800, Paul E. McKenney wrote:
> On Mon, Feb 24, 2025 at 02:36:59PM +0100, Uladzislau Rezki (Sony) wrote:
> > Switch for using of get_state_synchronize_rcu_full() and
> > poll_state_synchronize_rcu_full() pair for debug a normal
> > synchronize_rcu() call.
> > 
> > Just using "not" full APIs to identify if a grace period
> > is passed or not might lead to a false kernel splat.
> > 
> > Link: https://lore.kernel.org/lkml/Z5ikQeVmVdsWQrdD@pc636/T/
> > Fixes: 988f569ae041 ("rcu: Reduce synchronize_rcu() latency")
> > Reported-by: cheung wall <zzqq0103.hey@gmail.com>
> > Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
> > ---
> >  include/linux/rcupdate_wait.h | 4 ++++
> >  kernel/rcu/tree.c             | 8 +++-----
> >  2 files changed, 7 insertions(+), 5 deletions(-)
> > 
> > diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
> > index f9bed3d3f78d..a16fc2a9a7d7 100644
> > --- a/include/linux/rcupdate_wait.h
> > +++ b/include/linux/rcupdate_wait.h
> > @@ -16,6 +16,10 @@
> >  struct rcu_synchronize {
> >  	struct rcu_head head;
> >  	struct completion completion;
> > +#ifdef CONFIG_PROVE_RCU
> > +	/* This is for testing. */
> > +	struct rcu_gp_oldstate oldstate;
> > +#endif
> 
> This causes the build to fail on TREE01.  One way to make the build
> succeed is to remove the #ifdefs above.  Another way would be to add
> #ifdefs to the WARN_ONCE() below.  I suspect that removing the #ifdefs
> is best, at least until such time as people start passing many tens
> of SRCU instances to synchronize_rcu_mult() or some such (which seems
> quite unlikely).
> 
> Thoughts?
> 
Right, i agree. I will repost this series.

Thank you for checking and testing :)

--
Uladzislau Rezki
diff mbox series

Patch

diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index f9bed3d3f78d..a16fc2a9a7d7 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -16,6 +16,10 @@ 
 struct rcu_synchronize {
 	struct rcu_head head;
 	struct completion completion;
+#ifdef CONFIG_PROVE_RCU
+	/* This is for testing. */
+	struct rcu_gp_oldstate oldstate;
+#endif
 };
 void wakeme_after_rcu(struct rcu_head *head);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8625f616c65a..48384fa2eaeb 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1632,12 +1632,10 @@  static void rcu_sr_normal_complete(struct llist_node *node)
 {
 	struct rcu_synchronize *rs = container_of(
 		(struct rcu_head *) node, struct rcu_synchronize, head);
-	unsigned long oldstate = (unsigned long) rs->head.func;
 
 	WARN_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) &&
-		!poll_state_synchronize_rcu(oldstate),
-		"A full grace period is not passed yet: %lu",
-		rcu_seq_diff(get_state_synchronize_rcu(), oldstate));
+		!poll_state_synchronize_rcu_full(&rs->oldstate),
+		"A full grace period is not passed yet!\n");
 
 	/* Finally. */
 	complete(&rs->completion);
@@ -3247,7 +3245,7 @@  static void synchronize_rcu_normal(void)
 	 * snapshot before adding a request.
 	 */
 	if (IS_ENABLED(CONFIG_PROVE_RCU))
-		rs.head.func = (void *) get_state_synchronize_rcu();
+		get_state_synchronize_rcu_full(&rs.oldstate);
 
 	rcu_sr_normal_add_req(&rs);