diff mbox series

rcu-tasks: Avoid pr_info() with spin lock in cblist_init_generic()

Message ID 20220802162205.817796-1-syoshida@redhat.com (mailing list archive)
State New, archived
Headers show
Series rcu-tasks: Avoid pr_info() with spin lock in cblist_init_generic() | expand

Commit Message

Shigeru Yoshida Aug. 2, 2022, 4:22 p.m. UTC
pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
pr_info() calls printk() that might sleep, this will result in BUG
like below:

[    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
[    0.206463]
[    0.206464] =============================
[    0.206464] [ BUG: Invalid wait context ]
[    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
[    0.206466] -----------------------------
[    0.206466] swapper/0/1 is trying to lock:
[    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
[    0.206473] other info that might help us debug this:
[    0.206473] context-{5:5}
[    0.206474] 3 locks held by swapper/0/1:
[    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
[    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
[    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
[    0.206485] stack backtrace:
[    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
[    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
[    0.206489] Call Trace:
[    0.206490]  <TASK>
[    0.206491]  dump_stack_lvl+0x6a/0x9f
[    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
[    0.206496]  ? stack_trace_save+0x46/0x70
[    0.206497]  lock_acquire+0xd1/0x2f0
[    0.206499]  ? serial8250_console_write+0x327/0x4a0
[    0.206500]  ? __lock_acquire+0x5c7/0x2720
[    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
[    0.206504]  ? serial8250_console_write+0x327/0x4a0
[    0.206506]  serial8250_console_write+0x327/0x4a0
[    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
[    0.206511]  console_unlock+0xf7/0x1f0
[    0.206512]  vprintk_emit+0xf7/0x330
[    0.206514]  _printk+0x63/0x7e
[    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
[    0.206518]  rcu_init_tasks_generic+0x5/0xd9
[    0.206522]  kernel_init_freeable+0x15b/0x2a2
[    0.206523]  ? rest_init+0x160/0x160
[    0.206526]  kernel_init+0x11/0x120
[    0.206527]  ret_from_fork+0x1f/0x30
[    0.206530]  </TASK>
[    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.

This patch moves pr_info() so that it is called without
rtp->cbs_gbl_lock locked.

Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
---
 kernel/rcu/tasks.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Paul E. McKenney Aug. 4, 2022, 12:02 a.m. UTC | #1
On Wed, Aug 03, 2022 at 01:22:05AM +0900, Shigeru Yoshida wrote:
> pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
> pr_info() calls printk() that might sleep, this will result in BUG
> like below:
> 
> [    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.206463]
> [    0.206464] =============================
> [    0.206464] [ BUG: Invalid wait context ]
> [    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
> [    0.206466] -----------------------------
> [    0.206466] swapper/0/1 is trying to lock:
> [    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
> [    0.206473] other info that might help us debug this:
> [    0.206473] context-{5:5}
> [    0.206474] 3 locks held by swapper/0/1:
> [    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
> [    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
> [    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
> [    0.206485] stack backtrace:
> [    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
> [    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
> [    0.206489] Call Trace:
> [    0.206490]  <TASK>
> [    0.206491]  dump_stack_lvl+0x6a/0x9f
> [    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
> [    0.206496]  ? stack_trace_save+0x46/0x70
> [    0.206497]  lock_acquire+0xd1/0x2f0
> [    0.206499]  ? serial8250_console_write+0x327/0x4a0
> [    0.206500]  ? __lock_acquire+0x5c7/0x2720
> [    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
> [    0.206504]  ? serial8250_console_write+0x327/0x4a0
> [    0.206506]  serial8250_console_write+0x327/0x4a0
> [    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
> [    0.206511]  console_unlock+0xf7/0x1f0
> [    0.206512]  vprintk_emit+0xf7/0x330
> [    0.206514]  _printk+0x63/0x7e
> [    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
> [    0.206518]  rcu_init_tasks_generic+0x5/0xd9
> [    0.206522]  kernel_init_freeable+0x15b/0x2a2
> [    0.206523]  ? rest_init+0x160/0x160
> [    0.206526]  kernel_init+0x11/0x120
> [    0.206527]  ret_from_fork+0x1f/0x30
> [    0.206530]  </TASK>
> [    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
> 
> This patch moves pr_info() so that it is called without
> rtp->cbs_gbl_lock locked.
> 
> Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>

This patch looks good, except that wasn't this supposed to be fixed
in printk()?  Or am I suffering from wishful thinking?

							Thanx, Paul

> ---
>  kernel/rcu/tasks.h | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> index 3925e32159b5..d46dd970bf22 100644
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -227,7 +227,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>  	if (rcu_task_enqueue_lim < 0) {
>  		rcu_task_enqueue_lim = 1;
>  		rcu_task_cb_adjust = true;
> -		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
>  	} else if (rcu_task_enqueue_lim == 0) {
>  		rcu_task_enqueue_lim = 1;
>  	}
> @@ -256,6 +255,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>  		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
>  	}
>  	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> +
> +	if (rcu_task_cb_adjust)
> +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> +
>  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
>  }
>  
> -- 
> 2.37.1
>
Paul E. McKenney March 21, 2023, 3:59 p.m. UTC | #2
On Wed, Aug 03, 2022 at 05:02:52PM -0700, Paul E. McKenney wrote:
> On Wed, Aug 03, 2022 at 01:22:05AM +0900, Shigeru Yoshida wrote:
> > pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
> > pr_info() calls printk() that might sleep, this will result in BUG
> > like below:
> > 
> > [    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
> > [    0.206463]
> > [    0.206464] =============================
> > [    0.206464] [ BUG: Invalid wait context ]
> > [    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
> > [    0.206466] -----------------------------
> > [    0.206466] swapper/0/1 is trying to lock:
> > [    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
> > [    0.206473] other info that might help us debug this:
> > [    0.206473] context-{5:5}
> > [    0.206474] 3 locks held by swapper/0/1:
> > [    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
> > [    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
> > [    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
> > [    0.206485] stack backtrace:
> > [    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
> > [    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
> > [    0.206489] Call Trace:
> > [    0.206490]  <TASK>
> > [    0.206491]  dump_stack_lvl+0x6a/0x9f
> > [    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
> > [    0.206496]  ? stack_trace_save+0x46/0x70
> > [    0.206497]  lock_acquire+0xd1/0x2f0
> > [    0.206499]  ? serial8250_console_write+0x327/0x4a0
> > [    0.206500]  ? __lock_acquire+0x5c7/0x2720
> > [    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
> > [    0.206504]  ? serial8250_console_write+0x327/0x4a0
> > [    0.206506]  serial8250_console_write+0x327/0x4a0
> > [    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
> > [    0.206511]  console_unlock+0xf7/0x1f0
> > [    0.206512]  vprintk_emit+0xf7/0x330
> > [    0.206514]  _printk+0x63/0x7e
> > [    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
> > [    0.206518]  rcu_init_tasks_generic+0x5/0xd9
> > [    0.206522]  kernel_init_freeable+0x15b/0x2a2
> > [    0.206523]  ? rest_init+0x160/0x160
> > [    0.206526]  kernel_init+0x11/0x120
> > [    0.206527]  ret_from_fork+0x1f/0x30
> > [    0.206530]  </TASK>
> > [    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
> > 
> > This patch moves pr_info() so that it is called without
> > rtp->cbs_gbl_lock locked.
> > 
> > Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
> 
> This patch looks good, except that wasn't this supposed to be fixed
> in printk()?  Or am I suffering from wishful thinking?

And I was indeed suffering from wishful thinking, so I have queued this
patch.  A big "thank you!" to all for your patience, and to Shigeru for
this fix!

							Thanx, Paul

> > ---
> >  kernel/rcu/tasks.h | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> > index 3925e32159b5..d46dd970bf22 100644
> > --- a/kernel/rcu/tasks.h
> > +++ b/kernel/rcu/tasks.h
> > @@ -227,7 +227,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  	if (rcu_task_enqueue_lim < 0) {
> >  		rcu_task_enqueue_lim = 1;
> >  		rcu_task_cb_adjust = true;
> > -		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> >  	} else if (rcu_task_enqueue_lim == 0) {
> >  		rcu_task_enqueue_lim = 1;
> >  	}
> > @@ -256,6 +255,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
> >  	}
> >  	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> > +
> > +	if (rcu_task_cb_adjust)
> > +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > +
> >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> >  }
> >  
> > -- 
> > 2.37.1
> >
Zqiang March 22, 2023, 6:52 a.m. UTC | #3
> On Wed, Aug 03, 2022 at 01:22:05AM +0900, Shigeru Yoshida wrote:
> > pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
> > pr_info() calls printk() that might sleep, this will result in BUG
> > like below:
> > 
> > [    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
> > [    0.206463]
> > [    0.206464] =============================
> > [    0.206464] [ BUG: Invalid wait context ]
> > [    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
> > [    0.206466] -----------------------------
> > [    0.206466] swapper/0/1 is trying to lock:
> > [    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
> > [    0.206473] other info that might help us debug this:
> > [    0.206473] context-{5:5}
> > [    0.206474] 3 locks held by swapper/0/1:
> > [    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
> > [    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
> > [    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
> > [    0.206485] stack backtrace:
> > [    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
> > [    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
> > [    0.206489] Call Trace:
> > [    0.206490]  <TASK>
> > [    0.206491]  dump_stack_lvl+0x6a/0x9f
> > [    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
> > [    0.206496]  ? stack_trace_save+0x46/0x70
> > [    0.206497]  lock_acquire+0xd1/0x2f0
> > [    0.206499]  ? serial8250_console_write+0x327/0x4a0
> > [    0.206500]  ? __lock_acquire+0x5c7/0x2720
> > [    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
> > [    0.206504]  ? serial8250_console_write+0x327/0x4a0
> > [    0.206506]  serial8250_console_write+0x327/0x4a0
> > [    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
> > [    0.206511]  console_unlock+0xf7/0x1f0
> > [    0.206512]  vprintk_emit+0xf7/0x330
> > [    0.206514]  _printk+0x63/0x7e
> > [    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
> > [    0.206518]  rcu_init_tasks_generic+0x5/0xd9
> > [    0.206522]  kernel_init_freeable+0x15b/0x2a2
> > [    0.206523]  ? rest_init+0x160/0x160
> > [    0.206526]  kernel_init+0x11/0x120
> > [    0.206527]  ret_from_fork+0x1f/0x30
> > [    0.206530]  </TASK>
> > [    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
> > 
> > This patch moves pr_info() so that it is called without
> > rtp->cbs_gbl_lock locked.
> > 
> > Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
> 
> This patch looks good, except that wasn't this supposed to be fixed
> in printk()?  Or am I suffering from wishful thinking?
>
>And I was indeed suffering from wishful thinking, so I have queued this
>patch.  A big "thank you!" to all for your patience, and to Shigeru for
>this fix!
>

Test the patch

[    0.178892] cblist_init_generic: Setting adjustable number of callback queues.
[    0.179366] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.179488] cblist_init_generic: Setting adjustable number of callback queues.
[    0.179961] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.180470] cblist_init_generic: Setting adjustable number of callback queues.
[    0.180949] cblist_init_generic: Setting shift to 2 and lim to 1.


It looks more clearly

--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -270,10 +270,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
        }
        raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);

-       if (rcu_task_cb_adjust)
-               pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
-
-       pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
+       pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n",
+                       rtp->name, data_race(rtp->percpu_enqueue_shift),
+                       data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
 }


[    0.178746] RCU Tasks: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
[    0.179322] RCU Tasks Rude: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
[    0.180076] RCU Tasks Trace: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.


Thanks
Zqiang

>							Thanx, Paul
>
> > ---
> >  kernel/rcu/tasks.h | 5 ++++-
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> > index 3925e32159b5..d46dd970bf22 100644
> > --- a/kernel/rcu/tasks.h
> > +++ b/kernel/rcu/tasks.h
> > @@ -227,7 +227,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  	if (rcu_task_enqueue_lim < 0) {
> >  		rcu_task_enqueue_lim = 1;
> >  		rcu_task_cb_adjust = true;
> > -		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> >  	} else if (rcu_task_enqueue_lim == 0) {
> >  		rcu_task_enqueue_lim = 1;
> >  	}
> > @@ -256,6 +255,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> >  		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
> >  	}
> >  	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> > +
> > +	if (rcu_task_cb_adjust)
> > +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > +
> >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> >  }
> >  
> > -- 
> > 2.37.1
> >
Paul E. McKenney March 22, 2023, 9:24 p.m. UTC | #4
On Wed, Mar 22, 2023 at 06:52:45AM +0000, Zhang, Qiang1 wrote:
> > On Wed, Aug 03, 2022 at 01:22:05AM +0900, Shigeru Yoshida wrote:
> > > pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
> > > pr_info() calls printk() that might sleep, this will result in BUG
> > > like below:
> > > 
> > > [    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
> > > [    0.206463]
> > > [    0.206464] =============================
> > > [    0.206464] [ BUG: Invalid wait context ]
> > > [    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
> > > [    0.206466] -----------------------------
> > > [    0.206466] swapper/0/1 is trying to lock:
> > > [    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
> > > [    0.206473] other info that might help us debug this:
> > > [    0.206473] context-{5:5}
> > > [    0.206474] 3 locks held by swapper/0/1:
> > > [    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
> > > [    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
> > > [    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
> > > [    0.206485] stack backtrace:
> > > [    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
> > > [    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
> > > [    0.206489] Call Trace:
> > > [    0.206490]  <TASK>
> > > [    0.206491]  dump_stack_lvl+0x6a/0x9f
> > > [    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
> > > [    0.206496]  ? stack_trace_save+0x46/0x70
> > > [    0.206497]  lock_acquire+0xd1/0x2f0
> > > [    0.206499]  ? serial8250_console_write+0x327/0x4a0
> > > [    0.206500]  ? __lock_acquire+0x5c7/0x2720
> > > [    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
> > > [    0.206504]  ? serial8250_console_write+0x327/0x4a0
> > > [    0.206506]  serial8250_console_write+0x327/0x4a0
> > > [    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
> > > [    0.206511]  console_unlock+0xf7/0x1f0
> > > [    0.206512]  vprintk_emit+0xf7/0x330
> > > [    0.206514]  _printk+0x63/0x7e
> > > [    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
> > > [    0.206518]  rcu_init_tasks_generic+0x5/0xd9
> > > [    0.206522]  kernel_init_freeable+0x15b/0x2a2
> > > [    0.206523]  ? rest_init+0x160/0x160
> > > [    0.206526]  kernel_init+0x11/0x120
> > > [    0.206527]  ret_from_fork+0x1f/0x30
> > > [    0.206530]  </TASK>
> > > [    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
> > > 
> > > This patch moves pr_info() so that it is called without
> > > rtp->cbs_gbl_lock locked.
> > > 
> > > Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
> > 
> > This patch looks good, except that wasn't this supposed to be fixed
> > in printk()?  Or am I suffering from wishful thinking?
> >
> >And I was indeed suffering from wishful thinking, so I have queued this
> >patch.  A big "thank you!" to all for your patience, and to Shigeru for
> >this fix!
> >
> 
> Test the patch
> 
> [    0.178892] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.179366] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.179488] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.179961] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.180470] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.180949] cblist_init_generic: Setting shift to 2 and lim to 1.

Nice!  Can you see the problem without that patch?  If so, may I apply
your Tested-by?

> It looks more clearly
> 
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -270,10 +270,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>         }
>         raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> 
> -       if (rcu_task_cb_adjust)
> -               pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> -
> -       pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> +       pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n",
> +                       rtp->name, data_race(rtp->percpu_enqueue_shift),
> +                       data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
>  }
> 
> 
> [    0.178746] RCU Tasks: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
> [    0.179322] RCU Tasks Rude: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
> [    0.180076] RCU Tasks Trace: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.

If you make that "Setting shift to 2, lim to 1, and rcu_task_cb_adjust=1."
looks like an improvement to me!

Please send a proper patch for it.  There might be objections if someone
has scripting depending on the old message format, but that is after
all one of the reasons for putting patches out for review.

							Thanx, Paul

> Thanks
> Zqiang
> 
> >							Thanx, Paul
> >
> > > ---
> > >  kernel/rcu/tasks.h | 5 ++++-
> > >  1 file changed, 4 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> > > index 3925e32159b5..d46dd970bf22 100644
> > > --- a/kernel/rcu/tasks.h
> > > +++ b/kernel/rcu/tasks.h
> > > @@ -227,7 +227,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> > >  	if (rcu_task_enqueue_lim < 0) {
> > >  		rcu_task_enqueue_lim = 1;
> > >  		rcu_task_cb_adjust = true;
> > > -		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > >  	} else if (rcu_task_enqueue_lim == 0) {
> > >  		rcu_task_enqueue_lim = 1;
> > >  	}
> > > @@ -256,6 +255,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> > >  		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
> > >  	}
> > >  	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> > > +
> > > +	if (rcu_task_cb_adjust)
> > > +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > > +
> > >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> > >  }
> > >  
> > > -- 
> > > 2.37.1
> > >
Zqiang March 23, 2023, 3:32 a.m. UTC | #5
> > On Wed, Aug 03, 2022 at 01:22:05AM +0900, Shigeru Yoshida wrote:
> > > pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
> > > pr_info() calls printk() that might sleep, this will result in BUG
> > > like below:
> > > 
> > > [    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
> > > [    0.206463]
> > > [    0.206464] =============================
> > > [    0.206464] [ BUG: Invalid wait context ]
> > > [    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
> > > [    0.206466] -----------------------------
> > > [    0.206466] swapper/0/1 is trying to lock:
> > > [    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
> > > [    0.206473] other info that might help us debug this:
> > > [    0.206473] context-{5:5}
> > > [    0.206474] 3 locks held by swapper/0/1:
> > > [    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
> > > [    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
> > > [    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
> > > [    0.206485] stack backtrace:
> > > [    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
> > > [    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
> > > [    0.206489] Call Trace:
> > > [    0.206490]  <TASK>
> > > [    0.206491]  dump_stack_lvl+0x6a/0x9f
> > > [    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
> > > [    0.206496]  ? stack_trace_save+0x46/0x70
> > > [    0.206497]  lock_acquire+0xd1/0x2f0
> > > [    0.206499]  ? serial8250_console_write+0x327/0x4a0
> > > [    0.206500]  ? __lock_acquire+0x5c7/0x2720
> > > [    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
> > > [    0.206504]  ? serial8250_console_write+0x327/0x4a0
> > > [    0.206506]  serial8250_console_write+0x327/0x4a0
> > > [    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
> > > [    0.206511]  console_unlock+0xf7/0x1f0
> > > [    0.206512]  vprintk_emit+0xf7/0x330
> > > [    0.206514]  _printk+0x63/0x7e
> > > [    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
> > > [    0.206518]  rcu_init_tasks_generic+0x5/0xd9
> > > [    0.206522]  kernel_init_freeable+0x15b/0x2a2
> > > [    0.206523]  ? rest_init+0x160/0x160
> > > [    0.206526]  kernel_init+0x11/0x120
> > > [    0.206527]  ret_from_fork+0x1f/0x30
> > > [    0.206530]  </TASK>
> > > [    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
> > > 
> > > This patch moves pr_info() so that it is called without
> > > rtp->cbs_gbl_lock locked.
> > > 
> > > Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
> > 
> > This patch looks good, except that wasn't this supposed to be fixed
> > in printk()?  Or am I suffering from wishful thinking?
> >
> >And I was indeed suffering from wishful thinking, so I have queued this
> >patch.  A big "thank you!" to all for your patience, and to Shigeru for
> >this fix!
> >
> 
> Test the patch
> 
> [    0.178892] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.179366] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.179488] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.179961] cblist_init_generic: Setting shift to 2 and lim to 1.
> [    0.180470] cblist_init_generic: Setting adjustable number of callback queues.
> [    0.180949] cblist_init_generic: Setting shift to 2 and lim to 1.
> 
>
>Nice!  Can you see the problem without that patch?  If so, may I apply
>your Tested-by?


Yes,  after enable lockdep, I also saw the consent question.

CONFIG_LOCKDEP_SUPPORT=y
CONFIG_LOCKDEP=y

[    0.190643] cblist_init_generic: Setting adjustable number of callback queues.
[    0.190656]
[    0.190657] =============================
[    0.190658] [ BUG: Invalid wait context ]
[    0.190659] 6.3.0-rc1-yocto-standard+ #724 Not tainted
[    0.190660] -----------------------------
[    0.190661] swapper/0/1 is trying to lock:
[    0.190662] ffffffff86235698 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x37b/0x7d0
[    0.190669] other info that might help us debug this:
[    0.190670] context-{5:5}
[    0.190671] 4 locks held by swapper/0/1:
[    0.190672]  #0: ffffffff840374e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic+0x29/0x3c0
[    0.190678]  #1: ffffffff84022320 (console_lock){+.+.}-{0:0}, at: vprintk_emit+0xd6/0x2f0
[    0.190684]  #2: ffffffff84022390 (console_srcu){....}-{0:0}, at: console_flush_all.constprop.0+0xbf/0x750
[    0.190689]  #3: ffffffff83fb1ca0 (console_owner){....}-{0:0}, at: console_flush_all.constprop.0+0x277/0x750
[    0.190693] stack backtrace:
[    0.190694] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.3.0-rc1-yocto-standard+ #724
[    0.190696] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/014
[    0.190698] Call Trace:
[    0.190699]  <TASK>
[    0.190700]  dump_stack_lvl+0x64/0xb0
[    0.190705]  dump_stack+0x10/0x20
[    0.190708]  __lock_acquire+0x6c4/0x3b40
[    0.190710]  ? __pfx_stack_trace_save+0x10/0x10
[    0.190713]  ? __pfx_check_noncircular+0x10/0x10
[    0.190719]  ? __pfx___lock_acquire+0x10/0x10
[    0.190722]  ? rcu_read_lock_sched_held+0xb0/0xe0
[    0.190724]  ? __kasan_check_read+0x11/0x20
[    0.190728]  lock_acquire+0x188/0x460
[    0.190730]  ? serial8250_console_write+0x37b/0x7d0
[    0.190734]  ? __pfx_lock_acquire+0x10/0x10
[    0.190738]  ? __pfx___lock_acquire+0x10/0x10
[    0.190741]  _raw_spin_lock_irqsave+0x45/0x90
[    0.190744]  ? serial8250_console_write+0x37b/0x7d0
[    0.190746]  serial8250_console_write+0x37b/0x7d0
[    0.190749]  ? console_flush_all.constprop.0+0x277/0x750
[    0.190752]  ? __pfx_serial8250_console_write+0x10/0x10
[    0.190755]  ? __pfx_lock_acquire+0x10/0x10
[    0.190757]  ? __pfx_lock_release+0x10/0x10
[    0.190759]  ? __pfx_lock_acquired+0x10/0x10
[    0.190763]  univ8250_console_write+0x4b/0x70
[    0.190766]  console_flush_all.constprop.0+0x2ea/0x750
[    0.190768]  ? console_flush_all.constprop.0+0x277/0x750
[    0.190772]  ? __pfx_console_flush_all.constprop.0+0x10/0x10
[    0.190776]  ? __pfx_lock_acquire+0x10/0x10
[    0.190780]  console_unlock+0xbd/0x140
[    0.190782]  ? __pfx_console_unlock+0x10/0x10
[    0.190784]  ? __pfx_vprintk_emit+0x10/0x10
[    0.190787]  ? __down_trylock_console_sem+0x80/0xd0
[    0.190789]  ? vprintk_emit+0xd6/0x2f0
[    0.190792]  vprintk_emit+0xdf/0x2f0
[    0.190796]  vprintk_default+0x1d/0x30
[    0.190798]  vprintk+0x50/0x60
[    0.190800]  _printk+0xb3/0xf0
[    0.190802]  ? __pfx__printk+0x10/0x10
[    0.190804]  ? __pfx_lock_acquired+0x10/0x10
[    0.190806]  ? trace_hardirqs_off+0x23/0x30
[    0.190809]  ? _raw_spin_lock_irqsave+0x76/0x90
[    0.190812]  ? lock_acquired+0x327/0x660
[    0.190814]  ? lock_acquired+0x2d6/0x660
[    0.190816]  cblist_init_generic+0x395/0x3c0
[    0.190820]  rcu_init_tasks_generic+0x15/0x1b0
[    0.190824]  kernel_init_freeable+0xc7/0x640
[    0.190827]  ? __pfx_kernel_init+0x10/0x10
[    0.190830]  kernel_init+0x21/0x180
[    0.190831]  ? __pfx_kernel_init+0x10/0x10
[    0.190833]  ? __pfx_kernel_init+0x10/0x10
[    0.190835]  ret_from_fork+0x2c/0x50
[    0.190841]  </TASK>
[    0.191635] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.192106] cblist_init_generic: Setting shift to 2 and lim to 1.
[    0.192685] cblist_init_generic: Setting shift to 2 and lim to 1.




>
> It looks more clearly
> 
> --- a/kernel/rcu/tasks.h
> +++ b/kernel/rcu/tasks.h
> @@ -270,10 +270,9 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
>         }
>         raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> 
> -       if (rcu_task_cb_adjust)
> -               pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> -
> -       pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> +       pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n",
> +                       rtp->name, data_race(rtp->percpu_enqueue_shift),
> +                       data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
>  }
> 
> 
> [    0.178746] RCU Tasks: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
> [    0.179322] RCU Tasks Rude: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
> [    0.180076] RCU Tasks Trace: Setting shift to 2 and lim to 1 rcu_task_cb_adjust=1.
>
>If you make that "Setting shift to 2, lim to 1, and rcu_task_cb_adjust=1."
>looks like an improvement to me!
>
>Please send a proper patch for it.  There might be objections if someone
>has scripting depending on the old message format, but that is after
>all one of the reasons for putting patches out for review.


I will send patch

Thanks
Zqiang

>
>							Thanx, Paul
>
> Thanks
> Zqiang
> 
> >							Thanx, Paul
> >
> > > ---
> > >  kernel/rcu/tasks.h | 5 ++++-
> > >  1 file changed, 4 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
> > > index 3925e32159b5..d46dd970bf22 100644
> > > --- a/kernel/rcu/tasks.h
> > > +++ b/kernel/rcu/tasks.h
> > > @@ -227,7 +227,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> > >  	if (rcu_task_enqueue_lim < 0) {
> > >  		rcu_task_enqueue_lim = 1;
> > >  		rcu_task_cb_adjust = true;
> > > -		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > >  	} else if (rcu_task_enqueue_lim == 0) {
> > >  		rcu_task_enqueue_lim = 1;
> > >  	}
> > > @@ -256,6 +255,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
> > >  		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
> > >  	}
> > >  	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
> > > +
> > > +	if (rcu_task_cb_adjust)
> > > +		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
> > > +
> > >  	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
> > >  }
> > >  
> > > -- 
> > > 2.37.1
> > >
diff mbox series

Patch

diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 3925e32159b5..d46dd970bf22 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -227,7 +227,6 @@  static void cblist_init_generic(struct rcu_tasks *rtp)
 	if (rcu_task_enqueue_lim < 0) {
 		rcu_task_enqueue_lim = 1;
 		rcu_task_cb_adjust = true;
-		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
 	} else if (rcu_task_enqueue_lim == 0) {
 		rcu_task_enqueue_lim = 1;
 	}
@@ -256,6 +255,10 @@  static void cblist_init_generic(struct rcu_tasks *rtp)
 		raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
 	}
 	raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
+
+	if (rcu_task_cb_adjust)
+		pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
+
 	pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
 }