diff mbox series

[net,v2] xfrm: Reinject transport-mode packets through workqueue

Message ID 20220924080157.247678-1-liujian56@huawei.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [net,v2] xfrm: Reinject transport-mode packets through workqueue | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning CHECK: spinlock_t definition without comment WARNING: Possible repeated word: 'Add' WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

liujian (CE) Sept. 24, 2022, 8:01 a.m. UTC
The following warning is displayed when the tcp6-multi-diffip11 stress
test case of the LTP test suite is tested:

watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ns-tcpserver:48198]
CPU: 0 PID: 48198 Comm: ns-tcpserver Kdump: loaded Not tainted 6.0.0-rc6+ #39
Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : des3_ede_encrypt+0x27c/0x460 [libdes]
lr : 0x3f
sp : ffff80000ceaa1b0
x29: ffff80000ceaa1b0 x28: ffff0000df056100 x27: ffff0000e51e5280
x26: ffff80004df75030 x25: ffff0000e51e4600 x24: 000000000000003b
x23: 0000000000802080 x22: 000000000000003d x21: 0000000000000038
x20: 0000000080000020 x19: 000000000000000a x18: 0000000000000033
x17: ffff0000e51e4780 x16: ffff80004e2d1448 x15: ffff80004e2d1248
x14: ffff0000e51e4680 x13: ffff80004e2d1348 x12: ffff80004e2d1548
x11: ffff80004e2d1848 x10: ffff80004e2d1648 x9 : ffff80004e2d1748
x8 : ffff80004e2d1948 x7 : 000000000bcaf83d x6 : 000000000000001b
x5 : ffff80004e2d1048 x4 : 00000000761bf3bf x3 : 000000007f1dd0a3
x2 : ffff0000e51e4780 x1 : ffff0000e3b9a2f8 x0 : 00000000db44e872
Call trace:
 des3_ede_encrypt+0x27c/0x460 [libdes]
 crypto_des3_ede_encrypt+0x1c/0x30 [des_generic]
 crypto_cbc_encrypt+0x148/0x190
 crypto_skcipher_encrypt+0x2c/0x40
 crypto_authenc_encrypt+0xc8/0xfc [authenc]
 crypto_aead_encrypt+0x2c/0x40
 echainiv_encrypt+0x144/0x1a0 [echainiv]
 crypto_aead_encrypt+0x2c/0x40
 esp6_output_tail+0x1c8/0x5d0 [esp6]
 esp6_output+0x120/0x278 [esp6]
 xfrm_output_one+0x458/0x4ec
 xfrm_output_resume+0x6c/0x1f0
 xfrm_output+0xac/0x4ac
 __xfrm6_output+0x130/0x270
 xfrm6_output+0x60/0xec
 ip6_xmit+0x2ec/0x5bc
 inet6_csk_xmit+0xbc/0x10c
 __tcp_transmit_skb+0x460/0x8c0
 tcp_write_xmit+0x348/0x890
 __tcp_push_pending_frames+0x44/0x110
 tcp_rcv_established+0x3c8/0x720
 tcp_v6_do_rcv+0xdc/0x4a0
 tcp_v6_rcv+0xc24/0xcb0
 ip6_protocol_deliver_rcu+0xf0/0x574
 ip6_input_finish+0x48/0x7c
 ip6_input+0x48/0xc0
 ip6_rcv_finish+0x80/0x9c
 xfrm_trans_reinject+0xb0/0xf4
 tasklet_action_common.constprop.0+0xf8/0x134
 tasklet_action+0x30/0x3c
 __do_softirq+0x128/0x368
 do_softirq+0xb4/0xc0
 __local_bh_enable_ip+0xb0/0xb4
 put_cpu_fpsimd_context+0x40/0x70
 kernel_neon_end+0x20/0x40
 sha1_base_do_update.constprop.0.isra.0+0x11c/0x140 [sha1_ce]
 sha1_ce_finup+0x94/0x110 [sha1_ce]
 crypto_shash_finup+0x34/0xc0
 hmac_finup+0x48/0xe0
 crypto_shash_finup+0x34/0xc0
 shash_digest_unaligned+0x74/0x90
 crypto_shash_digest+0x4c/0x9c
 shash_ahash_digest+0xc8/0xf0
 shash_async_digest+0x28/0x34
 crypto_ahash_digest+0x48/0xcc
 crypto_authenc_genicv+0x88/0xcc [authenc]
 crypto_authenc_encrypt+0xd8/0xfc [authenc]
 crypto_aead_encrypt+0x2c/0x40
 echainiv_encrypt+0x144/0x1a0 [echainiv]
 crypto_aead_encrypt+0x2c/0x40
 esp6_output_tail+0x1c8/0x5d0 [esp6]
 esp6_output+0x120/0x278 [esp6]
 xfrm_output_one+0x458/0x4ec
 xfrm_output_resume+0x6c/0x1f0
 xfrm_output+0xac/0x4ac
 __xfrm6_output+0x130/0x270
 xfrm6_output+0x60/0xec
 ip6_xmit+0x2ec/0x5bc
 inet6_csk_xmit+0xbc/0x10c
 __tcp_transmit_skb+0x460/0x8c0
 tcp_write_xmit+0x348/0x890
 __tcp_push_pending_frames+0x44/0x110
 tcp_push+0xb4/0x14c
 tcp_sendmsg_locked+0x71c/0xb64
 tcp_sendmsg+0x40/0x6c
 inet6_sendmsg+0x4c/0x80
 sock_sendmsg+0x5c/0x6c
 __sys_sendto+0x128/0x15c
 __arm64_sys_sendto+0x30/0x40
 invoke_syscall+0x50/0x120
 el0_svc_common.constprop.0+0x170/0x194
 do_el0_svc+0x38/0x4c
 el0_svc+0x28/0xe0
 el0t_64_sync_handler+0xbc/0x13c
 el0t_64_sync+0x180/0x184

Get softirq info by bcc tool:
./softirqs -NT 10
Tracing soft irq event time... Hit Ctrl-C to end.

15:34:34
SOFTIRQ          TOTAL_nsecs
block                 158990
timer               20030920
sched               46577080
net_rx             676746820
tasklet           9906067650

15:34:45
SOFTIRQ          TOTAL_nsecs
block                  86100
sched               38849790
net_rx             676532470
timer             1163848790
tasklet           9409019620

15:34:55
SOFTIRQ          TOTAL_nsecs
sched               58078450
net_rx             475156720
timer              533832410
tasklet           9431333300

The tasklet software interrupt takes too much time. Therefore, the
xfrm_trans_reinject executor is changed from tasklet to workqueue. Add add
spin lock to protect the queue. This reduces the processing flow of the
tcp_sendmsg function in this scenario.

Fixes: acf568ee859f0 ("xfrm: Reinject transport-mode packets through tasklet")
Signed-off-by: Liu Jian <liujian56@huawei.com>
---
v1->v2:
after change to workqueue, should add queue_lock to protect queue.
 net/xfrm/xfrm_input.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Comments

Steffen Klassert Sept. 27, 2022, 7:53 a.m. UTC | #1
On Sat, Sep 24, 2022 at 04:01:57PM +0800, Liu Jian wrote:
> The following warning is displayed when the tcp6-multi-diffip11 stress
> test case of the LTP test suite is tested:
> 
> watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ns-tcpserver:48198]
> CPU: 0 PID: 48198 Comm: ns-tcpserver Kdump: loaded Not tainted 6.0.0-rc6+ #39
> Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : des3_ede_encrypt+0x27c/0x460 [libdes]
> lr : 0x3f
> sp : ffff80000ceaa1b0
> x29: ffff80000ceaa1b0 x28: ffff0000df056100 x27: ffff0000e51e5280
> x26: ffff80004df75030 x25: ffff0000e51e4600 x24: 000000000000003b
> x23: 0000000000802080 x22: 000000000000003d x21: 0000000000000038
> x20: 0000000080000020 x19: 000000000000000a x18: 0000000000000033
> x17: ffff0000e51e4780 x16: ffff80004e2d1448 x15: ffff80004e2d1248
> x14: ffff0000e51e4680 x13: ffff80004e2d1348 x12: ffff80004e2d1548
> x11: ffff80004e2d1848 x10: ffff80004e2d1648 x9 : ffff80004e2d1748
> x8 : ffff80004e2d1948 x7 : 000000000bcaf83d x6 : 000000000000001b
> x5 : ffff80004e2d1048 x4 : 00000000761bf3bf x3 : 000000007f1dd0a3
> x2 : ffff0000e51e4780 x1 : ffff0000e3b9a2f8 x0 : 00000000db44e872
> Call trace:
>  des3_ede_encrypt+0x27c/0x460 [libdes]
>  crypto_des3_ede_encrypt+0x1c/0x30 [des_generic]
>  crypto_cbc_encrypt+0x148/0x190
>  crypto_skcipher_encrypt+0x2c/0x40
>  crypto_authenc_encrypt+0xc8/0xfc [authenc]
>  crypto_aead_encrypt+0x2c/0x40
>  echainiv_encrypt+0x144/0x1a0 [echainiv]
>  crypto_aead_encrypt+0x2c/0x40
>  esp6_output_tail+0x1c8/0x5d0 [esp6]
>  esp6_output+0x120/0x278 [esp6]
>  xfrm_output_one+0x458/0x4ec
>  xfrm_output_resume+0x6c/0x1f0
>  xfrm_output+0xac/0x4ac
>  __xfrm6_output+0x130/0x270
>  xfrm6_output+0x60/0xec
>  ip6_xmit+0x2ec/0x5bc
>  inet6_csk_xmit+0xbc/0x10c
>  __tcp_transmit_skb+0x460/0x8c0
>  tcp_write_xmit+0x348/0x890
>  __tcp_push_pending_frames+0x44/0x110
>  tcp_rcv_established+0x3c8/0x720
>  tcp_v6_do_rcv+0xdc/0x4a0
>  tcp_v6_rcv+0xc24/0xcb0
>  ip6_protocol_deliver_rcu+0xf0/0x574
>  ip6_input_finish+0x48/0x7c
>  ip6_input+0x48/0xc0
>  ip6_rcv_finish+0x80/0x9c
>  xfrm_trans_reinject+0xb0/0xf4
>  tasklet_action_common.constprop.0+0xf8/0x134
>  tasklet_action+0x30/0x3c
>  __do_softirq+0x128/0x368
>  do_softirq+0xb4/0xc0

...

> The tasklet software interrupt takes too much time.

Do you know why this tasklet takes so long? Whatever happens in that
tasklet, it should not need more than 22s.

> Therefore, the
> xfrm_trans_reinject executor is changed from tasklet to workqueue.

Why should do a workqueue with BHs off better than a tasklet?
liujian (CE) Sept. 27, 2022, 9:23 a.m. UTC | #2
> -----Original Message-----
> From: Steffen Klassert [mailto:steffen.klassert@secunet.com]
> Sent: Tuesday, September 27, 2022 3:53 PM
> To: liujian (CE) <liujian56@huawei.com>
> Cc: herbert@gondor.apana.org.au; davem@davemloft.net;
> edumazet@google.com; kuba@kernel.org; pabeni@redhat.com;
> netdev@vger.kernel.org
> Subject: Re: [PATCH net v2] xfrm: Reinject transport-mode packets through
> workqueue
> 
> On Sat, Sep 24, 2022 at 04:01:57PM +0800, Liu Jian wrote:
> > The following warning is displayed when the tcp6-multi-diffip11 stress
> > test case of the LTP test suite is tested:
> >
> > watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ns-tcpserver:48198]
> > CPU: 0 PID: 48198 Comm: ns-tcpserver Kdump: loaded Not tainted
> > 6.0.0-rc6+ #39 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0
> > 02/06/2015
> > pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc :
> > des3_ede_encrypt+0x27c/0x460 [libdes] lr : 0x3f sp : ffff80000ceaa1b0
> > x29: ffff80000ceaa1b0 x28: ffff0000df056100 x27: ffff0000e51e5280
> > x26: ffff80004df75030 x25: ffff0000e51e4600 x24: 000000000000003b
> > x23: 0000000000802080 x22: 000000000000003d x21: 0000000000000038
> > x20: 0000000080000020 x19: 000000000000000a x18: 0000000000000033
> > x17: ffff0000e51e4780 x16: ffff80004e2d1448 x15: ffff80004e2d1248
> > x14: ffff0000e51e4680 x13: ffff80004e2d1348 x12: ffff80004e2d1548
> > x11: ffff80004e2d1848 x10: ffff80004e2d1648 x9 : ffff80004e2d1748
> > x8 : ffff80004e2d1948 x7 : 000000000bcaf83d x6 : 000000000000001b
> > x5 : ffff80004e2d1048 x4 : 00000000761bf3bf x3 : 000000007f1dd0a3
> > x2 : ffff0000e51e4780 x1 : ffff0000e3b9a2f8 x0 : 00000000db44e872 Call
> > trace:
> >  des3_ede_encrypt+0x27c/0x460 [libdes]
> >  crypto_des3_ede_encrypt+0x1c/0x30 [des_generic]
> >  crypto_cbc_encrypt+0x148/0x190
> >  crypto_skcipher_encrypt+0x2c/0x40
> >  crypto_authenc_encrypt+0xc8/0xfc [authenc]
> >  crypto_aead_encrypt+0x2c/0x40
> >  echainiv_encrypt+0x144/0x1a0 [echainiv]
> >  crypto_aead_encrypt+0x2c/0x40
> >  esp6_output_tail+0x1c8/0x5d0 [esp6]
> >  esp6_output+0x120/0x278 [esp6]
> >  xfrm_output_one+0x458/0x4ec
> >  xfrm_output_resume+0x6c/0x1f0
> >  xfrm_output+0xac/0x4ac
> >  __xfrm6_output+0x130/0x270
> >  xfrm6_output+0x60/0xec
> >  ip6_xmit+0x2ec/0x5bc
> >  inet6_csk_xmit+0xbc/0x10c
> >  __tcp_transmit_skb+0x460/0x8c0
> >  tcp_write_xmit+0x348/0x890
> >  __tcp_push_pending_frames+0x44/0x110
> >  tcp_rcv_established+0x3c8/0x720
> >  tcp_v6_do_rcv+0xdc/0x4a0
> >  tcp_v6_rcv+0xc24/0xcb0
> >  ip6_protocol_deliver_rcu+0xf0/0x574
> >  ip6_input_finish+0x48/0x7c
> >  ip6_input+0x48/0xc0
> >  ip6_rcv_finish+0x80/0x9c
> >  xfrm_trans_reinject+0xb0/0xf4
> >  tasklet_action_common.constprop.0+0xf8/0x134
> >  tasklet_action+0x30/0x3c
> >  __do_softirq+0x128/0x368
> >  do_softirq+0xb4/0xc0
> 
> ...
> 
> > The tasklet software interrupt takes too much time.
> 
> Do you know why this tasklet takes so long? Whatever happens in that
> tasklet, it should not need more than 22s.
> 
Something like this:
1. In the tcp6-multi-diffip11 test case, data is sent based on the sendbuf size of the sock (1638400 is set in the current environment), and the IPSec ESP/transport mode is used. In the TCP protocol stack, the size of each SKB is 1500. (did not coalesce? If each SKB is 1500, there is a 1638400 / 1500 loop in tcp_sendmsg.)
2. In the tcp6-multi-diffip11 test case, 100 sockets are used to perform the pressure test described in [1]. When xfrm_trans_reinject is triggered, the length of trans->queue is 64 to 2000.
3. The callback function in xfrm_trans_reinject is ip6_rcv_finish. In this function, __tcp_push_pending_frames is called to trigger data transmission of the other 99 sockets.
4. In my environment, __local_bh_enable_ip sometimes takes nearly 1 second (the time consumed by using ktime_get_raw_fast_ns).

And sorry, the tested VM environment is released due to timeout and logs are lost. If necessary, I may need to test it again later.

> > Therefore, the
> > xfrm_trans_reinject executor is changed from tasklet to workqueue.
> 
> Why should do a workqueue with BHs off better than a tasklet?
In this case, the path of tcp_sendmsg can be reduced. As shown in the preceding call trace, the software interrupt xfrm_trans_reinject is inserted in tcp_sendmsg. Change xfrm_trans_reinject from tasklet to workqueue. The xfrm_trans_reinject process is deleted from the sendmsg process.
Steffen Klassert Sept. 29, 2022, 4:39 a.m. UTC | #3
On Sat, Sep 24, 2022 at 04:01:57PM +0800, Liu Jian wrote:
> The following warning is displayed when the tcp6-multi-diffip11 stress
> test case of the LTP test suite is tested:
> 
> watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [ns-tcpserver:48198]
> CPU: 0 PID: 48198 Comm: ns-tcpserver Kdump: loaded Not tainted 6.0.0-rc6+ #39
> Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015
> pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
> pc : des3_ede_encrypt+0x27c/0x460 [libdes]
> lr : 0x3f
> sp : ffff80000ceaa1b0
> x29: ffff80000ceaa1b0 x28: ffff0000df056100 x27: ffff0000e51e5280
> x26: ffff80004df75030 x25: ffff0000e51e4600 x24: 000000000000003b
> x23: 0000000000802080 x22: 000000000000003d x21: 0000000000000038
> x20: 0000000080000020 x19: 000000000000000a x18: 0000000000000033
> x17: ffff0000e51e4780 x16: ffff80004e2d1448 x15: ffff80004e2d1248
> x14: ffff0000e51e4680 x13: ffff80004e2d1348 x12: ffff80004e2d1548
> x11: ffff80004e2d1848 x10: ffff80004e2d1648 x9 : ffff80004e2d1748
> x8 : ffff80004e2d1948 x7 : 000000000bcaf83d x6 : 000000000000001b
> x5 : ffff80004e2d1048 x4 : 00000000761bf3bf x3 : 000000007f1dd0a3
> x2 : ffff0000e51e4780 x1 : ffff0000e3b9a2f8 x0 : 00000000db44e872
> Call trace:

...

> The tasklet software interrupt takes too much time. Therefore, the
> xfrm_trans_reinject executor is changed from tasklet to workqueue. Add add
> spin lock to protect the queue. This reduces the processing flow of the
> tcp_sendmsg function in this scenario.
> 
> Fixes: acf568ee859f0 ("xfrm: Reinject transport-mode packets through tasklet")
> Signed-off-by: Liu Jian <liujian56@huawei.com>

Applied, thanks!
diff mbox series

Patch

diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index b2f4ec9c537f..aa5220565763 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -24,7 +24,8 @@ 
 #include "xfrm_inout.h"
 
 struct xfrm_trans_tasklet {
-	struct tasklet_struct tasklet;
+	struct work_struct work;
+	spinlock_t queue_lock;
 	struct sk_buff_head queue;
 };
 
@@ -760,18 +761,22 @@  int xfrm_input_resume(struct sk_buff *skb, int nexthdr)
 }
 EXPORT_SYMBOL(xfrm_input_resume);
 
-static void xfrm_trans_reinject(struct tasklet_struct *t)
+static void xfrm_trans_reinject(struct work_struct *work)
 {
-	struct xfrm_trans_tasklet *trans = from_tasklet(trans, t, tasklet);
+	struct xfrm_trans_tasklet *trans = container_of(work, struct xfrm_trans_tasklet, work);
 	struct sk_buff_head queue;
 	struct sk_buff *skb;
 
 	__skb_queue_head_init(&queue);
+	spin_lock_bh(&trans->queue_lock);
 	skb_queue_splice_init(&trans->queue, &queue);
+	spin_unlock_bh(&trans->queue_lock);
 
+	local_bh_disable();
 	while ((skb = __skb_dequeue(&queue)))
 		XFRM_TRANS_SKB_CB(skb)->finish(XFRM_TRANS_SKB_CB(skb)->net,
 					       NULL, skb);
+	local_bh_enable();
 }
 
 int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
@@ -789,8 +794,10 @@  int xfrm_trans_queue_net(struct net *net, struct sk_buff *skb,
 
 	XFRM_TRANS_SKB_CB(skb)->finish = finish;
 	XFRM_TRANS_SKB_CB(skb)->net = net;
+	spin_lock_bh(&trans->queue_lock);
 	__skb_queue_tail(&trans->queue, skb);
-	tasklet_schedule(&trans->tasklet);
+	spin_unlock_bh(&trans->queue_lock);
+	schedule_work(&trans->work);
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_trans_queue_net);
@@ -817,7 +824,8 @@  void __init xfrm_input_init(void)
 		struct xfrm_trans_tasklet *trans;
 
 		trans = &per_cpu(xfrm_trans_tasklet, i);
+		spin_lock_init(&trans->queue_lock);
 		__skb_queue_head_init(&trans->queue);
-		tasklet_setup(&trans->tasklet, xfrm_trans_reinject);
+		INIT_WORK(&trans->work, xfrm_trans_reinject);
 	}
 }