Message ID | 20231009230722.76268-9-dima@arista.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [v14,net-next,01/23] net/tcp: Prepare tcp_md5sig_pool for TCP-AO | expand |
On Tue, Oct 10, 2023 at 1:07 AM Dmitry Safonov <dima@arista.com> wrote: > > Wire up sending resets to TCP-AO hashing. > > Co-developed-by: Francesco Ruggeri <fruggeri@arista.com> > Signed-off-by: Francesco Ruggeri <fruggeri@arista.com> > Co-developed-by: Salam Noureddine <noureddine@arista.com> > Signed-off-by: Salam Noureddine <noureddine@arista.com> > Signed-off-by: Dmitry Safonov <dima@arista.com> > Acked-by: David Ahern <dsahern@kernel.org> > --- > include/net/tcp.h | 7 ++- > include/net/tcp_ao.h | 12 +++++ > net/ipv4/tcp_ao.c | 104 ++++++++++++++++++++++++++++++++++++++++++- > net/ipv4/tcp_ipv4.c | 69 ++++++++++++++++++++++------ > net/ipv6/tcp_ipv6.c | 96 ++++++++++++++++++++++++++++----------- > 5 files changed, 245 insertions(+), 43 deletions(-) > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index a619c429a8bd..dc74908ffa5a 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2220,7 +2220,12 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, > > struct tcp_key { > union { > - struct tcp_ao_key *ao_key; > + struct { > + struct tcp_ao_key *ao_key; > + u32 sne; > + char *traffic_key; Move sne after traffic_key to avoid a hole on 64bit arches. > + u8 rcv_next; > + }; > struct tcp_md5sig_key *md5_key; > }; > enum { > diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h > index fdd2f5091b98..629ab0365b83 100644 > --- a/include/net/tcp_ao.h > +++ b/include/net/tcp_ao.h > @@ -120,12 +120,24 @@ int tcp_ao_hash_skb(unsigned short int family, > const u8 *tkey, int hash_offset, u32 sne); > int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, > sockptr_t optval, int optlen); > +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, > + int sndid, int rcvid); > int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, > unsigned int len, struct tcp_sigpool *hp); > void tcp_ao_destroy_sock(struct sock *sk); > struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, > const union tcp_ao_addr *addr, > int family, int sndid, int rcvid); > +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, > + struct tcp_ao_key *key, const u8 *tkey, > + const union tcp_ao_addr *daddr, > + const union tcp_ao_addr *saddr, > + const struct tcphdr *th, u32 sne); > +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, > + const struct tcp_ao_hdr *aoh, int l3index, > + struct tcp_ao_key **key, char **traffic_key, > + bool *allocated_traffic_key, u8 *keyid, u32 *sne); > + > /* ipv4 specific functions */ > int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); > struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, > diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c > index 6eb9241d14a3..df59924c3828 100644 > --- a/net/ipv4/tcp_ao.c > +++ b/net/ipv4/tcp_ao.c > @@ -48,8 +48,8 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, > * it's known that the keys in ao_info are matching peer's > * family/address/VRF/etc. > */ > -static struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, > - int sndid, int rcvid) > +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, > + int sndid, int rcvid) > { > struct tcp_ao_key *key; > > @@ -369,6 +369,66 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp, > return err; > } > > +int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, > + struct tcp_ao_key *key, const u8 *tkey, > + const union tcp_ao_addr *daddr, > + const union tcp_ao_addr *saddr, > + const struct tcphdr *th, u32 sne) > +{ > + int tkey_len = tcp_ao_digest_size(key); > + int hash_offset = ao_hash - (char *)th; > + struct tcp_sigpool hp; > + void *hash_buf = NULL; > + > + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); > + if (!hash_buf) > + goto clear_hash_noput; > + > + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) > + goto clear_hash_noput; > + > + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) > + goto clear_hash; > + > + if (crypto_ahash_init(hp.req)) > + goto clear_hash; > + > + if (tcp_ao_hash_sne(&hp, sne)) > + goto clear_hash; > + if (family == AF_INET) { > + if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr, > + saddr->a4.s_addr, th->doff * 4)) > + goto clear_hash; > +#if IS_ENABLED(CONFIG_IPV6) > + } else if (family == AF_INET6) { > + if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6, > + &saddr->a6, th->doff * 4)) > + goto clear_hash; > +#endif > + } else { > + WARN_ON_ONCE(1); > + goto clear_hash; > + } > + if (tcp_ao_hash_header(&hp, th, false, > + ao_hash, hash_offset, tcp_ao_maclen(key))) > + goto clear_hash; > + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); > + if (crypto_ahash_final(hp.req)) > + goto clear_hash; > + > + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); > + tcp_sigpool_end(&hp); > + kfree(hash_buf); > + return 0; > + > +clear_hash: > + tcp_sigpool_end(&hp); > +clear_hash_noput: > + memset(ao_hash, 0, tcp_ao_maclen(key)); > + kfree(hash_buf); > + return 1; > +} > + > int tcp_ao_hash_skb(unsigned short int family, > char *ao_hash, struct tcp_ao_key *key, > const struct sock *sk, const struct sk_buff *skb, > @@ -435,6 +495,46 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, > return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); > } > > +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, > + const struct tcp_ao_hdr *aoh, int l3index, > + struct tcp_ao_key **key, char **traffic_key, > + bool *allocated_traffic_key, u8 *keyid, u32 *sne) > +{ > + struct tcp_ao_info *ao_info; > + > + *allocated_traffic_key = false; > + /* If there's no socket - than initial sisn/disn are unknown. > + * Drop the segment. RFC5925 (7.7) advises to require graceful > + * restart [RFC4724]. Alternatively, the RFC5925 advises to > + * save/restore traffic keys before/after reboot. > + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR > + * options to restore a socket post-reboot. > + */ > + if (!sk) > + return -ENOTCONN; > + > + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { > + return -1; > + } else { > + struct tcp_ao_key *rnext_key; > + > + if (sk->sk_state == TCP_TIME_WAIT) Why not adding TCPF_TIME_WAIT in the prior test ? > + return -1; > + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); > + if (!ao_info) > + return -ENOENT; > + > + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); > + if (!*key) > + return -ENOENT; > + *traffic_key = snd_other_key(*key); > + rnext_key = READ_ONCE(ao_info->rnext_key); > + *keyid = rnext_key->rcvid; > + *sne = 0; > + } > + return 0; > +} > + >
On 10/11/23 19:04, Eric Dumazet wrote: > On Tue, Oct 10, 2023 at 1:07 AM Dmitry Safonov <dima@arista.com> wrote: [..] >> struct tcp_key { >> union { >> - struct tcp_ao_key *ao_key; >> + struct { >> + struct tcp_ao_key *ao_key; >> + u32 sne; >> + char *traffic_key; > > Move sne after traffic_key to avoid a hole on 64bit arches. Sure, thanks! [..] >> @@ -435,6 +495,46 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, >> return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); >> } >> >> +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, >> + const struct tcp_ao_hdr *aoh, int l3index, >> + struct tcp_ao_key **key, char **traffic_key, >> + bool *allocated_traffic_key, u8 *keyid, u32 *sne) >> +{ >> + struct tcp_ao_info *ao_info; >> + >> + *allocated_traffic_key = false; >> + /* If there's no socket - than initial sisn/disn are unknown. >> + * Drop the segment. RFC5925 (7.7) advises to require graceful >> + * restart [RFC4724]. Alternatively, the RFC5925 advises to >> + * save/restore traffic keys before/after reboot. >> + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR >> + * options to restore a socket post-reboot. >> + */ >> + if (!sk) >> + return -ENOTCONN; >> + >> + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { >> + return -1; >> + } else { >> + struct tcp_ao_key *rnext_key; >> + >> + if (sk->sk_state == TCP_TIME_WAIT) > > Why not adding TCPF_TIME_WAIT in the prior test ? Well, twsk is wired up here in the next patch "net/tcp: Add TCP-AO sign to twsk", while the condition for request sockets is the patch after that. I probably can move it to the same condition just to polish the code in the middle of patch set, I guess it felt not critical when I split it by patches. > >> + return -1; [..] Thanks, Dmitry
Hello, kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c" on: commit: df13d11e6a2a3cc5f973aca36f68f880fa42d55f ("[PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets") url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Safonov/net-tcp-Prepare-tcp_md5sig_pool-for-TCP-AO/20231010-071412 patch link: https://lore.kernel.org/all/20231009230722.76268-9-dima@arista.com/ patch subject: [PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets in testcase: trinity version: trinity-i386-abe9de86-1_20230429 with following parameters: runtime: 300s group: group-02 nr_groups: 5 test-description: Trinity is a linux system call fuzz tester. test-url: http://codemonkey.org.uk/projects/trinity/ compiler: gcc-12 test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202310171606.30e15ebe-oliver.sang@intel.com [ 221.348247][ T7133] BUG: sleeping function called from invalid context at net/core/sock.c:2978 [ 221.349875][ T7133] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 7133, name: trinity-c4 [ 221.351666][ T7133] preempt_count: 0, expected: 0 [ 221.352614][ T7133] RCU nest depth: 1, expected: 0 [ 221.353518][ T7133] 2 locks held by trinity-c4/7133: [ 221.354530][ T7133] #0: ed8b5660 (sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sendmsg (net/ipv4/tcp.c:1336) [ 221.374314][ T7133] #1: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) [ 221.375906][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 [ 221.377820][ T7133] Call Trace: [ 221.378447][ T7133] dump_stack_lvl (lib/dump_stack.c:107) [ 221.379373][ T7133] dump_stack (lib/dump_stack.c:114) [ 221.380186][ T7133] __might_resched (kernel/sched/core.c:10188) [ 221.381100][ T7133] __release_sock (include/linux/sched.h:2097 net/core/sock.c:2978) [ 221.381960][ T7133] release_sock (net/core/sock.c:3520) [ 221.382784][ T7133] inet_wait_for_connect (net/ipv4/af_inet.c:609) [ 221.383763][ T7133] ? autoremove_wake_function (kernel/sched/wait.c:479) [ 221.384757][ T7133] __inet_stream_connect (net/ipv4/af_inet.c:701 (discriminator 1)) [ 221.385741][ T7133] ? kmalloc_node_trace (mm/slab_common.c:1133) [ 221.386702][ T7133] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1026) [ 221.387685][ T7133] tcp_sendmsg_locked (net/ipv4/tcp.c:1073) [ 221.388642][ T7133] ? find_held_lock (kernel/locking/lockdep.c:5243) [ 221.389536][ T7133] ? mark_held_locks (kernel/locking/lockdep.c:4273) [ 221.390437][ T7133] ? lock_sock_nested (net/core/sock.c:3511) [ 221.391359][ T7133] ? lock_sock_nested (net/core/sock.c:3511) [ 221.392335][ T7133] tcp_sendmsg (net/ipv4/tcp.c:1336) [ 221.393153][ T7133] inet6_sendmsg (net/ipv6/af_inet6.c:658 (discriminator 2)) [ 221.394010][ T7133] ____sys_sendmsg (net/socket.c:730 net/socket.c:745 net/socket.c:2558) [ 221.394927][ T7133] ___sys_sendmsg (net/socket.c:2612) [ 221.395844][ T7133] __sys_sendmsg (net/socket.c:2641) [ 221.396671][ T7133] __ia32_sys_sendmsg (net/socket.c:2648) [ 221.397562][ T7133] __do_fast_syscall_32 (arch/x86/entry/common.c:112 arch/x86/entry/common.c:178) [ 221.398485][ T7133] do_fast_syscall_32 (arch/x86/entry/common.c:203) [ 221.399401][ T7133] do_SYSENTER_32 (arch/x86/entry/common.c:247) [ 221.404363][ T7133] entry_SYSENTER_32 (arch/x86/entry/entry_32.S:840) [ 221.405255][ T7133] EIP: 0xb7f59579 [ 221.405931][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 All code ======== 0: b8 01 10 06 03 mov $0x3061001,%eax 5: 74 b4 je 0xffffffffffffffbb 7: 01 10 add %edx,(%rax) 9: 07 (bad) a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi e: 10 08 adc %cl,(%rax) 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi ... 20: 00 51 52 add %dl,0x52(%rcx) 23: 55 push %rbp 24:* 89 e5 mov %esp,%ebp <-- trapping instruction 26: 0f 34 sysenter 28: cd 80 int $0x80 2a: 5d pop %rbp 2b: 5a pop %rdx 2c: 59 pop %rcx 2d: c3 ret 2e: 90 nop 2f: 90 nop 30: 90 nop 31: 90 nop 32: 8d 76 00 lea 0x0(%rsi),%esi 35: 58 pop %rax 36: b8 77 00 00 00 mov $0x77,%eax 3b: cd 80 int $0x80 3d: 90 nop 3e: 8d .byte 0x8d 3f: 76 .byte 0x76 Code starting with the faulting instruction =========================================== 0: 5d pop %rbp 1: 5a pop %rdx 2: 59 pop %rcx 3: c3 ret 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 8d 76 00 lea 0x0(%rsi),%esi b: 58 pop %rax c: b8 77 00 00 00 mov $0x77,%eax 11: cd 80 int $0x80 13: 90 nop 14: 8d .byte 0x8d 15: 76 .byte 0x76 [ 221.409527][ T7133] EAX: ffffffda EBX: 00000137 ECX: 01ce1580 EDX: 240449b4 [ 221.410805][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc [ 221.412147][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 [ 221.481648][ T7133] [ 221.482194][ T7133] ================================================ [ 221.483377][ T7133] WARNING: lock held when returning to user space! [ 221.484574][ T7133] 6.6.0-rc4-01105-gdf13d11e6a2a #1 Tainted: G W TN [ 221.485904][ T7133] ------------------------------------------------ [ 221.487044][ T7133] trinity-c4/7133 is leaving the kernel with locks still held! [ 221.488448][ T7133] 1 lock held by trinity-c4/7133: [ 221.489401][ T7133] #0: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) [ 221.491125][ T7133] ------------[ cut here ]------------ [ 221.501170][ T7133] Voluntary context switch within RCU read-side critical section! [ 221.501214][ T7133] WARNING: CPU: 1 PID: 7133 at kernel/rcu/tree_plugin.h:320 rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.504458][ T7133] Modules linked in: ipmi_msghandler uio_pdrv_genirq uio rtc_cmos processor fuse drm drm_panel_orientation_quirks configfs [ 221.506701][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 [ 221.508634][ T7133] EIP: rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.509684][ T7133] Code: e9 87 fe ff ff 8d 74 26 00 8b 41 2c 89 45 ec e9 16 ff ff ff 8d 74 26 00 90 c6 05 09 88 94 c2 01 68 04 84 32 c2 e8 47 14 f5 ff <0f> 0b 5a e9 b0 fd ff ff 8d b4 26 00 00 00 00 81 e2 ff ff ff 7f 0f All code ======== 0: e9 87 fe ff ff jmp 0xfffffffffffffe8c 5: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi 9: 8b 41 2c mov 0x2c(%rcx),%eax c: 89 45 ec mov %eax,-0x14(%rbp) f: e9 16 ff ff ff jmp 0xffffffffffffff2a 14: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi 18: 90 nop 19: c6 05 09 88 94 c2 01 movb $0x1,-0x3d6b77f7(%rip) # 0xffffffffc2948829 20: 68 04 84 32 c2 push $0xffffffffc2328404 25: e8 47 14 f5 ff call 0xfffffffffff51471 2a:* 0f 0b ud2 <-- trapping instruction 2c: 5a pop %rdx 2d: e9 b0 fd ff ff jmp 0xfffffffffffffde2 32: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi 39: 81 e2 ff ff ff 7f and $0x7fffffff,%edx 3f: 0f .byte 0xf Code starting with the faulting instruction =========================================== 0: 0f 0b ud2 2: 5a pop %rdx 3: e9 b0 fd ff ff jmp 0xfffffffffffffdb8 8: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi f: 81 e2 ff ff ff 7f and $0x7fffffff,%edx 15: 0f .byte 0xf [ 221.513148][ T7133] EAX: 00000000 EBX: e52d7c40 ECX: 00000000 EDX: 00000000 [ 221.514443][ T7133] ESI: 00000000 EDI: eadb5640 EBP: ead7df28 ESP: ead7df10 [ 221.515747][ T7133] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010046 [ 221.517184][ T7133] CR0: 80050033 CR2: 01ce68cc CR3: 2ae6f000 CR4: 00000690 [ 221.518449][ T7133] Call Trace: [ 221.519074][ T7133] ? show_regs (arch/x86/kernel/dumpstack.c:478) [ 221.519826][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.520887][ T7133] ? __warn (kernel/panic.c:673) [ 221.521663][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.522722][ T7133] ? report_bug (lib/bug.c:201 lib/bug.c:219) [ 221.523615][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) [ 221.524517][ T7133] ? handle_bug (arch/x86/kernel/traps.c:216) [ 221.525240][ T7133] ? exc_invalid_op (arch/x86/kernel/traps.c:258 (discriminator 1)) [ 221.526115][ T7133] ? handle_exception (arch/x86/entry/entry_32.S:1056) [ 221.527035][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) [ 221.527882][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.528945][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) [ 221.529743][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) [ 221.530784][ T7133] __schedule (arch/x86/include/asm/preempt.h:80 (discriminator 3) kernel/sched/core.c:556 (discriminator 3) kernel/sched/sched.h:1372 (discriminator 3) kernel/sched/sched.h:1681 (discriminator 3) kernel/sched/core.c:6612 (discriminator 3)) [ 221.531573][ T7133] ? exit_to_user_mode_prepare (kernel/entry/common.c:158 kernel/entry/common.c:204) [ 221.532627][ T7133] schedule (arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/core.c:6772 (discriminator 1)) [ 221.533358][ T7133] exit_to_user_mode_prepare (kernel/entry/common.c:161 kernel/entry/common.c:204) [ 221.534382][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) [ 221.535255][ T7133] irqentry_exit_to_user_mode (kernel/entry/common.c:130 kernel/entry/common.c:311) [ 221.536302][ T7133] irqentry_exit (kernel/entry/common.c:445) [ 221.537101][ T7133] sysvec_reschedule_ipi (arch/x86/kernel/smp.c:269) [ 221.538079][ T7133] handle_exception (arch/x86/entry/entry_32.S:1056) [ 221.538990][ T7133] EIP: 0xb7f59579 [ 221.539674][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 All code ======== 0: b8 01 10 06 03 mov $0x3061001,%eax 5: 74 b4 je 0xffffffffffffffbb 7: 01 10 add %edx,(%rax) 9: 07 (bad) a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi e: 10 08 adc %cl,(%rax) 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi ... 20: 00 51 52 add %dl,0x52(%rcx) 23: 55 push %rbp 24:* 89 e5 mov %esp,%ebp <-- trapping instruction 26: 0f 34 sysenter 28: cd 80 int $0x80 2a: 5d pop %rbp 2b: 5a pop %rdx 2c: 59 pop %rcx 2d: c3 ret 2e: 90 nop 2f: 90 nop 30: 90 nop 31: 90 nop 32: 8d 76 00 lea 0x0(%rsi),%esi 35: 58 pop %rax 36: b8 77 00 00 00 mov $0x77,%eax 3b: cd 80 int $0x80 3d: 90 nop 3e: 8d .byte 0x8d 3f: 76 .byte 0x76 Code starting with the faulting instruction =========================================== 0: 5d pop %rbp 1: 5a pop %rdx 2: 59 pop %rcx 3: c3 ret 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 8d 76 00 lea 0x0(%rsi),%esi b: 58 pop %rax c: b8 77 00 00 00 mov $0x77,%eax 11: cd 80 int $0x80 13: 90 nop 14: 8d .byte 0x8d 15: 76 .byte 0x76 [ 221.543249][ T7133] EAX: ffffff91 EBX: 00000137 ECX: bfc205fc EDX: b7f59579 [ 221.544585][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc [ 221.545872][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 [ 221.547230][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) [ 221.548133][ T7133] irq event stamp: 10576 [ 221.548909][ T7133] hardirqs last enabled at (10575): _raw_spin_unlock_irq (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 include/linux/spinlock_api_smp.h:159 kernel/locking/spinlock.c:202) [ 221.550576][ T7133] hardirqs last disabled at (10576): exit_to_user_mode_prepare (include/linux/entry-common.h:191 kernel/entry/common.c:181 kernel/entry/common.c:204) [ 221.552434][ T7133] softirqs last enabled at (10570): release_sock (net/core/sock.c:3528) [ 221.553958][ T7133] softirqs last disabled at (10568): release_sock (net/core/sock.c:3517) [ 221.555538][ T7133] ---[ end trace 0000000000000000 ]--- The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20231017/202310171606.30e15ebe-oliver.sang@intel.com
On 10/17/23 09:37, kernel test robot wrote: > > > Hello, > > kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c" on: > > commit: df13d11e6a2a3cc5f973aca36f68f880fa42d55f ("[PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets") > url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Safonov/net-tcp-Prepare-tcp_md5sig_pool-for-TCP-AO/20231010-071412 > patch link: https://lore.kernel.org/all/20231009230722.76268-9-dima@arista.com/ > patch subject: [PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets > > in testcase: trinity > version: trinity-i386-abe9de86-1_20230429 > with following parameters: > > runtime: 300s > group: group-02 > nr_groups: 5 > > test-description: Trinity is a linux system call fuzz tester. > test-url: http://codemonkey.org.uk/projects/trinity/ > > > compiler: gcc-12 > test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G > > (please refer to attached dmesg/kmsg for entire log/backtrace) > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <oliver.sang@intel.com> > | Closes: https://lore.kernel.org/oe-lkp/202310171606.30e15ebe-oliver.sang@intel.com > > > [ 221.348247][ T7133] BUG: sleeping function called from invalid context at net/core/sock.c:2978 > [ 221.349875][ T7133] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 7133, name: trinity-c4 > [ 221.351666][ T7133] preempt_count: 0, expected: 0 > [ 221.352614][ T7133] RCU nest depth: 1, expected: 0 > [ 221.353518][ T7133] 2 locks held by trinity-c4/7133: > [ 221.354530][ T7133] #0: ed8b5660 (sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sendmsg (net/ipv4/tcp.c:1336) > [ 221.374314][ T7133] #1: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) > [ 221.375906][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 Quite puzzling. It doesn't seem that it can happen to inet6_csk_xmit(): : rcu_read_lock(); : skb_dst_set_noref(skb, dst); : : /* Restore final destination back after routing done */ : fl6.daddr = sk->sk_v6_daddr; : : res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), : np->tclass, sk->sk_priority); : rcu_read_unlock(); So, I presumed the calltrace was for nested rcu_read_lock() case. Then I've looked at all return/goto cases, I couldn't find any unbalanced rcu_read_{,un}lock(). Is this reproducible by any chance? > [ 221.377820][ T7133] Call Trace: > [ 221.378447][ T7133] dump_stack_lvl (lib/dump_stack.c:107) > [ 221.379373][ T7133] dump_stack (lib/dump_stack.c:114) > [ 221.380186][ T7133] __might_resched (kernel/sched/core.c:10188) > [ 221.381100][ T7133] __release_sock (include/linux/sched.h:2097 net/core/sock.c:2978) > [ 221.381960][ T7133] release_sock (net/core/sock.c:3520) > [ 221.382784][ T7133] inet_wait_for_connect (net/ipv4/af_inet.c:609) > [ 221.383763][ T7133] ? autoremove_wake_function (kernel/sched/wait.c:479) > [ 221.384757][ T7133] __inet_stream_connect (net/ipv4/af_inet.c:701 (discriminator 1)) > [ 221.385741][ T7133] ? kmalloc_node_trace (mm/slab_common.c:1133) > [ 221.386702][ T7133] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1026) > [ 221.387685][ T7133] tcp_sendmsg_locked (net/ipv4/tcp.c:1073) > [ 221.388642][ T7133] ? find_held_lock (kernel/locking/lockdep.c:5243) > [ 221.389536][ T7133] ? mark_held_locks (kernel/locking/lockdep.c:4273) > [ 221.390437][ T7133] ? lock_sock_nested (net/core/sock.c:3511) > [ 221.391359][ T7133] ? lock_sock_nested (net/core/sock.c:3511) > [ 221.392335][ T7133] tcp_sendmsg (net/ipv4/tcp.c:1336) > [ 221.393153][ T7133] inet6_sendmsg (net/ipv6/af_inet6.c:658 (discriminator 2)) > [ 221.394010][ T7133] ____sys_sendmsg (net/socket.c:730 net/socket.c:745 net/socket.c:2558) > [ 221.394927][ T7133] ___sys_sendmsg (net/socket.c:2612) > [ 221.395844][ T7133] __sys_sendmsg (net/socket.c:2641) > [ 221.396671][ T7133] __ia32_sys_sendmsg (net/socket.c:2648) > [ 221.397562][ T7133] __do_fast_syscall_32 (arch/x86/entry/common.c:112 arch/x86/entry/common.c:178) > [ 221.398485][ T7133] do_fast_syscall_32 (arch/x86/entry/common.c:203) > [ 221.399401][ T7133] do_SYSENTER_32 (arch/x86/entry/common.c:247) > [ 221.404363][ T7133] entry_SYSENTER_32 (arch/x86/entry/entry_32.S:840) > [ 221.405255][ T7133] EIP: 0xb7f59579 > [ 221.405931][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 > All code > ======== > 0: b8 01 10 06 03 mov $0x3061001,%eax > 5: 74 b4 je 0xffffffffffffffbb > 7: 01 10 add %edx,(%rax) > 9: 07 (bad) > a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi > e: 10 08 adc %cl,(%rax) > 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi > ... > 20: 00 51 52 add %dl,0x52(%rcx) > 23: 55 push %rbp > 24:* 89 e5 mov %esp,%ebp <-- trapping instruction > 26: 0f 34 sysenter > 28: cd 80 int $0x80 > 2a: 5d pop %rbp > 2b: 5a pop %rdx > 2c: 59 pop %rcx > 2d: c3 ret > 2e: 90 nop > 2f: 90 nop > 30: 90 nop > 31: 90 nop > 32: 8d 76 00 lea 0x0(%rsi),%esi > 35: 58 pop %rax > 36: b8 77 00 00 00 mov $0x77,%eax > 3b: cd 80 int $0x80 > 3d: 90 nop > 3e: 8d .byte 0x8d > 3f: 76 .byte 0x76 > > Code starting with the faulting instruction > =========================================== > 0: 5d pop %rbp > 1: 5a pop %rdx > 2: 59 pop %rcx > 3: c3 ret > 4: 90 nop > 5: 90 nop > 6: 90 nop > 7: 90 nop > 8: 8d 76 00 lea 0x0(%rsi),%esi > b: 58 pop %rax > c: b8 77 00 00 00 mov $0x77,%eax > 11: cd 80 int $0x80 > 13: 90 nop > 14: 8d .byte 0x8d > 15: 76 .byte 0x76 > [ 221.409527][ T7133] EAX: ffffffda EBX: 00000137 ECX: 01ce1580 EDX: 240449b4 > [ 221.410805][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc > [ 221.412147][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 > [ 221.481648][ T7133] > [ 221.482194][ T7133] ================================================ > [ 221.483377][ T7133] WARNING: lock held when returning to user space! > [ 221.484574][ T7133] 6.6.0-rc4-01105-gdf13d11e6a2a #1 Tainted: G W TN > [ 221.485904][ T7133] ------------------------------------------------ > [ 221.487044][ T7133] trinity-c4/7133 is leaving the kernel with locks still held! > [ 221.488448][ T7133] 1 lock held by trinity-c4/7133: > [ 221.489401][ T7133] #0: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) > [ 221.491125][ T7133] ------------[ cut here ]------------ > [ 221.501170][ T7133] Voluntary context switch within RCU read-side critical section! > [ 221.501214][ T7133] WARNING: CPU: 1 PID: 7133 at kernel/rcu/tree_plugin.h:320 rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.504458][ T7133] Modules linked in: ipmi_msghandler uio_pdrv_genirq uio rtc_cmos processor fuse drm drm_panel_orientation_quirks configfs > [ 221.506701][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 > [ 221.508634][ T7133] EIP: rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.509684][ T7133] Code: e9 87 fe ff ff 8d 74 26 00 8b 41 2c 89 45 ec e9 16 ff ff ff 8d 74 26 00 90 c6 05 09 88 94 c2 01 68 04 84 32 c2 e8 47 14 f5 ff <0f> 0b 5a e9 b0 fd ff ff 8d b4 26 00 00 00 00 81 e2 ff ff ff 7f 0f > All code > ======== > 0: e9 87 fe ff ff jmp 0xfffffffffffffe8c > 5: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi > 9: 8b 41 2c mov 0x2c(%rcx),%eax > c: 89 45 ec mov %eax,-0x14(%rbp) > f: e9 16 ff ff ff jmp 0xffffffffffffff2a > 14: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi > 18: 90 nop > 19: c6 05 09 88 94 c2 01 movb $0x1,-0x3d6b77f7(%rip) # 0xffffffffc2948829 > 20: 68 04 84 32 c2 push $0xffffffffc2328404 > 25: e8 47 14 f5 ff call 0xfffffffffff51471 > 2a:* 0f 0b ud2 <-- trapping instruction > 2c: 5a pop %rdx > 2d: e9 b0 fd ff ff jmp 0xfffffffffffffde2 > 32: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi > 39: 81 e2 ff ff ff 7f and $0x7fffffff,%edx > 3f: 0f .byte 0xf > > Code starting with the faulting instruction > =========================================== > 0: 0f 0b ud2 > 2: 5a pop %rdx > 3: e9 b0 fd ff ff jmp 0xfffffffffffffdb8 > 8: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi > f: 81 e2 ff ff ff 7f and $0x7fffffff,%edx > 15: 0f .byte 0xf > [ 221.513148][ T7133] EAX: 00000000 EBX: e52d7c40 ECX: 00000000 EDX: 00000000 > [ 221.514443][ T7133] ESI: 00000000 EDI: eadb5640 EBP: ead7df28 ESP: ead7df10 > [ 221.515747][ T7133] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010046 > [ 221.517184][ T7133] CR0: 80050033 CR2: 01ce68cc CR3: 2ae6f000 CR4: 00000690 > [ 221.518449][ T7133] Call Trace: > [ 221.519074][ T7133] ? show_regs (arch/x86/kernel/dumpstack.c:478) > [ 221.519826][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.520887][ T7133] ? __warn (kernel/panic.c:673) > [ 221.521663][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.522722][ T7133] ? report_bug (lib/bug.c:201 lib/bug.c:219) > [ 221.523615][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > [ 221.524517][ T7133] ? handle_bug (arch/x86/kernel/traps.c:216) > [ 221.525240][ T7133] ? exc_invalid_op (arch/x86/kernel/traps.c:258 (discriminator 1)) > [ 221.526115][ T7133] ? handle_exception (arch/x86/entry/entry_32.S:1056) > [ 221.527035][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > [ 221.527882][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.528945][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > [ 221.529743][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > [ 221.530784][ T7133] __schedule (arch/x86/include/asm/preempt.h:80 (discriminator 3) kernel/sched/core.c:556 (discriminator 3) kernel/sched/sched.h:1372 (discriminator 3) kernel/sched/sched.h:1681 (discriminator 3) kernel/sched/core.c:6612 (discriminator 3)) > [ 221.531573][ T7133] ? exit_to_user_mode_prepare (kernel/entry/common.c:158 kernel/entry/common.c:204) > [ 221.532627][ T7133] schedule (arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/core.c:6772 (discriminator 1)) > [ 221.533358][ T7133] exit_to_user_mode_prepare (kernel/entry/common.c:161 kernel/entry/common.c:204) > [ 221.534382][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) > [ 221.535255][ T7133] irqentry_exit_to_user_mode (kernel/entry/common.c:130 kernel/entry/common.c:311) > [ 221.536302][ T7133] irqentry_exit (kernel/entry/common.c:445) > [ 221.537101][ T7133] sysvec_reschedule_ipi (arch/x86/kernel/smp.c:269) > [ 221.538079][ T7133] handle_exception (arch/x86/entry/entry_32.S:1056) > [ 221.538990][ T7133] EIP: 0xb7f59579 > [ 221.539674][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 > All code > ======== > 0: b8 01 10 06 03 mov $0x3061001,%eax > 5: 74 b4 je 0xffffffffffffffbb > 7: 01 10 add %edx,(%rax) > 9: 07 (bad) > a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi > e: 10 08 adc %cl,(%rax) > 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi > ... > 20: 00 51 52 add %dl,0x52(%rcx) > 23: 55 push %rbp > 24:* 89 e5 mov %esp,%ebp <-- trapping instruction > 26: 0f 34 sysenter > 28: cd 80 int $0x80 > 2a: 5d pop %rbp > 2b: 5a pop %rdx > 2c: 59 pop %rcx > 2d: c3 ret > 2e: 90 nop > 2f: 90 nop > 30: 90 nop > 31: 90 nop > 32: 8d 76 00 lea 0x0(%rsi),%esi > 35: 58 pop %rax > 36: b8 77 00 00 00 mov $0x77,%eax > 3b: cd 80 int $0x80 > 3d: 90 nop > 3e: 8d .byte 0x8d > 3f: 76 .byte 0x76 > > Code starting with the faulting instruction > =========================================== > 0: 5d pop %rbp > 1: 5a pop %rdx > 2: 59 pop %rcx > 3: c3 ret > 4: 90 nop > 5: 90 nop > 6: 90 nop > 7: 90 nop > 8: 8d 76 00 lea 0x0(%rsi),%esi > b: 58 pop %rax > c: b8 77 00 00 00 mov $0x77,%eax > 11: cd 80 int $0x80 > 13: 90 nop > 14: 8d .byte 0x8d > 15: 76 .byte 0x76 > [ 221.543249][ T7133] EAX: ffffff91 EBX: 00000137 ECX: bfc205fc EDX: b7f59579 > [ 221.544585][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc > [ 221.545872][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 > [ 221.547230][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) > [ 221.548133][ T7133] irq event stamp: 10576 > [ 221.548909][ T7133] hardirqs last enabled at (10575): _raw_spin_unlock_irq (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 include/linux/spinlock_api_smp.h:159 kernel/locking/spinlock.c:202) > [ 221.550576][ T7133] hardirqs last disabled at (10576): exit_to_user_mode_prepare (include/linux/entry-common.h:191 kernel/entry/common.c:181 kernel/entry/common.c:204) > [ 221.552434][ T7133] softirqs last enabled at (10570): release_sock (net/core/sock.c:3528) > [ 221.553958][ T7133] softirqs last disabled at (10568): release_sock (net/core/sock.c:3517) > [ 221.555538][ T7133] ---[ end trace 0000000000000000 ]--- > > > The kernel config and materials to reproduce are available at: > https://download.01.org/0day-ci/archive/20231017/202310171606.30e15ebe-oliver.sang@intel.com > > > Thanks, Dmitry
hi, Dmitry Safonov, On Wed, Oct 18, 2023 at 12:19:56AM +0100, Dmitry Safonov wrote: > On 10/17/23 09:37, kernel test robot wrote: > > > > > > Hello, > > > > kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c" on: > > > > commit: df13d11e6a2a3cc5f973aca36f68f880fa42d55f ("[PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets") > > url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Safonov/net-tcp-Prepare-tcp_md5sig_pool-for-TCP-AO/20231010-071412 > > patch link: https://lore.kernel.org/all/20231009230722.76268-9-dima@arista.com/ > > patch subject: [PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets > > > > in testcase: trinity > > version: trinity-i386-abe9de86-1_20230429 > > with following parameters: > > > > runtime: 300s > > group: group-02 > > nr_groups: 5 > > > > test-description: Trinity is a linux system call fuzz tester. > > test-url: http://codemonkey.org.uk/projects/trinity/ > > > > > > compiler: gcc-12 > > test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G > > > > (please refer to attached dmesg/kmsg for entire log/backtrace) > > > > > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > > the same patch/commit), kindly add following tags > > | Reported-by: kernel test robot <oliver.sang@intel.com> > > | Closes: https://lore.kernel.org/oe-lkp/202310171606.30e15ebe-oliver.sang@intel.com > > > > > > [ 221.348247][ T7133] BUG: sleeping function called from invalid context at net/core/sock.c:2978 > > [ 221.349875][ T7133] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 7133, name: trinity-c4 > > [ 221.351666][ T7133] preempt_count: 0, expected: 0 > > [ 221.352614][ T7133] RCU nest depth: 1, expected: 0 > > [ 221.353518][ T7133] 2 locks held by trinity-c4/7133: > > [ 221.354530][ T7133] #0: ed8b5660 (sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sendmsg (net/ipv4/tcp.c:1336) > > [ 221.374314][ T7133] #1: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) > > [ 221.375906][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 > > Quite puzzling. It doesn't seem that it can happen to inet6_csk_xmit(): > > : rcu_read_lock(); > : skb_dst_set_noref(skb, dst); > : > : /* Restore final destination back after routing done */ > : fl6.daddr = sk->sk_v6_daddr; > : > : res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), > : np->tclass, sk->sk_priority); > : rcu_read_unlock(); > > So, I presumed the calltrace was for nested rcu_read_lock() case. > Then I've looked at all return/goto cases, I couldn't find any > unbalanced rcu_read_{,un}lock(). > > Is this reproducible by any chance? do you mean how often it could be reproduced? we run the tests upon this commit and parent both up to 15 times. from below, parent shows quite clean, but there are various issues while running tests upon this patch. 8468a6f4f3143 net/tcp: Add tcp_parse_auth_options() df13d11e6a2a3 net/tcp: Add AO sign to RST packets 8468a6f4f3143ba2 df13d11e6a2a3cc5f973aca36f6 ---------------- --------------------------- fail:runs %reproduction fail:runs | | | :15 87% 13:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_arch/x86/entry/common.c :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_fs/dcache.c :15 13% 2:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_include/linux/percpu-rwsem.h :15 60% 9:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/rwsem.c :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/sched/completion.c :15 40% 6:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/task_work.c :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/iov_iter.c :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/strncpy_from_user.c :15 20% 3:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/usercopy.c :15 13% 2:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_mm/vmscan.c :15 100% 15:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c :15 100% 15:15 dmesg.EIP:rcu_note_context_switch :15 100% 15:15 dmesg.WARNING:at_kernel/rcu/tree_plugin.h:#rcu_note_context_switch :15 100% 15:15 dmesg.WARNING:lock_held_when_returning_to_user_space :15 100% 15:15 dmesg.is_leaving_the_kernel_with_locks_still_held > > > > [ 221.377820][ T7133] Call Trace: > > [ 221.378447][ T7133] dump_stack_lvl (lib/dump_stack.c:107) > > [ 221.379373][ T7133] dump_stack (lib/dump_stack.c:114) > > [ 221.380186][ T7133] __might_resched (kernel/sched/core.c:10188) > > [ 221.381100][ T7133] __release_sock (include/linux/sched.h:2097 net/core/sock.c:2978) > > [ 221.381960][ T7133] release_sock (net/core/sock.c:3520) > > [ 221.382784][ T7133] inet_wait_for_connect (net/ipv4/af_inet.c:609) > > [ 221.383763][ T7133] ? autoremove_wake_function (kernel/sched/wait.c:479) > > [ 221.384757][ T7133] __inet_stream_connect (net/ipv4/af_inet.c:701 (discriminator 1)) > > [ 221.385741][ T7133] ? kmalloc_node_trace (mm/slab_common.c:1133) > > [ 221.386702][ T7133] tcp_sendmsg_fastopen (net/ipv4/tcp.c:1026) > > [ 221.387685][ T7133] tcp_sendmsg_locked (net/ipv4/tcp.c:1073) > > [ 221.388642][ T7133] ? find_held_lock (kernel/locking/lockdep.c:5243) > > [ 221.389536][ T7133] ? mark_held_locks (kernel/locking/lockdep.c:4273) > > [ 221.390437][ T7133] ? lock_sock_nested (net/core/sock.c:3511) > > [ 221.391359][ T7133] ? lock_sock_nested (net/core/sock.c:3511) > > [ 221.392335][ T7133] tcp_sendmsg (net/ipv4/tcp.c:1336) > > [ 221.393153][ T7133] inet6_sendmsg (net/ipv6/af_inet6.c:658 (discriminator 2)) > > [ 221.394010][ T7133] ____sys_sendmsg (net/socket.c:730 net/socket.c:745 net/socket.c:2558) > > [ 221.394927][ T7133] ___sys_sendmsg (net/socket.c:2612) > > [ 221.395844][ T7133] __sys_sendmsg (net/socket.c:2641) > > [ 221.396671][ T7133] __ia32_sys_sendmsg (net/socket.c:2648) > > [ 221.397562][ T7133] __do_fast_syscall_32 (arch/x86/entry/common.c:112 arch/x86/entry/common.c:178) > > [ 221.398485][ T7133] do_fast_syscall_32 (arch/x86/entry/common.c:203) > > [ 221.399401][ T7133] do_SYSENTER_32 (arch/x86/entry/common.c:247) > > [ 221.404363][ T7133] entry_SYSENTER_32 (arch/x86/entry/entry_32.S:840) > > [ 221.405255][ T7133] EIP: 0xb7f59579 > > [ 221.405931][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 > > All code > > ======== > > 0: b8 01 10 06 03 mov $0x3061001,%eax > > 5: 74 b4 je 0xffffffffffffffbb > > 7: 01 10 add %edx,(%rax) > > 9: 07 (bad) > > a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi > > e: 10 08 adc %cl,(%rax) > > 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi > > ... > > 20: 00 51 52 add %dl,0x52(%rcx) > > 23: 55 push %rbp > > 24:* 89 e5 mov %esp,%ebp <-- trapping instruction > > 26: 0f 34 sysenter > > 28: cd 80 int $0x80 > > 2a: 5d pop %rbp > > 2b: 5a pop %rdx > > 2c: 59 pop %rcx > > 2d: c3 ret > > 2e: 90 nop > > 2f: 90 nop > > 30: 90 nop > > 31: 90 nop > > 32: 8d 76 00 lea 0x0(%rsi),%esi > > 35: 58 pop %rax > > 36: b8 77 00 00 00 mov $0x77,%eax > > 3b: cd 80 int $0x80 > > 3d: 90 nop > > 3e: 8d .byte 0x8d > > 3f: 76 .byte 0x76 > > > > Code starting with the faulting instruction > > =========================================== > > 0: 5d pop %rbp > > 1: 5a pop %rdx > > 2: 59 pop %rcx > > 3: c3 ret > > 4: 90 nop > > 5: 90 nop > > 6: 90 nop > > 7: 90 nop > > 8: 8d 76 00 lea 0x0(%rsi),%esi > > b: 58 pop %rax > > c: b8 77 00 00 00 mov $0x77,%eax > > 11: cd 80 int $0x80 > > 13: 90 nop > > 14: 8d .byte 0x8d > > 15: 76 .byte 0x76 > > [ 221.409527][ T7133] EAX: ffffffda EBX: 00000137 ECX: 01ce1580 EDX: 240449b4 > > [ 221.410805][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc > > [ 221.412147][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 > > [ 221.481648][ T7133] > > [ 221.482194][ T7133] ================================================ > > [ 221.483377][ T7133] WARNING: lock held when returning to user space! > > [ 221.484574][ T7133] 6.6.0-rc4-01105-gdf13d11e6a2a #1 Tainted: G W TN > > [ 221.485904][ T7133] ------------------------------------------------ > > [ 221.487044][ T7133] trinity-c4/7133 is leaving the kernel with locks still held! > > [ 221.488448][ T7133] 1 lock held by trinity-c4/7133: > > [ 221.489401][ T7133] #0: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) > > [ 221.491125][ T7133] ------------[ cut here ]------------ > > [ 221.501170][ T7133] Voluntary context switch within RCU read-side critical section! > > [ 221.501214][ T7133] WARNING: CPU: 1 PID: 7133 at kernel/rcu/tree_plugin.h:320 rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.504458][ T7133] Modules linked in: ipmi_msghandler uio_pdrv_genirq uio rtc_cmos processor fuse drm drm_panel_orientation_quirks configfs > > [ 221.506701][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 > > [ 221.508634][ T7133] EIP: rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.509684][ T7133] Code: e9 87 fe ff ff 8d 74 26 00 8b 41 2c 89 45 ec e9 16 ff ff ff 8d 74 26 00 90 c6 05 09 88 94 c2 01 68 04 84 32 c2 e8 47 14 f5 ff <0f> 0b 5a e9 b0 fd ff ff 8d b4 26 00 00 00 00 81 e2 ff ff ff 7f 0f > > All code > > ======== > > 0: e9 87 fe ff ff jmp 0xfffffffffffffe8c > > 5: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi > > 9: 8b 41 2c mov 0x2c(%rcx),%eax > > c: 89 45 ec mov %eax,-0x14(%rbp) > > f: e9 16 ff ff ff jmp 0xffffffffffffff2a > > 14: 8d 74 26 00 lea 0x0(%rsi,%riz,1),%esi > > 18: 90 nop > > 19: c6 05 09 88 94 c2 01 movb $0x1,-0x3d6b77f7(%rip) # 0xffffffffc2948829 > > 20: 68 04 84 32 c2 push $0xffffffffc2328404 > > 25: e8 47 14 f5 ff call 0xfffffffffff51471 > > 2a:* 0f 0b ud2 <-- trapping instruction > > 2c: 5a pop %rdx > > 2d: e9 b0 fd ff ff jmp 0xfffffffffffffde2 > > 32: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi > > 39: 81 e2 ff ff ff 7f and $0x7fffffff,%edx > > 3f: 0f .byte 0xf > > > > Code starting with the faulting instruction > > =========================================== > > 0: 0f 0b ud2 > > 2: 5a pop %rdx > > 3: e9 b0 fd ff ff jmp 0xfffffffffffffdb8 > > 8: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi > > f: 81 e2 ff ff ff 7f and $0x7fffffff,%edx > > 15: 0f .byte 0xf > > [ 221.513148][ T7133] EAX: 00000000 EBX: e52d7c40 ECX: 00000000 EDX: 00000000 > > [ 221.514443][ T7133] ESI: 00000000 EDI: eadb5640 EBP: ead7df28 ESP: ead7df10 > > [ 221.515747][ T7133] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 EFLAGS: 00010046 > > [ 221.517184][ T7133] CR0: 80050033 CR2: 01ce68cc CR3: 2ae6f000 CR4: 00000690 > > [ 221.518449][ T7133] Call Trace: > > [ 221.519074][ T7133] ? show_regs (arch/x86/kernel/dumpstack.c:478) > > [ 221.519826][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.520887][ T7133] ? __warn (kernel/panic.c:673) > > [ 221.521663][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.522722][ T7133] ? report_bug (lib/bug.c:201 lib/bug.c:219) > > [ 221.523615][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > > [ 221.524517][ T7133] ? handle_bug (arch/x86/kernel/traps.c:216) > > [ 221.525240][ T7133] ? exc_invalid_op (arch/x86/kernel/traps.c:258 (discriminator 1)) > > [ 221.526115][ T7133] ? handle_exception (arch/x86/entry/entry_32.S:1056) > > [ 221.527035][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > > [ 221.527882][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.528945][ T7133] ? exc_overflow (arch/x86/kernel/traps.c:250) > > [ 221.529743][ T7133] ? rcu_note_context_switch (kernel/rcu/tree_plugin.h:320 (discriminator 11)) > > [ 221.530784][ T7133] __schedule (arch/x86/include/asm/preempt.h:80 (discriminator 3) kernel/sched/core.c:556 (discriminator 3) kernel/sched/sched.h:1372 (discriminator 3) kernel/sched/sched.h:1681 (discriminator 3) kernel/sched/core.c:6612 (discriminator 3)) > > [ 221.531573][ T7133] ? exit_to_user_mode_prepare (kernel/entry/common.c:158 kernel/entry/common.c:204) > > [ 221.532627][ T7133] schedule (arch/x86/include/asm/preempt.h:85 (discriminator 1) kernel/sched/core.c:6772 (discriminator 1)) > > [ 221.533358][ T7133] exit_to_user_mode_prepare (kernel/entry/common.c:161 kernel/entry/common.c:204) > > [ 221.534382][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) > > [ 221.535255][ T7133] irqentry_exit_to_user_mode (kernel/entry/common.c:130 kernel/entry/common.c:311) > > [ 221.536302][ T7133] irqentry_exit (kernel/entry/common.c:445) > > [ 221.537101][ T7133] sysvec_reschedule_ipi (arch/x86/kernel/smp.c:269) > > [ 221.538079][ T7133] handle_exception (arch/x86/entry/entry_32.S:1056) > > [ 221.538990][ T7133] EIP: 0xb7f59579 > > [ 221.539674][ T7133] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 > > All code > > ======== > > 0: b8 01 10 06 03 mov $0x3061001,%eax > > 5: 74 b4 je 0xffffffffffffffbb > > 7: 01 10 add %edx,(%rax) > > 9: 07 (bad) > > a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi > > e: 10 08 adc %cl,(%rax) > > 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi > > ... > > 20: 00 51 52 add %dl,0x52(%rcx) > > 23: 55 push %rbp > > 24:* 89 e5 mov %esp,%ebp <-- trapping instruction > > 26: 0f 34 sysenter > > 28: cd 80 int $0x80 > > 2a: 5d pop %rbp > > 2b: 5a pop %rdx > > 2c: 59 pop %rcx > > 2d: c3 ret > > 2e: 90 nop > > 2f: 90 nop > > 30: 90 nop > > 31: 90 nop > > 32: 8d 76 00 lea 0x0(%rsi),%esi > > 35: 58 pop %rax > > 36: b8 77 00 00 00 mov $0x77,%eax > > 3b: cd 80 int $0x80 > > 3d: 90 nop > > 3e: 8d .byte 0x8d > > 3f: 76 .byte 0x76 > > > > Code starting with the faulting instruction > > =========================================== > > 0: 5d pop %rbp > > 1: 5a pop %rdx > > 2: 59 pop %rcx > > 3: c3 ret > > 4: 90 nop > > 5: 90 nop > > 6: 90 nop > > 7: 90 nop > > 8: 8d 76 00 lea 0x0(%rsi),%esi > > b: 58 pop %rax > > c: b8 77 00 00 00 mov $0x77,%eax > > 11: cd 80 int $0x80 > > 13: 90 nop > > 14: 8d .byte 0x8d > > 15: 76 .byte 0x76 > > [ 221.543249][ T7133] EAX: ffffff91 EBX: 00000137 ECX: bfc205fc EDX: b7f59579 > > [ 221.544585][ T7133] ESI: 000000b1 EDI: 8b8b8b8b EBP: 08080808 ESP: bfc205fc > > [ 221.545872][ T7133] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000296 > > [ 221.547230][ T7133] ? sysvec_reboot (arch/x86/kernel/smp.c:269) > > [ 221.548133][ T7133] irq event stamp: 10576 > > [ 221.548909][ T7133] hardirqs last enabled at (10575): _raw_spin_unlock_irq (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:77 include/linux/spinlock_api_smp.h:159 kernel/locking/spinlock.c:202) > > [ 221.550576][ T7133] hardirqs last disabled at (10576): exit_to_user_mode_prepare (include/linux/entry-common.h:191 kernel/entry/common.c:181 kernel/entry/common.c:204) > > [ 221.552434][ T7133] softirqs last enabled at (10570): release_sock (net/core/sock.c:3528) > > [ 221.553958][ T7133] softirqs last disabled at (10568): release_sock (net/core/sock.c:3517) > > [ 221.555538][ T7133] ---[ end trace 0000000000000000 ]--- > > > > > > The kernel config and materials to reproduce are available at: > > https://download.01.org/0day-ci/archive/20231017/202310171606.30e15ebe-oliver.sang@intel.com > > > > > > > > Thanks, > Dmitry >
Hi Oliver, On 10/18/23 08:32, Oliver Sang wrote: > hi, Dmitry Safonov, > > On Wed, Oct 18, 2023 at 12:19:56AM +0100, Dmitry Safonov wrote: >> On 10/17/23 09:37, kernel test robot wrote: >>> >>> >>> Hello, >>> >>> kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c" on: >>> >>> commit: df13d11e6a2a3cc5f973aca36f68f880fa42d55f ("[PATCH v14 net-next 08/23] net/tcp: Add AO sign to RST packets") [..] >>> >>> [ 221.348247][ T7133] BUG: sleeping function called from invalid context at net/core/sock.c:2978 >>> [ 221.349875][ T7133] in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 7133, name: trinity-c4 >>> [ 221.351666][ T7133] preempt_count: 0, expected: 0 >>> [ 221.352614][ T7133] RCU nest depth: 1, expected: 0 >>> [ 221.353518][ T7133] 2 locks held by trinity-c4/7133: >>> [ 221.354530][ T7133] #0: ed8b5660 (sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_sendmsg (net/ipv4/tcp.c:1336) >>> [ 221.374314][ T7133] #1: c27dbb18 (rcu_read_lock){....}-{1:2}, at: inet6_csk_xmit (include/linux/rcupdate.h:747 net/ipv6/inet6_connection_sock.c:129) >>> [ 221.375906][ T7133] CPU: 1 PID: 7133 Comm: trinity-c4 Tainted: G W TN 6.6.0-rc4-01105-gdf13d11e6a2a #1 >> >> Quite puzzling. It doesn't seem that it can happen to inet6_csk_xmit(): >> >> : rcu_read_lock(); >> : skb_dst_set_noref(skb, dst); >> : >> : /* Restore final destination back after routing done */ >> : fl6.daddr = sk->sk_v6_daddr; >> : >> : res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), >> : np->tclass, sk->sk_priority); >> : rcu_read_unlock(); >> >> So, I presumed the calltrace was for nested rcu_read_lock() case. >> Then I've looked at all return/goto cases, I couldn't find any >> unbalanced rcu_read_{,un}lock(). >> >> Is this reproducible by any chance? > > do you mean how often it could be reproduced? > > we run the tests upon this commit and parent both up to 15 times. > from below, parent shows quite clean, but there are various issues while > running tests upon this patch. Thanks a lot for running your tests over it! That's what a fresh brain does: I see that tcp_v6_send_reset() with neither CONFIG_TCP_MD5SIG nor CONFIG_TCP_AO set does grab rcu_read_lock() [as I wasn't sure at that moment that nothing but md5 key does rely on that being held], but the exit is covered with ifdefs: : #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) : out: : if (allocated_traffic_key) : kfree(key.traffic_key); : rcu_read_unlock(); : #endif Seems to have been correct versions ago (version 6), but on refactoring when I've moved ifdeffs to tcp_parse_auth_options() declaration to pollute less code - missed this. Thanks again for your testing and reporting! > 8468a6f4f3143 net/tcp: Add tcp_parse_auth_options() > df13d11e6a2a3 net/tcp: Add AO sign to RST packets > > 8468a6f4f3143ba2 df13d11e6a2a3cc5f973aca36f6 > ---------------- --------------------------- > fail:runs %reproduction fail:runs > | | | > :15 87% 13:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_arch/x86/entry/common.c > :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_fs/dcache.c > :15 13% 2:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_include/linux/percpu-rwsem.h > :15 60% 9:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_include/linux/sched/mm.h > :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/mutex.c > :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/locking/rwsem.c > :15 7% 1:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/sched/completion.c > :15 40% 6:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_kernel/task_work.c > :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/iov_iter.c > :15 33% 5:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/strncpy_from_user.c > :15 20% 3:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_lib/usercopy.c > :15 13% 2:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_mm/vmscan.c > :15 100% 15:15 dmesg.BUG:sleeping_function_called_from_invalid_context_at_net/core/sock.c > :15 100% 15:15 dmesg.EIP:rcu_note_context_switch > :15 100% 15:15 dmesg.WARNING:at_kernel/rcu/tree_plugin.h:#rcu_note_context_switch > :15 100% 15:15 dmesg.WARNING:lock_held_when_returning_to_user_space > :15 100% 15:15 dmesg.is_leaving_the_kernel_with_locks_still_held > [..] Thanks, Dmitry
diff --git a/include/net/tcp.h b/include/net/tcp.h index a619c429a8bd..dc74908ffa5a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2220,7 +2220,12 @@ static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops, struct tcp_key { union { - struct tcp_ao_key *ao_key; + struct { + struct tcp_ao_key *ao_key; + u32 sne; + char *traffic_key; + u8 rcv_next; + }; struct tcp_md5sig_key *md5_key; }; enum { diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index fdd2f5091b98..629ab0365b83 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -120,12 +120,24 @@ int tcp_ao_hash_skb(unsigned short int family, const u8 *tkey, int hash_offset, u32 sne); int tcp_parse_ao(struct sock *sk, int cmd, unsigned short int family, sockptr_t optval, int optlen); +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid); int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, unsigned int len, struct tcp_sigpool *hp); void tcp_ao_destroy_sock(struct sock *sk); struct tcp_ao_key *tcp_ao_do_lookup(const struct sock *sk, const union tcp_ao_addr *addr, int family, int sndid, int rcvid); +int tcp_ao_hash_hdr(unsigned short family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne); +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne); + /* ipv4 specific functions */ int tcp_v4_parse_ao(struct sock *sk, int cmd, sockptr_t optval, int optlen); struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 6eb9241d14a3..df59924c3828 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -48,8 +48,8 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx, * it's known that the keys in ao_info are matching peer's * family/address/VRF/etc. */ -static struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, - int sndid, int rcvid) +struct tcp_ao_key *tcp_ao_established_key(struct tcp_ao_info *ao, + int sndid, int rcvid) { struct tcp_ao_key *key; @@ -369,6 +369,66 @@ static int tcp_ao_hash_header(struct tcp_sigpool *hp, return err; } +int tcp_ao_hash_hdr(unsigned short int family, char *ao_hash, + struct tcp_ao_key *key, const u8 *tkey, + const union tcp_ao_addr *daddr, + const union tcp_ao_addr *saddr, + const struct tcphdr *th, u32 sne) +{ + int tkey_len = tcp_ao_digest_size(key); + int hash_offset = ao_hash - (char *)th; + struct tcp_sigpool hp; + void *hash_buf = NULL; + + hash_buf = kmalloc(tkey_len, GFP_ATOMIC); + if (!hash_buf) + goto clear_hash_noput; + + if (tcp_sigpool_start(key->tcp_sigpool_id, &hp)) + goto clear_hash_noput; + + if (crypto_ahash_setkey(crypto_ahash_reqtfm(hp.req), tkey, tkey_len)) + goto clear_hash; + + if (crypto_ahash_init(hp.req)) + goto clear_hash; + + if (tcp_ao_hash_sne(&hp, sne)) + goto clear_hash; + if (family == AF_INET) { + if (tcp_v4_ao_hash_pseudoheader(&hp, daddr->a4.s_addr, + saddr->a4.s_addr, th->doff * 4)) + goto clear_hash; +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + if (tcp_v6_ao_hash_pseudoheader(&hp, &daddr->a6, + &saddr->a6, th->doff * 4)) + goto clear_hash; +#endif + } else { + WARN_ON_ONCE(1); + goto clear_hash; + } + if (tcp_ao_hash_header(&hp, th, false, + ao_hash, hash_offset, tcp_ao_maclen(key))) + goto clear_hash; + ahash_request_set_crypt(hp.req, NULL, hash_buf, 0); + if (crypto_ahash_final(hp.req)) + goto clear_hash; + + memcpy(ao_hash, hash_buf, tcp_ao_maclen(key)); + tcp_sigpool_end(&hp); + kfree(hash_buf); + return 0; + +clear_hash: + tcp_sigpool_end(&hp); +clear_hash_noput: + memset(ao_hash, 0, tcp_ao_maclen(key)); + kfree(hash_buf); + return 1; +} + int tcp_ao_hash_skb(unsigned short int family, char *ao_hash, struct tcp_ao_key *key, const struct sock *sk, const struct sk_buff *skb, @@ -435,6 +495,46 @@ struct tcp_ao_key *tcp_v4_ao_lookup(const struct sock *sk, struct sock *addr_sk, return tcp_ao_do_lookup(sk, addr, AF_INET, sndid, rcvid); } +int tcp_ao_prepare_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, int l3index, + struct tcp_ao_key **key, char **traffic_key, + bool *allocated_traffic_key, u8 *keyid, u32 *sne) +{ + struct tcp_ao_info *ao_info; + + *allocated_traffic_key = false; + /* If there's no socket - than initial sisn/disn are unknown. + * Drop the segment. RFC5925 (7.7) advises to require graceful + * restart [RFC4724]. Alternatively, the RFC5925 advises to + * save/restore traffic keys before/after reboot. + * Linux TCP-AO support provides TCP_AO_ADD_KEY and TCP_AO_REPAIR + * options to restore a socket post-reboot. + */ + if (!sk) + return -ENOTCONN; + + if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV)) { + return -1; + } else { + struct tcp_ao_key *rnext_key; + + if (sk->sk_state == TCP_TIME_WAIT) + return -1; + ao_info = rcu_dereference(tcp_sk(sk)->ao_info); + if (!ao_info) + return -ENOENT; + + *key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); + if (!*key) + return -ENOENT; + *traffic_key = snd_other_key(*key); + rnext_key = READ_ONCE(ao_info->rnext_key); + *keyid = rnext_key->rcvid; + *sne = 0; + } + return 0; +} + int tcp_ao_transmit_skb(struct sock *sk, struct sk_buff *skb, struct tcp_ao_key *key, struct tcphdr *th, __u8 *hash_location) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e731f168b1ee..6a7a7736ba6f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -656,6 +656,52 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_v4_send_check); +#define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) + +static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, + const struct tcp_ao_hdr *aoh, + struct ip_reply_arg *arg, struct tcphdr *reply, + __be32 reply_options[REPLY_OPTIONS_LEN]) +{ +#ifdef CONFIG_TCP_AO + int sdif = tcp_v4_sdif(skb); + int dif = inet_iif(skb); + int l3index = sdif ? dif : 0; + bool allocated_traffic_key; + struct tcp_ao_key *key; + char *traffic_key; + bool drop = true; + u32 ao_sne = 0; + u8 keyid; + + rcu_read_lock(); + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + &key, &traffic_key, &allocated_traffic_key, + &keyid, &ao_sne)) + goto out; + + reply_options[0] = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key) << 16) | + (aoh->rnext_keyid << 8) | keyid); + arg->iov[0].iov_len += round_up(tcp_ao_len(key), 4); + reply->doff = arg->iov[0].iov_len / 4; + + if (tcp_ao_hash_hdr(AF_INET, (char *)&reply_options[1], + key, traffic_key, + (union tcp_ao_addr *)&ip_hdr(skb)->saddr, + (union tcp_ao_addr *)&ip_hdr(skb)->daddr, + reply, ao_sne)) + goto out; + drop = false; +out: + rcu_read_unlock(); + if (allocated_traffic_key) + kfree(traffic_key); + return drop; +#else + return true; +#endif +} + /* * This routine will send an RST to the other tcp. * @@ -669,28 +715,21 @@ EXPORT_SYMBOL(tcp_v4_send_check); * Exception: precedence violation. We do not implement it in any case. */ -#ifdef CONFIG_TCP_AO -#define OPTION_BYTES MAX_TCP_OPTION_SPACE -#elif defined(CONFIG_TCP_MD5SIG) -#define OPTION_BYTES TCPOLEN_MD5SIG_ALIGNED -#else -#define OPTION_BYTES sizeof(__be32) -#endif - static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; - __be32 opt[OPTION_BYTES / sizeof(__be32)]; + __be32 opt[REPLY_OPTIONS_LEN]; } rep; + const __u8 *md5_hash_location = NULL; + const struct tcp_ao_hdr *aoh; struct ip_reply_arg arg; #ifdef CONFIG_TCP_MD5SIG - const __u8 *md5_hash_location = NULL; struct tcp_md5sig_key *key = NULL; unsigned char newhash[16]; - int genhash; struct sock *sk1 = NULL; + int genhash; #endif u64 transmit_time = 0; struct sock *ctl_sk; @@ -727,11 +766,15 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len = sizeof(rep.th); net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG + /* Invalid TCP option size or twice included auth */ - if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, NULL)) + if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) return; + if (aoh && tcp_v4_ao_sign_reset(sk, skb, aoh, &arg, &rep.th, rep.opt)) + return; + +#ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); if (sk && sk_fullsock(sk)) { const union tcp_md5_addr *addr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 6ba1fccbfa2e..594eb074e5e5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -853,8 +853,8 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, - int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, __be32 label, u32 priority, u32 txhash) + int oif, int rst, u8 tclass, __be32 label, + u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -869,13 +869,13 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; -#ifdef CONFIG_TCP_MD5SIG - if (key) + if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif + if (tcp_key_is_ao(key)) + tot_len += tcp_ao_len(key->ao_key); #ifdef CONFIG_MPTCP - if (rst && !key) { + if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) @@ -916,14 +916,28 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG - if (key) { + if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)topt, key, + tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif +#ifdef CONFIG_TCP_AO + if (tcp_key_is_ao(key)) { + *topt++ = htonl((TCPOPT_AO << 24) | + (tcp_ao_len(key->ao_key) << 16) | + (key->ao_key->sndid << 8) | + (key->rcv_next)); + + tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, + key->traffic_key, + (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, + (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, + t1, key->sne); + } +#endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; @@ -986,19 +1000,23 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - u32 seq = 0, ack_seq = 0; - struct tcp_md5sig_key *key = NULL; -#ifdef CONFIG_TCP_MD5SIG const __u8 *md5_hash_location = NULL; - unsigned char newhash[16]; - int genhash; - struct sock *sk1 = NULL; +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) + bool allocated_traffic_key = false; #endif + const struct tcp_ao_hdr *aoh; + struct tcp_key key = {}; + u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; +#ifdef CONFIG_TCP_MD5SIG + unsigned char newhash[16]; + int genhash; + struct sock *sk1 = NULL; +#endif if (th->rst) return; @@ -1010,12 +1028,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); -#ifdef CONFIG_TCP_MD5SIG /* Invalid TCP option size or twice included auth */ - if (tcp_parse_auth_options(th, &md5_hash_location, NULL)) + if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; - rcu_read_lock(); +#ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; @@ -1023,7 +1040,9 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; - key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); + if (key.md5_key) + key.type = TCP_KEY_MD5; } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); @@ -1048,11 +1067,12 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) */ l3index = tcp_v6_sdif(skb) ? dif : 0; - key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); - if (!key) + key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); + if (!key.md5_key) goto out; + key.type = TCP_KEY_MD5; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } @@ -1064,6 +1084,20 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); +#ifdef CONFIG_TCP_AO + if (aoh) { + int l3index; + + l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; + if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, + &key.ao_key, &key.traffic_key, + &allocated_traffic_key, + &key.rcv_next, &key.sne)) + goto out; + key.type = TCP_KEY_AO; + } +#endif + if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { @@ -1083,22 +1117,30 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) label = ip6_flowlabel(ipv6h); } - tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, txhash); + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, + ipv6_get_dsfield(ipv6h), label, priority, txhash, + &key); -#ifdef CONFIG_TCP_MD5SIG +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: + if (allocated_traffic_key) + kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, - struct tcp_md5sig_key *key, u8 tclass, + struct tcp_md5sig_key *md5_key, u8 tclass, __be32 label, u32 priority, u32 txhash) { - tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, - tclass, label, priority, txhash); + struct tcp_key key = { + .md5_key = md5_key, + .type = md5_key ? TCP_KEY_MD5 : TCP_KEY_NONE, + }; + + tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, + tclass, label, priority, txhash, &key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)