Message ID | 20240215-upstream-net-20240215-misc-fixes-v1-3-8c01a55d8f6a@kernel.org (mailing list archive) |
---|---|
State | Accepted |
Commit | b8adb69a7d29c2d33eb327bca66476fb6066516b |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | mptcp: misc. fixes for v6.8 | expand |
On Thu, Feb 15, 2024 at 7:25 PM Matthieu Baerts (NGI0) <matttbe@kernel.org> wrote: > > From: Paolo Abeni <pabeni@redhat.com> > > Since the introduction of the subflow ULP diag interface, the > dump callback accessed all the subflow data with lockless. > > We need either to annotate all the read and write operation accordingly, > or acquire the subflow socket lock. Let's do latter, even if slower, to > avoid a diffstat havoc. > > Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") > Cc: stable@vger.kernel.org > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > Reviewed-by: Mat Martineau <martineau@kernel.org> > Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> > --- > Notes: > - This patch modifies the existing ULP API. No better solutions have > been found for -net, and there is some similar prior art, see > commit 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info"). > > Please also note that TLS ULP Diag has likely the same issue. > To: Boris Pismenny <borisp@nvidia.com> > To: John Fastabend <john.fastabend@gmail.com> > --- > include/net/tcp.h | 2 +- > net/mptcp/diag.c | 6 +++++- > net/tls/tls_main.c | 2 +- > 3 files changed, 7 insertions(+), 3 deletions(-) > > diff --git a/include/net/tcp.h b/include/net/tcp.h > index dd78a1181031..f6eba9652d01 100644 > --- a/include/net/tcp.h > +++ b/include/net/tcp.h > @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { > /* cleanup ulp */ > void (*release)(struct sock *sk); > /* diagnostic */ > - int (*get_info)(const struct sock *sk, struct sk_buff *skb); > + int (*get_info)(struct sock *sk, struct sk_buff *skb); > size_t (*get_info_size)(const struct sock *sk); > /* clone ulp */ > void (*clone)(const struct request_sock *req, struct sock *newsk, > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > index a536586742f2..e57c5f47f035 100644 > --- a/net/mptcp/diag.c > +++ b/net/mptcp/diag.c > @@ -13,17 +13,19 @@ > #include <uapi/linux/mptcp.h> > #include "protocol.h" > > -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) > +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) > { > struct mptcp_subflow_context *sf; > struct nlattr *start; > u32 flags = 0; > + bool slow; > int err; > > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > if (!start) > return -EMSGSIZE; > > + slow = lock_sock_fast(sk); > rcu_read_lock(); I am afraid lockdep is not happy with this change. Paolo, we probably need the READ_ONCE() annotations after all.
On Mon, Feb 19, 2024 at 6:21 PM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Feb 15, 2024 at 7:25 PM Matthieu Baerts (NGI0) > <matttbe@kernel.org> wrote: > > > > From: Paolo Abeni <pabeni@redhat.com> > > > > Since the introduction of the subflow ULP diag interface, the > > dump callback accessed all the subflow data with lockless. > > > > We need either to annotate all the read and write operation accordingly, > > or acquire the subflow socket lock. Let's do latter, even if slower, to > > avoid a diffstat havoc. > > > > Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") > > Cc: stable@vger.kernel.org > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > Reviewed-by: Mat Martineau <martineau@kernel.org> > > Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> > > --- > > Notes: > > - This patch modifies the existing ULP API. No better solutions have > > been found for -net, and there is some similar prior art, see > > commit 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info"). > > > > Please also note that TLS ULP Diag has likely the same issue. > > To: Boris Pismenny <borisp@nvidia.com> > > To: John Fastabend <john.fastabend@gmail.com> > > --- > > include/net/tcp.h | 2 +- > > net/mptcp/diag.c | 6 +++++- > > net/tls/tls_main.c | 2 +- > > 3 files changed, 7 insertions(+), 3 deletions(-) > > > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > index dd78a1181031..f6eba9652d01 100644 > > --- a/include/net/tcp.h > > +++ b/include/net/tcp.h > > @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { > > /* cleanup ulp */ > > void (*release)(struct sock *sk); > > /* diagnostic */ > > - int (*get_info)(const struct sock *sk, struct sk_buff *skb); > > + int (*get_info)(struct sock *sk, struct sk_buff *skb); > > size_t (*get_info_size)(const struct sock *sk); > > /* clone ulp */ > > void (*clone)(const struct request_sock *req, struct sock *newsk, > > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > > index a536586742f2..e57c5f47f035 100644 > > --- a/net/mptcp/diag.c > > +++ b/net/mptcp/diag.c > > @@ -13,17 +13,19 @@ > > #include <uapi/linux/mptcp.h> > > #include "protocol.h" > > > > -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) > > +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) > > { > > struct mptcp_subflow_context *sf; > > struct nlattr *start; > > u32 flags = 0; > > + bool slow; > > int err; > > > > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > > if (!start) > > return -EMSGSIZE; > > > > + slow = lock_sock_fast(sk); > > rcu_read_lock(); > > I am afraid lockdep is not happy with this change. > > Paolo, we probably need the READ_ONCE() annotations after all. Or perhaps something like the following would be enough. diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 6ff6f14674aa2941bc04c680bacd9f79fc65060d..7017dd60659dc7133318c1c82e3f429bea3a5d57 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct sk_buff *skb) bool slow; int err; + if (inet_sk_state_load(sk) == TCP_LISTEN) + return 0; + start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE;
On Mon, 2024-02-19 at 18:35 +0100, Eric Dumazet wrote: > On Mon, Feb 19, 2024 at 6:21 PM Eric Dumazet <edumazet@google.com> wrote: > > > > On Thu, Feb 15, 2024 at 7:25 PM Matthieu Baerts (NGI0) > > <matttbe@kernel.org> wrote: > > > > > > From: Paolo Abeni <pabeni@redhat.com> > > > > > > Since the introduction of the subflow ULP diag interface, the > > > dump callback accessed all the subflow data with lockless. > > > > > > We need either to annotate all the read and write operation accordingly, > > > or acquire the subflow socket lock. Let's do latter, even if slower, to > > > avoid a diffstat havoc. > > > > > > Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") > > > Cc: stable@vger.kernel.org > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > > Reviewed-by: Mat Martineau <martineau@kernel.org> > > > Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> > > > --- > > > Notes: > > > - This patch modifies the existing ULP API. No better solutions have > > > been found for -net, and there is some similar prior art, see > > > commit 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info"). > > > > > > Please also note that TLS ULP Diag has likely the same issue. > > > To: Boris Pismenny <borisp@nvidia.com> > > > To: John Fastabend <john.fastabend@gmail.com> > > > --- > > > include/net/tcp.h | 2 +- > > > net/mptcp/diag.c | 6 +++++- > > > net/tls/tls_main.c | 2 +- > > > 3 files changed, 7 insertions(+), 3 deletions(-) > > > > > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > > index dd78a1181031..f6eba9652d01 100644 > > > --- a/include/net/tcp.h > > > +++ b/include/net/tcp.h > > > @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { > > > /* cleanup ulp */ > > > void (*release)(struct sock *sk); > > > /* diagnostic */ > > > - int (*get_info)(const struct sock *sk, struct sk_buff *skb); > > > + int (*get_info)(struct sock *sk, struct sk_buff *skb); > > > size_t (*get_info_size)(const struct sock *sk); > > > /* clone ulp */ > > > void (*clone)(const struct request_sock *req, struct sock *newsk, > > > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > > > index a536586742f2..e57c5f47f035 100644 > > > --- a/net/mptcp/diag.c > > > +++ b/net/mptcp/diag.c > > > @@ -13,17 +13,19 @@ > > > #include <uapi/linux/mptcp.h> > > > #include "protocol.h" > > > > > > -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) > > > +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) > > > { > > > struct mptcp_subflow_context *sf; > > > struct nlattr *start; > > > u32 flags = 0; > > > + bool slow; > > > int err; > > > > > > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > > > if (!start) > > > return -EMSGSIZE; > > > > > > + slow = lock_sock_fast(sk); > > > rcu_read_lock(); > > > > I am afraid lockdep is not happy with this change. > > > > Paolo, we probably need the READ_ONCE() annotations after all. > > Or perhaps something like the following would be enough. > > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > index 6ff6f14674aa2941bc04c680bacd9f79fc65060d..7017dd60659dc7133318c1c82e3f429bea3a5d57 > 100644 > --- a/net/mptcp/diag.c > +++ b/net/mptcp/diag.c > @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct > sk_buff *skb) > bool slow; > int err; > > + if (inet_sk_state_load(sk) == TCP_LISTEN) > + return 0; > + > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > if (!start) > return -EMSGSIZE; Thanks for the head-up. This later option looks preferable, to avoid quit a bit of noise with _ONCE annotation. Is there a syzkaller splat I could look at? if it landed on the ML, I missed it. Thanks! Paolo
On Mon, Feb 19, 2024 at 7:04 PM Paolo Abeni <pabeni@redhat.com> wrote: > > On Mon, 2024-02-19 at 18:35 +0100, Eric Dumazet wrote: > > On Mon, Feb 19, 2024 at 6:21 PM Eric Dumazet <edumazet@google.com> wrote: > > > > > > On Thu, Feb 15, 2024 at 7:25 PM Matthieu Baerts (NGI0) > > > <matttbe@kernel.org> wrote: > > > > > > > > From: Paolo Abeni <pabeni@redhat.com> > > > > > > > > Since the introduction of the subflow ULP diag interface, the > > > > dump callback accessed all the subflow data with lockless. > > > > > > > > We need either to annotate all the read and write operation accordingly, > > > > or acquire the subflow socket lock. Let's do latter, even if slower, to > > > > avoid a diffstat havoc. > > > > > > > > Fixes: 5147dfb50832 ("mptcp: allow dumping subflow context to userspace") > > > > Cc: stable@vger.kernel.org > > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com> > > > > Reviewed-by: Mat Martineau <martineau@kernel.org> > > > > Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org> > > > > --- > > > > Notes: > > > > - This patch modifies the existing ULP API. No better solutions have > > > > been found for -net, and there is some similar prior art, see > > > > commit 0df48c26d841 ("tcp: add tcpi_bytes_acked to tcp_info"). > > > > > > > > Please also note that TLS ULP Diag has likely the same issue. > > > > To: Boris Pismenny <borisp@nvidia.com> > > > > To: John Fastabend <john.fastabend@gmail.com> > > > > --- > > > > include/net/tcp.h | 2 +- > > > > net/mptcp/diag.c | 6 +++++- > > > > net/tls/tls_main.c | 2 +- > > > > 3 files changed, 7 insertions(+), 3 deletions(-) > > > > > > > > diff --git a/include/net/tcp.h b/include/net/tcp.h > > > > index dd78a1181031..f6eba9652d01 100644 > > > > --- a/include/net/tcp.h > > > > +++ b/include/net/tcp.h > > > > @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { > > > > /* cleanup ulp */ > > > > void (*release)(struct sock *sk); > > > > /* diagnostic */ > > > > - int (*get_info)(const struct sock *sk, struct sk_buff *skb); > > > > + int (*get_info)(struct sock *sk, struct sk_buff *skb); > > > > size_t (*get_info_size)(const struct sock *sk); > > > > /* clone ulp */ > > > > void (*clone)(const struct request_sock *req, struct sock *newsk, > > > > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > > > > index a536586742f2..e57c5f47f035 100644 > > > > --- a/net/mptcp/diag.c > > > > +++ b/net/mptcp/diag.c > > > > @@ -13,17 +13,19 @@ > > > > #include <uapi/linux/mptcp.h> > > > > #include "protocol.h" > > > > > > > > -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) > > > > +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) > > > > { > > > > struct mptcp_subflow_context *sf; > > > > struct nlattr *start; > > > > u32 flags = 0; > > > > + bool slow; > > > > int err; > > > > > > > > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > > > > if (!start) > > > > return -EMSGSIZE; > > > > > > > > + slow = lock_sock_fast(sk); > > > > rcu_read_lock(); > > > > > > I am afraid lockdep is not happy with this change. > > > > > > Paolo, we probably need the READ_ONCE() annotations after all. > > > > Or perhaps something like the following would be enough. > > > > diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c > > index 6ff6f14674aa2941bc04c680bacd9f79fc65060d..7017dd60659dc7133318c1c82e3f429bea3a5d57 > > 100644 > > --- a/net/mptcp/diag.c > > +++ b/net/mptcp/diag.c > > @@ -21,6 +21,9 @@ static int subflow_get_info(struct sock *sk, struct > > sk_buff *skb) > > bool slow; > > int err; > > > > + if (inet_sk_state_load(sk) == TCP_LISTEN) > > + return 0; > > + > > start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); > > if (!start) > > return -EMSGSIZE; > > Thanks for the head-up. This later option looks preferable, to avoid > quit a bit of noise with _ONCE annotation. Is there a syzkaller splat I > could look at? if it landed on the ML, I missed it. > Not landed yet, here is the splat : ====================================================== WARNING: possible circular locking dependency detected 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Not tainted ------------------------------------------------------ syz-executor.2/24141 is trying to acquire lock: ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 but task is already holding lock: ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&h->lhash2[i].lock){+.+.}-{2:2}: lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline] _raw_spin_lock+0x2e/0x40 kernel/locking/spinlock.c:154 spin_lock include/linux/spinlock.h:351 [inline] __inet_hash+0x335/0xbe0 net/ipv4/inet_hashtables.c:743 inet_csk_listen_start+0x23a/0x320 net/ipv4/inet_connection_sock.c:1261 __inet_listen_sk+0x2a2/0x770 net/ipv4/af_inet.c:217 inet_listen+0xa3/0x110 net/ipv4/af_inet.c:239 rds_tcp_listen_init+0x3fd/0x5a0 net/rds/tcp_listen.c:316 rds_tcp_init_net+0x141/0x320 net/rds/tcp.c:577 ops_init+0x352/0x610 net/core/net_namespace.c:136 __register_pernet_operations net/core/net_namespace.c:1214 [inline] register_pernet_operations+0x2cb/0x660 net/core/net_namespace.c:1283 register_pernet_device+0x33/0x80 net/core/net_namespace.c:1370 rds_tcp_init+0x62/0xd0 net/rds/tcp.c:735 do_one_initcall+0x238/0x830 init/main.c:1236 do_initcall_level+0x157/0x210 init/main.c:1298 do_initcalls+0x3f/0x80 init/main.c:1314 kernel_init_freeable+0x42f/0x5d0 init/main.c:1551 kernel_init+0x1d/0x2a0 init/main.c:1441 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 -> #0 (k-sk_lock-AF_INET6){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_fast include/net/sock.h:1723 [inline] subflow_get_info+0x166/0xd20 net/mptcp/diag.c:28 tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 inet_sk_diag_fill+0x10ed/0x1e00 net/ipv4/inet_diag.c:345 inet_diag_dump_icsk+0x55b/0x1f80 net/ipv4/inet_diag.c:1061 __inet_diag_dump+0x211/0x3a0 net/ipv4/inet_diag.c:1263 inet_diag_dump_compat+0x1c1/0x2d0 net/ipv4/inet_diag.c:1371 netlink_dump+0x59b/0xc80 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5df/0x790 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] inet_diag_rcv_msg_compat+0x209/0x4c0 net/ipv4/inet_diag.c:1405 sock_diag_rcv_msg+0xe7/0x410 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&h->lhash2[i].lock); lock(k-sk_lock-AF_INET6); lock(&h->lhash2[i].lock); lock(k-sk_lock-AF_INET6); *** DEADLOCK *** 5 locks held by syz-executor.2/24141: #0: ffffffff8f380bc8 (sock_diag_mutex){+.+.}-{3:3}, at: sock_diag_rcv+0x1b/0x40 net/core/sock_diag.c:279 #1: ffffffff8f380a28 (sock_diag_table_mutex){+.+.}-{3:3}, at: sock_diag_rcv_msg+0xc6/0x410 net/core/sock_diag.c:259 #2: ffff8880586f5680 (nlk_cb_mutex-SOCK_DIAG){+.+.}-{3:3}, at: netlink_dump+0xde/0xc80 net/netlink/af_netlink.c:2211 #3: ffffffff8f464568 (inet_diag_table_mutex){+.+.}-{3:3}, at: inet_diag_lock_handler net/ipv4/inet_diag.c:63 [inline] #3: ffffffff8f464568 (inet_diag_table_mutex){+.+.}-{3:3}, at: __inet_diag_dump+0x191/0x3a0 net/ipv4/inet_diag.c:1261 #4: ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock include/linux/spinlock.h:351 [inline] #4: ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 stack backtrace: CPU: 0 PID: 24141 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2187 check_prev_add kernel/locking/lockdep.c:3134 [inline] check_prevs_add kernel/locking/lockdep.c:3253 [inline] validate_chain+0x18ca/0x58e0 kernel/locking/lockdep.c:3869 __lock_acquire+0x1345/0x1fd0 kernel/locking/lockdep.c:5137 lock_acquire+0x1e3/0x530 kernel/locking/lockdep.c:5754 lock_sock_fast include/net/sock.h:1723 [inline] subflow_get_info+0x166/0xd20 net/mptcp/diag.c:28 tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 inet_sk_diag_fill+0x10ed/0x1e00 net/ipv4/inet_diag.c:345 inet_diag_dump_icsk+0x55b/0x1f80 net/ipv4/inet_diag.c:1061 __inet_diag_dump+0x211/0x3a0 net/ipv4/inet_diag.c:1263 inet_diag_dump_compat+0x1c1/0x2d0 net/ipv4/inet_diag.c:1371 netlink_dump+0x59b/0xc80 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5df/0x790 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] inet_diag_rcv_msg_compat+0x209/0x4c0 net/ipv4/inet_diag.c:1405 sock_diag_rcv_msg+0xe7/0x410 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fbc4c07dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fbc4ce750c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fbc4c1abf80 RCX: 00007fbc4c07dda9 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000004 RBP: 00007fbc4c0ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fbc4c1abf80 R15: 00007ffcc3d92258 </TASK> BUG: sleeping function called from invalid context at net/core/sock.c:3554 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 24141, name: syz-executor.2 preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 INFO: lockdep is turned off. Preemption disabled at: [<0000000000000000>] 0x0 CPU: 0 PID: 24141 Comm: syz-executor.2 Not tainted 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1e7/0x2e0 lib/dump_stack.c:106 __might_resched+0x5d3/0x780 kernel/sched/core.c:10176 __lock_sock_fast+0x31/0xe0 net/core/sock.c:3554 lock_sock_fast include/net/sock.h:1725 [inline] subflow_get_info+0x172/0xd20 net/mptcp/diag.c:28 tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 inet_sk_diag_fill+0x10ed/0x1e00 net/ipv4/inet_diag.c:345 inet_diag_dump_icsk+0x55b/0x1f80 net/ipv4/inet_diag.c:1061 __inet_diag_dump+0x211/0x3a0 net/ipv4/inet_diag.c:1263 inet_diag_dump_compat+0x1c1/0x2d0 net/ipv4/inet_diag.c:1371 netlink_dump+0x59b/0xc80 net/netlink/af_netlink.c:2264 __netlink_dump_start+0x5df/0x790 net/netlink/af_netlink.c:2370 netlink_dump_start include/linux/netlink.h:338 [inline] inet_diag_rcv_msg_compat+0x209/0x4c0 net/ipv4/inet_diag.c:1405 sock_diag_rcv_msg+0xe7/0x410 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2543 sock_diag_rcv+0x2a/0x40 net/core/sock_diag.c:280 netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] netlink_unicast+0x7ea/0x980 net/netlink/af_netlink.c:1367 netlink_sendmsg+0xa3b/0xd70 net/netlink/af_netlink.c:1908 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:745 ____sys_sendmsg+0x525/0x7d0 net/socket.c:2584 ___sys_sendmsg net/socket.c:2638 [inline] __sys_sendmsg+0x2b0/0x3a0 net/socket.c:2667 do_syscall_64+0xf9/0x240 entry_SYSCALL_64_after_hwframe+0x6f/0x77 RIP: 0033:0x7fbc4c07dda9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 20 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fbc4ce750c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fbc4c1abf80 RCX: 00007fbc4c07dda9 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000004 RBP: 00007fbc4c0ca47a R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007fbc4c1abf80 R15: 00007ffcc3d92258
On Mon, 2024-02-19 at 19:33 +0100, Eric Dumazet wrote: > On Mon, Feb 19, 2024 at 7:04 PM Paolo Abeni <pabeni@redhat.com> wrote: > > Thanks for the head-up. This later option looks preferable, to avoid > > quit a bit of noise with _ONCE annotation. Is there a syzkaller splat I > > could look at? if it landed on the ML, I missed it. > > > > Not landed yet, here is the splat : > > ====================================================== > WARNING: possible circular locking dependency detected > 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Not tainted > ------------------------------------------------------ > syz-executor.2/24141 is trying to acquire lock: > ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: > tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] > ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: > tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 > > but task is already holding lock: > ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock > include/linux/spinlock.h:351 [inline] > ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: > inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 [Sorry for the latency]. Yes it looks like that checking the listener status will work. I can test and send the formal patch - with the due credits! - or do you prefer otherwise? Thanks! Paolo
On Tue, Feb 20, 2024 at 6:33 PM Paolo Abeni <pabeni@redhat.com> wrote: > > On Mon, 2024-02-19 at 19:33 +0100, Eric Dumazet wrote: > > On Mon, Feb 19, 2024 at 7:04 PM Paolo Abeni <pabeni@redhat.com> wrote: > > > Thanks for the head-up. This later option looks preferable, to avoid > > > quit a bit of noise with _ONCE annotation. Is there a syzkaller splat I > > > could look at? if it landed on the ML, I missed it. > > > > > > > Not landed yet, here is the splat : > > > > ====================================================== > > WARNING: possible circular locking dependency detected > > 6.8.0-rc4-syzkaller-00212-g40b9385dd8e6 #0 Not tainted > > ------------------------------------------------------ > > syz-executor.2/24141 is trying to acquire lock: > > ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: > > tcp_diag_put_ulp net/ipv4/tcp_diag.c:100 [inline] > > ffff888045870130 (k-sk_lock-AF_INET6){+.+.}-{0:0}, at: > > tcp_diag_get_aux+0x738/0x830 net/ipv4/tcp_diag.c:137 > > > > but task is already holding lock: > > ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: spin_lock > > include/linux/spinlock.h:351 [inline] > > ffffc9000135e488 (&h->lhash2[i].lock){+.+.}-{2:2}, at: > > inet_diag_dump_icsk+0x39f/0x1f80 net/ipv4/inet_diag.c:1038 > > [Sorry for the latency]. Yes it looks like that checking the listener > status will work. I can test and send the formal patch - with the due > credits! - or do you prefer otherwise? Sure, please send the formal patch, thank you.
diff --git a/include/net/tcp.h b/include/net/tcp.h index dd78a1181031..f6eba9652d01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2506,7 +2506,7 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); /* diagnostic */ - int (*get_info)(const struct sock *sk, struct sk_buff *skb); + int (*get_info)(struct sock *sk, struct sk_buff *skb); size_t (*get_info_size)(const struct sock *sk); /* clone ulp */ void (*clone)(const struct request_sock *req, struct sock *newsk, diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index a536586742f2..e57c5f47f035 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -13,17 +13,19 @@ #include <uapi/linux/mptcp.h> #include "protocol.h" -static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) +static int subflow_get_info(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *sf; struct nlattr *start; u32 flags = 0; + bool slow; int err; start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP); if (!start) return -EMSGSIZE; + slow = lock_sock_fast(sk); rcu_read_lock(); sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data); if (!sf) { @@ -69,11 +71,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb) } rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_end(skb, start); return 0; nla_failure: rcu_read_unlock(); + unlock_sock_fast(sk, slow); nla_nest_cancel(skb, start); return err; } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 1c2c6800949d..b4674f03d71a 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx) return 0; } -static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +static int tls_get_info(struct sock *sk, struct sk_buff *skb) { u16 version, cipher_type; struct tls_context *ctx;