diff mbox series

[net,v4,3/3] vsock/bpf: Fix bpf recvmsg() racing transport reassignment

Message ID 20250317-vsock-trans-signal-race-v4-3-fc8837f3f1d4@rbox.co (mailing list archive)
State New
Headers show
Series vsock/bpf: Handle races between sockmap update and connect() disconnecting | expand

Commit Message

Michal Luczaj March 17, 2025, 9:52 a.m. UTC
Signal delivery during connect() may lead to a disconnect of an already
established socket. That involves removing socket from any sockmap and
resetting state to SS_UNCONNECTED. While it correctly restores socket's
proto, a call to vsock_bpf_recvmsg() might have been already under way in
another thread. If the connect()ing thread reassigns the vsock transport to
NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.

connect
  / state = SS_CONNECTED /
                                sock_map_update_elem
                                vsock_bpf_recvmsg
                                  psock = sk_psock_get()
  lock sk
  if signal_pending
    unhash
      sock_map_remove_links
    state = SS_UNCONNECTED
  release sk

connect
  transport = NULL
                                  lock sk
                                  WARN_ON_ONCE(!vsk->transport)

Protect recvmsg() from racing against transport reassignment. Enforce the
sockmap invariant that psock implies transport: lock socket before getting
psock.

WARNING: CPU: 9 PID: 1222 at net/vmw_vsock/vsock_bpf.c:92 vsock_bpf_recvmsg+0xb55/0xe00
CPU: 9 UID: 0 PID: 1222 Comm: a.out Not tainted 6.14.0-rc5+
RIP: 0010:vsock_bpf_recvmsg+0xb55/0xe00
 sock_recvmsg+0x1b2/0x220
 __sys_recvfrom+0x190/0x270
 __x64_sys_recvfrom+0xdc/0x1b0
 do_syscall_64+0x93/0x1b0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
 net/vmw_vsock/vsock_bpf.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Stefano Garzarella March 19, 2025, 9:34 a.m. UTC | #1
On Mon, Mar 17, 2025 at 10:52:25AM +0100, Michal Luczaj wrote:
>Signal delivery during connect() may lead to a disconnect of an already
>established socket. That involves removing socket from any sockmap and
>resetting state to SS_UNCONNECTED. While it correctly restores socket's
>proto, a call to vsock_bpf_recvmsg() might have been already under way in
>another thread. If the connect()ing thread reassigns the vsock transport to
>NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.
>
>connect
>  / state = SS_CONNECTED /
>                                sock_map_update_elem
>                                vsock_bpf_recvmsg
>                                  psock = sk_psock_get()
>  lock sk
>  if signal_pending
>    unhash
>      sock_map_remove_links
>    state = SS_UNCONNECTED
>  release sk
>
>connect
>  transport = NULL
>                                  lock sk
>                                  WARN_ON_ONCE(!vsk->transport)
>
>Protect recvmsg() from racing against transport reassignment. Enforce the
>sockmap invariant that psock implies transport: lock socket before getting
>psock.
>
>WARNING: CPU: 9 PID: 1222 at net/vmw_vsock/vsock_bpf.c:92 vsock_bpf_recvmsg+0xb55/0xe00
>CPU: 9 UID: 0 PID: 1222 Comm: a.out Not tainted 6.14.0-rc5+
>RIP: 0010:vsock_bpf_recvmsg+0xb55/0xe00
> sock_recvmsg+0x1b2/0x220
> __sys_recvfrom+0x190/0x270
> __x64_sys_recvfrom+0xdc/0x1b0
> do_syscall_64+0x93/0x1b0
> entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
>Fixes: 634f1a7110b4 ("vsock: support sockmap")
>Signed-off-by: Michal Luczaj <mhal@rbox.co>
>---
> net/vmw_vsock/vsock_bpf.c | 23 +++++++++++++++--------
> 1 file changed, 15 insertions(+), 8 deletions(-)
>
>diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
>index c68fdaf09046b68254dac3ea70ffbe73dfa45cef..5138195d91fb258d4bc09b48e80e13651d62863a 100644
>--- a/net/vmw_vsock/vsock_bpf.c
>+++ b/net/vmw_vsock/vsock_bpf.c
>@@ -73,28 +73,35 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int
> 	return err;
> }
>
>-static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
>-			     size_t len, int flags, int *addr_len)
>+static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>+			     int flags, int *addr_len)

I would avoid this change, especially in a patch with the Fixes tag then 
to be backported.

> {
> 	struct sk_psock *psock;
> 	struct vsock_sock *vsk;
> 	int copied;
>
>+	/* Since signal delivery during connect() may reset the state of socket
>+	 * that's already in a sockmap, take the lock before checking on psock.
>+	 * This serializes a possible transport reassignment, protecting this
>+	 * function from running with NULL transport.
>+	 */
>+	lock_sock(sk);
>+
> 	psock = sk_psock_get(sk);
>-	if (unlikely(!psock))
>+	if (unlikely(!psock)) {
>+		release_sock(sk);
> 		return __vsock_recvmsg(sk, msg, len, flags);
>+	}
>
>-	lock_sock(sk);
> 	vsk = vsock_sk(sk);
>-
> 	if (WARN_ON_ONCE(!vsk->transport)) {
> 		copied = -ENODEV;
> 		goto out;
> 	}
>
> 	if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
>-		release_sock(sk);
> 		sk_psock_put(sk, psock);
>+		release_sock(sk);

But here we release it, so can still a reset happen at this point, 
before calling __vsock_connectible_recvmsg().
In there anyway we handle the case where transport is null, so there's 
no problem, right?

The rest LTGM.

Thanks,
Stefano

> 		return __vsock_recvmsg(sk, msg, len, flags);
> 	}
>
>@@ -108,8 +115,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
> 		}
>
> 		if (sk_psock_queue_empty(psock)) {
>-			release_sock(sk);
> 			sk_psock_put(sk, psock);
>+			release_sock(sk);
> 			return __vsock_recvmsg(sk, msg, len, flags);
> 		}
>
>@@ -117,8 +124,8 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
> 	}
>
> out:
>-	release_sock(sk);
> 	sk_psock_put(sk, psock);
>+	release_sock(sk);
>
> 	return copied;
> }
>
>-- 
>2.48.1
>
Michal Luczaj March 19, 2025, 7:05 p.m. UTC | #2
On 3/19/25 10:34, Stefano Garzarella wrote:
> On Mon, Mar 17, 2025 at 10:52:25AM +0100, Michal Luczaj wrote:
>> ...
>> -static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
>> -			     size_t len, int flags, int *addr_len)
>> +static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
>> +			     int flags, int *addr_len)
> 
> I would avoid this change, especially in a patch with the Fixes tag then 
> to be backported.

I thought that since I've modified this function in so many places, doing
this wouldn't hurt. But ok, I'll drop this change.

>> {
>> 	struct sk_psock *psock;
>> 	struct vsock_sock *vsk;
>> 	int copied;
>>
>> +	/* Since signal delivery during connect() may reset the state of socket
>> +	 * that's already in a sockmap, take the lock before checking on psock.
>> +	 * This serializes a possible transport reassignment, protecting this
>> +	 * function from running with NULL transport.
>> +	 */
>> +	lock_sock(sk);
>> +
>> 	psock = sk_psock_get(sk);
>> -	if (unlikely(!psock))
>> +	if (unlikely(!psock)) {
>> +		release_sock(sk);
>> 		return __vsock_recvmsg(sk, msg, len, flags);
>> +	}
>>
>> -	lock_sock(sk);
>> 	vsk = vsock_sk(sk);
>> -
>> 	if (WARN_ON_ONCE(!vsk->transport)) {
>> 		copied = -ENODEV;
>> 		goto out;
>> 	}
>>
>> 	if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
>> -		release_sock(sk);
>> 		sk_psock_put(sk, psock);
>> +		release_sock(sk);
> 
> But here we release it, so can still a reset happen at this point, 
> before calling __vsock_connectible_recvmsg().
> In there anyway we handle the case where transport is null, so there's 
> no problem, right?

Yes, I think we're good. That function needs to gracefully handle being
called without a transport, and it does.

Thanks,
Michal
Cong Wang March 19, 2025, 10:18 p.m. UTC | #3
On Mon, Mar 17, 2025 at 10:52:25AM +0100, Michal Luczaj wrote:
> Signal delivery during connect() may lead to a disconnect of an already
> established socket. That involves removing socket from any sockmap and
> resetting state to SS_UNCONNECTED. While it correctly restores socket's
> proto, a call to vsock_bpf_recvmsg() might have been already under way in
> another thread. If the connect()ing thread reassigns the vsock transport to
> NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.
> 
> connect
>   / state = SS_CONNECTED /
>                                 sock_map_update_elem
>                                 vsock_bpf_recvmsg
>                                   psock = sk_psock_get()
>   lock sk
>   if signal_pending
>     unhash
>       sock_map_remove_links

So vsock's ->recvmsg() should be restored after this, right? Then how is
vsock_bpf_recvmsg() called afterward?

>     state = SS_UNCONNECTED
>   release sk
> 
> connect
>   transport = NULL
>                                   lock sk
>                                   WARN_ON_ONCE(!vsk->transport)
> 

And I am wondering why we need to WARN here since we can handle this error
case correctly?

Thanks.
Michal Luczaj March 20, 2025, 12:05 p.m. UTC | #4
On 3/19/25 23:18, Cong Wang wrote:
> On Mon, Mar 17, 2025 at 10:52:25AM +0100, Michal Luczaj wrote:
>> Signal delivery during connect() may lead to a disconnect of an already
>> established socket. That involves removing socket from any sockmap and
>> resetting state to SS_UNCONNECTED. While it correctly restores socket's
>> proto, a call to vsock_bpf_recvmsg() might have been already under way in
>> another thread. If the connect()ing thread reassigns the vsock transport to
>> NULL, the recvmsg()ing thread may trigger a WARN_ON_ONCE.
>>

   *THREAD 1*                      *THREAD 2*

>> connect
>>   / state = SS_CONNECTED /
>>                                 sock_map_update_elem
>>                                 vsock_bpf_recvmsg
>>                                   psock = sk_psock_get()
>>   lock sk
>>   if signal_pending
>>     unhash
>>       sock_map_remove_links
> 
> So vsock's ->recvmsg() should be restored after this, right? Then how is
> vsock_bpf_recvmsg() called afterward?

I'm not sure I understand the question, so I've added a header above: those
are 2 parallel flows of execution. vsock_bpf_recvmsg() wasn't called
afterwards. It was called before sock_map_remove_links(). Note that at the
time of sock_map_remove_links() (in T1), vsock_bpf_recvmsg() is still
executing (in T2).

>>     state = SS_UNCONNECTED
>>   release sk
>>
>> connect
>>   transport = NULL
>>                                   lock sk
>>                                   WARN_ON_ONCE(!vsk->transport)
>>
> 
> And I am wondering why we need to WARN here since we can handle this error
> case correctly?

The WARN and transport check are here for defensive measures, and to state
a contract.

But I think I get your point. If we accept for a fact of life that BPF code
should be able to handle transport disappearing - then WARN can be removed
(while keeping the check) and this patch can be dropped.

My aim, instead, was to keep things consistent. By which I mean sticking to
the conditions expressed in vsock_bpf_update_proto() as invariants; so that
vsock with a psock is guaranteed to have transport assigned.
diff mbox series

Patch

diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
index c68fdaf09046b68254dac3ea70ffbe73dfa45cef..5138195d91fb258d4bc09b48e80e13651d62863a 100644
--- a/net/vmw_vsock/vsock_bpf.c
+++ b/net/vmw_vsock/vsock_bpf.c
@@ -73,28 +73,35 @@  static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int
 	return err;
 }
 
-static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
-			     size_t len, int flags, int *addr_len)
+static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			     int flags, int *addr_len)
 {
 	struct sk_psock *psock;
 	struct vsock_sock *vsk;
 	int copied;
 
+	/* Since signal delivery during connect() may reset the state of socket
+	 * that's already in a sockmap, take the lock before checking on psock.
+	 * This serializes a possible transport reassignment, protecting this
+	 * function from running with NULL transport.
+	 */
+	lock_sock(sk);
+
 	psock = sk_psock_get(sk);
-	if (unlikely(!psock))
+	if (unlikely(!psock)) {
+		release_sock(sk);
 		return __vsock_recvmsg(sk, msg, len, flags);
+	}
 
-	lock_sock(sk);
 	vsk = vsock_sk(sk);
-
 	if (WARN_ON_ONCE(!vsk->transport)) {
 		copied = -ENODEV;
 		goto out;
 	}
 
 	if (vsock_has_data(sk, psock) && sk_psock_queue_empty(psock)) {
-		release_sock(sk);
 		sk_psock_put(sk, psock);
+		release_sock(sk);
 		return __vsock_recvmsg(sk, msg, len, flags);
 	}
 
@@ -108,8 +115,8 @@  static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
 		}
 
 		if (sk_psock_queue_empty(psock)) {
-			release_sock(sk);
 			sk_psock_put(sk, psock);
+			release_sock(sk);
 			return __vsock_recvmsg(sk, msg, len, flags);
 		}
 
@@ -117,8 +124,8 @@  static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
 	}
 
 out:
-	release_sock(sk);
 	sk_psock_put(sk, psock);
+	release_sock(sk);
 
 	return copied;
 }