diff mbox series

[net,2/2] vsock/test: Add test for SO_LINGER null ptr deref

Message ID 20250204-vsock-linger-nullderef-v1-2-6eb1760fa93e@rbox.co (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series vsock: null-ptr-deref when SO_LINGER enabled | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 1 maintainers not CCed: virtualization@lists.linux.dev
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 53 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-02-04--15-00 (tests: 886)

Commit Message

Michal Luczaj Feb. 4, 2025, 12:29 a.m. UTC
Explicitly close() a TCP_ESTABLISHED (connectible) socket with SO_LINGER
enabled. May trigger a null pointer dereference.

Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
 tools/testing/vsock/vsock_test.c | 41 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

Comments

Stefano Garzarella Feb. 4, 2025, 10:48 a.m. UTC | #1
On Tue, Feb 04, 2025 at 01:29:53AM +0100, Michal Luczaj wrote:
>Explicitly close() a TCP_ESTABLISHED (connectible) socket with SO_LINGER
>enabled. May trigger a null pointer dereference.
>
>Signed-off-by: Michal Luczaj <mhal@rbox.co>
>---
> tools/testing/vsock/vsock_test.c | 41 ++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 41 insertions(+)
>
>diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
>index dfff8b288265f96b602cb1bfa0e6dce02f114222..d0f6d253ac72d08a957cb81a3c38fcc72bec5a53 100644
>--- a/tools/testing/vsock/vsock_test.c
>+++ b/tools/testing/vsock/vsock_test.c
>@@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
> 	close(fd);
> }
>
>+static void test_stream_linger_client(const struct test_opts *opts)
>+{
>+	struct linger optval = {
>+		.l_onoff = 1,
>+		.l_linger = 1
>+	};
>+	int fd;
>+
>+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>+	if (fd < 0) {
>+		perror("connect");
>+		exit(EXIT_FAILURE);
>+	}
>+
>+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
>+		perror("setsockopt(SO_LINGER)");
>+		exit(EXIT_FAILURE);
>+	}

Since we are testing SO_LINGER, will also be nice to check if it's 
working properly, since one of the fixes proposed could break it.

To test, we may set a small SO_VM_SOCKETS_BUFFER_SIZE on the receive 
side and try to send more than that value, obviously without reading 
anything into the receiver, and check that close() here, returns after 
the timeout we set in .l_linger.

Of course, we could also add it later, but while we're at it, it crossed 
my mind.

WDYT?

Thanks,
Stefano

>+
>+	close(fd);
>+}
>+
>+static void test_stream_linger_server(const struct test_opts *opts)
>+{
>+	int fd;
>+
>+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
>+	if (fd < 0) {
>+		perror("accept");
>+		exit(EXIT_FAILURE);
>+	}
>+
>+	vsock_wait_remote_close(fd);
>+	close(fd);
>+}
>+
> static struct test_case test_cases[] = {
> 	{
> 		.name = "SOCK_STREAM connection reset",
>@@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = {
> 		.run_client = test_stream_connect_retry_client,
> 		.run_server = test_stream_connect_retry_server,
> 	},
>+	{
>+		.name = "SOCK_STREAM SO_LINGER null-ptr-deref",
>+		.run_client = test_stream_linger_client,
>+		.run_server = test_stream_linger_server,
>+	},
> 	{},
> };
>
>
>-- 
>2.48.1
>
Michal Luczaj Feb. 5, 2025, 11:20 a.m. UTC | #2
On 2/4/25 11:48, Stefano Garzarella wrote:
> On Tue, Feb 04, 2025 at 01:29:53AM +0100, Michal Luczaj wrote:
>> ...
>> +static void test_stream_linger_client(const struct test_opts *opts)
>> +{
>> +	struct linger optval = {
>> +		.l_onoff = 1,
>> +		.l_linger = 1
>> +	};
>> +	int fd;
>> +
>> +	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>> +	if (fd < 0) {
>> +		perror("connect");
>> +		exit(EXIT_FAILURE);
>> +	}
>> +
>> +	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
>> +		perror("setsockopt(SO_LINGER)");
>> +		exit(EXIT_FAILURE);
>> +	}
> 
> Since we are testing SO_LINGER, will also be nice to check if it's 
> working properly, since one of the fixes proposed could break it.
> 
> To test, we may set a small SO_VM_SOCKETS_BUFFER_SIZE on the receive 
> side and try to send more than that value, obviously without reading 
> anything into the receiver, and check that close() here, returns after 
> the timeout we set in .l_linger.

I may be doing something wrong, but (at least for loopback transport) it
seems that close() lingers until data is received, not sent (without even
touching SO_VM_SOCKETS_BUFFER_SIZE).

```
import struct, fcntl, termios, time
from socket import *

def linger(s, timeout):
	if s.family == AF_VSOCK:
		s.setsockopt(SOL_SOCKET, SO_LINGER, (timeout<<32) | 1)
	elif s.family == AF_INET:
		s.setsockopt(SOL_SOCKET, SO_LINGER, struct.pack('ii', 1, timeout))
	else:
		assert False

def unsent(s):
	SIOCOUTQ = termios.TIOCOUTQ
	return struct.unpack('I', fcntl.ioctl(s, SIOCOUTQ, bytes(4)))[0]

def check_lingering(family, addr):
	lis = socket(family, SOCK_STREAM)
	lis.bind(addr)
	lis.listen()

	s = socket(family, SOCK_STREAM)
	linger(s, 2)
	s.connect(lis.getsockname())

	for _ in range(1, 1<<8):
		s.send(b'x')

	while unsent(s) != 0:
		pass

	print("closing...")
	ts = time.time()
	s.close()
	print(f"done in %ds" % (time.time() - ts))

check_lingering(AF_INET, ('127.0.0.1', 1234))
check_lingering(AF_VSOCK, (1, 1234)) # VMADDR_CID_LOCAL
```

Gives me:
closing...
done in 0s
closing...
done in 2s
Stefano Garzarella Feb. 10, 2025, 10:18 a.m. UTC | #3
On Wed, Feb 05, 2025 at 12:20:56PM +0100, Michal Luczaj wrote:
>On 2/4/25 11:48, Stefano Garzarella wrote:
>> On Tue, Feb 04, 2025 at 01:29:53AM +0100, Michal Luczaj wrote:
>>> ...
>>> +static void test_stream_linger_client(const struct test_opts *opts)
>>> +{
>>> +	struct linger optval = {
>>> +		.l_onoff = 1,
>>> +		.l_linger = 1
>>> +	};
>>> +	int fd;
>>> +
>>> +	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>>> +	if (fd < 0) {
>>> +		perror("connect");
>>> +		exit(EXIT_FAILURE);
>>> +	}
>>> +
>>> +	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
>>> +		perror("setsockopt(SO_LINGER)");
>>> +		exit(EXIT_FAILURE);
>>> +	}
>>
>> Since we are testing SO_LINGER, will also be nice to check if it's
>> working properly, since one of the fixes proposed could break it.
>>
>> To test, we may set a small SO_VM_SOCKETS_BUFFER_SIZE on the receive
>> side and try to send more than that value, obviously without reading
>> anything into the receiver, and check that close() here, returns after
>> the timeout we set in .l_linger.
>
>I may be doing something wrong, but (at least for loopback transport) it

Also with VMs is the same, I think virtio_transport_wait_close() can be 
improved to check if everything is sent, avoiding to wait.

But this is material for another series, so this test should be fine for 
now!

Thanks,
Stefano

>seems that close() lingers until data is received, not sent (without even
>touching SO_VM_SOCKETS_BUFFER_SIZE).
>
>```
>import struct, fcntl, termios, time
>from socket import *
>
>def linger(s, timeout):
>	if s.family == AF_VSOCK:
>		s.setsockopt(SOL_SOCKET, SO_LINGER, (timeout<<32) | 1)
>	elif s.family == AF_INET:
>		s.setsockopt(SOL_SOCKET, SO_LINGER, struct.pack('ii', 1, timeout))
>	else:
>		assert False
>
>def unsent(s):
>	SIOCOUTQ = termios.TIOCOUTQ
>	return struct.unpack('I', fcntl.ioctl(s, SIOCOUTQ, bytes(4)))[0]
>
>def check_lingering(family, addr):
>	lis = socket(family, SOCK_STREAM)
>	lis.bind(addr)
>	lis.listen()
>
>	s = socket(family, SOCK_STREAM)
>	linger(s, 2)
>	s.connect(lis.getsockname())
>
>	for _ in range(1, 1<<8):
>		s.send(b'x')
>
>	while unsent(s) != 0:
>		pass
>
>	print("closing...")
>	ts = time.time()
>	s.close()
>	print(f"done in %ds" % (time.time() - ts))
>
>check_lingering(AF_INET, ('127.0.0.1', 1234))
>check_lingering(AF_VSOCK, (1, 1234)) # VMADDR_CID_LOCAL
>```
>
>Gives me:
>closing...
>done in 0s
>closing...
>done in 2s
>
Michal Luczaj March 7, 2025, 9:49 a.m. UTC | #4
On 2/10/25 11:18, Stefano Garzarella wrote:
> On Wed, Feb 05, 2025 at 12:20:56PM +0100, Michal Luczaj wrote:
>> On 2/4/25 11:48, Stefano Garzarella wrote:
>>> On Tue, Feb 04, 2025 at 01:29:53AM +0100, Michal Luczaj wrote:
>>>> ...
>>>> +static void test_stream_linger_client(const struct test_opts *opts)
>>>> +{
>>>> +	struct linger optval = {
>>>> +		.l_onoff = 1,
>>>> +		.l_linger = 1
>>>> +	};
>>>> +	int fd;
>>>> +
>>>> +	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>>>> +	if (fd < 0) {
>>>> +		perror("connect");
>>>> +		exit(EXIT_FAILURE);
>>>> +	}
>>>> +
>>>> +	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
>>>> +		perror("setsockopt(SO_LINGER)");
>>>> +		exit(EXIT_FAILURE);
>>>> +	}
>>>
>>> Since we are testing SO_LINGER, will also be nice to check if it's
>>> working properly, since one of the fixes proposed could break it.
>>>
>>> To test, we may set a small SO_VM_SOCKETS_BUFFER_SIZE on the receive
>>> side and try to send more than that value, obviously without reading
>>> anything into the receiver, and check that close() here, returns after
>>> the timeout we set in .l_linger.
>>
>> I may be doing something wrong, but (at least for loopback transport) ...
> 
> Also with VMs is the same, I think virtio_transport_wait_close() can be 
> improved to check if everything is sent, avoiding to wait.

What kind of improvement do you have in mind?

I've tried modifying the loop to make close()/shutdown() linger until
unsent_bytes() == 0. No idea if this is acceptable:

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 9e85424c8343..bd8b88d70423 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -221,6 +221,7 @@ void vsock_for_each_connected_socket(struct vsock_transport *transport,
 				     void (*fn)(struct sock *sk));
 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
 bool vsock_find_cid(unsigned int cid);
+void vsock_linger(struct sock *sk, long timeout);
 
 /**** TAP ****/
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 7e3db87ae433..2cf7571e94da 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1013,6 +1013,25 @@ static int vsock_getname(struct socket *sock,
 	return err;
 }
 
+void vsock_linger(struct sock *sk, long timeout)
+{
+	if (timeout) {
+		DEFINE_WAIT_FUNC(wait, woken_wake_function);
+		ssize_t (*unsent)(struct vsock_sock *vsk);
+		struct vsock_sock *vsk = vsock_sk(sk);
+
+		add_wait_queue(sk_sleep(sk), &wait);
+		unsent = vsk->transport->unsent_bytes;
+
+		do {
+			if (sk_wait_event(sk, &timeout, unsent(vsk) == 0, &wait))
+				break;
+		} while (!signal_pending(current) && timeout);
+
+		remove_wait_queue(sk_sleep(sk), &wait);
+	}
+}
+
 static int vsock_shutdown(struct socket *sock, int mode)
 {
 	int err;
@@ -1056,6 +1075,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
 		if (sock_type_connectible(sk->sk_type)) {
 			sock_reset_flag(sk, SOCK_DONE);
 			vsock_send_shutdown(sk, mode);
+			if (sock_flag(sk, SOCK_LINGER))
+				vsock_linger(sk, sk->sk_lingertime);
 		}
 	}
 
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 7f7de6d88096..9230b8358ef2 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -1192,23 +1192,6 @@ static void virtio_transport_remove_sock(struct vsock_sock *vsk)
 	vsock_remove_sock(vsk);
 }
 
-static void virtio_transport_wait_close(struct sock *sk, long timeout)
-{
-	if (timeout) {
-		DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
-		add_wait_queue(sk_sleep(sk), &wait);
-
-		do {
-			if (sk_wait_event(sk, &timeout,
-					  sock_flag(sk, SOCK_DONE), &wait))
-				break;
-		} while (!signal_pending(current) && timeout);
-
-		remove_wait_queue(sk_sleep(sk), &wait);
-	}
-}
-
 static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
 					       bool cancel_timeout)
 {
@@ -1279,7 +1262,7 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
 		(void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
 
 	if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
-		virtio_transport_wait_close(sk, sk->sk_lingertime);
+		vsock_linger(sk, sk->sk_lingertime);
 
 	if (sock_flag(sk, SOCK_DONE)) {
 		return true;


This works, but I find it difficult to test without artificially slowing
the kernel down. It's a race against workers as they quite eagerly do
virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
send() would just block until peer had available space.

Thanks,
Michal
Stefano Garzarella March 10, 2025, 3:24 p.m. UTC | #5
On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>On 2/10/25 11:18, Stefano Garzarella wrote:
>> On Wed, Feb 05, 2025 at 12:20:56PM +0100, Michal Luczaj wrote:
>>> On 2/4/25 11:48, Stefano Garzarella wrote:
>>>> On Tue, Feb 04, 2025 at 01:29:53AM +0100, Michal Luczaj wrote:
>>>>> ...
>>>>> +static void test_stream_linger_client(const struct test_opts *opts)
>>>>> +{
>>>>> +	struct linger optval = {
>>>>> +		.l_onoff = 1,
>>>>> +		.l_linger = 1
>>>>> +	};
>>>>> +	int fd;
>>>>> +
>>>>> +	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
>>>>> +	if (fd < 0) {
>>>>> +		perror("connect");
>>>>> +		exit(EXIT_FAILURE);
>>>>> +	}
>>>>> +
>>>>> +	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
>>>>> +		perror("setsockopt(SO_LINGER)");
>>>>> +		exit(EXIT_FAILURE);
>>>>> +	}
>>>>
>>>> Since we are testing SO_LINGER, will also be nice to check if it's
>>>> working properly, since one of the fixes proposed could break it.
>>>>
>>>> To test, we may set a small SO_VM_SOCKETS_BUFFER_SIZE on the receive
>>>> side and try to send more than that value, obviously without reading
>>>> anything into the receiver, and check that close() here, returns after
>>>> the timeout we set in .l_linger.
>>>
>>> I may be doing something wrong, but (at least for loopback transport) ...
>>
>> Also with VMs is the same, I think virtio_transport_wait_close() can be
>> improved to check if everything is sent, avoiding to wait.
>
>What kind of improvement do you have in mind?
>
>I've tried modifying the loop to make close()/shutdown() linger until
>unsent_bytes() == 0. No idea if this is acceptable:

Yes, that's a good idea, I had something similar in mind, but reusing 
unsent_bytes() sounds great to me.

The only problem I see is that in the driver in the guest, the packets 
are put in the virtqueue and the variable is decremented only when the 
host sends us an interrupt to say that it has copied the packets and 
then the guest can free the buffer. Is this okay to consider this as 
sending?

I think so, though it's honestly not clear to me if instead by sending 
we should consider when the driver copies the bytes into the virtqueue, 
but that doesn't mean they were really sent. We should compare it to 
what the network devices or AF_UNIX do.

>
>diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
>index 9e85424c8343..bd8b88d70423 100644
>--- a/include/net/af_vsock.h
>+++ b/include/net/af_vsock.h
>@@ -221,6 +221,7 @@ void vsock_for_each_connected_socket(struct vsock_transport *transport,
> 				     void (*fn)(struct sock *sk));
> int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
> bool vsock_find_cid(unsigned int cid);
>+void vsock_linger(struct sock *sk, long timeout);
>
> /**** TAP ****/
>
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index 7e3db87ae433..2cf7571e94da 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -1013,6 +1013,25 @@ static int vsock_getname(struct socket *sock,
> 	return err;
> }
>
>+void vsock_linger(struct sock *sk, long timeout)
>+{
>+	if (timeout) {
>+		DEFINE_WAIT_FUNC(wait, woken_wake_function);
>+		ssize_t (*unsent)(struct vsock_sock *vsk);
>+		struct vsock_sock *vsk = vsock_sk(sk);
>+
>+		add_wait_queue(sk_sleep(sk), &wait);
>+		unsent = vsk->transport->unsent_bytes;

This is not implemented by all transports, so we should handle it in 
some way (check the pointer or implement in all transports).

>+
>+		do {
>+			if (sk_wait_event(sk, &timeout, unsent(vsk) == 0, &wait))
>+				break;
>+		} while (!signal_pending(current) && timeout);
>+
>+		remove_wait_queue(sk_sleep(sk), &wait);
>+	}
>+}
>+
> static int vsock_shutdown(struct socket *sock, int mode)
> {
> 	int err;
>@@ -1056,6 +1075,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
> 		if (sock_type_connectible(sk->sk_type)) {
> 			sock_reset_flag(sk, SOCK_DONE);
> 			vsock_send_shutdown(sk, mode);
>+			if (sock_flag(sk, SOCK_LINGER))
>+				vsock_linger(sk, sk->sk_lingertime);

mmm, great, so on shutdown we never supported SO_LINGER, right?

> 		}
> 	}
>
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index 7f7de6d88096..9230b8358ef2 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -1192,23 +1192,6 @@ static void virtio_transport_remove_sock(struct vsock_sock *vsk)
> 	vsock_remove_sock(vsk);
> }
>
>-static void virtio_transport_wait_close(struct sock *sk, long timeout)
>-{
>-	if (timeout) {
>-		DEFINE_WAIT_FUNC(wait, woken_wake_function);
>-
>-		add_wait_queue(sk_sleep(sk), &wait);
>-
>-		do {
>-			if (sk_wait_event(sk, &timeout,
>-					  sock_flag(sk, SOCK_DONE), &wait))
>-				break;
>-		} while (!signal_pending(current) && timeout);
>-
>-		remove_wait_queue(sk_sleep(sk), &wait);
>-	}
>-}
>-
> static void virtio_transport_cancel_close_work(struct vsock_sock *vsk,
> 					       bool cancel_timeout)
> {
>@@ -1279,7 +1262,7 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
> 		(void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
>
> 	if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
>-		virtio_transport_wait_close(sk, sk->sk_lingertime);
>+		vsock_linger(sk, sk->sk_lingertime);
>
> 	if (sock_flag(sk, SOCK_DONE)) {
> 		return true;
>
>
>This works, but I find it difficult to test without artificially slowing
>the kernel down. It's a race against workers as they quite eagerly do
>virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>send() would just block until peer had available space.

Did you test with loopback or virtio-vsock with a VM?

BTW this approach LGTM!

Thanks,
Stefano
Michal Luczaj March 14, 2025, 3:25 p.m. UTC | #6
On 3/10/25 16:24, Stefano Garzarella wrote:
> On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>> ...
>> I've tried modifying the loop to make close()/shutdown() linger until
>> unsent_bytes() == 0. No idea if this is acceptable:
> 
> Yes, that's a good idea, I had something similar in mind, but reusing 
> unsent_bytes() sounds great to me.
> 
> The only problem I see is that in the driver in the guest, the packets 
> are put in the virtqueue and the variable is decremented only when the 
> host sends us an interrupt to say that it has copied the packets and 
> then the guest can free the buffer. Is this okay to consider this as 
> sending?
>
> I think so, though it's honestly not clear to me if instead by sending 
> we should consider when the driver copies the bytes into the virtqueue, 
> but that doesn't mean they were really sent. We should compare it to 
> what the network devices or AF_UNIX do.

I had a look at AF_UNIX. SO_LINGER is not supported. Which makes sense;
when you send a packet, it directly lands in receiver's queue. As for
SIOCOUTQ handling: `return sk_wmem_alloc_get(sk)`. So I guess it's more of
an "unread bytes"?

>> +void vsock_linger(struct sock *sk, long timeout)
>> +{
>> +	if (timeout) {
>> +		DEFINE_WAIT_FUNC(wait, woken_wake_function);
>> +		ssize_t (*unsent)(struct vsock_sock *vsk);
>> +		struct vsock_sock *vsk = vsock_sk(sk);
>> +
>> +		add_wait_queue(sk_sleep(sk), &wait);
>> +		unsent = vsk->transport->unsent_bytes;
> 
> This is not implemented by all transports, so we should handle it in 
> some way (check the pointer or implement in all transports).

Ah, right, I didn't think that through.

>> @@ -1056,6 +1075,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
>> 		if (sock_type_connectible(sk->sk_type)) {
>> 			sock_reset_flag(sk, SOCK_DONE);
>> 			vsock_send_shutdown(sk, mode);
>> +			if (sock_flag(sk, SOCK_LINGER))
>> +				vsock_linger(sk, sk->sk_lingertime);
> 
> mmm, great, so on shutdown we never supported SO_LINGER, right?

Yup.

>> ...
>> This works, but I find it difficult to test without artificially slowing
>> the kernel down. It's a race against workers as they quite eagerly do
>> virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>> I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>> send() would just block until peer had available space.
> 
> Did you test with loopback or virtio-vsock with a VM?

Both, but I may be missing something. Do you see a way to stop (or don't
schedule) the worker from processing queue (and decrementing bytes_unsent)?

Thanks,
Michal
Stefano Garzarella March 20, 2025, 11:31 a.m. UTC | #7
On Fri, Mar 14, 2025 at 04:25:16PM +0100, Michal Luczaj wrote:
>On 3/10/25 16:24, Stefano Garzarella wrote:
>> On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>>> ...
>>> I've tried modifying the loop to make close()/shutdown() linger until
>>> unsent_bytes() == 0. No idea if this is acceptable:
>>
>> Yes, that's a good idea, I had something similar in mind, but reusing
>> unsent_bytes() sounds great to me.
>>
>> The only problem I see is that in the driver in the guest, the packets
>> are put in the virtqueue and the variable is decremented only when the
>> host sends us an interrupt to say that it has copied the packets and
>> then the guest can free the buffer. Is this okay to consider this as
>> sending?
>>
>> I think so, though it's honestly not clear to me if instead by sending
>> we should consider when the driver copies the bytes into the virtqueue,
>> but that doesn't mean they were really sent. We should compare it to
>> what the network devices or AF_UNIX do.
>
>I had a look at AF_UNIX. SO_LINGER is not supported. Which makes sense;
>when you send a packet, it directly lands in receiver's queue. As for
>SIOCOUTQ handling: `return sk_wmem_alloc_get(sk)`. So I guess it's more of
>an "unread bytes"?

Yes, I see, actually for AF_UNIX it is simple.
It's hard for us to tell when the user on the other pear actually read
the data, we could use the credit mechanism, but that sometimes isn't
sent unless explicitly requested, so I'd say unsent_bytes() is fine.

>
>>> +void vsock_linger(struct sock *sk, long timeout)
>>> +{
>>> +	if (timeout) {
>>> +		DEFINE_WAIT_FUNC(wait, woken_wake_function);
>>> +		ssize_t (*unsent)(struct vsock_sock *vsk);
>>> +		struct vsock_sock *vsk = vsock_sk(sk);
>>> +
>>> +		add_wait_queue(sk_sleep(sk), &wait);
>>> +		unsent = vsk->transport->unsent_bytes;
>>
>> This is not implemented by all transports, so we should handle it in
>> some way (check the pointer or implement in all transports).
>
>Ah, right, I didn't think that through.
>
>>> @@ -1056,6 +1075,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
>>> 		if (sock_type_connectible(sk->sk_type)) {
>>> 			sock_reset_flag(sk, SOCK_DONE);
>>> 			vsock_send_shutdown(sk, mode);
>>> +			if (sock_flag(sk, SOCK_LINGER))
>>> +				vsock_linger(sk, sk->sk_lingertime);
>>
>> mmm, great, so on shutdown we never supported SO_LINGER, right?
>
>Yup.
>
>>> ...
>>> This works, but I find it difficult to test without artificially slowing
>>> the kernel down. It's a race against workers as they quite eagerly do
>>> virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>>> I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>>> send() would just block until peer had available space.
>>
>> Did you test with loopback or virtio-vsock with a VM?
>
>Both, but I may be missing something. Do you see a way to stop (or don't
>schedule) the worker from processing queue (and decrementing bytes_unsent)?

Without touching the driver (which I don't want to do) I can't think of
anything, so I'd say it's okay.

Thanks,
Stefano
Michal Luczaj March 25, 2025, 1:22 p.m. UTC | #8
On 3/20/25 12:31, Stefano Garzarella wrote:
> On Fri, Mar 14, 2025 at 04:25:16PM +0100, Michal Luczaj wrote:
>> On 3/10/25 16:24, Stefano Garzarella wrote:
>>> On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>>>> ...
>>>> I've tried modifying the loop to make close()/shutdown() linger until
>>>> unsent_bytes() == 0. No idea if this is acceptable:
>>>
>>> Yes, that's a good idea, I had something similar in mind, but reusing
>>> unsent_bytes() sounds great to me.
>>>
>>> The only problem I see is that in the driver in the guest, the packets
>>> are put in the virtqueue and the variable is decremented only when the
>>> host sends us an interrupt to say that it has copied the packets and
>>> then the guest can free the buffer. Is this okay to consider this as
>>> sending?
>>>
>>> I think so, though it's honestly not clear to me if instead by sending
>>> we should consider when the driver copies the bytes into the virtqueue,
>>> but that doesn't mean they were really sent. We should compare it to
>>> what the network devices or AF_UNIX do.
>>
>> I had a look at AF_UNIX. SO_LINGER is not supported. Which makes sense;
>> when you send a packet, it directly lands in receiver's queue. As for
>> SIOCOUTQ handling: `return sk_wmem_alloc_get(sk)`. So I guess it's more of
>> an "unread bytes"?
> 
> Yes, I see, actually for AF_UNIX it is simple.
> It's hard for us to tell when the user on the other pear actually read
> the data, we could use the credit mechanism, but that sometimes isn't
> sent unless explicitly requested, so I'd say unsent_bytes() is fine.

One more option: keep the semantics (in a state of not-what-`man 7 socket`-
says) and, for completeness, add the lingering to shutdown()?

>>>> ...
>>>> This works, but I find it difficult to test without artificially slowing
>>>> the kernel down. It's a race against workers as they quite eagerly do
>>>> virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>>>> I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>>>> send() would just block until peer had available space.
>>>
>>> Did you test with loopback or virtio-vsock with a VM?
>>
>> Both, but I may be missing something. Do you see a way to stop (or don't
>> schedule) the worker from processing queue (and decrementing bytes_unsent)?
> 
> Without touching the driver (which I don't want to do) I can't think of
> anything, so I'd say it's okay.

Turns out there's a way to purge the loopback queue before worker processes
it (I had no success with g2h). If you win that race, bytes_unsent stays
elevated until kingdom come. Then you can close() the socket and watch as
it lingers.

connect(s)
  lock_sock
  while (sk_state != TCP_ESTABLISHED)
    release_sock
    schedule_timeout

// virtio_transport_recv_connecting
//   sk_state = TCP_ESTABLISHED

                                       send(s, 'x')
                                         lock_sock
                                         virtio_transport_send_pkt_info
                                           virtio_transport_get_credit
                                    (!)      vvs->bytes_unsent += ret
                                           vsock_loopback_send_pkt
                                             virtio_vsock_skb_queue_tail
                                         release_sock
                                       kill()
    lock_sock
    if signal_pending
      vsock_loopback_cancel_pkt
        virtio_transport_purge_skbs (!)

That said, I may be missing a bigger picture, but is it worth supporting
this "signal disconnects TCP_ESTABLISHED" behaviour in the first place?
Removing it would make the race above (and the whole [1] series) moot.
Plus, it appears to be broken: when I hit this condition and I try to
re-connect to the same listener, I get ETIMEDOUT for loopback and
ECONNRESET for g2h virtio; see [2].

[1]: https://lore.kernel.org/netdev/20250317-vsock-trans-signal-race-v4-0-fc8837f3f1d4@rbox.co/
[2]: Inspired by Luigi's code, which I mauled terribly:
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index d0f6d253ac72..aa4a321ddd9c 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -23,6 +23,7 @@
 #include <sys/ioctl.h>
 #include <linux/sockios.h>
 #include <linux/time64.h>
+#include <pthread.h>
 
 #include "vsock_test_zerocopy.h"
 #include "timeout.h"
@@ -1824,6 +1825,104 @@ static void test_stream_linger_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static void handler(int signum)
+{
+	/* nop */
+}
+
+static void *killer(void *arg)
+{
+	pid_t pid = getpid();
+
+	if ((errno = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL))) {
+		perror("pthread_setcanceltype");
+		exit(EXIT_FAILURE);
+	}
+
+	for (;;) {
+		if (kill(pid, SIGUSR1)) {
+			perror("kill");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	return NULL;
+}
+
+static void client(const struct test_opts *opts)
+{
+	struct sockaddr_vm addr = {
+		.svm_family = AF_VSOCK,
+		.svm_cid = opts->peer_cid,
+		.svm_port = opts->peer_port,
+	};
+	sighandler_t old_handler;
+	bool reconnect = false;
+	pthread_t tid;
+	time_t tout;
+	int c;
+
+	old_handler = signal(SIGUSR1, handler);
+	if (old_handler == SIG_ERR) {
+		perror("signal");
+		exit(EXIT_FAILURE);
+	}
+
+	if ((errno = pthread_create(&tid, NULL, killer, NULL))) {
+		perror("pthread_create");
+		exit(EXIT_FAILURE);
+	}
+
+	tout = current_nsec() + 2 * NSEC_PER_SEC;
+	do {
+		c = socket(AF_VSOCK, SOCK_STREAM, 0);
+		if (c < 0) {
+			perror("socket");
+			exit(EXIT_FAILURE);
+		}
+
+		if (connect(c, (struct sockaddr *)&addr, sizeof(addr)) &&
+		    errno == EINTR) {
+			reconnect = true;
+			break;
+		}
+
+		close(c);
+	} while (current_nsec() < tout);
+
+	if ((errno = pthread_cancel(tid))) {
+		perror("pthread_cancel");
+		exit(EXIT_FAILURE);
+	}
+
+	if ((errno = pthread_join(tid, NULL))) {
+		perror("pthread_join");
+		exit(EXIT_FAILURE);
+	}
+
+	if (signal(SIGUSR1, old_handler) == SIG_ERR) {
+		perror("signal");
+		exit(EXIT_FAILURE);
+	}
+
+	if (reconnect) {
+		if (connect(c, (struct sockaddr *)&addr, sizeof(addr))) {
+			perror("re-connect() after EINTR");
+			exit(EXIT_FAILURE);
+		}
+		close(c);
+	}
+
+	control_writeln("DONE");
+}
+
+static void server(const struct test_opts *opts)
+{
+	int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+	control_expectln("DONE");
+	close(s);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1984,6 +2083,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_stream_linger_client,
 		.run_server = test_stream_linger_server,
 	},
+	{
+		.name = "SOCK_STREAM connect -> EINTR -> connect",
+		.run_client = client,
+		.run_server = server,
+	},
 	{},
 };
Stefano Garzarella April 1, 2025, 10:32 a.m. UTC | #9
On Tue, Mar 25, 2025 at 02:22:45PM +0100, Michal Luczaj wrote:
>On 3/20/25 12:31, Stefano Garzarella wrote:
>> On Fri, Mar 14, 2025 at 04:25:16PM +0100, Michal Luczaj wrote:
>>> On 3/10/25 16:24, Stefano Garzarella wrote:
>>>> On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>>>>> ...
>>>>> I've tried modifying the loop to make close()/shutdown() linger until
>>>>> unsent_bytes() == 0. No idea if this is acceptable:
>>>>
>>>> Yes, that's a good idea, I had something similar in mind, but reusing
>>>> unsent_bytes() sounds great to me.
>>>>
>>>> The only problem I see is that in the driver in the guest, the packets
>>>> are put in the virtqueue and the variable is decremented only when the
>>>> host sends us an interrupt to say that it has copied the packets and
>>>> then the guest can free the buffer. Is this okay to consider this as
>>>> sending?
>>>>
>>>> I think so, though it's honestly not clear to me if instead by sending
>>>> we should consider when the driver copies the bytes into the virtqueue,
>>>> but that doesn't mean they were really sent. We should compare it to
>>>> what the network devices or AF_UNIX do.
>>>
>>> I had a look at AF_UNIX. SO_LINGER is not supported. Which makes sense;
>>> when you send a packet, it directly lands in receiver's queue. As for
>>> SIOCOUTQ handling: `return sk_wmem_alloc_get(sk)`. So I guess it's more of
>>> an "unread bytes"?
>>
>> Yes, I see, actually for AF_UNIX it is simple.
>> It's hard for us to tell when the user on the other pear actually read
>> the data, we could use the credit mechanism, but that sometimes isn't
>> sent unless explicitly requested, so I'd say unsent_bytes() is fine.
>
>One more option: keep the semantics (in a state of not-what-`man 7 socket`-
>says) and, for completeness, add the lingering to shutdown()?

Sorry, I'm getting lost!
That's because we had a different behavior between close() and 
shutdown() right?

If it's the case, I would say let's fix at least that for now.

>
>>>>> ...
>>>>> This works, but I find it difficult to test without artificially slowing
>>>>> the kernel down. It's a race against workers as they quite eagerly do
>>>>> virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>>>>> I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>>>>> send() would just block until peer had available space.
>>>>
>>>> Did you test with loopback or virtio-vsock with a VM?
>>>
>>> Both, but I may be missing something. Do you see a way to stop (or don't
>>> schedule) the worker from processing queue (and decrementing bytes_unsent)?
>>
>> Without touching the driver (which I don't want to do) I can't think of
>> anything, so I'd say it's okay.
>
>Turns out there's a way to purge the loopback queue before worker processes
>it (I had no success with g2h). If you win that race, bytes_unsent stays
>elevated until kingdom come. Then you can close() the socket and watch as
>it lingers.
>
>connect(s)
>  lock_sock
>  while (sk_state != TCP_ESTABLISHED)
>    release_sock
>    schedule_timeout
>
>// virtio_transport_recv_connecting
>//   sk_state = TCP_ESTABLISHED
>
>                                       send(s, 'x')
>                                         lock_sock
>                                         virtio_transport_send_pkt_info
>                                           virtio_transport_get_credit
>                                    (!)      vvs->bytes_unsent += ret
>                                           vsock_loopback_send_pkt
>                                             virtio_vsock_skb_queue_tail
>                                         release_sock
>                                       kill()
>    lock_sock
>    if signal_pending
>      vsock_loopback_cancel_pkt
>        virtio_transport_purge_skbs (!)
>
>That said, I may be missing a bigger picture, but is it worth supporting
>this "signal disconnects TCP_ESTABLISHED" behaviour in the first place?

Can you elaborate a bit?

>Removing it would make the race above (and the whole [1] series) moot.
>Plus, it appears to be broken: when I hit this condition and I try to
>re-connect to the same listener, I get ETIMEDOUT for loopback and
>ECONNRESET for g2h virtio; see [2].

Could this be related to the fix I sent some days ago?
https://lore.kernel.org/netdev/20250328141528.420719-1-sgarzare@redhat.com/

Thanks,
Stefano
Michal Luczaj April 3, 2025, 10:06 p.m. UTC | #10
On 4/1/25 12:32, Stefano Garzarella wrote:
> On Tue, Mar 25, 2025 at 02:22:45PM +0100, Michal Luczaj wrote:
>> On 3/20/25 12:31, Stefano Garzarella wrote:
>>> On Fri, Mar 14, 2025 at 04:25:16PM +0100, Michal Luczaj wrote:
>>>> On 3/10/25 16:24, Stefano Garzarella wrote:
>>>>> On Fri, Mar 07, 2025 at 10:49:52AM +0100, Michal Luczaj wrote:
>>>>>> ...
>>>>>> I've tried modifying the loop to make close()/shutdown() linger until
>>>>>> unsent_bytes() == 0. No idea if this is acceptable:
>>>>>
>>>>> Yes, that's a good idea, I had something similar in mind, but reusing
>>>>> unsent_bytes() sounds great to me.
>>>>>
>>>>> The only problem I see is that in the driver in the guest, the packets
>>>>> are put in the virtqueue and the variable is decremented only when the
>>>>> host sends us an interrupt to say that it has copied the packets and
>>>>> then the guest can free the buffer. Is this okay to consider this as
>>>>> sending?
>>>>>
>>>>> I think so, though it's honestly not clear to me if instead by sending
>>>>> we should consider when the driver copies the bytes into the virtqueue,
>>>>> but that doesn't mean they were really sent. We should compare it to
>>>>> what the network devices or AF_UNIX do.
>>>>
>>>> I had a look at AF_UNIX. SO_LINGER is not supported. Which makes sense;
>>>> when you send a packet, it directly lands in receiver's queue. As for
>>>> SIOCOUTQ handling: `return sk_wmem_alloc_get(sk)`. So I guess it's more of
>>>> an "unread bytes"?
>>>
>>> Yes, I see, actually for AF_UNIX it is simple.
>>> It's hard for us to tell when the user on the other pear actually read
>>> the data, we could use the credit mechanism, but that sometimes isn't
>>> sent unless explicitly requested, so I'd say unsent_bytes() is fine.
>>
>> One more option: keep the semantics (in a state of not-what-`man 7 socket`-
>> says) and, for completeness, add the lingering to shutdown()?
> 
> Sorry, I'm getting lost!
> That's because we had a different behavior between close() and 
> shutdown() right?
> 
> If it's the case, I would say let's fix at least that for now.

Yeah, okay, let's keep things simple. I'll post the patch once net-next opens.

>>>>>> ...
>>>>>> This works, but I find it difficult to test without artificially slowing
>>>>>> the kernel down. It's a race against workers as they quite eagerly do
>>>>>> virtio_transport_consume_skb_sent(), which decrements vvs->bytes_unsent.
>>>>>> I've tried reducing SO_VM_SOCKETS_BUFFER_SIZE as you've suggested, but
>>>>>> send() would just block until peer had available space.
>>>>>
>>>>> Did you test with loopback or virtio-vsock with a VM?
>>>>
>>>> Both, but I may be missing something. Do you see a way to stop (or don't
>>>> schedule) the worker from processing queue (and decrementing bytes_unsent)?
>>>
>>> Without touching the driver (which I don't want to do) I can't think of
>>> anything, so I'd say it's okay.
>>
>> Turns out there's a way to purge the loopback queue before worker processes
>> it (I had no success with g2h). If you win that race, bytes_unsent stays
>> elevated until kingdom come. Then you can close() the socket and watch as
>> it lingers.
>>
>> connect(s)
>>  lock_sock
>>  while (sk_state != TCP_ESTABLISHED)
>>    release_sock
>>    schedule_timeout
>>
>> // virtio_transport_recv_connecting
>> //   sk_state = TCP_ESTABLISHED
>>
>>                                       send(s, 'x')
>>                                         lock_sock
>>                                         virtio_transport_send_pkt_info
>>                                           virtio_transport_get_credit
>>                                    (!)      vvs->bytes_unsent += ret
>>                                           vsock_loopback_send_pkt
>>                                             virtio_vsock_skb_queue_tail
>>                                         release_sock
>>                                       kill()
>>    lock_sock
>>    if signal_pending
>>      vsock_loopback_cancel_pkt
>>        virtio_transport_purge_skbs (!)
>>
>> That said, I may be missing a bigger picture, but is it worth supporting
>> this "signal disconnects TCP_ESTABLISHED" behaviour in the first place?
> 
> Can you elaborate a bit?

There isn't much to it. I just wondered if connect() -- that has already
established a connection -- could ignore the signal (or pretend it came too
late), to avoid carrying out this kind of disconnect.

>> Removing it would make the race above (and the whole [1] series) moot.
>> Plus, it appears to be broken: when I hit this condition and I try to
>> re-connect to the same listener, I get ETIMEDOUT for loopback and
>> ECONNRESET for g2h virtio; see [2].
> 
> Could this be related to the fix I sent some days ago?
> https://lore.kernel.org/netdev/20250328141528.420719-1-sgarzare@redhat.com/

I've tried that. I've also took a hint from your other mail and attempted
flushing the listener queue, but to no avail. Crude code below. Is there
something wrong with it?

#include <stdio.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <pthread.h>
#include <sys/socket.h>
#include <linux/vm_sockets.h>

static void die(const char *msg)
{
	perror(msg);
	exit(-1);
}

static void barrier(pthread_barrier_t *barr)
{
	errno = pthread_barrier_wait(barr);
	if (errno && errno != PTHREAD_BARRIER_SERIAL_THREAD)
		die("pthread_barrier_wait");
}

static void flush_accept(int s)
{
	int p = accept(s, NULL, NULL);
	if (p < 0) {
		if (errno != EAGAIN)
			perror("accept");
		return;
	}

	printf("accept: drained\n");
	close(p);
}

static void handler(int signum)
{
	/* nop */
}

void static set_accept_timeout(int s)
{
	struct timeval tv = { .tv_sec = 1 };
	if (setsockopt(s, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
		die("setsockopt SO_RCVTIMEO");
}

void static set_connect_timeout(int s)
{
	struct timeval tv = { .tv_sec = 1 };
	if (setsockopt(s, AF_VSOCK, SO_VM_SOCKETS_CONNECT_TIMEOUT, &tv,
		       sizeof(tv)))
		die("setsockopt SO_VM_SOCKETS_CONNECT_TIMEOUT");
}

static void *killer(void *arg)
{
	pthread_barrier_t *barr = (pthread_barrier_t *)arg;
	pid_t pid = getpid();

	if ((errno = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,
					   NULL)))
		die("pthread_setcanceltype");

	for (;;) {
		barrier(barr);
		if (kill(pid, SIGUSR1))
			die("kill");
		barrier(barr);
	}

	return NULL;
}

int main(void)
{
	struct sockaddr_vm addr = {
		.svm_family = AF_VSOCK,
		.svm_cid = VMADDR_CID_LOCAL,
		.svm_port = 1234
	};
	socklen_t alen = sizeof(addr);
	pthread_barrier_t barr;
	pthread_t tid;
	int s, c;

	if ((errno = pthread_barrier_init(&barr, NULL, 2)))
		die("pthread_barrier_init");

	if (signal(SIGUSR1, handler) == SIG_ERR)
		die("signal");

	s = socket(AF_VSOCK, SOCK_STREAM, 0);
	if (s < 0)
		die("socket s");
	set_accept_timeout(s);

	if (bind(s, (struct sockaddr *)&addr, alen))
		die("bind");

	if (listen(s, 64))
		die("listen");

	if ((errno = pthread_create(&tid, NULL, killer, &barr)))
		die("pthread_create");

	for (;;) {
		c = socket(AF_VSOCK, SOCK_STREAM, 0);
		if (c < 0)
			die("socket c");

		barrier(&barr);
		if (connect(c, (struct sockaddr *)&addr, sizeof(addr)) &&
		    errno == EINTR) {
		    	printf("connect: EINTR\n");
			break;
		}
		barrier(&barr);

		close(c);
		flush_accept(s);
	}

	if ((errno = pthread_cancel(tid)))
		die("pthread_cancel");

	if ((errno = pthread_join(tid, NULL)))
		die("pthread_join");

	flush_accept(s);
	set_connect_timeout(c);
	if (connect(c, (struct sockaddr *)&addr, sizeof(addr)))
		die("re-connect");

	printf("okay?\n");

	return 0;
}
diff mbox series

Patch

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dfff8b288265f96b602cb1bfa0e6dce02f114222..d0f6d253ac72d08a957cb81a3c38fcc72bec5a53 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1788,6 +1788,42 @@  static void test_stream_connect_retry_server(const struct test_opts *opts)
 	close(fd);
 }
 
+static void test_stream_linger_client(const struct test_opts *opts)
+{
+	struct linger optval = {
+		.l_onoff = 1,
+		.l_linger = 1
+	};
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+		perror("setsockopt(SO_LINGER)");
+		exit(EXIT_FAILURE);
+	}
+
+	close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+	int fd;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	vsock_wait_remote_close(fd);
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -1943,6 +1979,11 @@  static struct test_case test_cases[] = {
 		.run_client = test_stream_connect_retry_client,
 		.run_server = test_stream_connect_retry_server,
 	},
+	{
+		.name = "SOCK_STREAM SO_LINGER null-ptr-deref",
+		.run_client = test_stream_linger_client,
+		.run_server = test_stream_linger_server,
+	},
 	{},
 };