diff mbox series

[bpf,3/4] bpf, vsock: Invoke proto::close on close()

Message ID 20241118-vsock-bpf-poll-close-v1-3-f1b9669cacdc@rbox.co (mailing list archive)
State Accepted
Commit 135ffc7becc82cfb84936ae133da7969220b43b2
Headers show
Series bpf, vsock: Fix poll() and close() | expand

Commit Message

Michal Luczaj Nov. 18, 2024, 9:03 p.m. UTC
vsock defines a BPF callback to be invoked when close() is called. However,
this callback is never actually executed. As a result, a closed vsock
socket is not automatically removed from the sockmap/sockhash.

Introduce a dummy vsock_close() and make vsock_release() call proto::close.

Note: changes in __vsock_release() look messy, but it's only due to indent
level reduction and variables xmas tree reorder.

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
 net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 27 deletions(-)

Comments

Stefano Garzarella Nov. 21, 2024, 9:22 a.m. UTC | #1
On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote:
>vsock defines a BPF callback to be invoked when close() is called. However,
>this callback is never actually executed. As a result, a closed vsock
>socket is not automatically removed from the sockmap/sockhash.
>
>Introduce a dummy vsock_close() and make vsock_release() call proto::close.
>
>Note: changes in __vsock_release() look messy, but it's only due to indent
>level reduction and variables xmas tree reorder.
>
>Fixes: 634f1a7110b4 ("vsock: support sockmap")
>Signed-off-by: Michal Luczaj <mhal@rbox.co>
>---
> net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++-------------------
> 1 file changed, 40 insertions(+), 27 deletions(-)
>
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -117,12 +117,14 @@
> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> static void vsock_sk_destruct(struct sock *sk);
> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>+static void vsock_close(struct sock *sk, long timeout);
>
> /* Protocol family. */
> struct proto vsock_proto = {
> 	.name = "AF_VSOCK",
> 	.owner = THIS_MODULE,
> 	.obj_size = sizeof(struct vsock_sock),
>+	.close = vsock_close,
> #ifdef CONFIG_BPF_SYSCALL
> 	.psock_update_sk_prot = vsock_bpf_update_proto,
> #endif
>@@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type)
>
> static void __vsock_release(struct sock *sk, int level)
> {
>-	if (sk) {
>-		struct sock *pending;
>-		struct vsock_sock *vsk;
>-
>-		vsk = vsock_sk(sk);
>-		pending = NULL;	/* Compiler warning. */
>+	struct vsock_sock *vsk;
>+	struct sock *pending;
>
>-		/* When "level" is SINGLE_DEPTH_NESTING, use the nested
>-		 * version to avoid the warning "possible recursive locking
>-		 * detected". When "level" is 0, lock_sock_nested(sk, level)
>-		 * is the same as lock_sock(sk).
>-		 */
>-		lock_sock_nested(sk, level);
>+	vsk = vsock_sk(sk);
>+	pending = NULL;	/* Compiler warning. */
>
>-		if (vsk->transport)
>-			vsk->transport->release(vsk);
>-		else if (sock_type_connectible(sk->sk_type))
>-			vsock_remove_sock(vsk);
>+	/* When "level" is SINGLE_DEPTH_NESTING, use the nested
>+	 * version to avoid the warning "possible recursive locking
>+	 * detected". When "level" is 0, lock_sock_nested(sk, level)
>+	 * is the same as lock_sock(sk).
>+	 */
>+	lock_sock_nested(sk, level);
>
>-		sock_orphan(sk);
>-		sk->sk_shutdown = SHUTDOWN_MASK;
>+	if (vsk->transport)
>+		vsk->transport->release(vsk);
>+	else if (sock_type_connectible(sk->sk_type))
>+		vsock_remove_sock(vsk);
>
>-		skb_queue_purge(&sk->sk_receive_queue);
>+	sock_orphan(sk);
>+	sk->sk_shutdown = SHUTDOWN_MASK;
>
>-		/* Clean up any sockets that never were accepted. */
>-		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
>-			__vsock_release(pending, SINGLE_DEPTH_NESTING);
>-			sock_put(pending);
>-		}
>+	skb_queue_purge(&sk->sk_receive_queue);
>
>-		release_sock(sk);
>-		sock_put(sk);
>+	/* Clean up any sockets that never were accepted. */
>+	while ((pending = vsock_dequeue_accept(sk)) != NULL) {
>+		__vsock_release(pending, SINGLE_DEPTH_NESTING);
>+		sock_put(pending);
> 	}
>+
>+	release_sock(sk);
>+	sock_put(sk);
> }
>
> static void vsock_sk_destruct(struct sock *sk)
>@@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
> }
> EXPORT_SYMBOL_GPL(vsock_data_ready);
>
>+/* Dummy callback required by sockmap.
>+ * See unconditional call of saved_close() in sock_map_close().
>+ */
>+static void vsock_close(struct sock *sk, long timeout)
>+{
>+}
>+
> static int vsock_release(struct socket *sock)
> {
>-	__vsock_release(sock->sk, 0);
>+	struct sock *sk = sock->sk;
>+
>+	if (!sk)
>+		return 0;

Compared with before, now we return earlier and so we don't set SS_FREE, 
could it be risky?

I think no, because in theory we have already set it in a previous call, 
right?

Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>

>+
>+	sk->sk_prot->close(sk, 0);
>+	__vsock_release(sk, 0);
> 	sock->sk = NULL;
> 	sock->state = SS_FREE;
>
>
>-- 
>2.46.2
>
Michal Luczaj Nov. 21, 2024, 7:54 p.m. UTC | #2
On 11/21/24 10:22, Stefano Garzarella wrote:
> On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote:
>> vsock defines a BPF callback to be invoked when close() is called. However,
>> this callback is never actually executed. As a result, a closed vsock
>> socket is not automatically removed from the sockmap/sockhash.
>>
>> Introduce a dummy vsock_close() and make vsock_release() call proto::close.
>>
>> Note: changes in __vsock_release() look messy, but it's only due to indent
>> level reduction and variables xmas tree reorder.
>>
>> Fixes: 634f1a7110b4 ("vsock: support sockmap")
>> Signed-off-by: Michal Luczaj <mhal@rbox.co>
>> ---
>> net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++-------------------
>> 1 file changed, 40 insertions(+), 27 deletions(-)
>>
>> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644
>> --- a/net/vmw_vsock/af_vsock.c
>> +++ b/net/vmw_vsock/af_vsock.c
>> @@ -117,12 +117,14 @@
>> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
>> static void vsock_sk_destruct(struct sock *sk);
>> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>> +static void vsock_close(struct sock *sk, long timeout);
>>
>> /* Protocol family. */
>> struct proto vsock_proto = {
>> 	.name = "AF_VSOCK",
>> 	.owner = THIS_MODULE,
>> 	.obj_size = sizeof(struct vsock_sock),
>> +	.close = vsock_close,
>> #ifdef CONFIG_BPF_SYSCALL
>> 	.psock_update_sk_prot = vsock_bpf_update_proto,
>> #endif
>> @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type)
>>
>> static void __vsock_release(struct sock *sk, int level)
>> {
>> -	if (sk) {
>> -		struct sock *pending;
>> -		struct vsock_sock *vsk;
>> -
>> -		vsk = vsock_sk(sk);
>> -		pending = NULL;	/* Compiler warning. */
>> +	struct vsock_sock *vsk;
>> +	struct sock *pending;
>>
>> -		/* When "level" is SINGLE_DEPTH_NESTING, use the nested
>> -		 * version to avoid the warning "possible recursive locking
>> -		 * detected". When "level" is 0, lock_sock_nested(sk, level)
>> -		 * is the same as lock_sock(sk).
>> -		 */
>> -		lock_sock_nested(sk, level);
>> +	vsk = vsock_sk(sk);
>> +	pending = NULL;	/* Compiler warning. */
>>
>> -		if (vsk->transport)
>> -			vsk->transport->release(vsk);
>> -		else if (sock_type_connectible(sk->sk_type))
>> -			vsock_remove_sock(vsk);
>> +	/* When "level" is SINGLE_DEPTH_NESTING, use the nested
>> +	 * version to avoid the warning "possible recursive locking
>> +	 * detected". When "level" is 0, lock_sock_nested(sk, level)
>> +	 * is the same as lock_sock(sk).
>> +	 */
>> +	lock_sock_nested(sk, level);
>>
>> -		sock_orphan(sk);
>> -		sk->sk_shutdown = SHUTDOWN_MASK;
>> +	if (vsk->transport)
>> +		vsk->transport->release(vsk);
>> +	else if (sock_type_connectible(sk->sk_type))
>> +		vsock_remove_sock(vsk);
>>
>> -		skb_queue_purge(&sk->sk_receive_queue);
>> +	sock_orphan(sk);
>> +	sk->sk_shutdown = SHUTDOWN_MASK;
>>
>> -		/* Clean up any sockets that never were accepted. */
>> -		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
>> -			__vsock_release(pending, SINGLE_DEPTH_NESTING);
>> -			sock_put(pending);
>> -		}
>> +	skb_queue_purge(&sk->sk_receive_queue);
>>
>> -		release_sock(sk);
>> -		sock_put(sk);
>> +	/* Clean up any sockets that never were accepted. */
>> +	while ((pending = vsock_dequeue_accept(sk)) != NULL) {
>> +		__vsock_release(pending, SINGLE_DEPTH_NESTING);
>> +		sock_put(pending);
>> 	}
>> +
>> +	release_sock(sk);
>> +	sock_put(sk);
>> }
>>
>> static void vsock_sk_destruct(struct sock *sk)
>> @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
>> }
>> EXPORT_SYMBOL_GPL(vsock_data_ready);
>>
>> +/* Dummy callback required by sockmap.
>> + * See unconditional call of saved_close() in sock_map_close().
>> + */
>> +static void vsock_close(struct sock *sk, long timeout)
>> +{
>> +}
>> +
>> static int vsock_release(struct socket *sock)
>> {
>> -	__vsock_release(sock->sk, 0);
>> +	struct sock *sk = sock->sk;
>> +
>> +	if (!sk)
>> +		return 0;
> 
> Compared with before, now we return earlier and so we don't set SS_FREE, 
> could it be risky?
>
> I think no, because in theory we have already set it in a previous call, 
> right?

Yeah, and is there actually a way to call vsock_release() for a second
time? The only caller I see is __sock_release(), which won't allow that.

As for the sockets that never had ->sk assigned, I assume it doesn't matter.

> Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
> 
>> +
>> +	sk->sk_prot->close(sk, 0);
>> +	__vsock_release(sk, 0);
>> 	sock->sk = NULL;
>> 	sock->state = SS_FREE;
Stefano Garzarella Nov. 22, 2024, 8:13 a.m. UTC | #3
On Thu, Nov 21, 2024 at 8:54 PM Michal Luczaj <mhal@rbox.co> wrote:
>
> On 11/21/24 10:22, Stefano Garzarella wrote:
> > On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote:
> >> vsock defines a BPF callback to be invoked when close() is called. However,
> >> this callback is never actually executed. As a result, a closed vsock
> >> socket is not automatically removed from the sockmap/sockhash.
> >>
> >> Introduce a dummy vsock_close() and make vsock_release() call proto::close.
> >>
> >> Note: changes in __vsock_release() look messy, but it's only due to indent
> >> level reduction and variables xmas tree reorder.
> >>
> >> Fixes: 634f1a7110b4 ("vsock: support sockmap")
> >> Signed-off-by: Michal Luczaj <mhal@rbox.co>
> >> ---
> >> net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++-------------------
> >> 1 file changed, 40 insertions(+), 27 deletions(-)
> >>
> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >> index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644
> >> --- a/net/vmw_vsock/af_vsock.c
> >> +++ b/net/vmw_vsock/af_vsock.c
> >> @@ -117,12 +117,14 @@
> >> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> >> static void vsock_sk_destruct(struct sock *sk);
> >> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
> >> +static void vsock_close(struct sock *sk, long timeout);
> >>
> >> /* Protocol family. */
> >> struct proto vsock_proto = {
> >>      .name = "AF_VSOCK",
> >>      .owner = THIS_MODULE,
> >>      .obj_size = sizeof(struct vsock_sock),
> >> +    .close = vsock_close,
> >> #ifdef CONFIG_BPF_SYSCALL
> >>      .psock_update_sk_prot = vsock_bpf_update_proto,
> >> #endif
> >> @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type)
> >>
> >> static void __vsock_release(struct sock *sk, int level)
> >> {
> >> -    if (sk) {
> >> -            struct sock *pending;
> >> -            struct vsock_sock *vsk;
> >> -
> >> -            vsk = vsock_sk(sk);
> >> -            pending = NULL; /* Compiler warning. */
> >> +    struct vsock_sock *vsk;
> >> +    struct sock *pending;
> >>
> >> -            /* When "level" is SINGLE_DEPTH_NESTING, use the nested
> >> -             * version to avoid the warning "possible recursive locking
> >> -             * detected". When "level" is 0, lock_sock_nested(sk, level)
> >> -             * is the same as lock_sock(sk).
> >> -             */
> >> -            lock_sock_nested(sk, level);
> >> +    vsk = vsock_sk(sk);
> >> +    pending = NULL; /* Compiler warning. */
> >>
> >> -            if (vsk->transport)
> >> -                    vsk->transport->release(vsk);
> >> -            else if (sock_type_connectible(sk->sk_type))
> >> -                    vsock_remove_sock(vsk);
> >> +    /* When "level" is SINGLE_DEPTH_NESTING, use the nested
> >> +     * version to avoid the warning "possible recursive locking
> >> +     * detected". When "level" is 0, lock_sock_nested(sk, level)
> >> +     * is the same as lock_sock(sk).
> >> +     */
> >> +    lock_sock_nested(sk, level);
> >>
> >> -            sock_orphan(sk);
> >> -            sk->sk_shutdown = SHUTDOWN_MASK;
> >> +    if (vsk->transport)
> >> +            vsk->transport->release(vsk);
> >> +    else if (sock_type_connectible(sk->sk_type))
> >> +            vsock_remove_sock(vsk);
> >>
> >> -            skb_queue_purge(&sk->sk_receive_queue);
> >> +    sock_orphan(sk);
> >> +    sk->sk_shutdown = SHUTDOWN_MASK;
> >>
> >> -            /* Clean up any sockets that never were accepted. */
> >> -            while ((pending = vsock_dequeue_accept(sk)) != NULL) {
> >> -                    __vsock_release(pending, SINGLE_DEPTH_NESTING);
> >> -                    sock_put(pending);
> >> -            }
> >> +    skb_queue_purge(&sk->sk_receive_queue);
> >>
> >> -            release_sock(sk);
> >> -            sock_put(sk);
> >> +    /* Clean up any sockets that never were accepted. */
> >> +    while ((pending = vsock_dequeue_accept(sk)) != NULL) {
> >> +            __vsock_release(pending, SINGLE_DEPTH_NESTING);
> >> +            sock_put(pending);
> >>      }
> >> +
> >> +    release_sock(sk);
> >> +    sock_put(sk);
> >> }
> >>
> >> static void vsock_sk_destruct(struct sock *sk)
> >> @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk)
> >> }
> >> EXPORT_SYMBOL_GPL(vsock_data_ready);
> >>
> >> +/* Dummy callback required by sockmap.
> >> + * See unconditional call of saved_close() in sock_map_close().
> >> + */
> >> +static void vsock_close(struct sock *sk, long timeout)
> >> +{
> >> +}
> >> +
> >> static int vsock_release(struct socket *sock)
> >> {
> >> -    __vsock_release(sock->sk, 0);
> >> +    struct sock *sk = sock->sk;
> >> +
> >> +    if (!sk)
> >> +            return 0;
> >
> > Compared with before, now we return earlier and so we don't set SS_FREE,
> > could it be risky?
> >
> > I think no, because in theory we have already set it in a previous call,
> > right?
>
> Yeah, and is there actually a way to call vsock_release() for a second
> time? The only caller I see is __sock_release(), which won't allow that.

Maybe no, but the `sock->sk` check made me think so.

>
> As for the sockets that never had ->sk assigned, I assume it doesn't matter.

Yep, so my R-b stays here ;-)

Thanks for these great fixes,
Stefano

>
> > Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
> >
> >> +
> >> +    sk->sk_prot->close(sk, 0);
> >> +    __vsock_release(sk, 0);
> >>      sock->sk = NULL;
> >>      sock->state = SS_FREE;
>
Luigi Leonardi Nov. 22, 2024, 4:20 p.m. UTC | #4
I spent some time checking and nobody but __sock_create (net/socket.c)
and vsock_release can set sock->sk to NULL.

I also ran checkpatch, everything LGTM.
Thanks for the fix!

Reviewed-by: Luigi Leonardi <leonardi@redhat.com>
diff mbox series

Patch

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -117,12 +117,14 @@ 
 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
 static void vsock_sk_destruct(struct sock *sk);
 static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+static void vsock_close(struct sock *sk, long timeout);
 
 /* Protocol family. */
 struct proto vsock_proto = {
 	.name = "AF_VSOCK",
 	.owner = THIS_MODULE,
 	.obj_size = sizeof(struct vsock_sock),
+	.close = vsock_close,
 #ifdef CONFIG_BPF_SYSCALL
 	.psock_update_sk_prot = vsock_bpf_update_proto,
 #endif
@@ -797,39 +799,37 @@  static bool sock_type_connectible(u16 type)
 
 static void __vsock_release(struct sock *sk, int level)
 {
-	if (sk) {
-		struct sock *pending;
-		struct vsock_sock *vsk;
-
-		vsk = vsock_sk(sk);
-		pending = NULL;	/* Compiler warning. */
+	struct vsock_sock *vsk;
+	struct sock *pending;
 
-		/* When "level" is SINGLE_DEPTH_NESTING, use the nested
-		 * version to avoid the warning "possible recursive locking
-		 * detected". When "level" is 0, lock_sock_nested(sk, level)
-		 * is the same as lock_sock(sk).
-		 */
-		lock_sock_nested(sk, level);
+	vsk = vsock_sk(sk);
+	pending = NULL;	/* Compiler warning. */
 
-		if (vsk->transport)
-			vsk->transport->release(vsk);
-		else if (sock_type_connectible(sk->sk_type))
-			vsock_remove_sock(vsk);
+	/* When "level" is SINGLE_DEPTH_NESTING, use the nested
+	 * version to avoid the warning "possible recursive locking
+	 * detected". When "level" is 0, lock_sock_nested(sk, level)
+	 * is the same as lock_sock(sk).
+	 */
+	lock_sock_nested(sk, level);
 
-		sock_orphan(sk);
-		sk->sk_shutdown = SHUTDOWN_MASK;
+	if (vsk->transport)
+		vsk->transport->release(vsk);
+	else if (sock_type_connectible(sk->sk_type))
+		vsock_remove_sock(vsk);
 
-		skb_queue_purge(&sk->sk_receive_queue);
+	sock_orphan(sk);
+	sk->sk_shutdown = SHUTDOWN_MASK;
 
-		/* Clean up any sockets that never were accepted. */
-		while ((pending = vsock_dequeue_accept(sk)) != NULL) {
-			__vsock_release(pending, SINGLE_DEPTH_NESTING);
-			sock_put(pending);
-		}
+	skb_queue_purge(&sk->sk_receive_queue);
 
-		release_sock(sk);
-		sock_put(sk);
+	/* Clean up any sockets that never were accepted. */
+	while ((pending = vsock_dequeue_accept(sk)) != NULL) {
+		__vsock_release(pending, SINGLE_DEPTH_NESTING);
+		sock_put(pending);
 	}
+
+	release_sock(sk);
+	sock_put(sk);
 }
 
 static void vsock_sk_destruct(struct sock *sk)
@@ -901,9 +901,22 @@  void vsock_data_ready(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(vsock_data_ready);
 
+/* Dummy callback required by sockmap.
+ * See unconditional call of saved_close() in sock_map_close().
+ */
+static void vsock_close(struct sock *sk, long timeout)
+{
+}
+
 static int vsock_release(struct socket *sock)
 {
-	__vsock_release(sock->sk, 0);
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 0;
+
+	sk->sk_prot->close(sk, 0);
+	__vsock_release(sk, 0);
 	sock->sk = NULL;
 	sock->state = SS_FREE;