Message ID | 20241118-vsock-bpf-poll-close-v1-3-f1b9669cacdc@rbox.co (mailing list archive) |
---|---|
State | Accepted |
Commit | 135ffc7becc82cfb84936ae133da7969220b43b2 |
Headers | show |
Series | bpf, vsock: Fix poll() and close() | expand |
On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote: >vsock defines a BPF callback to be invoked when close() is called. However, >this callback is never actually executed. As a result, a closed vsock >socket is not automatically removed from the sockmap/sockhash. > >Introduce a dummy vsock_close() and make vsock_release() call proto::close. > >Note: changes in __vsock_release() look messy, but it's only due to indent >level reduction and variables xmas tree reorder. > >Fixes: 634f1a7110b4 ("vsock: support sockmap") >Signed-off-by: Michal Luczaj <mhal@rbox.co> >--- > net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++------------------- > 1 file changed, 40 insertions(+), 27 deletions(-) > >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644 >--- a/net/vmw_vsock/af_vsock.c >+++ b/net/vmw_vsock/af_vsock.c >@@ -117,12 +117,14 @@ > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); > static void vsock_sk_destruct(struct sock *sk); > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); >+static void vsock_close(struct sock *sk, long timeout); > > /* Protocol family. */ > struct proto vsock_proto = { > .name = "AF_VSOCK", > .owner = THIS_MODULE, > .obj_size = sizeof(struct vsock_sock), >+ .close = vsock_close, > #ifdef CONFIG_BPF_SYSCALL > .psock_update_sk_prot = vsock_bpf_update_proto, > #endif >@@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type) > > static void __vsock_release(struct sock *sk, int level) > { >- if (sk) { >- struct sock *pending; >- struct vsock_sock *vsk; >- >- vsk = vsock_sk(sk); >- pending = NULL; /* Compiler warning. */ >+ struct vsock_sock *vsk; >+ struct sock *pending; > >- /* When "level" is SINGLE_DEPTH_NESTING, use the nested >- * version to avoid the warning "possible recursive locking >- * detected". When "level" is 0, lock_sock_nested(sk, level) >- * is the same as lock_sock(sk). >- */ >- lock_sock_nested(sk, level); >+ vsk = vsock_sk(sk); >+ pending = NULL; /* Compiler warning. */ > >- if (vsk->transport) >- vsk->transport->release(vsk); >- else if (sock_type_connectible(sk->sk_type)) >- vsock_remove_sock(vsk); >+ /* When "level" is SINGLE_DEPTH_NESTING, use the nested >+ * version to avoid the warning "possible recursive locking >+ * detected". When "level" is 0, lock_sock_nested(sk, level) >+ * is the same as lock_sock(sk). >+ */ >+ lock_sock_nested(sk, level); > >- sock_orphan(sk); >- sk->sk_shutdown = SHUTDOWN_MASK; >+ if (vsk->transport) >+ vsk->transport->release(vsk); >+ else if (sock_type_connectible(sk->sk_type)) >+ vsock_remove_sock(vsk); > >- skb_queue_purge(&sk->sk_receive_queue); >+ sock_orphan(sk); >+ sk->sk_shutdown = SHUTDOWN_MASK; > >- /* Clean up any sockets that never were accepted. */ >- while ((pending = vsock_dequeue_accept(sk)) != NULL) { >- __vsock_release(pending, SINGLE_DEPTH_NESTING); >- sock_put(pending); >- } >+ skb_queue_purge(&sk->sk_receive_queue); > >- release_sock(sk); >- sock_put(sk); >+ /* Clean up any sockets that never were accepted. */ >+ while ((pending = vsock_dequeue_accept(sk)) != NULL) { >+ __vsock_release(pending, SINGLE_DEPTH_NESTING); >+ sock_put(pending); > } >+ >+ release_sock(sk); >+ sock_put(sk); > } > > static void vsock_sk_destruct(struct sock *sk) >@@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk) > } > EXPORT_SYMBOL_GPL(vsock_data_ready); > >+/* Dummy callback required by sockmap. >+ * See unconditional call of saved_close() in sock_map_close(). >+ */ >+static void vsock_close(struct sock *sk, long timeout) >+{ >+} >+ > static int vsock_release(struct socket *sock) > { >- __vsock_release(sock->sk, 0); >+ struct sock *sk = sock->sk; >+ >+ if (!sk) >+ return 0; Compared with before, now we return earlier and so we don't set SS_FREE, could it be risky? I think no, because in theory we have already set it in a previous call, right? Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> >+ >+ sk->sk_prot->close(sk, 0); >+ __vsock_release(sk, 0); > sock->sk = NULL; > sock->state = SS_FREE; > > >-- >2.46.2 >
On 11/21/24 10:22, Stefano Garzarella wrote: > On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote: >> vsock defines a BPF callback to be invoked when close() is called. However, >> this callback is never actually executed. As a result, a closed vsock >> socket is not automatically removed from the sockmap/sockhash. >> >> Introduce a dummy vsock_close() and make vsock_release() call proto::close. >> >> Note: changes in __vsock_release() look messy, but it's only due to indent >> level reduction and variables xmas tree reorder. >> >> Fixes: 634f1a7110b4 ("vsock: support sockmap") >> Signed-off-by: Michal Luczaj <mhal@rbox.co> >> --- >> net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++------------------- >> 1 file changed, 40 insertions(+), 27 deletions(-) >> >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >> index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644 >> --- a/net/vmw_vsock/af_vsock.c >> +++ b/net/vmw_vsock/af_vsock.c >> @@ -117,12 +117,14 @@ >> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); >> static void vsock_sk_destruct(struct sock *sk); >> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); >> +static void vsock_close(struct sock *sk, long timeout); >> >> /* Protocol family. */ >> struct proto vsock_proto = { >> .name = "AF_VSOCK", >> .owner = THIS_MODULE, >> .obj_size = sizeof(struct vsock_sock), >> + .close = vsock_close, >> #ifdef CONFIG_BPF_SYSCALL >> .psock_update_sk_prot = vsock_bpf_update_proto, >> #endif >> @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type) >> >> static void __vsock_release(struct sock *sk, int level) >> { >> - if (sk) { >> - struct sock *pending; >> - struct vsock_sock *vsk; >> - >> - vsk = vsock_sk(sk); >> - pending = NULL; /* Compiler warning. */ >> + struct vsock_sock *vsk; >> + struct sock *pending; >> >> - /* When "level" is SINGLE_DEPTH_NESTING, use the nested >> - * version to avoid the warning "possible recursive locking >> - * detected". When "level" is 0, lock_sock_nested(sk, level) >> - * is the same as lock_sock(sk). >> - */ >> - lock_sock_nested(sk, level); >> + vsk = vsock_sk(sk); >> + pending = NULL; /* Compiler warning. */ >> >> - if (vsk->transport) >> - vsk->transport->release(vsk); >> - else if (sock_type_connectible(sk->sk_type)) >> - vsock_remove_sock(vsk); >> + /* When "level" is SINGLE_DEPTH_NESTING, use the nested >> + * version to avoid the warning "possible recursive locking >> + * detected". When "level" is 0, lock_sock_nested(sk, level) >> + * is the same as lock_sock(sk). >> + */ >> + lock_sock_nested(sk, level); >> >> - sock_orphan(sk); >> - sk->sk_shutdown = SHUTDOWN_MASK; >> + if (vsk->transport) >> + vsk->transport->release(vsk); >> + else if (sock_type_connectible(sk->sk_type)) >> + vsock_remove_sock(vsk); >> >> - skb_queue_purge(&sk->sk_receive_queue); >> + sock_orphan(sk); >> + sk->sk_shutdown = SHUTDOWN_MASK; >> >> - /* Clean up any sockets that never were accepted. */ >> - while ((pending = vsock_dequeue_accept(sk)) != NULL) { >> - __vsock_release(pending, SINGLE_DEPTH_NESTING); >> - sock_put(pending); >> - } >> + skb_queue_purge(&sk->sk_receive_queue); >> >> - release_sock(sk); >> - sock_put(sk); >> + /* Clean up any sockets that never were accepted. */ >> + while ((pending = vsock_dequeue_accept(sk)) != NULL) { >> + __vsock_release(pending, SINGLE_DEPTH_NESTING); >> + sock_put(pending); >> } >> + >> + release_sock(sk); >> + sock_put(sk); >> } >> >> static void vsock_sk_destruct(struct sock *sk) >> @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk) >> } >> EXPORT_SYMBOL_GPL(vsock_data_ready); >> >> +/* Dummy callback required by sockmap. >> + * See unconditional call of saved_close() in sock_map_close(). >> + */ >> +static void vsock_close(struct sock *sk, long timeout) >> +{ >> +} >> + >> static int vsock_release(struct socket *sock) >> { >> - __vsock_release(sock->sk, 0); >> + struct sock *sk = sock->sk; >> + >> + if (!sk) >> + return 0; > > Compared with before, now we return earlier and so we don't set SS_FREE, > could it be risky? > > I think no, because in theory we have already set it in a previous call, > right? Yeah, and is there actually a way to call vsock_release() for a second time? The only caller I see is __sock_release(), which won't allow that. As for the sockets that never had ->sk assigned, I assume it doesn't matter. > Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> > >> + >> + sk->sk_prot->close(sk, 0); >> + __vsock_release(sk, 0); >> sock->sk = NULL; >> sock->state = SS_FREE;
On Thu, Nov 21, 2024 at 8:54 PM Michal Luczaj <mhal@rbox.co> wrote: > > On 11/21/24 10:22, Stefano Garzarella wrote: > > On Mon, Nov 18, 2024 at 10:03:43PM +0100, Michal Luczaj wrote: > >> vsock defines a BPF callback to be invoked when close() is called. However, > >> this callback is never actually executed. As a result, a closed vsock > >> socket is not automatically removed from the sockmap/sockhash. > >> > >> Introduce a dummy vsock_close() and make vsock_release() call proto::close. > >> > >> Note: changes in __vsock_release() look messy, but it's only due to indent > >> level reduction and variables xmas tree reorder. > >> > >> Fixes: 634f1a7110b4 ("vsock: support sockmap") > >> Signed-off-by: Michal Luczaj <mhal@rbox.co> > >> --- > >> net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++------------------- > >> 1 file changed, 40 insertions(+), 27 deletions(-) > >> > >> diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > >> index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644 > >> --- a/net/vmw_vsock/af_vsock.c > >> +++ b/net/vmw_vsock/af_vsock.c > >> @@ -117,12 +117,14 @@ > >> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); > >> static void vsock_sk_destruct(struct sock *sk); > >> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); > >> +static void vsock_close(struct sock *sk, long timeout); > >> > >> /* Protocol family. */ > >> struct proto vsock_proto = { > >> .name = "AF_VSOCK", > >> .owner = THIS_MODULE, > >> .obj_size = sizeof(struct vsock_sock), > >> + .close = vsock_close, > >> #ifdef CONFIG_BPF_SYSCALL > >> .psock_update_sk_prot = vsock_bpf_update_proto, > >> #endif > >> @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type) > >> > >> static void __vsock_release(struct sock *sk, int level) > >> { > >> - if (sk) { > >> - struct sock *pending; > >> - struct vsock_sock *vsk; > >> - > >> - vsk = vsock_sk(sk); > >> - pending = NULL; /* Compiler warning. */ > >> + struct vsock_sock *vsk; > >> + struct sock *pending; > >> > >> - /* When "level" is SINGLE_DEPTH_NESTING, use the nested > >> - * version to avoid the warning "possible recursive locking > >> - * detected". When "level" is 0, lock_sock_nested(sk, level) > >> - * is the same as lock_sock(sk). > >> - */ > >> - lock_sock_nested(sk, level); > >> + vsk = vsock_sk(sk); > >> + pending = NULL; /* Compiler warning. */ > >> > >> - if (vsk->transport) > >> - vsk->transport->release(vsk); > >> - else if (sock_type_connectible(sk->sk_type)) > >> - vsock_remove_sock(vsk); > >> + /* When "level" is SINGLE_DEPTH_NESTING, use the nested > >> + * version to avoid the warning "possible recursive locking > >> + * detected". When "level" is 0, lock_sock_nested(sk, level) > >> + * is the same as lock_sock(sk). > >> + */ > >> + lock_sock_nested(sk, level); > >> > >> - sock_orphan(sk); > >> - sk->sk_shutdown = SHUTDOWN_MASK; > >> + if (vsk->transport) > >> + vsk->transport->release(vsk); > >> + else if (sock_type_connectible(sk->sk_type)) > >> + vsock_remove_sock(vsk); > >> > >> - skb_queue_purge(&sk->sk_receive_queue); > >> + sock_orphan(sk); > >> + sk->sk_shutdown = SHUTDOWN_MASK; > >> > >> - /* Clean up any sockets that never were accepted. */ > >> - while ((pending = vsock_dequeue_accept(sk)) != NULL) { > >> - __vsock_release(pending, SINGLE_DEPTH_NESTING); > >> - sock_put(pending); > >> - } > >> + skb_queue_purge(&sk->sk_receive_queue); > >> > >> - release_sock(sk); > >> - sock_put(sk); > >> + /* Clean up any sockets that never were accepted. */ > >> + while ((pending = vsock_dequeue_accept(sk)) != NULL) { > >> + __vsock_release(pending, SINGLE_DEPTH_NESTING); > >> + sock_put(pending); > >> } > >> + > >> + release_sock(sk); > >> + sock_put(sk); > >> } > >> > >> static void vsock_sk_destruct(struct sock *sk) > >> @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk) > >> } > >> EXPORT_SYMBOL_GPL(vsock_data_ready); > >> > >> +/* Dummy callback required by sockmap. > >> + * See unconditional call of saved_close() in sock_map_close(). > >> + */ > >> +static void vsock_close(struct sock *sk, long timeout) > >> +{ > >> +} > >> + > >> static int vsock_release(struct socket *sock) > >> { > >> - __vsock_release(sock->sk, 0); > >> + struct sock *sk = sock->sk; > >> + > >> + if (!sk) > >> + return 0; > > > > Compared with before, now we return earlier and so we don't set SS_FREE, > > could it be risky? > > > > I think no, because in theory we have already set it in a previous call, > > right? > > Yeah, and is there actually a way to call vsock_release() for a second > time? The only caller I see is __sock_release(), which won't allow that. Maybe no, but the `sock->sk` check made me think so. > > As for the sockets that never had ->sk assigned, I assume it doesn't matter. Yep, so my R-b stays here ;-) Thanks for these great fixes, Stefano > > > Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> > > > >> + > >> + sk->sk_prot->close(sk, 0); > >> + __vsock_release(sk, 0); > >> sock->sk = NULL; > >> sock->state = SS_FREE; >
I spent some time checking and nobody but __sock_create (net/socket.c)
and vsock_release can set sock->sk to NULL.
I also ran checkpatch, everything LGTM.
Thanks for the fix!
Reviewed-by: Luigi Leonardi <leonardi@redhat.com>
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 919da8edd03c838cbcdbf1618425da6c5ec2df1a..b52b798aa4c2926c3f233aad6cd31b4056f6fee2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -117,12 +117,14 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +static void vsock_close(struct sock *sk, long timeout); /* Protocol family. */ struct proto vsock_proto = { .name = "AF_VSOCK", .owner = THIS_MODULE, .obj_size = sizeof(struct vsock_sock), + .close = vsock_close, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = vsock_bpf_update_proto, #endif @@ -797,39 +799,37 @@ static bool sock_type_connectible(u16 type) static void __vsock_release(struct sock *sk, int level) { - if (sk) { - struct sock *pending; - struct vsock_sock *vsk; - - vsk = vsock_sk(sk); - pending = NULL; /* Compiler warning. */ + struct vsock_sock *vsk; + struct sock *pending; - /* When "level" is SINGLE_DEPTH_NESTING, use the nested - * version to avoid the warning "possible recursive locking - * detected". When "level" is 0, lock_sock_nested(sk, level) - * is the same as lock_sock(sk). - */ - lock_sock_nested(sk, level); + vsk = vsock_sk(sk); + pending = NULL; /* Compiler warning. */ - if (vsk->transport) - vsk->transport->release(vsk); - else if (sock_type_connectible(sk->sk_type)) - vsock_remove_sock(vsk); + /* When "level" is SINGLE_DEPTH_NESTING, use the nested + * version to avoid the warning "possible recursive locking + * detected". When "level" is 0, lock_sock_nested(sk, level) + * is the same as lock_sock(sk). + */ + lock_sock_nested(sk, level); - sock_orphan(sk); - sk->sk_shutdown = SHUTDOWN_MASK; + if (vsk->transport) + vsk->transport->release(vsk); + else if (sock_type_connectible(sk->sk_type)) + vsock_remove_sock(vsk); - skb_queue_purge(&sk->sk_receive_queue); + sock_orphan(sk); + sk->sk_shutdown = SHUTDOWN_MASK; - /* Clean up any sockets that never were accepted. */ - while ((pending = vsock_dequeue_accept(sk)) != NULL) { - __vsock_release(pending, SINGLE_DEPTH_NESTING); - sock_put(pending); - } + skb_queue_purge(&sk->sk_receive_queue); - release_sock(sk); - sock_put(sk); + /* Clean up any sockets that never were accepted. */ + while ((pending = vsock_dequeue_accept(sk)) != NULL) { + __vsock_release(pending, SINGLE_DEPTH_NESTING); + sock_put(pending); } + + release_sock(sk); + sock_put(sk); } static void vsock_sk_destruct(struct sock *sk) @@ -901,9 +901,22 @@ void vsock_data_ready(struct sock *sk) } EXPORT_SYMBOL_GPL(vsock_data_ready); +/* Dummy callback required by sockmap. + * See unconditional call of saved_close() in sock_map_close(). + */ +static void vsock_close(struct sock *sk, long timeout) +{ +} + static int vsock_release(struct socket *sock) { - __vsock_release(sock->sk, 0); + struct sock *sk = sock->sk; + + if (!sk) + return 0; + + sk->sk_prot->close(sk, 0); + __vsock_release(sk, 0); sock->sk = NULL; sock->state = SS_FREE;
vsock defines a BPF callback to be invoked when close() is called. However, this callback is never actually executed. As a result, a closed vsock socket is not automatically removed from the sockmap/sockhash. Introduce a dummy vsock_close() and make vsock_release() call proto::close. Note: changes in __vsock_release() look messy, but it's only due to indent level reduction and variables xmas tree reorder. Fixes: 634f1a7110b4 ("vsock: support sockmap") Signed-off-by: Michal Luczaj <mhal@rbox.co> --- net/vmw_vsock/af_vsock.c | 67 +++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-)