Message ID | 20241206-test-vsock-leaks-v1-4-c31e8c875797@rbox.co (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | vsock/test: Tests for memory leaks | expand |
On Fri, Dec 06, 2024 at 07:34:54PM +0100, Michal Luczaj wrote: >Exercise the ENOMEM error path by attempting to hit net.core.optmem_max >limit on send(). > >Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error >handling"). > >Signed-off-by: Michal Luczaj <mhal@rbox.co> >--- > tools/testing/vsock/vsock_test.c | 66 ++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 66 insertions(+) > >diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c >index f92c62b25a25d35ae63a77a0122a194051719169..6973e681490b363e3b9cedcf195844ba56da6f1d 100644 >--- a/tools/testing/vsock/vsock_test.c >+++ b/tools/testing/vsock/vsock_test.c >@@ -1552,6 +1552,67 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) > close(fd); > } > >+static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) >+{ >+ char buf[1024] = { 0 }; >+ ssize_t optmem_max; >+ int fd, res; >+ FILE *f; >+ >+ f = fopen("/proc/sys/net/core/optmem_max", "r"); >+ if (!f) { >+ perror("fopen(optmem_max)"); >+ exit(EXIT_FAILURE); >+ } >+ >+ if (fscanf(f, "%zd", &optmem_max) != 1 || optmem_max > ~0U / 2) { >+ fprintf(stderr, "fscanf(optmem_max) failed\n"); >+ exit(EXIT_FAILURE); >+ } >+ >+ fclose(f); >+ >+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); >+ if (fd < 0) { >+ perror("connect"); >+ exit(EXIT_FAILURE); >+ } >+ >+ enable_so_zerocopy_check(fd); >+ >+ /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting >+ * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in >+ * sock_omalloc(). >+ */ >+ optmem_max *= 2; >+ errno = 0; >+ do { >+ res = send(fd, buf, sizeof(buf), MSG_ZEROCOPY); >+ optmem_max -= res; >+ } while (res > 0 && optmem_max > 0); >+ >+ if (errno != ENOMEM) { >+ fprintf(stderr, "expected ENOMEM on send()\n"); This test is failing in my suite with this message (I added errno in the message that maybe we can add to understand better what we expect, and what we saw): 28 - SOCK_STREAM MSG_ZEROCOPY leak completion skb...expected ENOMEM on send() - errno 0 my env: host (L0) -> vsockvm0 (L1) -> vsockvm1 (L2) L2 is a nested guest run by L1. L1 and L2 runs v6.13-rc2 plus some commits (7cb1b466315004af98f6ba6c2546bb713ca3c237) vsockvm0$ cat /proc/sys/net/core/optmem_max 81920 vsockvm1$ cat /proc/sys/net/core/optmem_max 81920 If the server is running on vsockvm0 (host for the test POV, using vhost-vsock transport), the test passes, but if the server is running on vsockvm1 (guest, virtio-vsock transport) the test fails: vsockvm1$ vsock_test --mode=server --control-port=12345 --peer-cid=2 vsockvm0$ vsock_test --mode=client --control-host=192.168.133.3 \ --control-port=12345 --peer-cid=4 ... 26 - SOCK_STREAM leak accept queue...ok 27 - SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE...ok 28 - SOCK_STREAM MSG_ZEROCOPY leak completion skb...expected ENOMEM on send() - errno 0 Maybe because virtio_transport_init_zcopy_skb() in the vhost-vsock case is called in the context of vhost kernel task created by vhost_task_create()? Thanks, Stefano >+ exit(EXIT_FAILURE); >+ } >+ >+ close(fd); >+} >+ >+static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) >+{ >+ int fd; >+ >+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); >+ if (fd < 0) { >+ perror("accept"); >+ exit(EXIT_FAILURE); >+ } >+ >+ vsock_wait_remote_close(fd); >+ close(fd); >+} >+ > static struct test_case test_cases[] = { > { > .name = "SOCK_STREAM connection reset", >@@ -1692,6 +1753,11 @@ static struct test_case test_cases[] = { > .run_client = test_stream_msgzcopy_leak_errq_client, > .run_server = test_stream_msgzcopy_leak_errq_server, > }, >+ { >+ .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", >+ .run_client = test_stream_msgzcopy_leak_zcskb_client, >+ .run_server = test_stream_msgzcopy_leak_zcskb_server, >+ }, > {}, > }; > > >-- >2.47.1 >
On 12/10/24 16:36, Stefano Garzarella wrote: > On Fri, Dec 06, 2024 at 07:34:54PM +0100, Michal Luczaj wrote: >> [...] >> +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) >> +{ >> + char buf[1024] = { 0 }; >> + ssize_t optmem_max; >> + int fd, res; >> + FILE *f; >> + >> + f = fopen("/proc/sys/net/core/optmem_max", "r"); >> + if (!f) { >> + perror("fopen(optmem_max)"); >> + exit(EXIT_FAILURE); >> + } >> + >> + if (fscanf(f, "%zd", &optmem_max) != 1 || optmem_max > ~0U / 2) { >> + fprintf(stderr, "fscanf(optmem_max) failed\n"); >> + exit(EXIT_FAILURE); >> + } >> + >> + fclose(f); >> + >> + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); >> + if (fd < 0) { >> + perror("connect"); >> + exit(EXIT_FAILURE); >> + } >> + >> + enable_so_zerocopy_check(fd); >> + >> + /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting >> + * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in >> + * sock_omalloc(). >> + */ >> + optmem_max *= 2; >> + errno = 0; >> + do { >> + res = send(fd, buf, sizeof(buf), MSG_ZEROCOPY); >> + optmem_max -= res; >> + } while (res > 0 && optmem_max > 0); >> + >> + if (errno != ENOMEM) { >> + fprintf(stderr, "expected ENOMEM on send()\n"); > > This test is failing in my suite with this message (I added errno > in the message that maybe we can add to understand better what we > expect, and what we saw) [...] Thanks, can reproduce. And I'm surprised it ever worked at all. What I did is completely and utterly wrong :) The idea was to exhaust sk_omem_alloc by growing some skbs. First thing I've missed: no matter the size of the buffer being send(), sock_omalloc() is always requested with size=0[*]. Sure, skb->truesize is non-zero, so it does bump the counter, but it won't work as intended. Then comes vhost transport: sk_omem_alloc won't stay incremented, because skbs are processed and consumed. That's a race between transport and us. So here's another desperate attempt to do it without elevated privileges: exhaust sk_omem_alloc by abusing sendmsg()'s `sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL)`. Code below, tested under what I hope resembles your nested setup, i.e. L1$ --mode=server --peer-cid=4 L2$ --mode=client --peer-cid=2 L1$ --mode=client --peer-cid=4 L2$ --mode=server --peer-cid=2 Please let me know if it works for you. That said, I really think this test should be scrapped. I'm afraid it will break sooner or later. And since kmemleak needs root anyway, perhaps it's better to use failslab fault injection for this? [*] It's the only caller. Should @size be dropped from sock_omalloc()? Sorry for the mess, Michal From 52cd25e49649f853e75ef82c90ff360ee0a54a50 Mon Sep 17 00:00:00 2001 From: Michal Luczaj <mhal@rbox.co> Date: Wed, 4 Dec 2024 02:38:16 +0100 Subject: [PATCH] vsock/test: Add test for MSG_ZEROCOPY completion memory leak Exercise the ENOMEM error path by attempting to hit net.core.optmem_max limit on send(). Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error handling"). Signed-off-by: Michal Luczaj <mhal@rbox.co> --- tools/testing/vsock/vsock_test.c | 82 ++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 0a930803227a..d64e681f2904 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1552,6 +1552,83 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) close(fd); } +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov = { 0 }; + char *chunk, buf = 'x'; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting + * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in + * sock_omalloc(sk, 0, GFP_KERNEL). Stuff sk->sk_omem_alloc with cmsg, + * see net/socket.c:____sys_sendmsg(). + */ + + chunk_size = CMSG_SPACE(optmem_max - 1); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &buf; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = optmem_max - 1; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1692,6 +1769,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_errq_client, .run_server = test_stream_msgzcopy_leak_errq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, {}, };
On Thu, Dec 12, 2024 at 07:26:39PM +0100, Michal Luczaj wrote: >On 12/10/24 16:36, Stefano Garzarella wrote: >> On Fri, Dec 06, 2024 at 07:34:54PM +0100, Michal Luczaj wrote: >>> [...] >>> +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) >>> +{ >>> + char buf[1024] = { 0 }; >>> + ssize_t optmem_max; >>> + int fd, res; >>> + FILE *f; >>> + >>> + f = fopen("/proc/sys/net/core/optmem_max", "r"); >>> + if (!f) { >>> + perror("fopen(optmem_max)"); >>> + exit(EXIT_FAILURE); >>> + } >>> + >>> + if (fscanf(f, "%zd", &optmem_max) != 1 || optmem_max > ~0U / 2) { >>> + fprintf(stderr, "fscanf(optmem_max) failed\n"); >>> + exit(EXIT_FAILURE); >>> + } >>> + >>> + fclose(f); >>> + >>> + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); >>> + if (fd < 0) { >>> + perror("connect"); >>> + exit(EXIT_FAILURE); >>> + } >>> + >>> + enable_so_zerocopy_check(fd); >>> + >>> + /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting >>> + * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in >>> + * sock_omalloc(). >>> + */ >>> + optmem_max *= 2; >>> + errno = 0; >>> + do { >>> + res = send(fd, buf, sizeof(buf), MSG_ZEROCOPY); >>> + optmem_max -= res; >>> + } while (res > 0 && optmem_max > 0); >>> + >>> + if (errno != ENOMEM) { >>> + fprintf(stderr, "expected ENOMEM on send()\n"); >> >> This test is failing in my suite with this message (I added errno >> in the message that maybe we can add to understand better what we >> expect, and what we saw) [...] > >Thanks, can reproduce. And I'm surprised it ever worked at all. What I did >is completely and utterly wrong :) > >The idea was to exhaust sk_omem_alloc by growing some skbs. First thing >I've missed: no matter the size of the buffer being send(), sock_omalloc() >is always requested with size=0[*]. Sure, skb->truesize is non-zero, so it >does bump the counter, but it won't work as intended. > >Then comes vhost transport: sk_omem_alloc won't stay incremented, because >skbs are processed and consumed. That's a race between transport and us. I see, thanks for the details! > >So here's another desperate attempt to do it without elevated privileges: >exhaust sk_omem_alloc by abusing sendmsg()'s `sock_kmalloc(sock->sk, >ctl_len, GFP_KERNEL)`. Code below, tested under what I hope resembles your >nested setup, i.e. > >L1$ --mode=server --peer-cid=4 >L2$ --mode=client --peer-cid=2 > >L1$ --mode=client --peer-cid=4 >L2$ --mode=server --peer-cid=2 > >Please let me know if it works for you. Yep, tested several times without failures. > >That said, I really think this test should be scrapped. I'm afraid it will >break sooner or later. And since kmemleak needs root anyway, perhaps it's >better to use failslab fault injection for this? As you prefer! I'd be for merging this new version, but I would ask you to put a comment above the function with your concerns about possible failures in the future and possibly how to implement it with more privileges. If they happen we always have time to remove the test or extend it to use more privileged things. > >[*] It's the only caller. Should @size be dropped from sock_omalloc()? Oh, I see, more a question for net maintainer, but I'd agree with you. So I think you can try sending a patch to net-next for that. > >Sorry for the mess, Don't worry at all! Really appreciated your help with vsock fixes and tests. Stefano >Michal > >From 52cd25e49649f853e75ef82c90ff360ee0a54a50 Mon Sep 17 00:00:00 2001 >From: Michal Luczaj <mhal@rbox.co> >Date: Wed, 4 Dec 2024 02:38:16 +0100 >Subject: [PATCH] vsock/test: Add test for MSG_ZEROCOPY completion memory leak > >Exercise the ENOMEM error path by attempting to hit net.core.optmem_max >limit on send(). > >Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error >handling"). > >Signed-off-by: Michal Luczaj <mhal@rbox.co> >--- > tools/testing/vsock/vsock_test.c | 82 ++++++++++++++++++++++++++++++++ > 1 file changed, 82 insertions(+) > >diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c >index 0a930803227a..d64e681f2904 100644 >--- a/tools/testing/vsock/vsock_test.c >+++ b/tools/testing/vsock/vsock_test.c >@@ -1552,6 +1552,83 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) > close(fd); > } > >+static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) >+{ >+ size_t optmem_max, chunk_size; >+ struct msghdr msg = { 0 }; >+ struct iovec iov = { 0 }; >+ char *chunk, buf = 'x'; >+ int fd, res; >+ FILE *f; >+ >+ f = fopen("/proc/sys/net/core/optmem_max", "r"); >+ if (!f) { >+ perror("fopen(optmem_max)"); >+ exit(EXIT_FAILURE); >+ } >+ >+ if (fscanf(f, "%zu", &optmem_max) != 1) { >+ fprintf(stderr, "fscanf(optmem_max) failed\n"); >+ exit(EXIT_FAILURE); >+ } >+ >+ fclose(f); >+ >+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); >+ if (fd < 0) { >+ perror("connect"); >+ exit(EXIT_FAILURE); >+ } >+ >+ enable_so_zerocopy_check(fd); >+ >+ /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting >+ * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in >+ * sock_omalloc(sk, 0, GFP_KERNEL). Stuff sk->sk_omem_alloc with cmsg, >+ * see net/socket.c:____sys_sendmsg(). >+ */ >+ >+ chunk_size = CMSG_SPACE(optmem_max - 1); >+ chunk = malloc(chunk_size); >+ if (!chunk) { >+ perror("malloc"); >+ exit(EXIT_FAILURE); >+ } >+ memset(chunk, 0, chunk_size); >+ >+ iov.iov_base = &buf; >+ iov.iov_len = 1; >+ >+ msg.msg_iov = &iov; >+ msg.msg_iovlen = 1; >+ msg.msg_control = chunk; >+ msg.msg_controllen = optmem_max - 1; >+ >+ errno = 0; >+ res = sendmsg(fd, &msg, MSG_ZEROCOPY); >+ if (res >= 0 || errno != ENOMEM) { >+ fprintf(stderr, "expected ENOMEM, got errno=%d res=%d\n", >+ errno, res); >+ exit(EXIT_FAILURE); >+ } >+ >+ close(fd); >+} >+ >+static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) >+{ >+ int fd; >+ >+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); >+ if (fd < 0) { >+ perror("accept"); >+ exit(EXIT_FAILURE); >+ } >+ >+ vsock_wait_remote_close(fd); >+ close(fd); >+} >+ > static struct test_case test_cases[] = { > { > .name = "SOCK_STREAM connection reset", >@@ -1692,6 +1769,11 @@ static struct test_case test_cases[] = { > .run_client = test_stream_msgzcopy_leak_errq_client, > .run_server = test_stream_msgzcopy_leak_errq_server, > }, >+ { >+ .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", >+ .run_client = test_stream_msgzcopy_leak_zcskb_client, >+ .run_server = test_stream_msgzcopy_leak_zcskb_server, >+ }, > {}, > }; > >-- >2.47.1 > >
On 12/13/24 15:33, Stefano Garzarella wrote: > On Thu, Dec 12, 2024 at 07:26:39PM +0100, Michal Luczaj wrote: >> [...] >> That said, I really think this test should be scrapped. I'm afraid it will >> break sooner or later. And since kmemleak needs root anyway, perhaps it's >> better to use failslab fault injection for this? > > As you prefer! > > I'd be for merging this new version, but I would ask you to put a > comment above the function with your concerns about possible failures > in the future and possibly how to implement it with more privileges. > If they happen we always have time to remove the test or extend it to > use more privileged things. All right, here's v2: https://lore.kernel.org/netdev/20241216-test-vsock-leaks-v2-0-55e1405742fc@rbox.co/ >> [*] It's the only caller. Should @size be dropped from sock_omalloc()? > > Oh, I see, more a question for net maintainer, but I'd agree with you. > So I think you can try sending a patch to net-next for that. I digged more and it seems this (i.e. sock_omalloc() only being called with @size=0) was the case from the very beginning[*]. I understand it was deliberate and just awaits for some other users of ancillary sbks. [*] https://lore.kernel.org/netdev/20170803202945.70750-1-willemdebruijn.kernel@gmail.com/
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index f92c62b25a25d35ae63a77a0122a194051719169..6973e681490b363e3b9cedcf195844ba56da6f1d 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1552,6 +1552,67 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) close(fd); } +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + char buf[1024] = { 0 }; + ssize_t optmem_max; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zd", &optmem_max) != 1 || optmem_max > ~0U / 2) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + /* The idea is to fail virtio_transport_init_zcopy_skb() by hitting + * core.sysctl_optmem_max (sysctl net.core.optmem_max) limit check in + * sock_omalloc(). + */ + optmem_max *= 2; + errno = 0; + do { + res = send(fd, buf, sizeof(buf), MSG_ZEROCOPY); + optmem_max -= res; + } while (res > 0 && optmem_max > 0); + + if (errno != ENOMEM) { + fprintf(stderr, "expected ENOMEM on send()\n"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1692,6 +1753,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_errq_client, .run_server = test_stream_msgzcopy_leak_errq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, {}, };
Exercise the ENOMEM error path by attempting to hit net.core.optmem_max limit on send(). Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error handling"). Signed-off-by: Michal Luczaj <mhal@rbox.co> --- tools/testing/vsock/vsock_test.c | 66 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+)