Message ID | 20220712235310.1935121-3-joannelkoong@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add a second bind table hashed by port + address | expand |
On Tue, 2022-07-12 at 16:53 -0700, Joanne Koong wrote: > This test populates the bhash table for a given port with > MAX_THREADS * MAX_CONNECTIONS sockets, and then times how long > a bind request on the port takes. > > When populating the bhash table, we create the sockets and then bind > the sockets to the same address and port (SO_REUSEADDR and SO_REUSEPORT > are set). When timing how long a bind on the port takes, we bind on a > different address without SO_REUSEPORT set. We do not set SO_REUSEPORT > because we are interested in the case where the bind request does not > go through the tb->fastreuseport path, which is fragile (eg > tb->fastreuseport path does not work if binding with a different uid). > > On my local machine, I see: > ipv4: > before - 0.002317 seconds > with bhash2 - 0.000020 seconds > > ipv6: > before - 0.002431 seconds > with bhash2 - 0.000021 seconds > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > --- > tools/testing/selftests/net/.gitignore | 3 +- > tools/testing/selftests/net/Makefile | 3 + > tools/testing/selftests/net/bind_bhash.c | 119 ++++++++++++++++++++++ > tools/testing/selftests/net/bind_bhash.sh | 23 +++++ > 4 files changed, 147 insertions(+), 1 deletion(-) > create mode 100644 tools/testing/selftests/net/bind_bhash.c > create mode 100755 tools/testing/selftests/net/bind_bhash.sh > > diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore > index 1257baa79286..5b1adf6e29ae 100644 > --- a/tools/testing/selftests/net/.gitignore > +++ b/tools/testing/selftests/net/.gitignore > @@ -37,4 +37,5 @@ gro > ioam6_parser > toeplitz > cmsg_sender > -unix_connect > \ No newline at end of file > +unix_connect > +bind_bhash > diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile > index ddad703ace34..e678fc3030a2 100644 > --- a/tools/testing/selftests/net/Makefile > +++ b/tools/testing/selftests/net/Makefile > @@ -39,6 +39,7 @@ TEST_PROGS += vrf_strict_mode_test.sh > TEST_PROGS += arp_ndisc_evict_nocarrier.sh > TEST_PROGS += ndisc_unsolicited_na_test.sh > TEST_PROGS += stress_reuseport_listen.sh > +TEST_PROGS += bind_bhash.sh > TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh > TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh > TEST_GEN_FILES = socket nettest > @@ -59,6 +60,7 @@ TEST_GEN_FILES += toeplitz > TEST_GEN_FILES += cmsg_sender > TEST_GEN_FILES += stress_reuseport_listen > TEST_PROGS += test_vxlan_vnifiltering.sh > +TEST_GEN_FILES += bind_bhash > > TEST_FILES := settings > > @@ -70,3 +72,4 @@ include bpf/Makefile > $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma > $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread > $(OUTPUT)/tcp_inq: LDLIBS += -lpthread > +$(OUTPUT)/bind_bhash: LDLIBS += -lpthread > diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c > new file mode 100644 > index 000000000000..252e73754e76 > --- /dev/null > +++ b/tools/testing/selftests/net/bind_bhash.c > @@ -0,0 +1,119 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * This times how long it takes to bind to a port when the port already > + * has multiple sockets in its bhash table. > + * > + * In the setup(), we populate the port's bhash table with > + * MAX_THREADS * MAX_CONNECTIONS number of entries. > + */ > + > +#include <unistd.h> > +#include <stdio.h> > +#include <netdb.h> > +#include <pthread.h> > + > +#define MAX_THREADS 600 > +#define MAX_CONNECTIONS 40 > + > +static const char *bind_addr = "::1"; > +static const char *port; > + > +static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; > + > +static int bind_socket(int opt, const char *addr) > +{ > + struct addrinfo *res, hint = {}; > + int sock_fd, reuse = 1, err; > + > + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); > + if (sock_fd < 0) { > + perror("socket fd err"); > + return -1; > + } > + > + hint.ai_family = AF_INET6; > + hint.ai_socktype = SOCK_STREAM; > + > + err = getaddrinfo(addr, port, &hint, &res); > + if (err) { > + perror("getaddrinfo failed"); > + return -1; > + } > + > + if (opt) { > + err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); > + if (err) { > + perror("setsockopt failed"); > + return -1; > + } > + } > + > + err = bind(sock_fd, res->ai_addr, res->ai_addrlen); > + if (err) { > + perror("failed to bind to port"); > + return -1; > + } > + > + return sock_fd; > +} > + > +static void *setup(void *arg) > +{ > + int sock_fd, i; > + int *array = (int *)arg; > + > + for (i = 0; i < MAX_CONNECTIONS; i++) { > + sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); > + if (sock_fd < 0) > + return NULL; > + array[i] = sock_fd; > + } > + > + return NULL; > +} > + > +int main(int argc, const char *argv[]) > +{ > + int listener_fd, sock_fd, i, j; > + pthread_t tid[MAX_THREADS]; > + clock_t begin, end; > + > + if (argc != 2) { > + printf("Usage: listener <port>\n"); > + return -1; > + } > + > + port = argv[1]; > + > + listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); > + if (listen(listener_fd, 100) < 0) { > + perror("listen failed"); > + return -1; > + } > + > + /* Set up threads to populate the bhash table entry for the port */ > + for (i = 0; i < MAX_THREADS; i++) > + pthread_create(&tid[i], NULL, setup, fd_array[i]); > + > + for (i = 0; i < MAX_THREADS; i++) > + pthread_join(tid[i], NULL); > + > + begin = clock(); > + > + /* Bind to the same port on a different address */ > + sock_fd = bind_socket(0, "2001:0db8:0:f101::1"); I think it's better/nicer if you make this address configurable from the command line, instead of hard-codying it here. > + > + end = clock(); > + > + printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); > + > + /* clean up */ > + close(sock_fd); > + close(listener_fd); > + for (i = 0; i < MAX_THREADS; i++) { > + for (j = 0; i < MAX_THREADS; i++) > + close(fd_array[i][j]); > + } > + > + return 0; > +} > diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh > new file mode 100755 > index 000000000000..f7794d63efd2 > --- /dev/null > +++ b/tools/testing/selftests/net/bind_bhash.sh > @@ -0,0 +1,23 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > + > +NR_FILES=32768 > +SAVED_NR_FILES=$(ulimit -n) > + > +setup() { > + ip addr add dev eth0 2001:0db8:0:f101::1 If you add the 'nodad' option here... > + ulimit -n $NR_FILES > + sleep 1 ... this should not be needed Also what about ipv4 tests? Thanks! Paolo
On Thu, Jul 14, 2022 at 2:18 AM Paolo Abeni <pabeni@redhat.com> wrote: > > On Tue, 2022-07-12 at 16:53 -0700, Joanne Koong wrote: > > This test populates the bhash table for a given port with > > MAX_THREADS * MAX_CONNECTIONS sockets, and then times how long > > a bind request on the port takes. > > > > When populating the bhash table, we create the sockets and then bind > > the sockets to the same address and port (SO_REUSEADDR and SO_REUSEPORT > > are set). When timing how long a bind on the port takes, we bind on a > > different address without SO_REUSEPORT set. We do not set SO_REUSEPORT > > because we are interested in the case where the bind request does not > > go through the tb->fastreuseport path, which is fragile (eg > > tb->fastreuseport path does not work if binding with a different uid). > > > > On my local machine, I see: > > ipv4: > > before - 0.002317 seconds > > with bhash2 - 0.000020 seconds > > > > ipv6: > > before - 0.002431 seconds > > with bhash2 - 0.000021 seconds > > > > Signed-off-by: Joanne Koong <joannelkoong@gmail.com> > > --- > > tools/testing/selftests/net/.gitignore | 3 +- > > tools/testing/selftests/net/Makefile | 3 + > > tools/testing/selftests/net/bind_bhash.c | 119 ++++++++++++++++++++++ > > tools/testing/selftests/net/bind_bhash.sh | 23 +++++ > > 4 files changed, 147 insertions(+), 1 deletion(-) > > create mode 100644 tools/testing/selftests/net/bind_bhash.c > > create mode 100755 tools/testing/selftests/net/bind_bhash.sh > > > > diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore > > index 1257baa79286..5b1adf6e29ae 100644 > > --- a/tools/testing/selftests/net/.gitignore > > +++ b/tools/testing/selftests/net/.gitignore > > @@ -37,4 +37,5 @@ gro > > ioam6_parser > > toeplitz > > cmsg_sender > > -unix_connect > > \ No newline at end of file > > +unix_connect > > +bind_bhash > > diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile > > index ddad703ace34..e678fc3030a2 100644 > > --- a/tools/testing/selftests/net/Makefile > > +++ b/tools/testing/selftests/net/Makefile > > @@ -39,6 +39,7 @@ TEST_PROGS += vrf_strict_mode_test.sh > > TEST_PROGS += arp_ndisc_evict_nocarrier.sh > > TEST_PROGS += ndisc_unsolicited_na_test.sh > > TEST_PROGS += stress_reuseport_listen.sh > > +TEST_PROGS += bind_bhash.sh > > TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh > > TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh > > TEST_GEN_FILES = socket nettest > > @@ -59,6 +60,7 @@ TEST_GEN_FILES += toeplitz > > TEST_GEN_FILES += cmsg_sender > > TEST_GEN_FILES += stress_reuseport_listen > > TEST_PROGS += test_vxlan_vnifiltering.sh > > +TEST_GEN_FILES += bind_bhash > > > > TEST_FILES := settings > > > > @@ -70,3 +72,4 @@ include bpf/Makefile > > $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma > > $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread > > $(OUTPUT)/tcp_inq: LDLIBS += -lpthread > > +$(OUTPUT)/bind_bhash: LDLIBS += -lpthread > > diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c > > new file mode 100644 > > index 000000000000..252e73754e76 > > --- /dev/null > > +++ b/tools/testing/selftests/net/bind_bhash.c > > @@ -0,0 +1,119 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * This times how long it takes to bind to a port when the port already > > + * has multiple sockets in its bhash table. > > + * > > + * In the setup(), we populate the port's bhash table with > > + * MAX_THREADS * MAX_CONNECTIONS number of entries. > > + */ > > + > > +#include <unistd.h> > > +#include <stdio.h> > > +#include <netdb.h> > > +#include <pthread.h> > > + > > +#define MAX_THREADS 600 > > +#define MAX_CONNECTIONS 40 > > + > > +static const char *bind_addr = "::1"; > > +static const char *port; > > + > > +static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; > > + > > +static int bind_socket(int opt, const char *addr) > > +{ > > + struct addrinfo *res, hint = {}; > > + int sock_fd, reuse = 1, err; > > + > > + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); > > + if (sock_fd < 0) { > > + perror("socket fd err"); > > + return -1; > > + } > > + > > + hint.ai_family = AF_INET6; > > + hint.ai_socktype = SOCK_STREAM; > > + > > + err = getaddrinfo(addr, port, &hint, &res); > > + if (err) { > > + perror("getaddrinfo failed"); > > + return -1; > > + } > > + > > + if (opt) { > > + err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); > > + if (err) { > > + perror("setsockopt failed"); > > + return -1; > > + } > > + } > > + > > + err = bind(sock_fd, res->ai_addr, res->ai_addrlen); > > + if (err) { > > + perror("failed to bind to port"); > > + return -1; > > + } > > + > > + return sock_fd; > > +} > > + > > +static void *setup(void *arg) > > +{ > > + int sock_fd, i; > > + int *array = (int *)arg; > > + > > + for (i = 0; i < MAX_CONNECTIONS; i++) { > > + sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); > > + if (sock_fd < 0) > > + return NULL; > > + array[i] = sock_fd; > > + } > > + > > + return NULL; > > +} > > + > > +int main(int argc, const char *argv[]) > > +{ > > + int listener_fd, sock_fd, i, j; > > + pthread_t tid[MAX_THREADS]; > > + clock_t begin, end; > > + > > + if (argc != 2) { > > + printf("Usage: listener <port>\n"); > > + return -1; > > + } > > + > > + port = argv[1]; > > + > > + listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); > > + if (listen(listener_fd, 100) < 0) { > > + perror("listen failed"); > > + return -1; > > + } > > + > > + /* Set up threads to populate the bhash table entry for the port */ > > + for (i = 0; i < MAX_THREADS; i++) > > + pthread_create(&tid[i], NULL, setup, fd_array[i]); > > + > > + for (i = 0; i < MAX_THREADS; i++) > > + pthread_join(tid[i], NULL); > > + > > + begin = clock(); > > + > > + /* Bind to the same port on a different address */ > > + sock_fd = bind_socket(0, "2001:0db8:0:f101::1"); > > I think it's better/nicer if you make this address configurable from > the command line, instead of hard-codying it here. I will make this change for v3. > > + > > + end = clock(); > > + > > + printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); > > + > > + /* clean up */ > > + close(sock_fd); > > + close(listener_fd); > > + for (i = 0; i < MAX_THREADS; i++) { > > + for (j = 0; i < MAX_THREADS; i++) > > + close(fd_array[i][j]); > > + } > > + > > + return 0; > > +} > > diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh > > new file mode 100755 > > index 000000000000..f7794d63efd2 > > --- /dev/null > > +++ b/tools/testing/selftests/net/bind_bhash.sh > > @@ -0,0 +1,23 @@ > > +#!/bin/bash > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +NR_FILES=32768 > > +SAVED_NR_FILES=$(ulimit -n) > > + > > +setup() { > > + ip addr add dev eth0 2001:0db8:0:f101::1 > > If you add the 'nodad' option here... > > > + ulimit -n $NR_FILES > > + sleep 1 > > ... this should not be needed Awesome! Thanks, I will add this 'nodad' option. > > Also what about ipv4 tests? I will update this to also include the ipv4 version of the test in v3. > > > Thanks! > > Paolo >
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 1257baa79286..5b1adf6e29ae 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -37,4 +37,5 @@ gro ioam6_parser toeplitz cmsg_sender -unix_connect \ No newline at end of file +unix_connect +bind_bhash diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ddad703ace34..e678fc3030a2 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -39,6 +39,7 @@ TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh TEST_PROGS += stress_reuseport_listen.sh +TEST_PROGS += bind_bhash.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest @@ -59,6 +60,7 @@ TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen TEST_PROGS += test_vxlan_vnifiltering.sh +TEST_GEN_FILES += bind_bhash TEST_FILES := settings @@ -70,3 +72,4 @@ include bpf/Makefile $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread $(OUTPUT)/tcp_inq: LDLIBS += -lpthread +$(OUTPUT)/bind_bhash: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c new file mode 100644 index 000000000000..252e73754e76 --- /dev/null +++ b/tools/testing/selftests/net/bind_bhash.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This times how long it takes to bind to a port when the port already + * has multiple sockets in its bhash table. + * + * In the setup(), we populate the port's bhash table with + * MAX_THREADS * MAX_CONNECTIONS number of entries. + */ + +#include <unistd.h> +#include <stdio.h> +#include <netdb.h> +#include <pthread.h> + +#define MAX_THREADS 600 +#define MAX_CONNECTIONS 40 + +static const char *bind_addr = "::1"; +static const char *port; + +static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; + +static int bind_socket(int opt, const char *addr) +{ + struct addrinfo *res, hint = {}; + int sock_fd, reuse = 1, err; + + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (sock_fd < 0) { + perror("socket fd err"); + return -1; + } + + hint.ai_family = AF_INET6; + hint.ai_socktype = SOCK_STREAM; + + err = getaddrinfo(addr, port, &hint, &res); + if (err) { + perror("getaddrinfo failed"); + return -1; + } + + if (opt) { + err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); + if (err) { + perror("setsockopt failed"); + return -1; + } + } + + err = bind(sock_fd, res->ai_addr, res->ai_addrlen); + if (err) { + perror("failed to bind to port"); + return -1; + } + + return sock_fd; +} + +static void *setup(void *arg) +{ + int sock_fd, i; + int *array = (int *)arg; + + for (i = 0; i < MAX_CONNECTIONS; i++) { + sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); + if (sock_fd < 0) + return NULL; + array[i] = sock_fd; + } + + return NULL; +} + +int main(int argc, const char *argv[]) +{ + int listener_fd, sock_fd, i, j; + pthread_t tid[MAX_THREADS]; + clock_t begin, end; + + if (argc != 2) { + printf("Usage: listener <port>\n"); + return -1; + } + + port = argv[1]; + + listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); + if (listen(listener_fd, 100) < 0) { + perror("listen failed"); + return -1; + } + + /* Set up threads to populate the bhash table entry for the port */ + for (i = 0; i < MAX_THREADS; i++) + pthread_create(&tid[i], NULL, setup, fd_array[i]); + + for (i = 0; i < MAX_THREADS; i++) + pthread_join(tid[i], NULL); + + begin = clock(); + + /* Bind to the same port on a different address */ + sock_fd = bind_socket(0, "2001:0db8:0:f101::1"); + + end = clock(); + + printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); + + /* clean up */ + close(sock_fd); + close(listener_fd); + for (i = 0; i < MAX_THREADS; i++) { + for (j = 0; i < MAX_THREADS; i++) + close(fd_array[i][j]); + } + + return 0; +} diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh new file mode 100755 index 000000000000..f7794d63efd2 --- /dev/null +++ b/tools/testing/selftests/net/bind_bhash.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NR_FILES=32768 +SAVED_NR_FILES=$(ulimit -n) + +setup() { + ip addr add dev eth0 2001:0db8:0:f101::1 + ulimit -n $NR_FILES + sleep 1 +} + +cleanup() { + ip addr del 2001:0db8:0:f101::1 dev eth0 + ulimit -n $SAVED_NR_FILES +} + +trap cleanup EXIT + +setup +./bind_bhash 443 + +exit $EXIT_STATUS
This test populates the bhash table for a given port with MAX_THREADS * MAX_CONNECTIONS sockets, and then times how long a bind request on the port takes. When populating the bhash table, we create the sockets and then bind the sockets to the same address and port (SO_REUSEADDR and SO_REUSEPORT are set). When timing how long a bind on the port takes, we bind on a different address without SO_REUSEPORT set. We do not set SO_REUSEPORT because we are interested in the case where the bind request does not go through the tb->fastreuseport path, which is fragile (eg tb->fastreuseport path does not work if binding with a different uid). On my local machine, I see: ipv4: before - 0.002317 seconds with bhash2 - 0.000020 seconds ipv6: before - 0.002431 seconds with bhash2 - 0.000021 seconds Signed-off-by: Joanne Koong <joannelkoong@gmail.com> --- tools/testing/selftests/net/.gitignore | 3 +- tools/testing/selftests/net/Makefile | 3 + tools/testing/selftests/net/bind_bhash.c | 119 ++++++++++++++++++++++ tools/testing/selftests/net/bind_bhash.sh | 23 +++++ 4 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/net/bind_bhash.c create mode 100755 tools/testing/selftests/net/bind_bhash.sh