@@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze
vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
vsock_perf: vsock_perf.o msg_zerocopy_common.o
+vsock_test: LDLIBS = -lpthread
vsock_uring_test: LDLIBS = -luring
vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o
@@ -23,6 +23,7 @@
#include <sys/ioctl.h>
#include <linux/sockios.h>
#include <linux/time64.h>
+#include <pthread.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1788,6 +1789,101 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
close(fd);
}
+static void *test_stream_transport_change_thread(void *vargp)
+{
+ pid_t *pid = (pid_t *)vargp;
+
+ /* We want this thread to terminate as soon as possible */
+ if (pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL)) {
+ perror("pthread_setcanceltype");
+ exit(EXIT_FAILURE);
+ }
+
+ while (true) {
+ if (kill(*pid, SIGUSR1) < 0) {
+ perror("kill");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return NULL;
+}
+
+static void test_transport_change_signal_handler(int signal)
+{
+ /* We need a custom handler for SIGUSR1 as the default one terminates the process. */
+}
+
+static void test_stream_transport_change_client(const struct test_opts *opts)
+{
+ __sighandler_t old_handler;
+ pid_t pid = getpid();
+ pthread_t thread_id;
+ time_t tout;
+
+ old_handler = signal(SIGUSR1, test_transport_change_signal_handler);
+ if (old_handler == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid)) {
+ perror("pthread_create");
+ exit(EXIT_FAILURE);
+ }
+
+ tout = current_nsec() + TIMEOUT * NSEC_PER_SEC;
+ do {
+ struct sockaddr_vm sa = {
+ .svm_family = AF_VSOCK,
+ .svm_cid = opts->peer_cid,
+ .svm_port = opts->peer_port,
+ };
+ int s;
+
+ s = socket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ connect(s, (struct sockaddr *)&sa, sizeof(sa));
+
+ /* Set CID to 0 cause a transport change. */
+ sa.svm_cid = 0;
+ connect(s, (struct sockaddr *)&sa, sizeof(sa));
+
+ close(s);
+ } while (current_nsec() < tout);
+
+ if (pthread_cancel(thread_id)) {
+ perror("pthread_cancel");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Wait for the thread to terminate */
+ if (pthread_join(thread_id, NULL)) {
+ perror("pthread_join");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Restore the old handler */
+ if (signal(SIGUSR1, old_handler) == SIG_ERR) {
+ perror("signal");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void test_stream_transport_change_server(const struct test_opts *opts)
+{
+ time_t tout = current_nsec() + TIMEOUT * NSEC_PER_SEC;
+
+ do {
+ int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+
+ close(s);
+ } while (current_nsec() < tout);
+}
+
static void test_stream_linger_client(const struct test_opts *opts)
{
struct linger optval = {
@@ -1984,6 +2080,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_linger_client,
.run_server = test_stream_linger_server,
},
+ {
+ .name = "SOCK_STREAM transport change null-ptr-deref",
+ .run_client = test_stream_transport_change_client,
+ .run_server = test_stream_transport_change_server,
+ },
{},
};
Add a new test to ensure that when the transport changes a null pointer dereference does not occur[1]. Note that this test does not fail, but it may hang on the client side if it triggers a kernel oops. This works by creating a socket, trying to connect to a server, and then executing a second connect operation on the same socket but to a different CID (0). This triggers a transport change. If the connect operation is interrupted by a signal, this could cause a null-ptr-deref. Since this bug is non-deterministic, we need to try several times. It is safe to assume that the bug will show up within the timeout period. If there is a G2H transport loaded in the system, the bug is not triggered and this test will always pass. [1]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/ Suggested-by: Hyunwoo Kim <v4bel@theori.io> Suggested-by: Michal Luczaj <mhal@rbox.co> Signed-off-by: Luigi Leonardi <leonardi@redhat.com> --- This series introduces a new test that checks for a null pointer dereference that may happen when there is a transport change[1]. This bug was fixed in [2]. Note that this test *cannot* fail, it hangs if it triggers a kernel oops. The intended use-case is to run it and then check if there is any oops in the dmesg. This test is based on Hyunwoo Kim's[3] and Michal's python reproducers[4]. [1]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/ [2]https://lore.kernel.org/netdev/20250110083511.30419-1-sgarzare@redhat.com/ [3]https://lore.kernel.org/netdev/Z2LvdTTQR7dBmPb5@v4bel-B760M-AORUS-ELITE-AX/#t [4]https://lore.kernel.org/netdev/2b3062e3-bdaa-4c94-a3c0-2930595b9670@rbox.co/ --- Changes in v2: - Addressed Stefano's comments: - Timeout is now using current_nsec() - Check for return values - Style issues - Added Hyunwoo Kim to Suggested-by - Link to v1: https://lore.kernel.org/r/20250306-test_vsock-v1-0-0320b5accf92@redhat.com --- tools/testing/vsock/Makefile | 1 + tools/testing/vsock/vsock_test.c | 101 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) --- base-commit: 4d872d51bc9d7b899c1f61534e3dbde72613f627 change-id: 20250306-test_vsock-3e77a9c7a245 Best regards,