diff mbox series

[net-next,3/3] selftests: add msg_zerocopy_uarg test

Message ID 20240409205300.1346681-4-zijianzhang@bytedance.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: socket sendmsg MSG_ZEROCOPY_UARG | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 17 this patch: 17
netdev/build_tools success Errors and warnings before: 5 this patch: 5
netdev/cc_maintainers warning 3 maintainers not CCed: pabeni@redhat.com linux-kselftest@vger.kernel.org shuah@kernel.org
netdev/build_clang fail Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success net selftest script(s) already in Makefile
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 17 this patch: 17
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: braces {} should be used on all arms of this statement WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Zijian Zhang April 9, 2024, 8:53 p.m. UTC
From: Zijian Zhang <zijianzhang@bytedance.com>

We update selftests/net/msg_zerocopy.c to accommodate the new flag.
In the original selftest, it tries to retrieve notifications when the
socket is not writable. In order to compare with the new flag, we
introduce a new config, "cfg_notification_limit", which forces the
application to recv notifications when some number of sendmsgs finishes.

Test result from selftests/net/msg_zerocopy.c,
cfg_notification_limit = 1, it's an unrealistic setting for MSG_ZEROCOPY,
and it approximately aligns with the semantics of MSG_ZEROCOPY_UARG.
In this case, the new flag has around 15% cpu savings in TCP and 28% cpu
savings in UDP. The numbers are in the unit of MB.
+---------------------+---------+---------+---------+---------+
| Test Type / Protocol| TCP v4  | TCP v6  | UDP v4  | UDP v6  |
+---------------------+---------+---------+---------+---------+
| Copy                | 5517    | 5345    | 9158    | 8767    |
+---------------------+---------+---------+---------+---------+
| ZCopy               | 5588    | 5439    | 8538    | 8169    |
+---------------------+---------+---------+---------+---------+
| New ZCopy           | 6517    | 6103    | 11000   | 10839   |
+---------------------+---------+---------+---------+---------+
| ZCopy / Copy        | 101.29% | 101.76% | 93.23%  | 93.18%  |
+---------------------+---------+---------+---------+---------+
| New ZCopy / Copy    | 118.13% | 114.18% | 120.11% | 123.63% |
+---------------------+---------+---------+---------+---------+

cfg_notification_limit = 8, it means less poll + recvmsg overhead,
the new flag performs 7% better in TCP and 4% better in UDP.
The numbers are in the unit of MB.
+---------------------+---------+---------+---------+---------+
| Test Type / Protocol| TCP v4  | TCP v6  | UDP v4  | UDP v6  |
+---------------------+---------+---------+---------+---------+
| Copy                | 5328    | 5159    | 8581    | 8457    |
+---------------------+---------+---------+---------+---------+
| ZCopy               | 5877    | 5568    | 10314   | 10091   |
+---------------------+---------+---------+---------+---------+
| New ZCopy           | 6254    | 5901    | 10674   | 10293   |
+---------------------+---------+---------+---------+---------+
| ZCopy / Copy        | 110.30% | 107.93% | 120.20% | 119.32% |
+---------------------+---------+---------+---------+---------+
| New ZCopy / Copy    | 117.38% | 114.38% | 124.39% | 121.71% |
+---------------------+---------+---------+---------+---------+

Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com>
Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com>
---
 tools/testing/selftests/net/msg_zerocopy.c  | 132 ++++++++++++++++++--
 tools/testing/selftests/net/msg_zerocopy.sh |   1 +
 2 files changed, 122 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 8e595216a0af..0ca5e8509032 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -1,4 +1,5 @@ 
-/* Evaluate MSG_ZEROCOPY
+// SPDX-License-Identifier: GPL-2.0
+/* Evaluate MSG_ZEROCOPY && MSG_ZEROCOPY_UARG
  *
  * Send traffic between two processes over one of the supported
  * protocols and modes:
@@ -66,14 +67,29 @@ 
 #define SO_ZEROCOPY	60
 #endif
 
+#ifndef SO_ZEROCOPY_NOTIFICATION
+#define SO_ZEROCOPY_NOTIFICATION	78
+#endif
+
 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
 #define SO_EE_CODE_ZEROCOPY_COPIED	1
 #endif
 
+#ifndef MSG_ZEROCOPY_UARG
+#define MSG_ZEROCOPY_UARG	0x2000000
+#endif
+
 #ifndef MSG_ZEROCOPY
 #define MSG_ZEROCOPY	0x4000000
 #endif
 
+#ifndef SOCK_USR_ZC_INFO_MAX
+#define SOCK_USR_ZC_INFO_MAX	8
+#endif
+
+#define ZEROCOPY_MSGERR_NOTIFICATION 1
+#define ZEROCOPY_USER_ARG_NOTIFICATION 2
+
 static int  cfg_cork;
 static bool cfg_cork_mixed;
 static int  cfg_cpu		= -1;		/* default: pin to last cpu */
@@ -87,7 +103,7 @@  static int  cfg_verbose;
 static int  cfg_waittime_ms	= 500;
 static bool cfg_notification_order_check;
 static int  cfg_notification_limit = 32;
-static bool cfg_zerocopy;
+static int  cfg_zerocopy;           /* 1 for MSG_ZEROCOPY, 2 for MSG_ZEROCOPY_UARG */
 
 static socklen_t cfg_alen;
 static struct sockaddr_storage cfg_dst_addr;
@@ -169,6 +185,19 @@  static int do_accept(int fd)
 	return fd;
 }
 
+static void add_zcopy_user_arg(struct msghdr *msg, void *usr_addr)
+{
+	struct cmsghdr *cm;
+
+	if (!msg->msg_control)
+		error(1, errno, "NULL user arg");
+	cm = (void *)msg->msg_control;
+	cm->cmsg_len = CMSG_LEN(sizeof(void *));
+	cm->cmsg_level = SOL_SOCKET;
+	cm->cmsg_type = SO_ZEROCOPY_NOTIFICATION;
+	memcpy(CMSG_DATA(cm), &usr_addr, sizeof(usr_addr));
+}
+
 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
 {
 	struct cmsghdr *cm;
@@ -182,18 +211,55 @@  static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
 	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
 }
 
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+static void do_recv_completion_user_arg(void *p)
+{
+	int i;
+	__u32 hi, lo, range;
+	__u8 zerocopy;
+	struct tx_usr_zcopy_info *zc_info_p = (struct tx_usr_zcopy_info *)p;
+
+	for (i = 0; i < zc_info_p->length; ++i) {
+		struct tx_msg_zcopy_info elem = zc_info_p->info[i];
+
+		hi = elem.hi;
+		lo = elem.lo;
+		zerocopy = elem.zerocopy;
+		range = hi - lo + 1;
+
+		if (cfg_notification_order_check && lo != next_completion)
+			fprintf(stderr, "gap: %u..%u does not append to %u\n",
+				lo, hi, next_completion);
+		next_completion = hi + 1;
+
+		if (zerocopied == -1)
+			zerocopied = zerocopy;
+		else if (zerocopied != zerocopy) {
+			fprintf(stderr, "serr: inconsistent\n");
+			zerocopied = zerocopy;
+		}
+
+		completions += range;
+
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+				range, hi, lo);
+	}
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, int do_zerocopy, int domain)
 {
 	int ret, len, i, flags;
 	static uint32_t cookie;
-	char ckbuf[CMSG_SPACE(sizeof(cookie))];
+	/* ckbuf is used to either hold uint32_t cookie or void *pointer */
+	char ckbuf[CMSG_SPACE(sizeof(void *))];
+	struct tx_usr_zcopy_info zc_info;
 
 	len = 0;
 	for (i = 0; i < msg->msg_iovlen; i++)
 		len += msg->msg_iov[i].iov_len;
 
 	flags = MSG_DONTWAIT;
-	if (do_zerocopy) {
+	if (do_zerocopy == ZEROCOPY_MSGERR_NOTIFICATION) {
 		flags |= MSG_ZEROCOPY;
 		if (domain == PF_RDS) {
 			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
@@ -201,6 +267,12 @@  static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 			msg->msg_control = (struct cmsghdr *)ckbuf;
 			add_zcopy_cookie(msg, ++cookie);
 		}
+	} else if (do_zerocopy == ZEROCOPY_USER_ARG_NOTIFICATION) {
+		flags |= MSG_ZEROCOPY_UARG;
+		memset(&zc_info, 0, sizeof(zc_info));
+		msg->msg_controllen = CMSG_SPACE(sizeof(void *));
+		msg->msg_control = (struct cmsghdr *)ckbuf;
+		add_zcopy_user_arg(msg, &zc_info);
 	}
 
 	ret = sendmsg(fd, msg, flags);
@@ -211,13 +283,16 @@  static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
 	if (cfg_verbose && ret != len)
 		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
 
+	if (do_zerocopy == ZEROCOPY_USER_ARG_NOTIFICATION)
+		do_recv_completion_user_arg(&zc_info);
+
 	if (len) {
 		packets++;
 		bytes += ret;
 		if (do_zerocopy && ret)
 			expected_completions++;
 	}
-	if (do_zerocopy && domain == PF_RDS) {
+	if (msg->msg_control) {
 		msg->msg_control = NULL;
 		msg->msg_controllen = 0;
 	}
@@ -480,6 +555,36 @@  static void do_recv_remaining_completions(int fd, int domain)
 			completions, expected_completions);
 }
 
+static void do_new_recv_remaining_completions(int fd, struct msghdr *msg)
+{
+	int ret, flags;
+	struct tx_usr_zcopy_info zc_info;
+	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+	char ckbuf[CMSG_SPACE(sizeof(void *))];
+
+	flags = MSG_DONTWAIT | MSG_ZEROCOPY_UARG;
+	msg->msg_iovlen = 0;
+	msg->msg_controllen = CMSG_SPACE(sizeof(void *));
+	msg->msg_control = (struct cmsghdr *)ckbuf;
+	add_zcopy_user_arg(msg, &zc_info);
+
+	while (completions < expected_completions &&
+			gettimeofday_ms() < tstop) {
+		memset(&zc_info, 0, sizeof(zc_info));
+		ret = sendmsg(fd, msg, flags);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "send");
+
+		do_recv_completion_user_arg(&zc_info);
+	}
+
+	if (completions < expected_completions)
+		fprintf(stderr, "missing notifications: %lu < %lu\n",
+			completions, expected_completions);
+}
+
 static void do_tx(int domain, int type, int protocol)
 {
 	struct iovec iov[3] = { {0} };
@@ -552,13 +657,14 @@  static void do_tx(int domain, int type, int protocol)
 			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
 		sendmsg_counter++;
 
-		if (sendmsg_counter == cfg_notification_limit && cfg_zerocopy) {
+		if (sendmsg_counter == cfg_notification_limit &&
+			cfg_zerocopy == ZEROCOPY_MSGERR_NOTIFICATION) {
 			do_recv_completions(fd, domain);
 			sendmsg_counter = 0;
 		}
 
 		while (!do_poll(fd, POLLOUT)) {
-			if (cfg_zerocopy) {
+			if (cfg_zerocopy == ZEROCOPY_MSGERR_NOTIFICATION) {
 				do_recv_completions(fd, domain);
 				sendmsg_counter = 0;
 			}
@@ -566,8 +672,10 @@  static void do_tx(int domain, int type, int protocol)
 
 	} while (gettimeofday_ms() < tstop);
 
-	if (cfg_zerocopy)
+	if (cfg_zerocopy == ZEROCOPY_MSGERR_NOTIFICATION)
 		do_recv_remaining_completions(fd, domain);
+	else if (cfg_zerocopy == ZEROCOPY_USER_ARG_NOTIFICATION)
+		do_new_recv_remaining_completions(fd, &msg);
 
 	if (close(fd))
 		error(1, errno, "close");
@@ -718,7 +826,7 @@  static void parse_opts(int argc, char **argv)
 
 	cfg_payload_len = max_payload_len;
 
-	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vzol:")) != -1) {
+	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vzol:n")) != -1) {
 		switch (c) {
 		case '4':
 			if (cfg_family != PF_UNSPEC)
@@ -768,7 +876,7 @@  static void parse_opts(int argc, char **argv)
 			cfg_verbose++;
 			break;
 		case 'z':
-			cfg_zerocopy = true;
+			cfg_zerocopy = ZEROCOPY_MSGERR_NOTIFICATION;
 			break;
 		case 'o':
 			cfg_notification_order_check = true;
@@ -776,6 +884,9 @@  static void parse_opts(int argc, char **argv)
 		case 'l':
 			cfg_notification_limit = strtoul(optarg, NULL, 0);
 			break;
+		case 'n':
+			cfg_zerocopy = ZEROCOPY_USER_ARG_NOTIFICATION;
+			break;
 		}
 	}
 
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..022a6936d86f 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -118,4 +118,5 @@  do_test() {
 
 do_test "${EXTRA_ARGS}"
 do_test "-z ${EXTRA_ARGS}"
+do_test "-n ${EXTRA_ARGS}"
 echo ok