@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* Evaluate MSG_ZEROCOPY
*
* Send traffic between two processes over one of the supported
@@ -66,6 +67,10 @@
#define SO_ZEROCOPY 60
#endif
+#ifndef SCM_ZC_NOTIFICATION
+#define SCM_ZC_NOTIFICATION 78
+#endif
+
#ifndef SO_EE_CODE_ZEROCOPY_COPIED
#define SO_EE_CODE_ZEROCOPY_COPIED 1
#endif
@@ -74,6 +79,15 @@
#define MSG_ZEROCOPY 0x4000000
#endif
+enum notification_type {
+ MSG_ZEROCOPY_NOTIFY_ERRQUEUE = 1,
+ MSG_ZEROCOPY_NOTIFY_SENDMSG = 2,
+};
+
+#define SOCK_ZC_INFO_NUM 8
+
+#define INVALID_ZEROCOPY_VAL 2
+
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
@@ -86,13 +100,16 @@ static int cfg_runtime_ms = 4200;
static int cfg_verbose;
static int cfg_waittime_ms = 500;
static int cfg_notification_limit = 32;
-static bool cfg_zerocopy;
+static enum notification_type cfg_zerocopy;
static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
static char payload[IP_MAXPACKET];
+static char zc_ckbuf[CMSG_SPACE(sizeof(void *))];
+static struct zc_info_elem zc_info[SOCK_ZC_INFO_NUM];
+static struct zc_info_elem *zc_info_ptr = zc_info;
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
@@ -169,6 +186,26 @@ static int do_accept(int fd)
return fd;
}
+static void add_zcopy_info(struct msghdr *msg)
+{
+ int i;
+ struct cmsghdr *cm;
+
+ if (!msg->msg_control)
+ error(1, errno, "NULL user arg");
+ cm = (void *)msg->msg_control;
+ /* Although only the address of the array will be written into the
+ * zc_ckbuf, we assign cmsg_len to CMSG_LEN(sizeof(zc_info)) to specify
+ * the length of the array.
+ */
+ cm->cmsg_len = CMSG_LEN(sizeof(zc_info));
+ cm->cmsg_level = SOL_SOCKET;
+ cm->cmsg_type = SCM_ZC_NOTIFICATION;
+ memcpy(CMSG_DATA(cm), &zc_info_ptr, sizeof(zc_info_ptr));
+ for (i = 0; i < SOCK_ZC_INFO_NUM; i++)
+ zc_info[i].zerocopy = INVALID_ZEROCOPY_VAL;
+}
+
static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
{
struct cmsghdr *cm;
@@ -182,7 +219,8 @@ static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
}
-static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+static bool do_sendmsg(int fd, struct msghdr *msg,
+ enum notification_type do_zerocopy, int domain)
{
int ret, len, i, flags;
static uint32_t cookie;
@@ -200,6 +238,15 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
msg->msg_control = (struct cmsghdr *)ckbuf;
add_zcopy_cookie(msg, ++cookie);
+ } else if (do_zerocopy == MSG_ZEROCOPY_NOTIFY_SENDMSG) {
+ memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+ /* Although only the address of the array will be written into the
+ * zc_ckbuf, msg_controllen must be larger or equal than any cmsg_len
+ * in it. Otherwise, we will get -EINVAL.
+ */
+ msg->msg_controllen = CMSG_SPACE(sizeof(zc_info));
+ msg->msg_control = (struct cmsghdr *)zc_ckbuf;
+ add_zcopy_info(msg);
}
}
@@ -218,7 +265,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
if (do_zerocopy && ret)
expected_completions++;
}
- if (do_zerocopy && domain == PF_RDS) {
+ if (msg->msg_control) {
msg->msg_control = NULL;
msg->msg_controllen = 0;
}
@@ -392,6 +439,42 @@ static bool do_recvmsg_completion(int fd)
return ret;
}
+static void do_recv_completions2(void)
+{
+ int i;
+ __u32 hi, lo, range;
+ __u8 zerocopy;
+
+ for (i = 0; zc_info[i].zerocopy != INVALID_ZEROCOPY_VAL; i++) {
+ struct zc_info_elem elem = zc_info[i];
+
+ hi = elem.hi;
+ lo = elem.lo;
+ zerocopy = elem.zerocopy;
+ range = hi - lo + 1;
+
+ if (cfg_verbose && lo != next_completion)
+ fprintf(stderr, "gap: %u..%u does not append to %u\n",
+ lo, hi, next_completion);
+ next_completion = hi + 1;
+
+ if (zerocopied == -1)
+ zerocopied = zerocopy;
+ else if (zerocopied != zerocopy) {
+ fprintf(stderr, "serr: inconsistent\n");
+ zerocopied = zerocopy;
+ }
+
+ completions += range;
+
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+ range, hi, lo);
+ }
+
+ sends_since_notify -= i;
+}
+
static bool do_recv_completion(int fd, int domain)
{
struct sock_extended_err *serr;
@@ -553,11 +636,15 @@ static void do_tx(int domain, int type, int protocol)
else
do_sendmsg(fd, &msg, cfg_zerocopy, domain);
- if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
+ if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_ERRQUEUE &&
+ sends_since_notify >= cfg_notification_limit)
do_recv_completions(fd, domain);
+ if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_SENDMSG)
+ do_recv_completions2();
+
while (!do_poll(fd, POLLOUT)) {
- if (cfg_zerocopy)
+ if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_ERRQUEUE)
do_recv_completions(fd, domain);
}
@@ -715,7 +802,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mnp:rs:S:t:vz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -749,6 +836,9 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_cork_mixed = true;
break;
+ case 'n':
+ cfg_zerocopy = MSG_ZEROCOPY_NOTIFY_SENDMSG;
+ break;
case 'p':
cfg_port = strtoul(optarg, NULL, 0);
break;
@@ -768,7 +858,7 @@ static void parse_opts(int argc, char **argv)
cfg_verbose++;
break;
case 'z':
- cfg_zerocopy = true;
+ cfg_zerocopy = MSG_ZEROCOPY_NOTIFY_ERRQUEUE;
break;
}
}
@@ -779,6 +869,8 @@ static void parse_opts(int argc, char **argv)
error(1, 0, "-D <server addr> required for PF_RDS\n");
if (!cfg_rx && !saddr)
error(1, 0, "-S <client addr> required for PF_RDS\n");
+ if (cfg_zerocopy == MSG_ZEROCOPY_NOTIFY_SENDMSG)
+ error(1, 0, "PF_RDS does not support MSG_ZEROCOPY_NOTIFY_SENDMSG");
}
setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
@@ -118,4 +118,5 @@ do_test() {
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
+do_test "-n ${EXTRA_ARGS}"
echo ok