From patchwork Tue Mar 21 08:12:01 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Coco Li X-Patchwork-Id: 13182357 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6F743C74A5B for ; Tue, 21 Mar 2023 08:12:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230374AbjCUIMh (ORCPT ); Tue, 21 Mar 2023 04:12:37 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58202 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230507AbjCUIMY (ORCPT ); Tue, 21 Mar 2023 04:12:24 -0400 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 01A6B3B679 for ; Tue, 21 Mar 2023 01:12:11 -0700 (PDT) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5419fb7d6c7so145378007b3.11 for ; Tue, 21 Mar 2023 01:12:11 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; t=1679386329; h=cc:to:from:subject:message-id:mime-version:date:from:to:cc:subject :date:message-id:reply-to; bh=939qGjHz1bGVs5oH99oCeysL5FUpJOexYpIoJoVgT20=; b=EEs74O7INa/kDcHbNe4Pb5teq4IGoSGm26aIG6Znw4CZAQMNdFe1abWyZuRPPjhKWk Z8aLAmodsq2ZbwvJ1zPeU1fmGVxuL6xjBVDAycrbFZ8IVH+8uVxpTyE1ynTsh7do//V4 9CHTSEvkUSnBSsK8yjIHKO7VVkXTbngn9F+Jqob/GIpydXQhQI3RDapx/DMXbH2GEu+l yZqTHH6yIP4+lZU3sfnXNrEPFy/25GiEdt+gq7qEHA95jV+GLtrUesV2Lt/EI13Ac84W xdUl1EvFKTIBtDMfcVOkaNri1DNFykHaJWb3mjBFlF4RKQd1wIixBmDOxjc+YmOoop27 B/Lw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679386329; h=cc:to:from:subject:message-id:mime-version:date:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=939qGjHz1bGVs5oH99oCeysL5FUpJOexYpIoJoVgT20=; b=ggfysbTNUwzY/BCY0IUm2vllejOkLTwk47HqJ4BFcWpPrPP+ejIQDgJ5fK+22P/8Ih 7EJug29VtWVUnLKyP/uwtDadgAhQ1JWZwzNG4qYru0jXp73YDHUW7On+epZWeIWSgyVi dBXgrXz+Hzxs76QHIYOYkl8bQCMjUuTdj9q2a7wCmDMUrNkk0wqlYKNcfKyBEc65tpbJ itJqIgbQgSxys9XpSlaJLgISwQCkwfMjWeXxXpsFp9kmtwGdXIAkW6yRBKnnkggEBhsL TS5eFzs6JzwKx/NYHGZt7H6wDyyAshrHiV7xrEW7r2l8sj4HgeGMdd65sa4J1lpFAwPi eA7w== X-Gm-Message-State: AAQBX9dBhZE0JT09WMiDwbXQHM8WZxf4JrcD6yFmWtYxaLS9cV8qni/0 D5LPi0+SDRohJLtrOxkjfJJ9nSNPT11vq+8= X-Google-Smtp-Source: AKy350ZDrwtJmhLe1KZnlEa3wHtDGdJ0n4tdxp4fhnhITywPtatb3ppEznHNK9dpb2Ejfi8FPPU4bQUSZ5lxVb0= X-Received: from lixiaoyan1.bej.corp.google.com ([2401:fa00:44:10:da00:4d2e:dae2:452f]) (user=lixiaoyan job=sendgmr) by 2002:a81:ad50:0:b0:544:bce8:980f with SMTP id l16-20020a81ad50000000b00544bce8980fmr555615ywk.6.1679386329501; Tue, 21 Mar 2023 01:12:09 -0700 (PDT) Date: Tue, 21 Mar 2023 16:12:01 +0800 Mime-Version: 1.0 X-Mailer: git-send-email 2.40.0.rc1.284.g88254d51c5-goog Message-ID: <20230321081202.2370275-1-lixiaoyan@google.com> Subject: [PATCH net-next 1/2] net-zerocopy: Reduce compound page head access From: Coco Li To: "David S . Miller" , David Ahern , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Shuah Khan , Pavel Begunkov , Shakeel Butt , Oliver Hartkopp , Al Viro Cc: netdev@vger.kernel.org, inux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, Xiaoyan Li Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org From: Xiaoyan Li When compound pages are enabled, although the mm layer still returns an array of page pointers, a subset (or all) of them may have the same page head since a max 180kb skb can span 2 hugepages if it is on the boundary, be a mix of pages and 1 hugepage, or fit completely in a hugepage. Instead of referencing page head on all page pointers, use page length arithmetic to only call page head when referencing a known different page head to avoid touching a cold cacheline. Tested: See next patch with changes to tcp_mmap Correntess: On a pair of separate hosts as send with MSG_ZEROCOPY will force a copy on tx if using loopback alone, check that the SHA on the message sent is equivalent to checksum on the message received, since the current program already checks for the length. echo 1024 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages ./tcp_mmap -s -z ./tcp_mmap -H $DADDR -z SHA256 is correct received 2 MB (100 % mmap'ed) in 0.005914 s, 2.83686 Gbit cpu usage user:0.001984 sys:0.000963, 1473.5 usec per MB, 10 c-switches Performance: Run neper between adjacent hosts with the same config tcp_stream -Z --skip-rx-copy -6 -T 20 -F 1000 --stime-use-proc --test-length=30 Before patch: stime_end=37.670000 After patch: stime_end=30.310000 Signed-off-by: Coco Li Reviewed-by: Eric Dumazet --- net/core/datagram.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/core/datagram.c b/net/core/datagram.c index e4ff2db40c98..5662dff3d381 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -622,12 +622,12 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, frag = skb_shinfo(skb)->nr_frags; while (length && iov_iter_count(from)) { + struct page *head, *last_head = NULL; struct page *pages[MAX_SKB_FRAGS]; - struct page *last_head = NULL; + int refs, order, n = 0; size_t start; ssize_t copied; unsigned long truesize; - int refs, n = 0; if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; @@ -650,9 +650,17 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } + + head = compound_head(pages[n]); + order = compound_order(head); + for (refs = 0; copied != 0; start = 0) { int size = min_t(int, copied, PAGE_SIZE - start); - struct page *head = compound_head(pages[n]); + + if (pages[n] - head > (1UL << order) - 1) { + head = compound_head(pages[n]); + order = compound_order(head); + } start += (pages[n] - head) << PAGE_SHIFT; copied -= size; From patchwork Tue Mar 21 08:12:02 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Coco Li X-Patchwork-Id: 13182358 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 93EACC6FD1D for ; Tue, 21 Mar 2023 08:12:48 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230515AbjCUIMr (ORCPT ); Tue, 21 Mar 2023 04:12:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:58296 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230505AbjCUIM0 (ORCPT ); Tue, 21 Mar 2023 04:12:26 -0400 Received: from mail-yw1-x1149.google.com (mail-yw1-x1149.google.com [IPv6:2607:f8b0:4864:20::1149]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 481325FCD for ; Tue, 21 Mar 2023 01:12:16 -0700 (PDT) Received: by mail-yw1-x1149.google.com with SMTP id 00721157ae682-536d63d17dbso148161297b3.22 for ; Tue, 21 Mar 2023 01:12:16 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20210112; t=1679386335; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=N02MtQpElRjAOVUgTTOXBlF9aZQ2nY+ViAJXGNwfrIo=; b=eJTPTYJ+wzWxB72jt9UaCPCC04hgVtuB+eFPHiQzKD1a8O+IbC8c9LF+rtIJIg2K6d beIlg4/ZI2NWZOgBII7bGSNyYoXBu18jIIQ6NwZk4IIbdyb8M/dvriNPZQo8DyIsXRTt CxaXBej0RTl//hIsAnzhN82tj+h1BHxL2GqUTs+mjl5WPRHRbZ0NmlqHp4YQ62L7zyTE 2NS2vbXnS7FmRuGoBQbZGuTzTzHYWINGedroQoD4mEo+AgBTiVoAFwpTPBe6qOeVJGOJ w+YSjO4cjtSg41GWZZIOh572+E/gve9XSUbMP3lHvkBl6AstZTPPLfY8O3GIMDNMzZ36 ksDA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1679386335; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=N02MtQpElRjAOVUgTTOXBlF9aZQ2nY+ViAJXGNwfrIo=; b=aZdK1JOeiGvrQy0XBIn1RfinC+L6AEb0qdNNClM+KNMGGtmSyGFoz9El0imj8yeOSG Mj6gmGD7CiMORlPDNHBWBxAdtOGW+IRYvwHbekFwl8ZdtM6CGmE1bmu2QqX6lcYMZaXQ P5frnnBm8w7eLUS7Mh27iV6rwI6UuAhfVdeK2a7Y6u9T4O5Q6IPhwHAC+IDZWlzbcxdR vlKpFBYHE9j75B3ELwBIhhZMoIFxNpFp68vuMeqs1VCfEmu5fUNaz8k/y8m0udmK1isj 2tAUBBMm5LmO9cTBfklJAm8FLL7YU97XBjKAPo74HiYWOxMp6aRco2uQt0dPHwPvcCXd 2hNw== X-Gm-Message-State: AAQBX9eAGJTrda3lo9sNQTumMtpEN0j4SJHrCIYVJbcovuOdihyEjoZV +gA7DBZC3lWGbsAgNh82WHjlP+kWEHUHs3A= X-Google-Smtp-Source: AKy350aVd9+BQsmfwyqDYzq46/d2egfXJ6pneGnEZ6YRLhcZ4HKrl8KXeOwh4gmzDScagT4bssiXoUNSdUcYJ5w= X-Received: from lixiaoyan1.bej.corp.google.com ([2401:fa00:44:10:da00:4d2e:dae2:452f]) (user=lixiaoyan job=sendgmr) by 2002:a25:5188:0:b0:b35:91cc:9e29 with SMTP id f130-20020a255188000000b00b3591cc9e29mr624803ybb.5.1679386335445; Tue, 21 Mar 2023 01:12:15 -0700 (PDT) Date: Tue, 21 Mar 2023 16:12:02 +0800 In-Reply-To: <20230321081202.2370275-1-lixiaoyan@google.com> Mime-Version: 1.0 References: <20230321081202.2370275-1-lixiaoyan@google.com> X-Mailer: git-send-email 2.40.0.rc1.284.g88254d51c5-goog Message-ID: <20230321081202.2370275-2-lixiaoyan@google.com> Subject: [PATCH net-next 2/2] selftests/net: Add SHA256 computation over data sent in tcp_mmap From: Coco Li To: "David S . Miller" , David Ahern , Eric Dumazet , Jakub Kicinski , Paolo Abeni , Shuah Khan , Pavel Begunkov , Shakeel Butt , Oliver Hartkopp , Al Viro Cc: netdev@vger.kernel.org, inux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org, Xiaoyan Li Precedence: bulk List-ID: X-Mailing-List: linux-kselftest@vger.kernel.org From: Xiaoyan Li Add option to compute and send SHA256 over data sent (-i). This is to ensure the correctness of data received. Data is randomly populated from /dev/urandom. Tested: ./tcp_mmap -s -z -i ./tcp_mmap -z -H $ADDR -i SHA256 is correct ./tcp_mmap -s -i ./tcp_mmap -H $ADDR -i SHA256 is correct Signed-off-by: Coco Li --- tools/testing/selftests/net/Makefile | 2 +- tools/testing/selftests/net/tcp_mmap.c | 102 ++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index e57750e44f71..1de34ec99290 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -89,7 +89,7 @@ TEST_FILES := settings include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma -$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread +$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 46a02bbd31d0..607cc9ad8d1b 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -66,11 +66,16 @@ #include #include #include +#include #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + #define FILE_SZ (1ULL << 35) static int cfg_family = AF_INET6; static socklen_t cfg_alen = sizeof(struct sockaddr_in6); @@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning. Can be set with -w option static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ static int xflg; /* hash received data (simple xor) (-h option) */ static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ +static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/ static size_t chunk_size = 512*1024; static size_t map_align; unsigned long htotal; +unsigned int digest_len; static inline void prefetch(const void *x) { @@ -148,12 +155,14 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) void *child_thread(void *arg) { + unsigned char digest[SHA256_DIGEST_LENGTH]; unsigned long total_mmap = 0, total = 0; struct tcp_zerocopy_receive zc; + unsigned char *buffer = NULL; unsigned long delta_usec; + EVP_MD_CTX *ctx = NULL; int flags = MAP_SHARED; struct timeval t0, t1; - char *buffer = NULL; void *raddr = NULL; void *addr = NULL; double throughput; @@ -180,6 +189,14 @@ void *child_thread(void *arg) addr = ALIGN_PTR_UP(raddr, map_align); } } + if (integrity) { + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + goto error; + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN, }; int sub; @@ -191,7 +208,7 @@ void *child_thread(void *arg) memset(&zc, 0, sizeof(zc)); zc.address = (__u64)((unsigned long)addr); - zc.length = chunk_size; + zc.length = min(chunk_size, FILE_SZ - lu); res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, &zc, &zc_len); @@ -200,6 +217,8 @@ void *child_thread(void *arg) if (zc.length) { assert(zc.length <= chunk_size); + if (integrity) + EVP_DigestUpdate(ctx, addr, zc.length); total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); @@ -211,22 +230,30 @@ void *child_thread(void *arg) } if (zc.recv_skip_hint) { assert(zc.recv_skip_hint <= chunk_size); - lu = read(fd, buffer, zc.recv_skip_hint); + lu = read(fd, buffer, min(zc.recv_skip_hint, + FILE_SZ - total)); if (lu > 0) { + if (integrity) + EVP_DigestUpdate(ctx, buffer, lu); if (xflg) hash_zone(buffer, lu); total += lu; } + if (lu == 0) + goto end; } continue; } sub = 0; while (sub < chunk_size) { - lu = read(fd, buffer + sub, chunk_size - sub); + lu = read(fd, buffer + sub, min(chunk_size - sub, + FILE_SZ - total)); if (lu == 0) goto end; if (lu < 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + sub, lu); if (xflg) hash_zone(buffer + sub, lu); total += lu; @@ -237,6 +264,20 @@ void *child_thread(void *arg) gettimeofday(&t1, NULL); delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; + if (integrity) { + fcntl(fd, F_SETFL, 0); + EVP_DigestFinal_ex(ctx, digest, &digest_len); + lu = read(fd, buffer, SHA256_DIGEST_LENGTH); + if (lu != SHA256_DIGEST_LENGTH) + perror("Error: Cannot read SHA256\n"); + + if (memcmp(digest, buffer, + SHA256_DIGEST_LENGTH)) + fprintf(stderr, "Error: SHA256 of the data is not right\n"); + else + printf("\nSHA256 is correct\n"); + } + throughput = 0; if (delta_usec) throughput = total * 8.0 / (double)delta_usec / 1000.0; @@ -368,19 +409,38 @@ static unsigned long default_huge_page_size(void) return hps; } +static void randomize(void *target, size_t count) +{ + static int urandom = -1; + ssize_t got; + + urandom = open("/dev/urandom", O_RDONLY); + if (urandom < 0) { + perror("open /dev/urandom"); + exit(1); + } + got = read(urandom, target, count); + if (got != count) { + perror("read /dev/urandom"); + exit(1); + } +} + int main(int argc, char *argv[]) { + unsigned char digest[SHA256_DIGEST_LENGTH]; struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; + EVP_MD_CTX *ctx = NULL; + unsigned char *buffer; uint64_t total = 0; char *host = NULL; int fd, c, on = 1; size_t buffer_sz; - char *buffer; int sflg = 0; int mss = 0; - while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) { switch (c) { case '4': cfg_family = PF_INET; @@ -426,6 +486,9 @@ int main(int argc, char *argv[]) case 'a': map_align = atol(optarg); break; + case 'i': + integrity = 1; + break; default: exit(1); } @@ -468,7 +531,7 @@ int main(int argc, char *argv[]) } buffer = mmap_large_buffer(chunk_size, &buffer_sz); - if (buffer == (char *)-1) { + if (buffer == (unsigned char *)-1) { perror("mmap"); exit(1); } @@ -501,17 +564,34 @@ int main(int argc, char *argv[]) perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); zflg = 0; } + if (integrity) { + randomize(buffer, buffer_sz); + ctx = EVP_MD_CTX_new(); + if (!ctx) { + perror("cannot enable SHA computing"); + exit(1); + } + EVP_DigestInit_ex(ctx, EVP_sha256(), NULL); + } while (total < FILE_SZ) { + size_t offset = total % chunk_size; int64_t wr = FILE_SZ - total; - if (wr > chunk_size) - wr = chunk_size; - /* Note : we just want to fill the pipe with 0 bytes */ - wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); + if (wr > chunk_size - offset) + wr = chunk_size - offset; + /* Note : we just want to fill the pipe with random bytes */ + wr = send(fd, buffer + offset, + (size_t)wr, zflg ? MSG_ZEROCOPY : 0); if (wr <= 0) break; + if (integrity) + EVP_DigestUpdate(ctx, buffer + offset, wr); total += wr; } + if (integrity && total == FILE_SZ) { + EVP_DigestFinal_ex(ctx, digest, &digest_len); + send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0); + } close(fd); munmap(buffer, buffer_sz); return 0;