[RFC] net: add TCP fraglist GRO support

Message ID	20240423094117.93206-1-nbd@nbd.name (mailing list archive)
State	RFC
Delegated to:	Netdev Maintainers
Headers	show Received: from nbd.name (nbd.name [46.4.11.11]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 35E0B1E863; Tue, 23 Apr 2024 09:41:38 +0000 (UTC) From: Felix Fietkau <nbd@nbd.name> To: netdev@vger.kernel.org, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, David Ahern <dsahern@kernel.org> Cc: linux-kernel@vger.kernel.org Subject: [RFC] net: add TCP fraglist GRO support Date: Tue, 23 Apr 2024 11:41:15 +0200 Message-ID: <20240423094117.93206-1-nbd@nbd.name> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[RFC] net: add TCP fraglist GRO support \| expand [RFC] net: add TCP fraglist GRO support

Context	Check	Description
netdev/series_format	warning	Single patches do not need cover letters; Target tree name not specified in the subject
netdev/tree_selection	success	Guessed tree name to be net-next, async
netdev/ynl	success	Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	success	Errors and warnings before: 1847 this patch: 1847
netdev/build_tools	success	Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers	success	CCed 5 of 5 maintainers
netdev/build_clang	success	Errors and warnings before: 957 this patch: 957
netdev/verify_signedoff	fail	author Signed-off-by missing
netdev/deprecated_api	success	None detected
netdev/check_selftest	success	No net selftest shell script
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	success	Errors and warnings before: 1883 this patch: 1883
netdev/checkpatch	warning	WARNING: Non-standard signature: 'Signe-off-by:' - perhaps 'Signed-off-by:'? WARNING: line length of 85 exceeds 80 columns
netdev/build_clang_rust	success	No Rust files in patch. Skipping build
netdev/kdoc	success	Errors and warnings before: 2 this patch: 2
netdev/source_inline	success	Was 0 now: 0

diff --git a/include/net/gro.h b/include/net/gro.h index 50f1e403dbbb..ca8e4b3de044 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -429,6 +429,7 @@ static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, } int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ static inline void gro_normal_list(struct napi_struct *napi) diff --git a/include/net/tcp.h b/include/net/tcp.h index b935e1ae4caf..875cda53a7c9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2194,7 +2194,8 @@ void tcp_v4_destroy_sock(struct sock *sk); struct sk_buff *tcp_gso_segment(struct sk_buff *skb, netdev_features_t features); -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb); +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + bool fraglist); INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff)); INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)); INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff)); diff --git a/net/core/gro.c b/net/core/gro.c index 2459ab697f7f..268c6c826d09 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -231,6 +231,33 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) return 0; } +int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + + /* sk ownership - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + skb->sk = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} + static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..4f6e40a30b0c 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,74 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + if (th->check) { + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, + true); + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, + false); + if (!th->check) + th->check = CSUM_MANGLED_0; + } + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct tcphdr *uh, *uh2; + struct iphdr *iph, *iph2; + + seg = segs; + uh = tcp_hdr(seg); + iph = ip_hdr(seg); + + if ((tcp_hdr(seg)->dest == tcp_hdr(seg->next)->dest) && + (tcp_hdr(seg)->source == tcp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &uh2->source, &uh->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &uh2->dest, &uh->dest); + } + + return segs; +} + +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +105,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); @@ -178,7 +249,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, return segs; } -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb, + bool fraglist) { struct sk_buff *pp = NULL; struct sk_buff *p; @@ -215,6 +287,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) len = skb_gro_len(skb); flags = tcp_flag_word(th); + NAPI_GRO_CB(skb)->is_flist = fraglist; list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -234,6 +307,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) found: /* Include the IP ID check below from the inner most IP hdr */ flush = NAPI_GRO_CB(p)->flush; + flush |= fraglist != NAPI_GRO_CB(p)->is_flist; flush |= (__force int)(flags & TCP_FLAG_CWR); flush |= (__force int)((flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); @@ -267,6 +341,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb) flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); flush |= skb_cmp_decrypted(p, skb); + if (fraglist) { + flush |= (__force int)(flags ^ tcp_flag_word(th2)); + flush |= skb->ip_summed != p->ip_summed; + flush |= skb->csum_level != p->csum_level; + flush |= !pskb_may_pull(skb, skb_gro_offset(skb)); + flush |= NAPI_GRO_CB(p)->count >= 64; + + if (flush || skb_gro_receive_list(p, skb)) + mss = 1; + + goto out_check_final; + } + if (flush || skb_gro_receive(p, skb)) { mss = 1; goto out_check_final; @@ -314,6 +401,49 @@ void tcp_gro_complete(struct sk_buff *skb) } EXPORT_SYMBOL(tcp_gro_complete); +static bool tcp4_check_fraglist_gro(struct sk_buff *skb) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + struct net *net = dev_net(skb->dev); + unsigned int off, hlen, thlen; + struct tcphdr *th; + struct sock *sk; + int iif, sdif; + + if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) + return false; + + inet_get_iif_sdif(skb, &iif, &sdif); + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header(skb, hlen, off); + if (unlikely(!th)) + return false; + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + return false; + + hlen = off + thlen; + if (!skb_gro_may_pull(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + return false; + } + + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + iif, sdif); + if (!sk) + return true; + + sock_put(sk); + + return false; +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) { @@ -325,7 +455,7 @@ struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb) return NULL; } - return tcp_gro_receive(head, skb); + return tcp_gro_receive(head, skb, tcp4_check_fraglist_gro(skb)); } INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) @@ -333,6 +463,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff) const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); + if (NAPI_GRO_CB(skb)->is_flist) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, iph->daddr, 0); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 3498dd1d0694..a3cd546a1aea 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -433,33 +433,6 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, return segs; } -static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) -{ - if (unlikely(p->len + skb->len >= 65536)) - return -E2BIG; - - if (NAPI_GRO_CB(p)->last == p) - skb_shinfo(p)->frag_list = skb; - else - NAPI_GRO_CB(p)->last->next = skb; - - skb_pull(skb, skb_gro_offset(skb)); - - NAPI_GRO_CB(p)->last = skb; - NAPI_GRO_CB(p)->count++; - p->data_len += skb->len; - - /* sk ownership - if any - completely transferred to the aggregated packet */ - skb->destructor = NULL; - skb->sk = NULL; - p->truesize += skb->truesize; - p->len += skb->len; - - NAPI_GRO_CB(skb)->same_flow = 1; - - return 0; -} - #define UDP_GRO_CNT_MAX 64 static struct sk_buff *udp_gro_receive_segment(struct list_head *head, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..7c82532d8aa7 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -7,12 +7,56 @@ */ #include <linux/indirect_call_wrapper.h> #include <linux/skbuff.h> +#include <net/inet6_hashtables.h> #include <net/gro.h> #include <net/protocol.h> #include <net/tcp.h> #include <net/ip6_checksum.h> #include "ip6_offload.h" +static bool tcp6_check_fraglist_gro(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr = skb_gro_network_header(skb); + struct net *net = dev_net(skb->dev); + unsigned int off, hlen, thlen; + struct tcphdr *th; + struct sock *sk; + int iif, sdif; + + if (!(skb->dev->features & NETIF_F_GRO_FRAGLIST)) + return false; + + inet6_get_iif_sdif(skb, &iif, &sdif); + + off = skb_gro_offset(skb); + hlen = off + sizeof(*th); + th = skb_gro_header(skb, hlen, off); + if (unlikely(!th)) + return false; + + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + return false; + + hlen = off + thlen; + if (!skb_gro_may_pull(skb, hlen)) { + th = skb_gro_header_slow(skb, hlen, off); + if (unlikely(!th)) + return false; + } + + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + iif, sdif); + if (!sk) + return true; + + sock_put(sk); + + return false; +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) { @@ -24,7 +68,7 @@ struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb) return NULL; } - return tcp_gro_receive(head, skb); + return tcp_gro_receive(head, skb, tcp6_check_fraglist_gro(skb)); } INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) @@ -32,6 +76,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); + if (NAPI_GRO_CB(skb)->is_flist) { + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6; + skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; + + __skb_incr_checksum_unnecessary(skb); + + return 0; + } + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, &iph->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; @@ -51,6 +104,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb);

[RFC] net: add TCP fraglist GRO support

Checks

Commit Message

Comments

Patch