diff mbox series

[net-next,3/4] net: add code for TCP fraglist GRO

Message ID 20240424180458.56211-4-nbd@nbd.name (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Add TCP fraglist GRO support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1830 this patch: 1830
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 957 this patch: 957
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1866 this patch: 1866
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 97 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 2 this patch: 2
netdev/source_inline success Was 0 now: 0

Commit Message

Felix Fietkau April 24, 2024, 6:04 p.m. UTC
This implements fraglist GRO similar to how it's handled in UDP, however
no functional changes are added yet. The next change adds a heuristic for
using fraglist GRO instead of regular GRO.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 include/net/tcp.h        |  3 ++-
 net/ipv4/tcp_offload.c   | 29 +++++++++++++++++++++++++++--
 net/ipv6/tcpv6_offload.c | 11 ++++++++++-
 3 files changed, 39 insertions(+), 4 deletions(-)

Comments

Paolo Abeni April 26, 2024, 8:14 a.m. UTC | #1
On Wed, 2024-04-24 at 20:04 +0200, Felix Fietkau wrote:
> This implements fraglist GRO similar to how it's handled in UDP, however
> no functional changes are added yet. The next change adds a heuristic for
> using fraglist GRO instead of regular GRO.
> 
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> ---
>  include/net/tcp.h        |  3 ++-
>  net/ipv4/tcp_offload.c   | 29 +++++++++++++++++++++++++++--
>  net/ipv6/tcpv6_offload.c | 11 ++++++++++-
>  3 files changed, 39 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index b935e1ae4caf..875cda53a7c9 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -2194,7 +2194,8 @@ void tcp_v4_destroy_sock(struct sock *sk);
>  
>  struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>  				netdev_features_t features);
> -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
> +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
> +				bool fraglist);
>  INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
>  INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
>  INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index 06dbb2e2b2f3..6294e7a5c099 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -252,7 +252,8 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
>  	return segs;
>  }
>  
> -struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
> +struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
> +				bool fraglist)
>  {
>  	struct sk_buff *pp = NULL;
>  	struct sk_buff *p;
> @@ -289,6 +290,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	len = skb_gro_len(skb);
>  	flags = tcp_flag_word(th);
>  
> +	NAPI_GRO_CB(skb)->is_flist = fraglist;
>  	list_for_each_entry(p, head, list) {
>  		if (!NAPI_GRO_CB(p)->same_flow)
>  			continue;
> @@ -308,6 +310,7 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  found:
>  	/* Include the IP ID check below from the inner most IP hdr */
>  	flush = NAPI_GRO_CB(p)->flush;
> +	flush |= fraglist != NAPI_GRO_CB(p)->is_flist;
>  	flush |= (__force int)(flags & TCP_FLAG_CWR);
>  	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
>  		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
> @@ -341,6 +344,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>  	flush |= skb_cmp_decrypted(p, skb);
>  
> +	if (fraglist) {
> +		flush |= (__force int)(flags ^ tcp_flag_word(th2));

Don't we have this check already a few lines above?


> +		flush |= skb->ip_summed != p->ip_summed;
> +		flush |= skb->csum_level != p->csum_level;
> +		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));

Why we need this check? The earlier skb_gro_may_pull() should ensure
that, right?

> +		flush |= NAPI_GRO_CB(p)->count >= 64;
> +
> +		if (flush || skb_gro_receive_list(p, skb))
> +			mss = 1;
> +
> +		goto out_check_final;

TCP flags processing needs some care. You need to propagate the current
packets flag to the old one, and update the older packet csum
accordingly.

Cheers,

Paolo
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b935e1ae4caf..875cda53a7c9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2194,7 +2194,8 @@  void tcp_v4_destroy_sock(struct sock *sk);
 
 struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 				netdev_features_t features);
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb);
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				bool fraglist);
 INDIRECT_CALLABLE_DECLARE(int tcp4_gro_complete(struct sk_buff *skb, int thoff));
 INDIRECT_CALLABLE_DECLARE(struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb));
 INDIRECT_CALLABLE_DECLARE(int tcp6_gro_complete(struct sk_buff *skb, int thoff));
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 06dbb2e2b2f3..6294e7a5c099 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -252,7 +252,8 @@  struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 	return segs;
 }
 
-struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb,
+				bool fraglist)
 {
 	struct sk_buff *pp = NULL;
 	struct sk_buff *p;
@@ -289,6 +290,7 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 	len = skb_gro_len(skb);
 	flags = tcp_flag_word(th);
 
+	NAPI_GRO_CB(skb)->is_flist = fraglist;
 	list_for_each_entry(p, head, list) {
 		if (!NAPI_GRO_CB(p)->same_flow)
 			continue;
@@ -308,6 +310,7 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 found:
 	/* Include the IP ID check below from the inner most IP hdr */
 	flush = NAPI_GRO_CB(p)->flush;
+	flush |= fraglist != NAPI_GRO_CB(p)->is_flist;
 	flush |= (__force int)(flags & TCP_FLAG_CWR);
 	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
 		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
@@ -341,6 +344,19 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 	flush |= skb_cmp_decrypted(p, skb);
 
+	if (fraglist) {
+		flush |= (__force int)(flags ^ tcp_flag_word(th2));
+		flush |= skb->ip_summed != p->ip_summed;
+		flush |= skb->csum_level != p->csum_level;
+		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
+		flush |= NAPI_GRO_CB(p)->count >= 64;
+
+		if (flush || skb_gro_receive_list(p, skb))
+			mss = 1;
+
+		goto out_check_final;
+	}
+
 	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;
 		goto out_check_final;
@@ -399,7 +415,7 @@  struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
 		return NULL;
 	}
 
-	return tcp_gro_receive(head, skb);
+	return tcp_gro_receive(head, skb, false);
 }
 
 INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
@@ -407,6 +423,15 @@  INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		__skb_incr_checksum_unnecessary(skb);
+
+		return 0;
+	}
+
 	th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
 				  iph->daddr, 0);
 
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 12fe79cb2c10..239588557dc4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -24,7 +24,7 @@  struct sk_buff *tcp6_gro_receive(struct list_head *head, struct sk_buff *skb)
 		return NULL;
 	}
 
-	return tcp_gro_receive(head, skb);
+	return tcp_gro_receive(head, skb, false);
 }
 
 INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
@@ -32,6 +32,15 @@  INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		__skb_incr_checksum_unnecessary(skb);
+
+		return 0;
+	}
+
 	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
 				  &iph->daddr, 0);
 	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;