diff mbox series

[v3,net-next,v3,3/6] net: add code for TCP fraglist GRO

Message ID 20240426065143.4667-4-nbd@nbd.name (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Add TCP fraglist GRO support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 932 this patch: 932
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 938 this patch: 938
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 943 this patch: 943
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 49 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-04-26--09-00 (tests: 993)

Commit Message

Felix Fietkau April 26, 2024, 6:51 a.m. UTC
This implements fraglist GRO similar to how it's handled in UDP, however
no functional changes are added yet. The next change adds a heuristic for
using fraglist GRO instead of regular GRO.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 net/ipv4/tcp_offload.c   | 22 ++++++++++++++++++++++
 net/ipv6/tcpv6_offload.c |  9 +++++++++
 2 files changed, 31 insertions(+)

Comments

Eric Dumazet April 26, 2024, 7:47 a.m. UTC | #1
On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <nbd@nbd.name> wrote:
>
> This implements fraglist GRO similar to how it's handled in UDP, however
> no functional changes are added yet. The next change adds a heuristic for
> using fraglist GRO instead of regular GRO.
>
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> ---
>  net/ipv4/tcp_offload.c   | 22 ++++++++++++++++++++++
>  net/ipv6/tcpv6_offload.c |  9 +++++++++
>  2 files changed, 31 insertions(+)
>
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index c493e95e09a5..ffd6b7a4163a 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -332,6 +332,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>         flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>         flush |= skb_cmp_decrypted(p, skb);
>
> +       if (NAPI_GRO_CB(p)->is_flist) {


Please add unlikely() for all NAPI_GRO_CB(p)->is_flist checks added in
this patch.

> +               flush |= (__force int)(flags ^ tcp_flag_word(th2));
> +               flush |= skb->ip_summed != p->ip_summed;
> +               flush |= skb->csum_level != p->csum_level;
> +               flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
> +               flush |= NAPI_GRO_CB(p)->count >= 64;
> +
> +               if (flush || skb_gro_receive_list(p, skb))
> +                       mss = 1;
> +
> +               goto out_check_final;
> +       }
> +
>         if (flush || skb_gro_receive(p, skb)) {
>                 mss = 1;
>                 goto out_check_final;
> @@ -398,6 +411,15 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
>         const struct iphdr *iph = ip_hdr(skb);
>         struct tcphdr *th = tcp_hdr(skb);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               __skb_incr_checksum_unnecessary(skb);
> +
> +               return 0;
> +       }
> +
>         th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
>                                   iph->daddr, 0);
>
> diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
> index b3b8e1f6b92a..c97d55cf036f 100644
> --- a/net/ipv6/tcpv6_offload.c
> +++ b/net/ipv6/tcpv6_offload.c
> @@ -32,6 +32,15 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
>         const struct ipv6hdr *iph = ipv6_hdr(skb);
>         struct tcphdr *th = tcp_hdr(skb);
>
> +       if (NAPI_GRO_CB(skb)->is_flist) {
> +               skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
> +               skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> +
> +               __skb_incr_checksum_unnecessary(skb);
> +
> +               return 0;
> +       }
> +
>         th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
>                                   &iph->daddr, 0);
>         skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
> --
> 2.44.0
>
Paolo Abeni April 26, 2024, 8:21 a.m. UTC | #2
On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
> This implements fraglist GRO similar to how it's handled in UDP, however
> no functional changes are added yet. The next change adds a heuristic for
> using fraglist GRO instead of regular GRO.
> 
> Signed-off-by: Felix Fietkau <nbd@nbd.name>
> ---
>  net/ipv4/tcp_offload.c   | 22 ++++++++++++++++++++++
>  net/ipv6/tcpv6_offload.c |  9 +++++++++
>  2 files changed, 31 insertions(+)
> 
> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
> index c493e95e09a5..ffd6b7a4163a 100644
> --- a/net/ipv4/tcp_offload.c
> +++ b/net/ipv4/tcp_offload.c
> @@ -332,6 +332,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>  	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>  	flush |= skb_cmp_decrypted(p, skb);
>  
> +	if (NAPI_GRO_CB(p)->is_flist) {
> +		flush |= (__force int)(flags ^ tcp_flag_word(th2));
> +		flush |= skb->ip_summed != p->ip_summed;
> +		flush |= skb->csum_level != p->csum_level;
> +		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));

I'm sorry, I'm lagging behind. I think the TCP flags handling here is
correct - preserving the original ones should work.

The question a made WRT 2 above checks being non necessary/redundant:

		flush |= (__force int)(flags ^ tcp_flag_word(th2));
		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));

still stands, I think.

Thanks,

Paolo
Felix Fietkau April 26, 2024, 9:41 a.m. UTC | #3
On 26.04.24 10:21, Paolo Abeni wrote:
> On Fri, 2024-04-26 at 08:51 +0200, Felix Fietkau wrote:
>> This implements fraglist GRO similar to how it's handled in UDP, however
>> no functional changes are added yet. The next change adds a heuristic for
>> using fraglist GRO instead of regular GRO.
>> 
>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
>> ---
>>  net/ipv4/tcp_offload.c   | 22 ++++++++++++++++++++++
>>  net/ipv6/tcpv6_offload.c |  9 +++++++++
>>  2 files changed, 31 insertions(+)
>> 
>> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
>> index c493e95e09a5..ffd6b7a4163a 100644
>> --- a/net/ipv4/tcp_offload.c
>> +++ b/net/ipv4/tcp_offload.c
>> @@ -332,6 +332,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>>  	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>>  	flush |= skb_cmp_decrypted(p, skb);
>>  
>> +	if (NAPI_GRO_CB(p)->is_flist) {
>> +		flush |= (__force int)(flags ^ tcp_flag_word(th2));
>> +		flush |= skb->ip_summed != p->ip_summed;
>> +		flush |= skb->csum_level != p->csum_level;
>> +		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
> 
> I'm sorry, I'm lagging behind. I think the TCP flags handling here is
> correct - preserving the original ones should work.
> 
> The question a made WRT 2 above checks being non necessary/redundant:
> 
> 		flush |= (__force int)(flags ^ tcp_flag_word(th2));

This one is not redundant, because the earlier flags check includes this 
part: & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH))

> 		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));

This one looks like a redundant leftover, I will remove it in the next 
version.

Thanks,

- Felix
Felix Fietkau April 26, 2024, 9:44 a.m. UTC | #4
On 26.04.24 09:47, Eric Dumazet wrote:
> On Fri, Apr 26, 2024 at 8:51 AM Felix Fietkau <nbd@nbd.name> wrote:
>>
>> This implements fraglist GRO similar to how it's handled in UDP, however
>> no functional changes are added yet. The next change adds a heuristic for
>> using fraglist GRO instead of regular GRO.
>>
>> Signed-off-by: Felix Fietkau <nbd@nbd.name>
>> ---
>>  net/ipv4/tcp_offload.c   | 22 ++++++++++++++++++++++
>>  net/ipv6/tcpv6_offload.c |  9 +++++++++
>>  2 files changed, 31 insertions(+)
>>
>> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
>> index c493e95e09a5..ffd6b7a4163a 100644
>> --- a/net/ipv4/tcp_offload.c
>> +++ b/net/ipv4/tcp_offload.c
>> @@ -332,6 +332,19 @@ struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
>>         flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
>>         flush |= skb_cmp_decrypted(p, skb);
>>
>> +       if (NAPI_GRO_CB(p)->is_flist) {
> 
> 
> Please add unlikely() for all NAPI_GRO_CB(p)->is_flist checks added in
> this patch.

Will do, thanks.

- Felix
diff mbox series

Patch

diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index c493e95e09a5..ffd6b7a4163a 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -332,6 +332,19 @@  struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
 	flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
 	flush |= skb_cmp_decrypted(p, skb);
 
+	if (NAPI_GRO_CB(p)->is_flist) {
+		flush |= (__force int)(flags ^ tcp_flag_word(th2));
+		flush |= skb->ip_summed != p->ip_summed;
+		flush |= skb->csum_level != p->csum_level;
+		flush |= !pskb_may_pull(skb, skb_gro_offset(skb));
+		flush |= NAPI_GRO_CB(p)->count >= 64;
+
+		if (flush || skb_gro_receive_list(p, skb))
+			mss = 1;
+
+		goto out_check_final;
+	}
+
 	if (flush || skb_gro_receive(p, skb)) {
 		mss = 1;
 		goto out_check_final;
@@ -398,6 +411,15 @@  INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV4;
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		__skb_incr_checksum_unnecessary(skb);
+
+		return 0;
+	}
+
 	th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
 				  iph->daddr, 0);
 
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index b3b8e1f6b92a..c97d55cf036f 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -32,6 +32,15 @@  INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
 	struct tcphdr *th = tcp_hdr(skb);
 
+	if (NAPI_GRO_CB(skb)->is_flist) {
+		skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST | SKB_GSO_TCPV6;
+		skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
+
+		__skb_incr_checksum_unnecessary(skb);
+
+		return 0;
+	}
+
 	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr,
 				  &iph->daddr, 0);
 	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;