Message ID | 20240424180458.56211-3-nbd@nbd.name (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add TCP fraglist GRO support | expand |
Felix Fietkau wrote: > Preparation for adding TCP fraglist GRO support. It expects packets to be > combined in a similar way as UDP fraglist GSO packets. > One difference is the fact that this code assumes that the TCP flags of > all packets have the same value. This allows simple handling of flags > mutations. Can you clarify this some more? We expect potentially different flags on first and last packet in a TSO train. With fraglist, the segments keep their original flags, as the headers are only pulled. When do segment flags need to be replaced with those of the first segment? > For IPv4 packets, NAT is handled in the same way as UDP > fraglist GSO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > net/ipv4/tcp_offload.c | 74 ++++++++++++++++++++++++++++++++++++++++ > net/ipv6/tcpv6_offload.c | 37 ++++++++++++++++++++ > 2 files changed, 111 insertions(+) > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > index fab0973f995b..06dbb2e2b2f3 100644 > --- a/net/ipv4/tcp_offload.c > +++ b/net/ipv4/tcp_offload.c > @@ -28,6 +28,77 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > } > } > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > + __be32 *oldip, __be32 *newip, > + __be16 *oldport, __be16 *newport) > +{ > + struct tcphdr *th; > + struct iphdr *iph; > + > + if (*oldip == *newip && *oldport == *newport) > + return; > + > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + > + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); > + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); > + *oldport = *newport; > + > + csum_replace4(&iph->check, *oldip, *newip); > + *oldip = *newip; > +} > + > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > +{ > + struct sk_buff *seg; > + struct tcphdr *th, *th2; > + struct iphdr *iph, *iph2; > + __be32 flags, flags2; > + > + seg = segs; > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + flags = tcp_flag_word(th); > + flags2 = tcp_flag_word(tcp_hdr(seg->next)); > + > + if ((tcp_hdr(seg)->dest == tcp_hdr(seg->next)->dest) && > + (tcp_hdr(seg)->source == tcp_hdr(seg->next)->source) && > + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && > + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr) && > + (flags == flags2)) > + return segs; > + > + while ((seg = seg->next)) { > + th2 = tcp_hdr(seg); > + iph2 = ip_hdr(seg); > + > + __tcpv4_gso_segment_csum(seg, > + &iph2->saddr, &iph->saddr, > + &th2->source, &th->source); > + __tcpv4_gso_segment_csum(seg, > + &iph2->daddr, &iph->daddr, > + &th2->dest, &th->dest); > + if (flags == flags2) > + continue; > + > + inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false); > + tcp_flag_word(th2) = flags; > + } > + > + return segs; > +} > + > +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb, > + netdev_features_t features) For consistency and to avoid having the same name in ipv6, add the 4/6 suffix here too. > +{ > + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); > + if (IS_ERR(skb)) > + return skb; > + > + return __tcpv4_gso_segment_list_csum(skb); > +} > + > static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, > netdev_features_t features) > { > @@ -37,6 +108,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, > if (!pskb_may_pull(skb, sizeof(struct tcphdr))) > return ERR_PTR(-EINVAL); > > + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) > + return __tcp_gso_segment_list(skb, features); > + > if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { > const struct iphdr *iph = ip_hdr(skb); > struct tcphdr *th = tcp_hdr(skb); > diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c > index 4b07d1e6c952..12fe79cb2c10 100644 > --- a/net/ipv6/tcpv6_offload.c > +++ b/net/ipv6/tcpv6_offload.c > @@ -40,6 +40,40 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) > return 0; > } > > +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) > +{ > + struct tcphdr *th, *th2; > + __be32 flags, flags2; > + struct sk_buff *seg; > + > + seg = segs; > + th = tcp_hdr(seg); > + flags = tcp_flag_word(th); > + flags2 = tcp_flag_word(tcp_hdr(seg->next)); > + > + if (flags == flags2) > + return segs; > + > + while ((seg = seg->next)) { > + th2 = tcp_hdr(seg); > + > + inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false); > + tcp_flag_word(th2) = flags; > + } > + > + return segs; > +} > + > +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb, > + netdev_features_t features) > +{ > + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); > + if (IS_ERR(skb)) > + return skb; > + > + return __tcpv6_gso_segment_list_csum(skb); > +} > + > static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, > netdev_features_t features) > { > @@ -51,6 +85,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, > if (!pskb_may_pull(skb, sizeof(*th))) > return ERR_PTR(-EINVAL); > > + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) > + return __tcp_gso_segment_list(skb, features); > + > if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { > const struct ipv6hdr *ipv6h = ipv6_hdr(skb); > struct tcphdr *th = tcp_hdr(skb); > -- > 2.44.0 > x
On 25.04.24 05:03, Willem de Bruijn wrote: > Felix Fietkau wrote: >> Preparation for adding TCP fraglist GRO support. It expects packets to be >> combined in a similar way as UDP fraglist GSO packets. >> One difference is the fact that this code assumes that the TCP flags of >> all packets have the same value. This allows simple handling of flags >> mutations. > > Can you clarify this some more? We expect potentially different flags > on first and last packet in a TSO train. With fraglist, the segments > keep their original flags, as the headers are only pulled. When do > segment flags need to be replaced with those of the first segment? Maybe I just misunderstood a comment that Paolo made earlier regarding TCP header mutations. Will review this again and compare with regular TSO. - Felix
On Thu, 2024-04-25 at 09:51 +0200, Felix Fietkau wrote: > On 25.04.24 05:03, Willem de Bruijn wrote: > > Felix Fietkau wrote: > > > Preparation for adding TCP fraglist GRO support. It expects packets to be > > > combined in a similar way as UDP fraglist GSO packets. > > > One difference is the fact that this code assumes that the TCP flags of > > > all packets have the same value. This allows simple handling of flags > > > mutations. > > > > Can you clarify this some more? We expect potentially different flags > > on first and last packet in a TSO train. With fraglist, the segments > > keep their original flags, as the headers are only pulled. When do > > segment flags need to be replaced with those of the first segment? > > Maybe I just misunderstood a comment that Paolo made earlier regarding > TCP header mutations. Will review this again and compare with regular TSO. I likely was not clear, I'm sorry. Let me try to rephrase. After the GRO stage, and before segmentation, the stack could change other fields inside the TCP header (beyond src/dst port). e.g. nftables can clear the ECN bit, or strip all the TCP options. The frag_list segmentation should catch such changes and update the individual segments csum accordingly. Note that even IPv6 could snat/dnat a packet! The GRO stage allows aggregating with different flags. Later on, at segmentation stage, all the individual packets except the last one will retain the same flags of the first segment, except for the PUSH and FIN bit, that will be cleared. The last segment will have such bit value preserved. Cheers, Paolo
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..06dbb2e2b2f3 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,77 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + inet_proto_csum_replace4(&th->check, seg, *oldip, *newip, true); + inet_proto_csum_replace2(&th->check, seg, *oldport, *newport, false); + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct tcphdr *th, *th2; + struct iphdr *iph, *iph2; + __be32 flags, flags2; + + seg = segs; + th = tcp_hdr(seg); + iph = ip_hdr(seg); + flags = tcp_flag_word(th); + flags2 = tcp_flag_word(tcp_hdr(seg->next)); + + if ((tcp_hdr(seg)->dest == tcp_hdr(seg->next)->dest) && + (tcp_hdr(seg)->source == tcp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr) && + (flags == flags2)) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &th2->source, &th->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &th2->dest, &th->dest); + if (flags == flags2) + continue; + + inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false); + tcp_flag_word(th2) = flags; + } + + return segs; +} + +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +108,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..12fe79cb2c10 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -40,6 +40,40 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) return 0; } +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + struct tcphdr *th, *th2; + __be32 flags, flags2; + struct sk_buff *seg; + + seg = segs; + th = tcp_hdr(seg); + flags = tcp_flag_word(th); + flags2 = tcp_flag_word(tcp_hdr(seg->next)); + + if (flags == flags2) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + + inet_proto_csum_replace4(&th2->check, seg, flags2, flags, false); + tcp_flag_word(th2) = flags; + } + + return segs; +} + +static struct sk_buff *__tcp_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv6_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -51,6 +85,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
Preparation for adding TCP fraglist GRO support. It expects packets to be combined in a similar way as UDP fraglist GSO packets. One difference is the fact that this code assumes that the TCP flags of all packets have the same value. This allows simple handling of flags mutations. For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. Signed-off-by: Felix Fietkau <nbd@nbd.name> --- net/ipv4/tcp_offload.c | 74 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcpv6_offload.c | 37 ++++++++++++++++++++ 2 files changed, 111 insertions(+)