Message ID | 20240427182305.24461-3-nbd@nbd.name (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | Add TCP fraglist GRO support | expand |
On Sat, 2024-04-27 at 20:22 +0200, Felix Fietkau wrote: > Preparation for adding TCP fraglist GRO support. It expects packets to be > combined in a similar way as UDP fraglist GSO packets. > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > net/ipv4/tcp_offload.c | 67 ++++++++++++++++++++++++++++++++++++++++ > net/ipv6/tcpv6_offload.c | 58 ++++++++++++++++++++++++++++++++++ > 2 files changed, 125 insertions(+) > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > index fab0973f995b..affd4ed28cfe 100644 > --- a/net/ipv4/tcp_offload.c > +++ b/net/ipv4/tcp_offload.c > @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > } > } > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > + __be32 *oldip, __be32 newip, > + __be16 *oldport, __be16 newport) > +{ > + struct tcphdr *th; > + struct iphdr *iph; > + > + if (*oldip == newip && *oldport == newport) > + return; > + > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + > + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); > + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); > + *oldport = newport; > + > + csum_replace4(&iph->check, *oldip, newip); > + *oldip = newip; > +} > + > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > +{ > + const struct tcphdr *th; > + const struct iphdr *iph; > + struct sk_buff *seg; > + struct tcphdr *th2; > + struct iphdr *iph2; > + > + seg = segs; > + th = tcp_hdr(seg); > + iph = ip_hdr(seg); > + th2 = tcp_hdr(seg->next); > + iph2 = ip_hdr(seg->next); > + > + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) > + return segs; > + > + while ((seg = seg->next)) { > + th2 = tcp_hdr(seg); > + iph2 = ip_hdr(seg); > + > + __tcpv4_gso_segment_csum(seg, > + &iph2->saddr, iph->saddr, > + &th2->source, th->source); > + __tcpv4_gso_segment_csum(seg, > + &iph2->daddr, iph->daddr, > + &th2->dest, th->dest); > + } > + > + return segs; > +} AFAICS, all the above is really alike the UDP side, except for the transport header zero csum. What about renaming the udp version of this helpers as 'tcpudpv4_...', move them in common code, add an explicit argument for 'zerocsum_allowed' and reuse such helper for both tcp and udp? The same for the ipv6 variant. Cheers, Paolo
On Sat, 2024-04-27 at 20:22 +0200, Felix Fietkau wrote: > @@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, > if (!pskb_may_pull(skb, sizeof(struct tcphdr))) > return ERR_PTR(-EINVAL); > > + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) > + return __tcp4_gso_segment_list(skb, features); I'm sorry for the incremental feedback, I almost forgot. Possibly the above condition could deserve an unlikely() annotation? less relevant than for GRO case, but at least we have consistent handling of such flag. Thanks! Paolo
On 30.04.24 12:19, Paolo Abeni wrote: > On Sat, 2024-04-27 at 20:22 +0200, Felix Fietkau wrote: >> Preparation for adding TCP fraglist GRO support. It expects packets to be >> combined in a similar way as UDP fraglist GSO packets. >> For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. >> >> Signed-off-by: Felix Fietkau <nbd@nbd.name> >> --- >> net/ipv4/tcp_offload.c | 67 ++++++++++++++++++++++++++++++++++++++++ >> net/ipv6/tcpv6_offload.c | 58 ++++++++++++++++++++++++++++++++++ >> 2 files changed, 125 insertions(+) >> >> diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c >> index fab0973f995b..affd4ed28cfe 100644 >> --- a/net/ipv4/tcp_offload.c >> +++ b/net/ipv4/tcp_offload.c >> @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, >> } >> } >> >> +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, >> + __be32 *oldip, __be32 newip, >> + __be16 *oldport, __be16 newport) >> +{ >> + struct tcphdr *th; >> + struct iphdr *iph; >> + >> + if (*oldip == newip && *oldport == newport) >> + return; >> + >> + th = tcp_hdr(seg); >> + iph = ip_hdr(seg); >> + >> + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); >> + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); >> + *oldport = newport; >> + >> + csum_replace4(&iph->check, *oldip, newip); >> + *oldip = newip; >> +} >> + >> +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) >> +{ >> + const struct tcphdr *th; >> + const struct iphdr *iph; >> + struct sk_buff *seg; >> + struct tcphdr *th2; >> + struct iphdr *iph2; >> + >> + seg = segs; >> + th = tcp_hdr(seg); >> + iph = ip_hdr(seg); >> + th2 = tcp_hdr(seg->next); >> + iph2 = ip_hdr(seg->next); >> + >> + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && >> + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) >> + return segs; >> + >> + while ((seg = seg->next)) { >> + th2 = tcp_hdr(seg); >> + iph2 = ip_hdr(seg); >> + >> + __tcpv4_gso_segment_csum(seg, >> + &iph2->saddr, iph->saddr, >> + &th2->source, th->source); >> + __tcpv4_gso_segment_csum(seg, >> + &iph2->daddr, iph->daddr, >> + &th2->dest, th->dest); >> + } >> + >> + return segs; >> +} > > AFAICS, all the above is really alike the UDP side, except for the > transport header zero csum. > > What about renaming the udp version of this helpers as 'tcpudpv4_...', > move them in common code, add an explicit argument for > 'zerocsum_allowed' and reuse such helper for both tcp and udp? > > The same for the ipv6 variant. Wouldn't that make it more convoluted when taking into account that the checksum field offset is different for tcp vs udp? How would you handle that? - Felix
On Tue, 2024-04-30 at 12:27 +0200, Felix Fietkau wrote: > On 30.04.24 12:19, Paolo Abeni wrote: > > On Sat, 2024-04-27 at 20:22 +0200, Felix Fietkau wrote: > > > Preparation for adding TCP fraglist GRO support. It expects packets to be > > > combined in a similar way as UDP fraglist GSO packets. > > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > > > > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > > > --- > > > net/ipv4/tcp_offload.c | 67 ++++++++++++++++++++++++++++++++++++++++ > > > net/ipv6/tcpv6_offload.c | 58 ++++++++++++++++++++++++++++++++++ > > > 2 files changed, 125 insertions(+) > > > > > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c > > > index fab0973f995b..affd4ed28cfe 100644 > > > --- a/net/ipv4/tcp_offload.c > > > +++ b/net/ipv4/tcp_offload.c > > > @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, > > > } > > > } > > > > > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, > > > + __be32 *oldip, __be32 newip, > > > + __be16 *oldport, __be16 newport) > > > +{ > > > + struct tcphdr *th; > > > + struct iphdr *iph; > > > + > > > + if (*oldip == newip && *oldport == newport) > > > + return; > > > + > > > + th = tcp_hdr(seg); > > > + iph = ip_hdr(seg); > > > + > > > + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); > > > + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); > > > + *oldport = newport; > > > + > > > + csum_replace4(&iph->check, *oldip, newip); > > > + *oldip = newip; > > > +} > > > + > > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) > > > +{ > > > + const struct tcphdr *th; > > > + const struct iphdr *iph; > > > + struct sk_buff *seg; > > > + struct tcphdr *th2; > > > + struct iphdr *iph2; > > > + > > > + seg = segs; > > > + th = tcp_hdr(seg); > > > + iph = ip_hdr(seg); > > > + th2 = tcp_hdr(seg->next); > > > + iph2 = ip_hdr(seg->next); > > > + > > > + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && > > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) > > > + return segs; > > > + > > > + while ((seg = seg->next)) { > > > + th2 = tcp_hdr(seg); > > > + iph2 = ip_hdr(seg); > > > + > > > + __tcpv4_gso_segment_csum(seg, > > > + &iph2->saddr, iph->saddr, > > > + &th2->source, th->source); > > > + __tcpv4_gso_segment_csum(seg, > > > + &iph2->daddr, iph->daddr, > > > + &th2->dest, th->dest); > > > + } > > > + > > > + return segs; > > > +} > > > > AFAICS, all the above is really alike the UDP side, except for the > > transport header zero csum. > > > > What about renaming the udp version of this helpers as 'tcpudpv4_...', > > move them in common code, add an explicit argument for > > 'zerocsum_allowed' and reuse such helper for both tcp and udp? > > > > The same for the ipv6 variant. > > Wouldn't that make it more convoluted when taking into account that the > checksum field offset is different for tcp vs udp? > How would you handle that? Probably making a common helper just for __tcpudpv{4,6}_gso_segment_csum and pass it the target l4 csum pointer as an additional argument. It would not be spectacularly nice, so no strong opinion either way. Cheers, Paolo
On 30.04.24 12:40, Paolo Abeni wrote: > On Tue, 2024-04-30 at 12:27 +0200, Felix Fietkau wrote: >> On 30.04.24 12:19, Paolo Abeni wrote: >> > On Sat, 2024-04-27 at 20:22 +0200, Felix Fietkau wrote: >> > > Preparation for adding TCP fraglist GRO support. It expects packets to be >> > > combined in a similar way as UDP fraglist GSO packets. >> > > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. >> > > >> > > Signed-off-by: Felix Fietkau <nbd@nbd.name> >> > > --- >> > > net/ipv4/tcp_offload.c | 67 ++++++++++++++++++++++++++++++++++++++++ >> > > net/ipv6/tcpv6_offload.c | 58 ++++++++++++++++++++++++++++++++++ >> > > 2 files changed, 125 insertions(+) >> > > >> > > diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c >> > > index fab0973f995b..affd4ed28cfe 100644 >> > > --- a/net/ipv4/tcp_offload.c >> > > +++ b/net/ipv4/tcp_offload.c >> > > @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, >> > > } >> > > } >> > > >> > > +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, >> > > + __be32 *oldip, __be32 newip, >> > > + __be16 *oldport, __be16 newport) >> > > +{ >> > > + struct tcphdr *th; >> > > + struct iphdr *iph; >> > > + >> > > + if (*oldip == newip && *oldport == newport) >> > > + return; >> > > + >> > > + th = tcp_hdr(seg); >> > > + iph = ip_hdr(seg); >> > > + >> > > + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); >> > > + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); >> > > + *oldport = newport; >> > > + >> > > + csum_replace4(&iph->check, *oldip, newip); >> > > + *oldip = newip; >> > > +} >> > > + >> > > +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) >> > > +{ >> > > + const struct tcphdr *th; >> > > + const struct iphdr *iph; >> > > + struct sk_buff *seg; >> > > + struct tcphdr *th2; >> > > + struct iphdr *iph2; >> > > + >> > > + seg = segs; >> > > + th = tcp_hdr(seg); >> > > + iph = ip_hdr(seg); >> > > + th2 = tcp_hdr(seg->next); >> > > + iph2 = ip_hdr(seg->next); >> > > + >> > > + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && >> > > + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) >> > > + return segs; >> > > + >> > > + while ((seg = seg->next)) { >> > > + th2 = tcp_hdr(seg); >> > > + iph2 = ip_hdr(seg); >> > > + >> > > + __tcpv4_gso_segment_csum(seg, >> > > + &iph2->saddr, iph->saddr, >> > > + &th2->source, th->source); >> > > + __tcpv4_gso_segment_csum(seg, >> > > + &iph2->daddr, iph->daddr, >> > > + &th2->dest, th->dest); >> > > + } >> > > + >> > > + return segs; >> > > +} >> > >> > AFAICS, all the above is really alike the UDP side, except for the >> > transport header zero csum. >> > >> > What about renaming the udp version of this helpers as 'tcpudpv4_...', >> > move them in common code, add an explicit argument for >> > 'zerocsum_allowed' and reuse such helper for both tcp and udp? >> > >> > The same for the ipv6 variant. >> >> Wouldn't that make it more convoluted when taking into account that the >> checksum field offset is different for tcp vs udp? >> How would you handle that? > > Probably making a common helper just for > __tcpudpv{4,6}_gso_segment_csum and pass it the target l4 csum pointer > as an additional argument. It would not be spectacularly nice, so no > strong opinion either way. I'd rather keep it duplicated but more straightforward and easier to read. - Felix
On Sat, Apr 27, 2024 at 8:23 PM Felix Fietkau <nbd@nbd.name> wrote: > > Preparation for adding TCP fraglist GRO support. It expects packets to be > combined in a similar way as UDP fraglist GSO packets. > For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > Reviewed-by: Eric Dumazet <edumazet@google.com>
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index fab0973f995b..affd4ed28cfe 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -28,6 +28,70 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq, } } +static void __tcpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 newip, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + struct iphdr *iph; + + if (*oldip == newip && *oldport == newport) + return; + + th = tcp_hdr(seg); + iph = ip_hdr(seg); + + inet_proto_csum_replace4(&th->check, seg, *oldip, newip, true); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; + + csum_replace4(&iph->check, *oldip, newip); + *oldip = newip; +} + +static struct sk_buff *__tcpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct iphdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct iphdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ip_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ip_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + iph->daddr == iph2->daddr && iph->saddr == iph2->saddr) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ip_hdr(seg); + + __tcpv4_gso_segment_csum(seg, + &iph2->saddr, iph->saddr, + &th2->source, th->source); + __tcpv4_gso_segment_csum(seg, + &iph2->daddr, iph->daddr, + &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp4_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv4_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -37,6 +101,9 @@ static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(struct tcphdr))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp4_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct iphdr *iph = ip_hdr(skb); struct tcphdr *th = tcp_hdr(skb); diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 4b07d1e6c952..7180c30dbbef 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -40,6 +40,61 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff) return 0; } +static void __tcpv6_gso_segment_csum(struct sk_buff *seg, + __be16 *oldport, __be16 newport) +{ + struct tcphdr *th; + + if (*oldport == newport) + return; + + th = tcp_hdr(seg); + inet_proto_csum_replace2(&th->check, seg, *oldport, newport, false); + *oldport = newport; +} + +static struct sk_buff *__tcpv6_gso_segment_list_csum(struct sk_buff *segs) +{ + const struct tcphdr *th; + const struct ipv6hdr *iph; + struct sk_buff *seg; + struct tcphdr *th2; + struct ipv6hdr *iph2; + + seg = segs; + th = tcp_hdr(seg); + iph = ipv6_hdr(seg); + th2 = tcp_hdr(seg->next); + iph2 = ipv6_hdr(seg->next); + + if (!(*(const u32 *)&th->source ^ *(const u32 *)&th2->source) && + ipv6_addr_equal(&iph->saddr, &iph2->saddr) && + ipv6_addr_equal(&iph->daddr, &iph2->daddr)) + return segs; + + while ((seg = seg->next)) { + th2 = tcp_hdr(seg); + iph2 = ipv6_hdr(seg); + + iph2->saddr = iph->saddr; + iph2->daddr = iph->daddr; + __tcpv6_gso_segment_csum(seg, &th2->source, th->source); + __tcpv6_gso_segment_csum(seg, &th2->dest, th->dest); + } + + return segs; +} + +static struct sk_buff *__tcp6_gso_segment_list(struct sk_buff *skb, + netdev_features_t features) +{ + skb = skb_segment_list(skb, features, skb_mac_header_len(skb)); + if (IS_ERR(skb)) + return skb; + + return __tcpv6_gso_segment_list_csum(skb); +} + static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@ -51,6 +106,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, if (!pskb_may_pull(skb, sizeof(*th))) return ERR_PTR(-EINVAL); + if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST) + return __tcp6_gso_segment_list(skb, features); + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { const struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb);
Preparation for adding TCP fraglist GRO support. It expects packets to be combined in a similar way as UDP fraglist GSO packets. For IPv4 packets, NAT is handled in the same way as UDP fraglist GSO. Signed-off-by: Felix Fietkau <nbd@nbd.name> --- net/ipv4/tcp_offload.c | 67 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/tcpv6_offload.c | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+)