Message ID | 20220203015140.3022854-6-eric.dumazet@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | tcp: BIG TCP implementation | expand |
On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote: > From: Eric Dumazet <edumazet@google.com> > > ipv6 tcp and gro stacks will soon be able to build big TCP packets, > with an added temporary Hop By Hop header. > > If GSO is involved for these large packets, we need to remove > the temporary HBH header before segmentation happens. > > Signed-off-by: Eric Dumazet <edumazet@google.com> > --- > include/net/ipv6.h | 31 +++++++++++++++++++++++++++++++ > net/core/skbuff.c | 21 ++++++++++++++++++++- > 2 files changed, 51 insertions(+), 1 deletion(-) > > diff --git a/include/net/ipv6.h b/include/net/ipv6.h > index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..96e916fb933c3e7d4288e86790fcb2bb1353a261 100644 > --- a/include/net/ipv6.h > +++ b/include/net/ipv6.h > @@ -464,6 +464,37 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, > struct ipv6_txoptions *ipv6_update_options(struct sock *sk, > struct ipv6_txoptions *opt); > > +/* This helper is specialized for BIG TCP needs. > + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. > + * It assumes headers are already in skb->head, thus the sk argument is only read. > + */ > +static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb) > +{ > + struct hop_jumbo_hdr *jhdr; > + struct ipv6hdr *nhdr; > + > + if (likely(skb->len <= GRO_MAX_SIZE)) > + return false; > + > + if (skb->protocol != htons(ETH_P_IPV6)) > + return false; > + > + if (skb_network_offset(skb) + > + sizeof(struct ipv6hdr) + > + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) > + return false; > + > + nhdr = ipv6_hdr(skb); > + > + if (nhdr->nexthdr != NEXTHDR_HOP) > + return false; > + > + jhdr = (struct hop_jumbo_hdr *) (nhdr + 1); > + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0) > + return false; > + return true; Rather than having to perform all of these checkes would it maybe make sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make it easier for drivers to indicate if they support the new offload or not. An added bonus is that it would probably make it easier to do something like a GSO_PARTIAL for this since then it would just be a matter of flagging it, stripping the extra hop-by-hop header, and chopping it into gso_max_size chunks. > +} > + > static inline bool ipv6_accept_ra(struct inet6_dev *idev) > { > /* If forwarding is enabled, RA are not accepted unless the special > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > skb_frag_t *frag = skb_shinfo(head_skb)->frags; > unsigned int mss = skb_shinfo(head_skb)->gso_size; > unsigned int doffset = head_skb->data - skb_mac_header(head_skb); > + int hophdr_len = sizeof(struct hop_jumbo_hdr); > struct sk_buff *frag_skb = head_skb; > - unsigned int offset = doffset; > + unsigned int offset; > unsigned int tnl_hlen = skb_tnl_header_len(head_skb); > unsigned int partial_segs = 0; > unsigned int headroom; > @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > __be16 proto; > bool csum, sg; > int nfrags = skb_shinfo(head_skb)->nr_frags; > + struct ipv6hdr *h6; > int err = -ENOMEM; > int i = 0; > int pos; > @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > } > > __skb_push(head_skb, doffset); > + > + if (ipv6_has_hopopt_jumbo(head_skb)) { > + /* remove the HBH header. > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > + */ > + memmove(head_skb->data + hophdr_len, > + head_skb->data, > + ETH_HLEN + sizeof(struct ipv6hdr)); > + head_skb->data += hophdr_len; > + head_skb->len -= hophdr_len; > + head_skb->network_header += hophdr_len; > + head_skb->mac_header += hophdr_len; > + doffset -= hophdr_len; > + h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN); > + h6->nexthdr = IPPROTO_TCP; > + } Does it really make the most sense to be doing this here, or should this be a part of the IPv6 processing? It seems like of asymmetric when compared with the change in the next patch to add the header in GRO. > + offset = doffset; > proto = skb_network_protocol(head_skb, NULL); > if (unlikely(!proto)) > return ERR_PTR(-EINVAL);
On Thu, Feb 3, 2022 at 10:53 AM Alexander H Duyck <alexander.duyck@gmail.com> wrote: > > > Rather than having to perform all of these checkes would it maybe make > sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make > it easier for drivers to indicate if they support the new offload or > not. Yes, this could be an option. > > An added bonus is that it would probably make it easier to do something > like a GSO_PARTIAL for this since then it would just be a matter of > flagging it, stripping the extra hop-by-hop header, and chopping it > into gso_max_size chunks. > > > +} > > + > > static inline bool ipv6_accept_ra(struct inet6_dev *idev) > > { > > /* If forwarding is enabled, RA are not accepted unless the special > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644 > > --- a/net/core/skbuff.c > > +++ b/net/core/skbuff.c > > @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > skb_frag_t *frag = skb_shinfo(head_skb)->frags; > > unsigned int mss = skb_shinfo(head_skb)->gso_size; > > unsigned int doffset = head_skb->data - skb_mac_header(head_skb); > > + int hophdr_len = sizeof(struct hop_jumbo_hdr); > > struct sk_buff *frag_skb = head_skb; > > - unsigned int offset = doffset; > > + unsigned int offset; > > unsigned int tnl_hlen = skb_tnl_header_len(head_skb); > > unsigned int partial_segs = 0; > > unsigned int headroom; > > @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > __be16 proto; > > bool csum, sg; > > int nfrags = skb_shinfo(head_skb)->nr_frags; > > + struct ipv6hdr *h6; > > int err = -ENOMEM; > > int i = 0; > > int pos; > > @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > } > > > > __skb_push(head_skb, doffset); > > + > > + if (ipv6_has_hopopt_jumbo(head_skb)) { > > + /* remove the HBH header. > > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > > + */ > > + memmove(head_skb->data + hophdr_len, > > + head_skb->data, > > + ETH_HLEN + sizeof(struct ipv6hdr)); > > + head_skb->data += hophdr_len; > > + head_skb->len -= hophdr_len; > > + head_skb->network_header += hophdr_len; > > + head_skb->mac_header += hophdr_len; > > + doffset -= hophdr_len; > > + h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN); > > + h6->nexthdr = IPPROTO_TCP; > > + } > > Does it really make the most sense to be doing this here, or should > this be a part of the IPv6 processing? It seems like of asymmetric when > compared with the change in the next patch to add the header in GRO. > Not sure what you mean. We do have to strip the header here, I do not see where else to make this ? > > + offset = doffset; > > proto = skb_network_protocol(head_skb, NULL); > > if (unlikely(!proto)) > > return ERR_PTR(-EINVAL); > >
On Thu, Feb 3, 2022 at 11:17 AM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Feb 3, 2022 at 10:53 AM Alexander H Duyck > <alexander.duyck@gmail.com> wrote: > > > > > > Rather than having to perform all of these checkes would it maybe make > > sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make > > it easier for drivers to indicate if they support the new offload or > > not. > > Yes, this could be an option. > > > > > An added bonus is that it would probably make it easier to do something > > like a GSO_PARTIAL for this since then it would just be a matter of > > flagging it, stripping the extra hop-by-hop header, and chopping it > > into gso_max_size chunks. > > > > > +} > > > + > > > static inline bool ipv6_accept_ra(struct inet6_dev *idev) > > > { > > > /* If forwarding is enabled, RA are not accepted unless the special > > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > > > index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644 > > > --- a/net/core/skbuff.c > > > +++ b/net/core/skbuff.c > > > @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > > skb_frag_t *frag = skb_shinfo(head_skb)->frags; > > > unsigned int mss = skb_shinfo(head_skb)->gso_size; > > > unsigned int doffset = head_skb->data - skb_mac_header(head_skb); > > > + int hophdr_len = sizeof(struct hop_jumbo_hdr); > > > struct sk_buff *frag_skb = head_skb; > > > - unsigned int offset = doffset; > > > + unsigned int offset; > > > unsigned int tnl_hlen = skb_tnl_header_len(head_skb); > > > unsigned int partial_segs = 0; > > > unsigned int headroom; > > > @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > > __be16 proto; > > > bool csum, sg; > > > int nfrags = skb_shinfo(head_skb)->nr_frags; > > > + struct ipv6hdr *h6; > > > int err = -ENOMEM; > > > int i = 0; > > > int pos; > > > @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, > > > } > > > > > > __skb_push(head_skb, doffset); > > > + > > > + if (ipv6_has_hopopt_jumbo(head_skb)) { > > > + /* remove the HBH header. > > > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > > > + */ > > > + memmove(head_skb->data + hophdr_len, > > > + head_skb->data, > > > + ETH_HLEN + sizeof(struct ipv6hdr)); > > > + head_skb->data += hophdr_len; > > > + head_skb->len -= hophdr_len; > > > + head_skb->network_header += hophdr_len; > > > + head_skb->mac_header += hophdr_len; > > > + doffset -= hophdr_len; > > > + h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN); > > > + h6->nexthdr = IPPROTO_TCP; > > > + } > > > > Does it really make the most sense to be doing this here, or should > > this be a part of the IPv6 processing? It seems like of asymmetric when > > compared with the change in the next patch to add the header in GRO. > > > > Not sure what you mean. We do have to strip the header here, I do not > see where else to make this ? It is the fact that you are adding IPv6 specific code to the net/core/skbuff.c block here. Logically speaking if you are adding the header in ipv6_gro_receive then it really seems li:ke the logic to remove the header really belongs in ipv6_gso_segment. I suppose this is an attempt to optimize it though, since normally updates to the header are done after segmentation instead of before.
On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck <alexander.duyck@gmail.com> wrote: > It is the fact that you are adding IPv6 specific code to the > net/core/skbuff.c block here. Logically speaking if you are adding the > header in ipv6_gro_receive then it really seems li:ke the logic to > remove the header really belongs in ipv6_gso_segment. I suppose this > is an attempt to optimize it though, since normally updates to the > header are done after segmentation instead of before. Right, doing this at the top level means we do the thing once only, instead of 45 times if the skb has 45 segments.
On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote: > On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck > <alexander.duyck@gmail.com> wrote: > > > It is the fact that you are adding IPv6 specific code to the > > net/core/skbuff.c block here. Logically speaking if you are adding the > > header in ipv6_gro_receive then it really seems li:ke the logic to > > remove the header really belongs in ipv6_gso_segment. I suppose this > > is an attempt to optimize it though, since normally updates to the > > header are done after segmentation instead of before. > > Right, doing this at the top level means we do the thing once only, > instead of 45 times if the skb has 45 segments. I'm just wondering if there is a way for us to do it in ipv6_gso_segment directly instead though. With this we essentially end up having to free the skb if the segmentation fails anyway since it won't be able to go out on the wire. If we assume the stack will successfully segment the frame then it might make sense to just take care of the hop-by-hop header before we start processing the L4 protocol.
On Thu, Feb 3, 2022 at 1:08 PM Alexander H Duyck <alexander.duyck@gmail.com> wrote: > > On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote: > > On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck > > <alexander.duyck@gmail.com> wrote: > > > > > It is the fact that you are adding IPv6 specific code to the > > > net/core/skbuff.c block here. Logically speaking if you are adding the > > > header in ipv6_gro_receive then it really seems li:ke the logic to > > > remove the header really belongs in ipv6_gso_segment. I suppose this > > > is an attempt to optimize it though, since normally updates to the > > > header are done after segmentation instead of before. > > > > Right, doing this at the top level means we do the thing once only, > > instead of 45 times if the skb has 45 segments. > > I'm just wondering if there is a way for us to do it in > ipv6_gso_segment directly instead though. With this we essentially end > up having to free the skb if the segmentation fails anyway since it > won't be able to go out on the wire. > Having a HBH jumbo header in place while the current frame is MTU size (typically MTU < 9000) would violate the specs. A HBH jumbo header presence implies packet length > 64K. > If we assume the stack will successfully segment the frame then it > might make sense to just take care of the hop-by-hop header before we > start processing the L4 protocol.
On Thu, Feb 3, 2022 at 1:42 PM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Feb 3, 2022 at 1:08 PM Alexander H Duyck > <alexander.duyck@gmail.com> wrote: > > > > On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote: > > > On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck > > > <alexander.duyck@gmail.com> wrote: > > > > > > > It is the fact that you are adding IPv6 specific code to the > > > > net/core/skbuff.c block here. Logically speaking if you are adding the > > > > header in ipv6_gro_receive then it really seems li:ke the logic to > > > > remove the header really belongs in ipv6_gso_segment. I suppose this > > > > is an attempt to optimize it though, since normally updates to the > > > > header are done after segmentation instead of before. > > > > > > Right, doing this at the top level means we do the thing once only, > > > instead of 45 times if the skb has 45 segments. > > > > I'm just wondering if there is a way for us to do it in > > ipv6_gso_segment directly instead though. With this we essentially end > > up having to free the skb if the segmentation fails anyway since it > > won't be able to go out on the wire. > > > > Having a HBH jumbo header in place while the current frame is MTU size > (typically MTU < 9000) would > violate the specs. A HBH jumbo header presence implies packet length > 64K. I get that. What I was getting at was that we might be able to process it in ipv6_gso_segment before we hand it off to either TCP or UDP gso handlers to segment. The general idea being we keep the IPv6 specific bits in the IPv6 specific code instead of having the skb_segment function now have to understand IPv6 packets. So what we would end up doing is having to do an skb_cow to replace the skb->head if any clones might be holding on it, and then just chop off the HBH jumbo header before we start the segmenting. The risk would be that we waste cycles removing the HBH header for a frame that is going to fail, but I am not sure how likely a scenario that is or if we need to optimize for that.
On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck <alexander.duyck@gmail.com> wrote: > > I get that. What I was getting at was that we might be able to process > it in ipv6_gso_segment before we hand it off to either TCP or UDP gso > handlers to segment. > > The general idea being we keep the IPv6 specific bits in the IPv6 > specific code instead of having the skb_segment function now have to > understand IPv6 packets. So what we would end up doing is having to do > an skb_cow to replace the skb->head if any clones might be holding on > it, and then just chop off the HBH jumbo header before we start the > segmenting. > > The risk would be that we waste cycles removing the HBH header for a > frame that is going to fail, but I am not sure how likely a scenario > that is or if we need to optimize for that. I guess I can try this for the next version, thanks.
On Thu, Feb 3, 2022 at 4:27 PM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck > <alexander.duyck@gmail.com> wrote: > > > > > I get that. What I was getting at was that we might be able to process > > it in ipv6_gso_segment before we hand it off to either TCP or UDP gso > > handlers to segment. > > > > The general idea being we keep the IPv6 specific bits in the IPv6 > > specific code instead of having the skb_segment function now have to > > understand IPv6 packets. So what we would end up doing is having to do > > an skb_cow to replace the skb->head if any clones might be holding on > > it, and then just chop off the HBH jumbo header before we start the > > segmenting. > > > > The risk would be that we waste cycles removing the HBH header for a > > frame that is going to fail, but I am not sure how likely a scenario > > that is or if we need to optimize for that. > > I guess I can try this for the next version, thanks. I came up with: ommit 147f17169ccc6c2c38ea802e5728528ed54f492d Author: Eric Dumazet <edumazet@google.com> Date: Sat Nov 20 16:49:35 2021 -0800 ipv6/gso: remove temporary HBH/jumbo header ipv6 tcp and gro stacks will soon be able to build big TCP packets, with an added temporary Hop By Hop header. If GSO is involved for these large packets, we need to remove the temporary HBH header before segmentation happens. v2: perform HBH removal from ipv6_gso_segment() instead of skb_segment() (Alexander feedback) Signed-off-by: Eric Dumazet <edumazet@google.com> diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..a850c18dae0dfedccb9d956bf1ec9fa6b0368c6b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -464,6 +464,38 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head, thus the sk argument is only read. + */ +static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + const struct hop_jumbo_hdr *jhdr; + const struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_MAX_SIZE)) + return false; + + if (skb->protocol != htons(ETH_P_IPV6)) + return false; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return false; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return false; + + jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 || + jhdr->nexthdr != IPPROTO_TCP) + return false; + return true; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, bool gso_partial; skb_reset_network_header(skb); + if (ipv6_has_hopopt_jumbo(skb)) { + const int hophdr_len = sizeof(struct hop_jumbo_hdr); + int err; + + err = skb_cow_head(skb, 0); + if (err < 0) + return ERR_PTR(err); + + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + memmove(skb->data + hophdr_len, + skb->data, + ETH_HLEN + sizeof(struct ipv6hdr)); + skb->data += hophdr_len; + skb->len -= hophdr_len; + skb->network_header += hophdr_len; + skb->mac_header += hophdr_len; + ipv6h = (struct ipv6hdr *)skb->data; + ipv6h->nexthdr = IPPROTO_TCP; + } nhoff = skb_network_header(skb) - skb_mac_header(skb); if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out;
On Thu, Feb 3, 2022 at 5:14 PM Eric Dumazet <edumazet@google.com> wrote: > > On Thu, Feb 3, 2022 at 4:27 PM Eric Dumazet <edumazet@google.com> wrote: > > > > On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck > > <alexander.duyck@gmail.com> wrote: > > > > > > > > I get that. What I was getting at was that we might be able to process > > > it in ipv6_gso_segment before we hand it off to either TCP or UDP gso > > > handlers to segment. > > > > > > The general idea being we keep the IPv6 specific bits in the IPv6 > > > specific code instead of having the skb_segment function now have to > > > understand IPv6 packets. So what we would end up doing is having to do > > > an skb_cow to replace the skb->head if any clones might be holding on > > > it, and then just chop off the HBH jumbo header before we start the > > > segmenting. > > > > > > The risk would be that we waste cycles removing the HBH header for a > > > frame that is going to fail, but I am not sure how likely a scenario > > > that is or if we need to optimize for that. > > > > I guess I can try this for the next version, thanks. > > I came up with: > > ommit 147f17169ccc6c2c38ea802e5728528ed54f492d > Author: Eric Dumazet <edumazet@google.com> > Date: Sat Nov 20 16:49:35 2021 -0800 > > ipv6/gso: remove temporary HBH/jumbo header > > ipv6 tcp and gro stacks will soon be able to build big TCP packets, > with an added temporary Hop By Hop header. > > If GSO is involved for these large packets, we need to remove > the temporary HBH header before segmentation happens. > > v2: perform HBH removal from ipv6_gso_segment() instead of > skb_segment() (Alexander feedback) > > Signed-off-by: Eric Dumazet <edumazet@google.com> Well, this does not work at all. > static inline bool ipv6_accept_ra(struct inet6_dev *idev) > { > /* If forwarding is enabled, RA are not accepted unless the special > diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c > index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021 > 100644 > --- a/net/ipv6/ip6_offload.c > +++ b/net/ipv6/ip6_offload.c > @@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, > bool gso_partial; > > skb_reset_network_header(skb); > + if (ipv6_has_hopopt_jumbo(skb)) { > + const int hophdr_len = sizeof(struct hop_jumbo_hdr); > + int err; > + > + err = skb_cow_head(skb, 0); > + if (err < 0) > + return ERR_PTR(err); > + > + /* remove the HBH header. > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > + */ > + memmove(skb->data + hophdr_len, > + skb->data, > + ETH_HLEN + sizeof(struct ipv6hdr)); > + skb->data += hophdr_len; > + skb->len -= hophdr_len; > + skb->network_header += hophdr_len; > + skb->mac_header += hophdr_len; > + ipv6h = (struct ipv6hdr *)skb->data; > + ipv6h->nexthdr = IPPROTO_TCP; > + } > nhoff = skb_network_header(skb) - skb_mac_header(skb); > if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) > goto out;
On Thu, Feb 3, 2022 at 5:48 PM Eric Dumazet <edumazet@google.com> wrote: > > Well, this does not work at all. > > > > > > static inline bool ipv6_accept_ra(struct inet6_dev *idev) > > { > > /* If forwarding is enabled, RA are not accepted unless the special > > diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c > > index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021 > > 100644 > > --- a/net/ipv6/ip6_offload.c > > +++ b/net/ipv6/ip6_offload.c > > @@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, > > bool gso_partial; > > > > skb_reset_network_header(skb); > > + if (ipv6_has_hopopt_jumbo(skb)) { > > + const int hophdr_len = sizeof(struct hop_jumbo_hdr); > > + int err; > > + > > + err = skb_cow_head(skb, 0); > > + if (err < 0) > > + return ERR_PTR(err); > > + > > + /* remove the HBH header. > > + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] > > + */ > > + memmove(skb->data + hophdr_len, > > + skb->data, Oh, I must use skb_mac_header() instead of skb->data, sorry for the noise. > > + ETH_HLEN + sizeof(struct ipv6hdr)); > > + skb->data += hophdr_len; > > + skb->len -= hophdr_len; > > + skb->network_header += hophdr_len; > > + skb->mac_header += hophdr_len; > > + ipv6h = (struct ipv6hdr *)skb->data; > > + ipv6h->nexthdr = IPPROTO_TCP; > > + } > > nhoff = skb_network_header(skb) - skb_mac_header(skb); > > if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) > > goto out;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..96e916fb933c3e7d4288e86790fcb2bb1353a261 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -464,6 +464,37 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, struct ipv6_txoptions *ipv6_update_options(struct sock *sk, struct ipv6_txoptions *opt); +/* This helper is specialized for BIG TCP needs. + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. + * It assumes headers are already in skb->head, thus the sk argument is only read. + */ +static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb) +{ + struct hop_jumbo_hdr *jhdr; + struct ipv6hdr *nhdr; + + if (likely(skb->len <= GRO_MAX_SIZE)) + return false; + + if (skb->protocol != htons(ETH_P_IPV6)) + return false; + + if (skb_network_offset(skb) + + sizeof(struct ipv6hdr) + + sizeof(struct hop_jumbo_hdr) > skb_headlen(skb)) + return false; + + nhdr = ipv6_hdr(skb); + + if (nhdr->nexthdr != NEXTHDR_HOP) + return false; + + jhdr = (struct hop_jumbo_hdr *) (nhdr + 1); + if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0) + return false; + return true; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, skb_frag_t *frag = skb_shinfo(head_skb)->frags; unsigned int mss = skb_shinfo(head_skb)->gso_size; unsigned int doffset = head_skb->data - skb_mac_header(head_skb); + int hophdr_len = sizeof(struct hop_jumbo_hdr); struct sk_buff *frag_skb = head_skb; - unsigned int offset = doffset; + unsigned int offset; unsigned int tnl_hlen = skb_tnl_header_len(head_skb); unsigned int partial_segs = 0; unsigned int headroom; @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, __be16 proto; bool csum, sg; int nfrags = skb_shinfo(head_skb)->nr_frags; + struct ipv6hdr *h6; int err = -ENOMEM; int i = 0; int pos; @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, } __skb_push(head_skb, doffset); + + if (ipv6_has_hopopt_jumbo(head_skb)) { + /* remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][TCP header] + */ + memmove(head_skb->data + hophdr_len, + head_skb->data, + ETH_HLEN + sizeof(struct ipv6hdr)); + head_skb->data += hophdr_len; + head_skb->len -= hophdr_len; + head_skb->network_header += hophdr_len; + head_skb->mac_header += hophdr_len; + doffset -= hophdr_len; + h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN); + h6->nexthdr = IPPROTO_TCP; + } + offset = doffset; proto = skb_network_protocol(head_skb, NULL); if (unlikely(!proto)) return ERR_PTR(-EINVAL);