diff mbox series

[net-next,05/15] ipv6/gso: remove temporary HBH/jumbo header

Message ID 20220203015140.3022854-6-eric.dumazet@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series tcp: BIG TCP implementation | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 2109 this patch: 2109
netdev/cc_maintainers warning 3 maintainers not CCed: dsahern@kernel.org yoshfuji@linux-ipv6.org alobakin@pm.me
netdev/build_clang success Errors and warnings before: 307 this patch: 307
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2236 this patch: 2236
netdev/checkpatch warning CHECK: No space is necessary after a cast WARNING: line length of 82 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Feb. 3, 2022, 1:51 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

ipv6 tcp and gro stacks will soon be able to build big TCP packets,
with an added temporary Hop By Hop header.

If GSO is involved for these large packets, we need to remove
the temporary HBH header before segmentation happens.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/ipv6.h | 31 +++++++++++++++++++++++++++++++
 net/core/skbuff.c  | 21 ++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

Comments

Alexander Duyck Feb. 3, 2022, 6:53 p.m. UTC | #1
On Wed, 2022-02-02 at 17:51 -0800, Eric Dumazet wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> ipv6 tcp and gro stacks will soon be able to build big TCP packets,
> with an added temporary Hop By Hop header.
> 
> If GSO is involved for these large packets, we need to remove
> the temporary HBH header before segmentation happens.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  include/net/ipv6.h | 31 +++++++++++++++++++++++++++++++
>  net/core/skbuff.c  | 21 ++++++++++++++++++++-
>  2 files changed, 51 insertions(+), 1 deletion(-)
> 
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..96e916fb933c3e7d4288e86790fcb2bb1353a261 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -464,6 +464,37 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
>  struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
>  					   struct ipv6_txoptions *opt);
>  
> +/* This helper is specialized for BIG TCP needs.
> + * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
> + * It assumes headers are already in skb->head, thus the sk argument is only read.
> + */
> +static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
> +{
> +	struct hop_jumbo_hdr *jhdr;
> +	struct ipv6hdr *nhdr;
> +
> +	if (likely(skb->len <= GRO_MAX_SIZE))
> +		return false;
> +
> +	if (skb->protocol != htons(ETH_P_IPV6))
> +		return false;
> +
> +	if (skb_network_offset(skb) +
> +	    sizeof(struct ipv6hdr) +
> +	    sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
> +		return false;
> +
> +	nhdr = ipv6_hdr(skb);
> +
> +	if (nhdr->nexthdr != NEXTHDR_HOP)
> +		return false;
> +
> +	jhdr = (struct hop_jumbo_hdr *) (nhdr + 1);
> +	if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0)
> +		return false;
> +	return true;

Rather than having to perform all of these checkes would it maybe make
sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make
it easier for drivers to indicate if they support the new offload or
not.

An added bonus is that it would probably make it easier to do something
like a GSO_PARTIAL for this since then it would just be a matter of
flagging it, stripping the extra hop-by-hop header, and chopping it
into gso_max_size chunks.

> +}
> +
>  static inline bool ipv6_accept_ra(struct inet6_dev *idev)
>  {
>  	/* If forwarding is enabled, RA are not accepted unless the special
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>  	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
>  	unsigned int mss = skb_shinfo(head_skb)->gso_size;
>  	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> +	int hophdr_len = sizeof(struct hop_jumbo_hdr);
>  	struct sk_buff *frag_skb = head_skb;
> -	unsigned int offset = doffset;
> +	unsigned int offset;
>  	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
>  	unsigned int partial_segs = 0;
>  	unsigned int headroom;
> @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>  	__be16 proto;
>  	bool csum, sg;
>  	int nfrags = skb_shinfo(head_skb)->nr_frags;
> +	struct ipv6hdr *h6;
>  	int err = -ENOMEM;
>  	int i = 0;
>  	int pos;
> @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
>  	}
>  
>  	__skb_push(head_skb, doffset);
> +
> +	if (ipv6_has_hopopt_jumbo(head_skb)) {
> +		/* remove the HBH header.
> +		 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
> +		 */
> +		memmove(head_skb->data + hophdr_len,
> +			head_skb->data,
> +			ETH_HLEN + sizeof(struct ipv6hdr));
> +		head_skb->data += hophdr_len;
> +		head_skb->len -= hophdr_len;
> +		head_skb->network_header += hophdr_len;
> +		head_skb->mac_header += hophdr_len;
> +		doffset -= hophdr_len;
> +		h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN);
> +		h6->nexthdr = IPPROTO_TCP;
> +	}

Does it really make the most sense to be doing this here, or should
this be a part of the IPv6 processing? It seems like of asymmetric when
compared with the change in the next patch to add the header in GRO.

> +	offset = doffset;
>  	proto = skb_network_protocol(head_skb, NULL);
>  	if (unlikely(!proto))
>  		return ERR_PTR(-EINVAL);
Eric Dumazet Feb. 3, 2022, 7:17 p.m. UTC | #2
On Thu, Feb 3, 2022 at 10:53 AM Alexander H Duyck
<alexander.duyck@gmail.com> wrote:
>
>
> Rather than having to perform all of these checkes would it maybe make
> sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make
> it easier for drivers to indicate if they support the new offload or
> not.

Yes, this could be an option.

>
> An added bonus is that it would probably make it easier to do something
> like a GSO_PARTIAL for this since then it would just be a matter of
> flagging it, stripping the extra hop-by-hop header, and chopping it
> into gso_max_size chunks.
>
> > +}
> > +
> >  static inline bool ipv6_accept_ra(struct inet6_dev *idev)
> >  {
> >       /* If forwarding is enabled, RA are not accepted unless the special
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> >       unsigned int mss = skb_shinfo(head_skb)->gso_size;
> >       unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> > +     int hophdr_len = sizeof(struct hop_jumbo_hdr);
> >       struct sk_buff *frag_skb = head_skb;
> > -     unsigned int offset = doffset;
> > +     unsigned int offset;
> >       unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> >       unsigned int partial_segs = 0;
> >       unsigned int headroom;
> > @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >       __be16 proto;
> >       bool csum, sg;
> >       int nfrags = skb_shinfo(head_skb)->nr_frags;
> > +     struct ipv6hdr *h6;
> >       int err = -ENOMEM;
> >       int i = 0;
> >       int pos;
> > @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> >       }
> >
> >       __skb_push(head_skb, doffset);
> > +
> > +     if (ipv6_has_hopopt_jumbo(head_skb)) {
> > +             /* remove the HBH header.
> > +              * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
> > +              */
> > +             memmove(head_skb->data + hophdr_len,
> > +                     head_skb->data,
> > +                     ETH_HLEN + sizeof(struct ipv6hdr));
> > +             head_skb->data += hophdr_len;
> > +             head_skb->len -= hophdr_len;
> > +             head_skb->network_header += hophdr_len;
> > +             head_skb->mac_header += hophdr_len;
> > +             doffset -= hophdr_len;
> > +             h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN);
> > +             h6->nexthdr = IPPROTO_TCP;
> > +     }
>
> Does it really make the most sense to be doing this here, or should
> this be a part of the IPv6 processing? It seems like of asymmetric when
> compared with the change in the next patch to add the header in GRO.
>

Not sure what you mean. We do have to strip the header here, I do not
see where else to make this ?

> > +     offset = doffset;
> >       proto = skb_network_protocol(head_skb, NULL);
> >       if (unlikely(!proto))
> >               return ERR_PTR(-EINVAL);
>
>
Alexander Duyck Feb. 3, 2022, 7:45 p.m. UTC | #3
On Thu, Feb 3, 2022 at 11:17 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Feb 3, 2022 at 10:53 AM Alexander H Duyck
> <alexander.duyck@gmail.com> wrote:
> >
> >
> > Rather than having to perform all of these checkes would it maybe make
> > sense to add SKB_GSO_JUMBOGRAM as a gso_type flag? Then it would make
> > it easier for drivers to indicate if they support the new offload or
> > not.
>
> Yes, this could be an option.
>
> >
> > An added bonus is that it would probably make it easier to do something
> > like a GSO_PARTIAL for this since then it would just be a matter of
> > flagging it, stripping the extra hop-by-hop header, and chopping it
> > into gso_max_size chunks.
> >
> > > +}
> > > +
> > >  static inline bool ipv6_accept_ra(struct inet6_dev *idev)
> > >  {
> > >       /* If forwarding is enabled, RA are not accepted unless the special
> > > diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> > > index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644
> > > --- a/net/core/skbuff.c
> > > +++ b/net/core/skbuff.c
> > > @@ -3959,8 +3959,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >       skb_frag_t *frag = skb_shinfo(head_skb)->frags;
> > >       unsigned int mss = skb_shinfo(head_skb)->gso_size;
> > >       unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
> > > +     int hophdr_len = sizeof(struct hop_jumbo_hdr);
> > >       struct sk_buff *frag_skb = head_skb;
> > > -     unsigned int offset = doffset;
> > > +     unsigned int offset;
> > >       unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
> > >       unsigned int partial_segs = 0;
> > >       unsigned int headroom;
> > > @@ -3968,6 +3969,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >       __be16 proto;
> > >       bool csum, sg;
> > >       int nfrags = skb_shinfo(head_skb)->nr_frags;
> > > +     struct ipv6hdr *h6;
> > >       int err = -ENOMEM;
> > >       int i = 0;
> > >       int pos;
> > > @@ -3992,6 +3994,23 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
> > >       }
> > >
> > >       __skb_push(head_skb, doffset);
> > > +
> > > +     if (ipv6_has_hopopt_jumbo(head_skb)) {
> > > +             /* remove the HBH header.
> > > +              * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
> > > +              */
> > > +             memmove(head_skb->data + hophdr_len,
> > > +                     head_skb->data,
> > > +                     ETH_HLEN + sizeof(struct ipv6hdr));
> > > +             head_skb->data += hophdr_len;
> > > +             head_skb->len -= hophdr_len;
> > > +             head_skb->network_header += hophdr_len;
> > > +             head_skb->mac_header += hophdr_len;
> > > +             doffset -= hophdr_len;
> > > +             h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN);
> > > +             h6->nexthdr = IPPROTO_TCP;
> > > +     }
> >
> > Does it really make the most sense to be doing this here, or should
> > this be a part of the IPv6 processing? It seems like of asymmetric when
> > compared with the change in the next patch to add the header in GRO.
> >
>
> Not sure what you mean. We do have to strip the header here, I do not
> see where else to make this ?

It is the fact that you are adding IPv6 specific code to the
net/core/skbuff.c block here. Logically speaking if you are adding the
header in ipv6_gro_receive then it really seems li:ke the logic to
remove the header really belongs in ipv6_gso_segment. I suppose this
is an attempt to optimize it though, since normally updates to the
header are done after segmentation instead of before.
Eric Dumazet Feb. 3, 2022, 7:59 p.m. UTC | #4
On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck
<alexander.duyck@gmail.com> wrote:

> It is the fact that you are adding IPv6 specific code to the
> net/core/skbuff.c block here. Logically speaking if you are adding the
> header in ipv6_gro_receive then it really seems li:ke the logic to
> remove the header really belongs in ipv6_gso_segment. I suppose this
> is an attempt to optimize it though, since normally updates to the
> header are done after segmentation instead of before.

Right, doing this at the top level means we do the thing once only,
instead of 45 times
if the skb has 45 segments.
Alexander Duyck Feb. 3, 2022, 9:08 p.m. UTC | #5
On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote:
> On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
> 
> > It is the fact that you are adding IPv6 specific code to the
> > net/core/skbuff.c block here. Logically speaking if you are adding the
> > header in ipv6_gro_receive then it really seems li:ke the logic to
> > remove the header really belongs in ipv6_gso_segment. I suppose this
> > is an attempt to optimize it though, since normally updates to the
> > header are done after segmentation instead of before.
> 
> Right, doing this at the top level means we do the thing once only,
> instead of 45 times if the skb has 45 segments.

I'm just wondering if there is a way for us to do it in
ipv6_gso_segment directly instead though. With this we essentially end
up having to free the skb if the segmentation fails anyway since it
won't be able to go out on the wire.

If we assume the stack will successfully segment the frame then it
might make sense to just take care of the hop-by-hop header before we
start processing the L4 protocol.
Eric Dumazet Feb. 3, 2022, 9:41 p.m. UTC | #6
On Thu, Feb 3, 2022 at 1:08 PM Alexander H Duyck
<alexander.duyck@gmail.com> wrote:
>
> On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote:
> > On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck
> > <alexander.duyck@gmail.com> wrote:
> >
> > > It is the fact that you are adding IPv6 specific code to the
> > > net/core/skbuff.c block here. Logically speaking if you are adding the
> > > header in ipv6_gro_receive then it really seems li:ke the logic to
> > > remove the header really belongs in ipv6_gso_segment. I suppose this
> > > is an attempt to optimize it though, since normally updates to the
> > > header are done after segmentation instead of before.
> >
> > Right, doing this at the top level means we do the thing once only,
> > instead of 45 times if the skb has 45 segments.
>
> I'm just wondering if there is a way for us to do it in
> ipv6_gso_segment directly instead though. With this we essentially end
> up having to free the skb if the segmentation fails anyway since it
> won't be able to go out on the wire.
>

Having a HBH jumbo header in place while the current frame is MTU size
(typically MTU < 9000) would
violate the specs. A HBH jumbo header presence implies packet length > 64K.



> If we assume the stack will successfully segment the frame then it
> might make sense to just take care of the hop-by-hop header before we
> start processing the L4 protocol.
Alexander Duyck Feb. 4, 2022, 12:05 a.m. UTC | #7
On Thu, Feb 3, 2022 at 1:42 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Feb 3, 2022 at 1:08 PM Alexander H Duyck
> <alexander.duyck@gmail.com> wrote:
> >
> > On Thu, 2022-02-03 at 11:59 -0800, Eric Dumazet wrote:
> > > On Thu, Feb 3, 2022 at 11:45 AM Alexander Duyck
> > > <alexander.duyck@gmail.com> wrote:
> > >
> > > > It is the fact that you are adding IPv6 specific code to the
> > > > net/core/skbuff.c block here. Logically speaking if you are adding the
> > > > header in ipv6_gro_receive then it really seems li:ke the logic to
> > > > remove the header really belongs in ipv6_gso_segment. I suppose this
> > > > is an attempt to optimize it though, since normally updates to the
> > > > header are done after segmentation instead of before.
> > >
> > > Right, doing this at the top level means we do the thing once only,
> > > instead of 45 times if the skb has 45 segments.
> >
> > I'm just wondering if there is a way for us to do it in
> > ipv6_gso_segment directly instead though. With this we essentially end
> > up having to free the skb if the segmentation fails anyway since it
> > won't be able to go out on the wire.
> >
>
> Having a HBH jumbo header in place while the current frame is MTU size
> (typically MTU < 9000) would
> violate the specs. A HBH jumbo header presence implies packet length > 64K.

I get that. What I was getting at was that we might be able to process
it in ipv6_gso_segment before we hand it off to either TCP or UDP gso
handlers to segment.

The general idea being we keep the IPv6 specific bits in the IPv6
specific code instead of having the skb_segment function now have to
understand IPv6 packets. So what we would end up doing is having to do
an skb_cow to replace the skb->head if any clones might be holding on
it, and then just chop off the HBH jumbo header before we start the
segmenting.

The risk would be that we waste cycles removing the HBH header for a
frame that is going to fail, but I am not sure how likely a scenario
that is or if we need to optimize for that.
Eric Dumazet Feb. 4, 2022, 12:27 a.m. UTC | #8
On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck
<alexander.duyck@gmail.com> wrote:
>

> I get that. What I was getting at was that we might be able to process
> it in ipv6_gso_segment before we hand it off to either TCP or UDP gso
> handlers to segment.
>
> The general idea being we keep the IPv6 specific bits in the IPv6
> specific code instead of having the skb_segment function now have to
> understand IPv6 packets. So what we would end up doing is having to do
> an skb_cow to replace the skb->head if any clones might be holding on
> it, and then just chop off the HBH jumbo header before we start the
> segmenting.
>
> The risk would be that we waste cycles removing the HBH header for a
> frame that is going to fail, but I am not sure how likely a scenario
> that is or if we need to optimize for that.

I guess I can try this for the next version, thanks.
Eric Dumazet Feb. 4, 2022, 1:14 a.m. UTC | #9
On Thu, Feb 3, 2022 at 4:27 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck
> <alexander.duyck@gmail.com> wrote:
> >
>
> > I get that. What I was getting at was that we might be able to process
> > it in ipv6_gso_segment before we hand it off to either TCP or UDP gso
> > handlers to segment.
> >
> > The general idea being we keep the IPv6 specific bits in the IPv6
> > specific code instead of having the skb_segment function now have to
> > understand IPv6 packets. So what we would end up doing is having to do
> > an skb_cow to replace the skb->head if any clones might be holding on
> > it, and then just chop off the HBH jumbo header before we start the
> > segmenting.
> >
> > The risk would be that we waste cycles removing the HBH header for a
> > frame that is going to fail, but I am not sure how likely a scenario
> > that is or if we need to optimize for that.
>
> I guess I can try this for the next version, thanks.

I came up with:

ommit 147f17169ccc6c2c38ea802e5728528ed54f492d
Author: Eric Dumazet <edumazet@google.com>
Date:   Sat Nov 20 16:49:35 2021 -0800

    ipv6/gso: remove temporary HBH/jumbo header

    ipv6 tcp and gro stacks will soon be able to build big TCP packets,
    with an added temporary Hop By Hop header.

    If GSO is involved for these large packets, we need to remove
    the temporary HBH header before segmentation happens.

    v2: perform HBH removal from ipv6_gso_segment() instead of
        skb_segment() (Alexander feedback)

    Signed-off-by: Eric Dumazet <edumazet@google.com>

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..a850c18dae0dfedccb9d956bf1ec9fa6b0368c6b
100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -464,6 +464,38 @@ bool ipv6_opt_accepted(const struct sock *sk,
const struct sk_buff *skb,
 struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
                                           struct ipv6_txoptions *opt);

+/* This helper is specialized for BIG TCP needs.
+ * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
+ * It assumes headers are already in skb->head, thus the sk argument
is only read.
+ */
+static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
+{
+       const struct hop_jumbo_hdr *jhdr;
+       const struct ipv6hdr *nhdr;
+
+       if (likely(skb->len <= GRO_MAX_SIZE))
+               return false;
+
+       if (skb->protocol != htons(ETH_P_IPV6))
+               return false;
+
+       if (skb_network_offset(skb) +
+           sizeof(struct ipv6hdr) +
+           sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
+               return false;
+
+       nhdr = ipv6_hdr(skb);
+
+       if (nhdr->nexthdr != NEXTHDR_HOP)
+               return false;
+
+       jhdr = (const struct hop_jumbo_hdr *) (nhdr + 1);
+       if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0 ||
+           jhdr->nexthdr != IPPROTO_TCP)
+               return false;
+       return true;
+}
+
 static inline bool ipv6_accept_ra(struct inet6_dev *idev)
 {
        /* If forwarding is enabled, RA are not accepted unless the special
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021
100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
        bool gso_partial;

        skb_reset_network_header(skb);
+       if (ipv6_has_hopopt_jumbo(skb)) {
+               const int hophdr_len = sizeof(struct hop_jumbo_hdr);
+               int err;
+
+               err = skb_cow_head(skb, 0);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               /* remove the HBH header.
+                * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+                */
+               memmove(skb->data + hophdr_len,
+                       skb->data,
+                       ETH_HLEN + sizeof(struct ipv6hdr));
+               skb->data += hophdr_len;
+               skb->len -= hophdr_len;
+               skb->network_header += hophdr_len;
+               skb->mac_header += hophdr_len;
+               ipv6h = (struct ipv6hdr *)skb->data;
+               ipv6h->nexthdr = IPPROTO_TCP;
+       }
        nhoff = skb_network_header(skb) - skb_mac_header(skb);
        if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
                goto out;
Eric Dumazet Feb. 4, 2022, 1:48 a.m. UTC | #10
On Thu, Feb 3, 2022 at 5:14 PM Eric Dumazet <edumazet@google.com> wrote:
>
> On Thu, Feb 3, 2022 at 4:27 PM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Thu, Feb 3, 2022 at 4:05 PM Alexander Duyck
> > <alexander.duyck@gmail.com> wrote:
> > >
> >
> > > I get that. What I was getting at was that we might be able to process
> > > it in ipv6_gso_segment before we hand it off to either TCP or UDP gso
> > > handlers to segment.
> > >
> > > The general idea being we keep the IPv6 specific bits in the IPv6
> > > specific code instead of having the skb_segment function now have to
> > > understand IPv6 packets. So what we would end up doing is having to do
> > > an skb_cow to replace the skb->head if any clones might be holding on
> > > it, and then just chop off the HBH jumbo header before we start the
> > > segmenting.
> > >
> > > The risk would be that we waste cycles removing the HBH header for a
> > > frame that is going to fail, but I am not sure how likely a scenario
> > > that is or if we need to optimize for that.
> >
> > I guess I can try this for the next version, thanks.
>
> I came up with:
>
> ommit 147f17169ccc6c2c38ea802e5728528ed54f492d
> Author: Eric Dumazet <edumazet@google.com>
> Date:   Sat Nov 20 16:49:35 2021 -0800
>
>     ipv6/gso: remove temporary HBH/jumbo header
>
>     ipv6 tcp and gro stacks will soon be able to build big TCP packets,
>     with an added temporary Hop By Hop header.
>
>     If GSO is involved for these large packets, we need to remove
>     the temporary HBH header before segmentation happens.
>
>     v2: perform HBH removal from ipv6_gso_segment() instead of
>         skb_segment() (Alexander feedback)
>
>     Signed-off-by: Eric Dumazet <edumazet@google.com>


Well, this does not work at all.




>  static inline bool ipv6_accept_ra(struct inet6_dev *idev)
>  {
>         /* If forwarding is enabled, RA are not accepted unless the special
> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021
> 100644
> --- a/net/ipv6/ip6_offload.c
> +++ b/net/ipv6/ip6_offload.c
> @@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
>         bool gso_partial;
>
>         skb_reset_network_header(skb);
> +       if (ipv6_has_hopopt_jumbo(skb)) {
> +               const int hophdr_len = sizeof(struct hop_jumbo_hdr);
> +               int err;
> +
> +               err = skb_cow_head(skb, 0);
> +               if (err < 0)
> +                       return ERR_PTR(err);
> +
> +               /* remove the HBH header.
> +                * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
> +                */
> +               memmove(skb->data + hophdr_len,
> +                       skb->data,
> +                       ETH_HLEN + sizeof(struct ipv6hdr));
> +               skb->data += hophdr_len;
> +               skb->len -= hophdr_len;
> +               skb->network_header += hophdr_len;
> +               skb->mac_header += hophdr_len;
> +               ipv6h = (struct ipv6hdr *)skb->data;
> +               ipv6h->nexthdr = IPPROTO_TCP;
> +       }
>         nhoff = skb_network_header(skb) - skb_mac_header(skb);
>         if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
>                 goto out;
Eric Dumazet Feb. 4, 2022, 2:15 a.m. UTC | #11
On Thu, Feb 3, 2022 at 5:48 PM Eric Dumazet <edumazet@google.com> wrote:

>
> Well, this does not work at all.
>
>
>
>
> >  static inline bool ipv6_accept_ra(struct inet6_dev *idev)
> >  {
> >         /* If forwarding is enabled, RA are not accepted unless the special
> > diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> > index d37a79a8554e92a1dcaa6fd023cafe2114841ece..7f65097c8f30fa19a8c9c265eb4f027e91848021
> > 100644
> > --- a/net/ipv6/ip6_offload.c
> > +++ b/net/ipv6/ip6_offload.c
> > @@ -87,6 +87,27 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
> >         bool gso_partial;
> >
> >         skb_reset_network_header(skb);
> > +       if (ipv6_has_hopopt_jumbo(skb)) {
> > +               const int hophdr_len = sizeof(struct hop_jumbo_hdr);
> > +               int err;
> > +
> > +               err = skb_cow_head(skb, 0);
> > +               if (err < 0)
> > +                       return ERR_PTR(err);
> > +
> > +               /* remove the HBH header.
> > +                * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
> > +                */
> > +               memmove(skb->data + hophdr_len,
> > +                       skb->data,

Oh, I must use skb_mac_header() instead of skb->data, sorry for the noise.

> > +                       ETH_HLEN + sizeof(struct ipv6hdr));
> > +               skb->data += hophdr_len;
> > +               skb->len -= hophdr_len;
> > +               skb->network_header += hophdr_len;
> > +               skb->mac_header += hophdr_len;
> > +               ipv6h = (struct ipv6hdr *)skb->data;
> > +               ipv6h->nexthdr = IPPROTO_TCP;
> > +       }
> >         nhoff = skb_network_header(skb) - skb_mac_header(skb);
> >         if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
> >                 goto out;
diff mbox series

Patch

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ea2a4351b654f8bc96503aae2b9adcd478e1f8b2..96e916fb933c3e7d4288e86790fcb2bb1353a261 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -464,6 +464,37 @@  bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
 struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 					   struct ipv6_txoptions *opt);
 
+/* This helper is specialized for BIG TCP needs.
+ * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header.
+ * It assumes headers are already in skb->head, thus the sk argument is only read.
+ */
+static inline bool ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
+{
+	struct hop_jumbo_hdr *jhdr;
+	struct ipv6hdr *nhdr;
+
+	if (likely(skb->len <= GRO_MAX_SIZE))
+		return false;
+
+	if (skb->protocol != htons(ETH_P_IPV6))
+		return false;
+
+	if (skb_network_offset(skb) +
+	    sizeof(struct ipv6hdr) +
+	    sizeof(struct hop_jumbo_hdr) > skb_headlen(skb))
+		return false;
+
+	nhdr = ipv6_hdr(skb);
+
+	if (nhdr->nexthdr != NEXTHDR_HOP)
+		return false;
+
+	jhdr = (struct hop_jumbo_hdr *) (nhdr + 1);
+	if (jhdr->tlv_type != IPV6_TLV_JUMBO || jhdr->hdrlen != 0)
+		return false;
+	return true;
+}
+
 static inline bool ipv6_accept_ra(struct inet6_dev *idev)
 {
 	/* If forwarding is enabled, RA are not accepted unless the special
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..53f17c7392311e7123628fcab4617efc169905a1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3959,8 +3959,9 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	skb_frag_t *frag = skb_shinfo(head_skb)->frags;
 	unsigned int mss = skb_shinfo(head_skb)->gso_size;
 	unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+	int hophdr_len = sizeof(struct hop_jumbo_hdr);
 	struct sk_buff *frag_skb = head_skb;
-	unsigned int offset = doffset;
+	unsigned int offset;
 	unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
 	unsigned int partial_segs = 0;
 	unsigned int headroom;
@@ -3968,6 +3969,7 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	__be16 proto;
 	bool csum, sg;
 	int nfrags = skb_shinfo(head_skb)->nr_frags;
+	struct ipv6hdr *h6;
 	int err = -ENOMEM;
 	int i = 0;
 	int pos;
@@ -3992,6 +3994,23 @@  struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	}
 
 	__skb_push(head_skb, doffset);
+
+	if (ipv6_has_hopopt_jumbo(head_skb)) {
+		/* remove the HBH header.
+		 * Layout: [Ethernet header][IPv6 header][HBH][TCP header]
+		 */
+		memmove(head_skb->data + hophdr_len,
+			head_skb->data,
+			ETH_HLEN + sizeof(struct ipv6hdr));
+		head_skb->data += hophdr_len;
+		head_skb->len -= hophdr_len;
+		head_skb->network_header += hophdr_len;
+		head_skb->mac_header += hophdr_len;
+		doffset -= hophdr_len;
+		h6 = (struct ipv6hdr *)(head_skb->data + ETH_HLEN);
+		h6->nexthdr = IPPROTO_TCP;
+	}
+	offset = doffset;
 	proto = skb_network_protocol(head_skb, NULL);
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);