diff mbox series

[net-next,v2,2/3] net: gro: parse ipv6 ext headers without frag0 invalidation

Message ID 90117449-1f4a-47d7-baf4-2ed6540bc436@gmail.com (mailing list archive)
State New
Headers show
Series net: gro: reduce extension header parsing overhead | expand

Commit Message

Richard Gobert Jan. 2, 2024, 1:24 p.m. UTC
The existing code always pulls the IPv6 header and sets the transport
offset initially. Then optionally again pulls any extension headers in
ipv6_gso_pull_exthdrs and sets the transport offset again on return from
that call. skb->data is set at the start of the first extension header
before calling ipv6_gso_pull_exthdrs, and must disable the frag0
optimization because that function uses pskb_may_pull/pskb_pull instead of
skb_gro_ helpers. It sets the GRO offset to the TCP header with
skb_gro_pull and sets the transport header. Then returns skb->data to its
position before this block.

This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
operations use skb_gro_* helpers, and the frag0 fast path can be taken for
IPv6 packets with ext headers.

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
---
 include/net/ipv6.h     |  1 +
 net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
 2 files changed, 42 insertions(+), 10 deletions(-)

Comments

David Ahern Jan. 2, 2024, 4:33 p.m. UTC | #1
On 1/2/24 6:24 AM, Richard Gobert wrote:
> The existing code always pulls the IPv6 header and sets the transport
> offset initially. Then optionally again pulls any extension headers in
> ipv6_gso_pull_exthdrs and sets the transport offset again on return from
> that call. skb->data is set at the start of the first extension header
> before calling ipv6_gso_pull_exthdrs, and must disable the frag0
> optimization because that function uses pskb_may_pull/pskb_pull instead of
> skb_gro_ helpers. It sets the GRO offset to the TCP header with
> skb_gro_pull and sets the transport header. Then returns skb->data to its
> position before this block.
> 
> This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
> which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
> ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
> operations use skb_gro_* helpers, and the frag0 fast path can be taken for
> IPv6 packets with ext headers.
> 
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> Reviewed-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/net/ipv6.h     |  1 +
>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
>  2 files changed, 42 insertions(+), 10 deletions(-)
> 

Reviewed-by: David Ahern <dsahern@kernel.org>
Eric Dumazet Jan. 2, 2024, 5:33 p.m. UTC | #2
On Tue, Jan 2, 2024 at 2:25 PM Richard Gobert <richardbgobert@gmail.com> wrote:
>
> The existing code always pulls the IPv6 header and sets the transport
> offset initially. Then optionally again pulls any extension headers in
> ipv6_gso_pull_exthdrs and sets the transport offset again on return from
> that call. skb->data is set at the start of the first extension header
> before calling ipv6_gso_pull_exthdrs, and must disable the frag0
> optimization because that function uses pskb_may_pull/pskb_pull instead of
> skb_gro_ helpers. It sets the GRO offset to the TCP header with
> skb_gro_pull and sets the transport header. Then returns skb->data to its
> position before this block.
>
> This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
> which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
> ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
> operations use skb_gro_* helpers, and the frag0 fast path can be taken for
> IPv6 packets with ext headers.
>
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> Reviewed-by: Willem de Bruijn <willemb@google.com>
> ---
>  include/net/ipv6.h     |  1 +
>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
>  2 files changed, 42 insertions(+), 10 deletions(-)
>
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index 78d38dd88aba..217240efa182 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -26,6 +26,7 @@ struct ip_tunnel_info;
>  #define SIN6_LEN_RFC2133       24
>
>  #define IPV6_MAXPLEN           65535
> +#define IPV6_MIN_EXTHDR_LEN    8

// Hmm see my following comment.

>
>  /*
>   *     NextHeader field of IPv6 header
> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> index 0e0b5fed0995..c07111d8f56a 100644
> --- a/net/ipv6/ip6_offload.c
> +++ b/net/ipv6/ip6_offload.c
> @@ -37,6 +37,40 @@
>                 INDIRECT_CALL_L4(cb, f2, f1, head, skb);        \
>  })
>
> +static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
> +{
> +       const struct net_offload *ops = NULL;
> +       struct ipv6_opt_hdr *opth;
> +
> +       for (;;) {
> +               int len;
> +
> +               ops = rcu_dereference(inet6_offloads[proto]);
> +
> +               if (unlikely(!ops))
> +                       break;
> +
> +               if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
> +                       break;
> +
> +               opth = skb_gro_header(skb, off + IPV6_MIN_EXTHDR_LEN, off);

I do not see a compelling reason for adding yet another constant here.

I would stick to

   opth = skb_gro_header(skb, off + sizeof(*opth), off);

Consistency with similar helpers is desirable.

> +               if (unlikely(!opth))
> +                       break;
> +
> +               len = ipv6_optlen(opth);
> +
> +               opth = skb_gro_header(skb, off + len, off);

Note this call will take care of precise pull.

> +               if (unlikely(!opth))
> +                       break;
> +               proto = opth->nexthdr;
> +
> +               off += len;
> +       }
> +
> +       skb_gro_pull(skb, off - skb_network_offset(skb));
> +       return proto;
> +}
> +
>  static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
>  {
>         const struct net_offload *ops = NULL;
> @@ -203,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
>                 goto out;
>
>         skb_set_network_header(skb, off);
> -       skb_gro_pull(skb, sizeof(*iph));
> -       skb_set_transport_header(skb, skb_gro_offset(skb));
>
> -       flush += ntohs(iph->payload_len) != skb_gro_len(skb);
> +       flush += ntohs(iph->payload_len) != skb->len - hlen;
>
>         proto = iph->nexthdr;
>         ops = rcu_dereference(inet6_offloads[proto]);
>         if (!ops || !ops->callbacks.gro_receive) {
> -               pskb_pull(skb, skb_gro_offset(skb));
> -               skb_gro_frag0_invalidate(skb);
> -               proto = ipv6_gso_pull_exthdrs(skb, proto);
> -               skb_gro_pull(skb, -skb_transport_offset(skb));
> -               skb_reset_transport_header(skb);
> -               __skb_push(skb, skb_gro_offset(skb));
> +               proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
>
>                 ops = rcu_dereference(inet6_offloads[proto]);
>                 if (!ops || !ops->callbacks.gro_receive)
>                         goto out;
>
> -               iph = ipv6_hdr(skb);
> +               iph = skb_gro_network_header(skb);
> +       } else {
> +               skb_gro_pull(skb, sizeof(*iph));
>         }
>
> +       skb_set_transport_header(skb, skb_gro_offset(skb));
> +
>         NAPI_GRO_CB(skb)->proto = proto;
>
>         flush--;
> --
> 2.36.1
>
Richard Gobert Jan. 3, 2024, 1:08 p.m. UTC | #3
Eric Dumazet wrote:
> On Tue, Jan 2, 2024 at 2:25 PM Richard Gobert <richardbgobert@gmail.com> wrote:
>>
>> The existing code always pulls the IPv6 header and sets the transport
>> offset initially. Then optionally again pulls any extension headers in
>> ipv6_gso_pull_exthdrs and sets the transport offset again on return from
>> that call. skb->data is set at the start of the first extension header
>> before calling ipv6_gso_pull_exthdrs, and must disable the frag0
>> optimization because that function uses pskb_may_pull/pskb_pull instead of
>> skb_gro_ helpers. It sets the GRO offset to the TCP header with
>> skb_gro_pull and sets the transport header. Then returns skb->data to its
>> position before this block.
>>
>> This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
>> which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
>> ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
>> operations use skb_gro_* helpers, and the frag0 fast path can be taken for
>> IPv6 packets with ext headers.
>>
>> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
>> Reviewed-by: Willem de Bruijn <willemb@google.com>
>> ---
>>  include/net/ipv6.h     |  1 +
>>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
>>  2 files changed, 42 insertions(+), 10 deletions(-)
>>
>> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
>> index 78d38dd88aba..217240efa182 100644
>> --- a/include/net/ipv6.h
>> +++ b/include/net/ipv6.h
>> @@ -26,6 +26,7 @@ struct ip_tunnel_info;
>>  #define SIN6_LEN_RFC2133       24
>>
>>  #define IPV6_MAXPLEN           65535
>> +#define IPV6_MIN_EXTHDR_LEN    8
> 
> // Hmm see my following comment.
> 
>>
>>  /*
>>   *     NextHeader field of IPv6 header
>> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
>> index 0e0b5fed0995..c07111d8f56a 100644
>> --- a/net/ipv6/ip6_offload.c
>> +++ b/net/ipv6/ip6_offload.c
>> @@ -37,6 +37,40 @@
>>                 INDIRECT_CALL_L4(cb, f2, f1, head, skb);        \
>>  })
>>
>> +static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
>> +{
>> +       const struct net_offload *ops = NULL;
>> +       struct ipv6_opt_hdr *opth;
>> +
>> +       for (;;) {
>> +               int len;
>> +
>> +               ops = rcu_dereference(inet6_offloads[proto]);
>> +
>> +               if (unlikely(!ops))
>> +                       break;
>> +
>> +               if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
>> +                       break;
>> +
>> +               opth = skb_gro_header(skb, off + IPV6_MIN_EXTHDR_LEN, off);
> 
> I do not see a compelling reason for adding yet another constant here.
> 
> I would stick to
> 
>    opth = skb_gro_header(skb, off + sizeof(*opth), off);
> 
> Consistency with similar helpers is desirable.
> 

In terms of consistency - similar helper functions (ipv6_gso_pull_exthdrs,
ipv6_parse_hopopts) also pull 8 bytes at the beginning of every IPv6
extension header, because the minimum extension header length is 8 bytes.

sizeof(*opth) = 2, so for an IPv6 packet with one extension header with a
common length of 8 bytes, pskb_may_pull will be called twice: first with
length = 2 and again with length = 8, which might not be ideal when parsing
non-linear packets.

Willem suggested adding a constant to make the code more self-documenting.

>> +               if (unlikely(!opth))
>> +                       break;
>> +
>> +               len = ipv6_optlen(opth);
>> +
>> +               opth = skb_gro_header(skb, off + len, off);
> 
> Note this call will take care of precise pull.
> 
>> +               if (unlikely(!opth))
>> +                       break;
>> +               proto = opth->nexthdr;
>> +
>> +               off += len;
>> +       }
>> +
>> +       skb_gro_pull(skb, off - skb_network_offset(skb));
>> +       return proto;
>> +}
>> +
>>  static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
>>  {
>>         const struct net_offload *ops = NULL;
>> @@ -203,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
>>                 goto out;
>>
>>         skb_set_network_header(skb, off);
>> -       skb_gro_pull(skb, sizeof(*iph));
>> -       skb_set_transport_header(skb, skb_gro_offset(skb));
>>
>> -       flush += ntohs(iph->payload_len) != skb_gro_len(skb);
>> +       flush += ntohs(iph->payload_len) != skb->len - hlen;
>>
>>         proto = iph->nexthdr;
>>         ops = rcu_dereference(inet6_offloads[proto]);
>>         if (!ops || !ops->callbacks.gro_receive) {
>> -               pskb_pull(skb, skb_gro_offset(skb));
>> -               skb_gro_frag0_invalidate(skb);
>> -               proto = ipv6_gso_pull_exthdrs(skb, proto);
>> -               skb_gro_pull(skb, -skb_transport_offset(skb));
>> -               skb_reset_transport_header(skb);
>> -               __skb_push(skb, skb_gro_offset(skb));
>> +               proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
>>
>>                 ops = rcu_dereference(inet6_offloads[proto]);
>>                 if (!ops || !ops->callbacks.gro_receive)
>>                         goto out;
>>
>> -               iph = ipv6_hdr(skb);
>> +               iph = skb_gro_network_header(skb);
>> +       } else {
>> +               skb_gro_pull(skb, sizeof(*iph));
>>         }
>>
>> +       skb_set_transport_header(skb, skb_gro_offset(skb));
>> +
>>         NAPI_GRO_CB(skb)->proto = proto;
>>
>>         flush--;
>> --
>> 2.36.1
>>
Eric Dumazet Jan. 3, 2024, 1:30 p.m. UTC | #4
On Wed, Jan 3, 2024 at 2:08 PM Richard Gobert <richardbgobert@gmail.com> wrote:
>
>
>
> Eric Dumazet wrote:
> > On Tue, Jan 2, 2024 at 2:25 PM Richard Gobert <richardbgobert@gmail.com> wrote:
> >>
> >> The existing code always pulls the IPv6 header and sets the transport
> >> offset initially. Then optionally again pulls any extension headers in
> >> ipv6_gso_pull_exthdrs and sets the transport offset again on return from
> >> that call. skb->data is set at the start of the first extension header
> >> before calling ipv6_gso_pull_exthdrs, and must disable the frag0
> >> optimization because that function uses pskb_may_pull/pskb_pull instead of
> >> skb_gro_ helpers. It sets the GRO offset to the TCP header with
> >> skb_gro_pull and sets the transport header. Then returns skb->data to its
> >> position before this block.
> >>
> >> This commit introduces a new helper function - ipv6_gro_pull_exthdrs -
> >> which is used in ipv6_gro_receive to pull ipv6 ext headers instead of
> >> ipv6_gso_pull_exthdrs. Thus, there is no modification of skb->data, all
> >> operations use skb_gro_* helpers, and the frag0 fast path can be taken for
> >> IPv6 packets with ext headers.
> >>
> >> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> >> Reviewed-by: Willem de Bruijn <willemb@google.com>
> >> ---
> >>  include/net/ipv6.h     |  1 +
> >>  net/ipv6/ip6_offload.c | 51 +++++++++++++++++++++++++++++++++---------
> >>  2 files changed, 42 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> >> index 78d38dd88aba..217240efa182 100644
> >> --- a/include/net/ipv6.h
> >> +++ b/include/net/ipv6.h
> >> @@ -26,6 +26,7 @@ struct ip_tunnel_info;
> >>  #define SIN6_LEN_RFC2133       24
> >>
> >>  #define IPV6_MAXPLEN           65535
> >> +#define IPV6_MIN_EXTHDR_LEN    8
> >
> > // Hmm see my following comment.
> >
> >>
> >>  /*
> >>   *     NextHeader field of IPv6 header
> >> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
> >> index 0e0b5fed0995..c07111d8f56a 100644
> >> --- a/net/ipv6/ip6_offload.c
> >> +++ b/net/ipv6/ip6_offload.c
> >> @@ -37,6 +37,40 @@
> >>                 INDIRECT_CALL_L4(cb, f2, f1, head, skb);        \
> >>  })
> >>
> >> +static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
> >> +{
> >> +       const struct net_offload *ops = NULL;
> >> +       struct ipv6_opt_hdr *opth;
> >> +
> >> +       for (;;) {
> >> +               int len;
> >> +
> >> +               ops = rcu_dereference(inet6_offloads[proto]);
> >> +
> >> +               if (unlikely(!ops))
> >> +                       break;
> >> +
> >> +               if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
> >> +                       break;
> >> +
> >> +               opth = skb_gro_header(skb, off + IPV6_MIN_EXTHDR_LEN, off);
> >
> > I do not see a compelling reason for adding yet another constant here.
> >
> > I would stick to
> >
> >    opth = skb_gro_header(skb, off + sizeof(*opth), off);
> >
> > Consistency with similar helpers is desirable.
> >
>
> In terms of consistency - similar helper functions (ipv6_gso_pull_exthdrs,
> ipv6_parse_hopopts) also pull 8 bytes at the beginning of every IPv6
> extension header, because the minimum extension header length is 8 bytes.
>
> sizeof(*opth) = 2, so for an IPv6 packet with one extension header with a
> common length of 8 bytes, pskb_may_pull will be called twice: first with
> length = 2 and again with length = 8, which might not be ideal when parsing
> non-linear packets.
>
> Willem suggested adding a constant to make the code more self-documenting.


Hmm... I was looking at

skb_checksum_setup_ipv6() , it uses skb_maybe_pull_tail( ...
sizeof(struct ipv6_opt_hdr))
ipv6_skip_exthdr()  also uses sizeof(struct ipv6_opt_hdr)
ip6_tnl_parse_tlv_enc_lim also uses the same.
hbh_mt6(), ipv6header_mt6(),  .. same...
ip6_find_1stfragopt(), get_ipv6_ext_hdrs(), tcf_csum_ipv6(),
mip6_rthdr_offset() same

So it seems you found two helpers that went the other way.

If you think pulling 8 bytes first is a win, I would suggest a stand
alone patch, adding the magic constant
using it in all places, so that a casual reader can make sense of the
magical 8 value.
Richard Gobert Jan. 3, 2024, 2:01 p.m. UTC | #5
Eric Dumazet wrote:

> 
> 
> Hmm... I was looking at
> 
> skb_checksum_setup_ipv6() , it uses skb_maybe_pull_tail( ...
> sizeof(struct ipv6_opt_hdr))
> ipv6_skip_exthdr()  also uses sizeof(struct ipv6_opt_hdr)
> ip6_tnl_parse_tlv_enc_lim also uses the same.
> hbh_mt6(), ipv6header_mt6(),  .. same...
> ip6_find_1stfragopt(), get_ipv6_ext_hdrs(), tcf_csum_ipv6(),
> mip6_rthdr_offset() same
> 
> So it seems you found two helpers that went the other way.
> 
> If you think pulling 8 bytes first is a win, I would suggest a stand
> alone patch, adding the magic constant
> using it in all places, so that a casual reader can make sense of the
> magical 8 value.

I guess pulling 8 bytes first is not such a big advantage.
I will submit a v3 with sizeof(*opth) as you suggested.
diff mbox series

Patch

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 78d38dd88aba..217240efa182 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -26,6 +26,7 @@  struct ip_tunnel_info;
 #define SIN6_LEN_RFC2133	24
 
 #define IPV6_MAXPLEN		65535
+#define IPV6_MIN_EXTHDR_LEN	8
 
 /*
  *	NextHeader field of IPv6 header
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 0e0b5fed0995..c07111d8f56a 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -37,6 +37,40 @@ 
 		INDIRECT_CALL_L4(cb, f2, f1, head, skb);	\
 })
 
+static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
+{
+	const struct net_offload *ops = NULL;
+	struct ipv6_opt_hdr *opth;
+
+	for (;;) {
+		int len;
+
+		ops = rcu_dereference(inet6_offloads[proto]);
+
+		if (unlikely(!ops))
+			break;
+
+		if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+			break;
+
+		opth = skb_gro_header(skb, off + IPV6_MIN_EXTHDR_LEN, off);
+		if (unlikely(!opth))
+			break;
+
+		len = ipv6_optlen(opth);
+
+		opth = skb_gro_header(skb, off + len, off);
+		if (unlikely(!opth))
+			break;
+		proto = opth->nexthdr;
+
+		off += len;
+	}
+
+	skb_gro_pull(skb, off - skb_network_offset(skb));
+	return proto;
+}
+
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
 	const struct net_offload *ops = NULL;
@@ -203,28 +237,25 @@  INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 		goto out;
 
 	skb_set_network_header(skb, off);
-	skb_gro_pull(skb, sizeof(*iph));
-	skb_set_transport_header(skb, skb_gro_offset(skb));
 
-	flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+	flush += ntohs(iph->payload_len) != skb->len - hlen;
 
 	proto = iph->nexthdr;
 	ops = rcu_dereference(inet6_offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive) {
-		pskb_pull(skb, skb_gro_offset(skb));
-		skb_gro_frag0_invalidate(skb);
-		proto = ipv6_gso_pull_exthdrs(skb, proto);
-		skb_gro_pull(skb, -skb_transport_offset(skb));
-		skb_reset_transport_header(skb);
-		__skb_push(skb, skb_gro_offset(skb));
+		proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
 
 		ops = rcu_dereference(inet6_offloads[proto]);
 		if (!ops || !ops->callbacks.gro_receive)
 			goto out;
 
-		iph = ipv6_hdr(skb);
+		iph = skb_gro_network_header(skb);
+	} else {
+		skb_gro_pull(skb, sizeof(*iph));
 	}
 
+	skb_set_transport_header(skb, skb_gro_offset(skb));
+
 	NAPI_GRO_CB(skb)->proto = proto;
 
 	flush--;