diff mbox series

[net-next,v7,1/3] net: gro: add {inner_}network_offset to napi_gro_cb

Message ID 20240412155533.115507-2-richardbgobert@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series net: gro: move p->{flush/flush_id} calculations to L4 | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next-1

Commit Message

Richard Gobert April 12, 2024, 3:55 p.m. UTC
This patch adds network_offset and inner_network_offset to napi_gro_cb, and
makes sure both are set correctly. In the common path there's only one
write (skb_gro_reset_offset).

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
 drivers/net/geneve.c           |  1 +
 drivers/net/vxlan/vxlan_core.c |  1 +
 include/net/gro.h              | 18 ++++++++++++++++--
 net/8021q/vlan_core.c          |  2 ++
 net/core/gro.c                 |  1 +
 net/ethernet/eth.c             |  1 +
 net/ipv4/af_inet.c             |  5 +----
 net/ipv4/gre_offload.c         |  1 +
 net/ipv6/ip6_offload.c         |  8 ++++----
 9 files changed, 28 insertions(+), 10 deletions(-)

Comments

Willem de Bruijn April 14, 2024, 1:15 a.m. UTC | #1
Richard Gobert wrote:
> This patch adds network_offset and inner_network_offset to napi_gro_cb, and
> makes sure both are set correctly. In the common path there's only one
> write (skb_gro_reset_offset).
> 
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> ---
>  drivers/net/geneve.c           |  1 +
>  drivers/net/vxlan/vxlan_core.c |  1 +
>  include/net/gro.h              | 18 ++++++++++++++++--
>  net/8021q/vlan_core.c          |  2 ++
>  net/core/gro.c                 |  1 +
>  net/ethernet/eth.c             |  1 +
>  net/ipv4/af_inet.c             |  5 +----
>  net/ipv4/gre_offload.c         |  1 +
>  net/ipv6/ip6_offload.c         |  8 ++++----
>  9 files changed, 28 insertions(+), 10 deletions(-)
> 
> diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
> index d4520c3f7c09..ae596285d78c 100644
> --- a/net/ipv4/gre_offload.c
> +++ b/net/ipv4/gre_offload.c
> @@ -224,6 +224,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
>  	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
>  	skb_gro_postpull_rcsum(skb, greh, grehlen);
>  
> +	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
>  	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
>  	flush = 0;

Nice that this even works for ETH_P_TEB, as eth_gro_receive will
overwrite the offset written here.
  
  
>  	list_for_each_entry(p, head, list) {
>  		const struct ipv6hdr *iph2;
> @@ -327,6 +325,7 @@ static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
>  	}
>  
>  	NAPI_GRO_CB(skb)->encap_mark = 1;
> +	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
>  
>  	return ipv6_gro_receive(head, skb);
>  }
> @@ -342,6 +341,7 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
>  	}
>  
>  	NAPI_GRO_CB(skb)->encap_mark = 1;
> +	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);

Do we still need encap_mark, or is it always set at the same time that
inner_network_offset becomes non-zero?
Paolo Abeni April 16, 2024, 9:36 a.m. UTC | #2
On Fri, 2024-04-12 at 17:55 +0200, Richard Gobert wrote:
> This patch adds network_offset and inner_network_offset to napi_gro_cb, and
> makes sure both are set correctly. In the common path there's only one
> write (skb_gro_reset_offset).
> 
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>

Does not apply cleanly to net-next. You have to wait until the net
dependency is merged into net-next before posting.

> ---
>  drivers/net/geneve.c           |  1 +
>  drivers/net/vxlan/vxlan_core.c |  1 +
>  include/net/gro.h              | 18 ++++++++++++++++--
>  net/8021q/vlan_core.c          |  2 ++
>  net/core/gro.c                 |  1 +
>  net/ethernet/eth.c             |  1 +
>  net/ipv4/af_inet.c             |  5 +----
>  net/ipv4/gre_offload.c         |  1 +
>  net/ipv6/ip6_offload.c         |  8 ++++----
>  9 files changed, 28 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
> index 9c18a39b0d0c..a6256ea1f5bc 100644
> --- a/drivers/net/geneve.c
> +++ b/drivers/net/geneve.c
> @@ -545,6 +545,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
>  	if (!ptype)
>  		goto out;
>  
> +	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
>  	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
>  	flush = 0;
>  
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 6fb182d9d6e7..9fb93c3953c1 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -754,6 +754,7 @@ static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
>  
>  	vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
>  	if (vh) {
> +		NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
>  		if (!vxlan_parse_gpe_proto(vh, &protocol))
>  			goto out;
>  		ptype = gro_find_receive_by_type(protocol);

What about vxlan_gro_receive? and fou/gue?

Side note: the latter apparently exist mainly to make UDP-related
changes more difficult, can we deprecated them once for all?

Thank,

Paolo
Richard Gobert April 17, 2024, 1:57 p.m. UTC | #3
Willem de Bruijn wrote:
> Richard Gobert wrote:
>> This patch adds network_offset and inner_network_offset to napi_gro_cb, and
>> makes sure both are set correctly. In the common path there's only one
>> write (skb_gro_reset_offset).
>>
>> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
>> ---
>>  drivers/net/geneve.c           |  1 +
>>  drivers/net/vxlan/vxlan_core.c |  1 +
>>  include/net/gro.h              | 18 ++++++++++++++++--
>>  net/8021q/vlan_core.c          |  2 ++
>>  net/core/gro.c                 |  1 +
>>  net/ethernet/eth.c             |  1 +
>>  net/ipv4/af_inet.c             |  5 +----
>>  net/ipv4/gre_offload.c         |  1 +
>>  net/ipv6/ip6_offload.c         |  8 ++++----
>>  9 files changed, 28 insertions(+), 10 deletions(-)
>>
>> diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
>> index d4520c3f7c09..ae596285d78c 100644
>> --- a/net/ipv4/gre_offload.c
>> +++ b/net/ipv4/gre_offload.c
>> @@ -224,6 +224,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
>>  	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
>>  	skb_gro_postpull_rcsum(skb, greh, grehlen);
>>  
>> +	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
>>  	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
>>  	flush = 0;
> 
> Nice that this even works for ETH_P_TEB, as eth_gro_receive will
> overwrite the offset written here.
>   
>   
>>  	list_for_each_entry(p, head, list) {
>>  		const struct ipv6hdr *iph2;
>> @@ -327,6 +325,7 @@ static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
>>  	}
>>  
>>  	NAPI_GRO_CB(skb)->encap_mark = 1;
>> +	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
>>  
>>  	return ipv6_gro_receive(head, skb);
>>  }
>> @@ -342,6 +341,7 @@ static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
>>  	}
>>  
>>  	NAPI_GRO_CB(skb)->encap_mark = 1;
>> +	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
> 
> Do we still need encap_mark, or is it always set at the same time that
> inner_network_offset becomes non-zero?
> 

This would require setting inner_network_header to 0 before dev_gro_receive
which would not be favorable to the common case. (As opposed to encap_mark
which is already set to 0 as being part of NAPI_GRO_CB->zeroed). In my
opinion, it might also be less readable.
Richard Gobert April 18, 2024, 3:09 p.m. UTC | #4
Paolo Abeni wrote:
> On Fri, 2024-04-12 at 17:55 +0200, Richard Gobert wrote:
>> This patch adds network_offset and inner_network_offset to napi_gro_cb, and
>> makes sure both are set correctly. In the common path there's only one
>> write (skb_gro_reset_offset).
>>
>> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
> 
> Does not apply cleanly to net-next. You have to wait until the net
> dependency is merged into net-next before posting.
> 
>> ---
>>  drivers/net/geneve.c           |  1 +
>>  drivers/net/vxlan/vxlan_core.c |  1 +
>>  include/net/gro.h              | 18 ++++++++++++++++--
>>  net/8021q/vlan_core.c          |  2 ++
>>  net/core/gro.c                 |  1 +
>>  net/ethernet/eth.c             |  1 +
>>  net/ipv4/af_inet.c             |  5 +----
>>  net/ipv4/gre_offload.c         |  1 +
>>  net/ipv6/ip6_offload.c         |  8 ++++----
>>  9 files changed, 28 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
>> index 9c18a39b0d0c..a6256ea1f5bc 100644
>> --- a/drivers/net/geneve.c
>> +++ b/drivers/net/geneve.c
>> @@ -545,6 +545,7 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
>>  	if (!ptype)
>>  		goto out;
>>  
>> +	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
>>  	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
>>  	flush = 0;
>>  
>> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
>> index 6fb182d9d6e7..9fb93c3953c1 100644
>> --- a/drivers/net/vxlan/vxlan_core.c
>> +++ b/drivers/net/vxlan/vxlan_core.c
>> @@ -754,6 +754,7 @@ static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
>>  
>>  	vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
>>  	if (vh) {
>> +		NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
>>  		if (!vxlan_parse_gpe_proto(vh, &protocol))
>>  			goto out;
>>  		ptype = gro_find_receive_by_type(protocol);
> 
> What about vxlan_gro_receive? and fou/gue?
> 

No need to write in fou/gue functions, as both functions call
{inet,inet6}_offloads, which means if there's an IP/IPv6 header after
fou/gue - ipip_gro_receive will be called (or ip6ip6_gro_receive, or
sit_ip6ip6_gro_receive, etc), in which inner_network_offset is written.

vxlan_gro_receive calls eth_gro_receive, in which inner_network_offset
is written as well.

> Side note: the latter apparently exist mainly to make UDP-related
> changes more difficult, can we deprecated them once for all?
> 
> Thank,
> 
> Paolo
>
diff mbox series

Patch

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 9c18a39b0d0c..a6256ea1f5bc 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -545,6 +545,7 @@  static struct sk_buff *geneve_gro_receive(struct sock *sk,
 	if (!ptype)
 		goto out;
 
+	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
 	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
 	flush = 0;
 
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 6fb182d9d6e7..9fb93c3953c1 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -754,6 +754,7 @@  static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
 
 	vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
 	if (vh) {
+		NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
 		if (!vxlan_parse_gpe_proto(vh, &protocol))
 			goto out;
 		ptype = gro_find_receive_by_type(protocol);
diff --git a/include/net/gro.h b/include/net/gro.h
index ebead1d642b4..a1cc8e8c2ebd 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -87,6 +87,15 @@  struct napi_gro_cb {
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
+
+	/* L3 offsets */
+	union {
+		struct {
+			u16 network_offset;
+			u16 inner_network_offset;
+		};
+		u16 network_offsets[2];
+	};
 };
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
@@ -172,12 +181,17 @@  static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen,
 	return ptr;
 }
 
+static inline int skb_gro_network_offset(const struct sk_buff *skb)
+{
+	return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark];
+}
+
 static inline void *skb_gro_network_header(const struct sk_buff *skb)
 {
 	if (skb_gro_may_pull(skb, skb_gro_offset(skb)))
-		return skb_gro_header_fast(skb, skb_network_offset(skb));
+		return skb_gro_header_fast(skb, skb_gro_network_offset(skb));
 
-	return skb_network_header(skb);
+	return skb->data + skb_gro_network_offset(skb);
 }
 
 static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb,
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 247704cf70af..355cafe23329 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -478,6 +478,8 @@  static struct sk_buff *vlan_gro_receive(struct list_head *head,
 	if (unlikely(!vhdr))
 		goto out;
 
+	NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen;
+
 	type = vhdr->h_vlan_encapsulated_proto;
 
 	ptype = gro_find_receive_by_type(type);
diff --git a/net/core/gro.c b/net/core/gro.c
index b129cd201937..b2156e6cc4ad 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -371,6 +371,7 @@  static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
 	const skb_frag_t *frag0;
 	unsigned int headlen;
 
+	NAPI_GRO_CB(skb)->network_offset = 0;
 	NAPI_GRO_CB(skb)->data_offset = 0;
 	headlen = skb_headlen(skb);
 	NAPI_GRO_CB(skb)->frag0 = skb->data;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 7515e6bcbb7d..e3eca605bcc7 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -441,6 +441,7 @@  struct sk_buff *eth_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	skb_gro_pull(skb, sizeof(*eh));
 	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
 
 	pp = indirect_call_gro_receive_inet(ptype->callbacks.gro_receive,
 					    ipv6_gro_receive, inet_gro_receive,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5a747d91cd0a..6546bf376b24 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1568,10 +1568,6 @@  struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
 
 	NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF));
 	NAPI_GRO_CB(skb)->flush |= flush;
-	skb_set_network_header(skb, off);
-	/* The above will be needed by the transport layer if there is one
-	 * immediately following this IP hdr.
-	 */
 
 	/* Note : No need to call skb_gro_postpull_rcsum() here,
 	 * as we already checked checksum over ipv4 header was 0
@@ -1597,6 +1593,7 @@  static struct sk_buff *ipip_gro_receive(struct list_head *head,
 	}
 
 	NAPI_GRO_CB(skb)->encap_mark = 1;
+	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
 
 	return inet_gro_receive(head, skb);
 }
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d4520c3f7c09..ae596285d78c 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -224,6 +224,7 @@  static struct sk_buff *gre_gro_receive(struct list_head *head,
 	/* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
 	skb_gro_postpull_rcsum(skb, greh, grehlen);
 
+	NAPI_GRO_CB(skb)->inner_network_offset = hlen;
 	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
 	flush = 0;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 10ddbbc0e46d..ba41939537f2 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -67,7 +67,7 @@  static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
 		off += len;
 	}
 
-	skb_gro_pull(skb, off - skb_network_offset(skb));
+	skb_gro_pull(skb, off - skb_gro_network_offset(skb));
 	return proto;
 }
 
@@ -236,8 +236,6 @@  INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 	if (unlikely(!iph))
 		goto out;
 
-	skb_set_network_header(skb, off);
-
 	flush += ntohs(iph->payload_len) != skb->len - hlen;
 
 	proto = iph->nexthdr;
@@ -259,7 +257,7 @@  INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
 	NAPI_GRO_CB(skb)->proto = proto;
 
 	flush--;
-	nlen = skb_network_header_len(skb);
+	nlen = skb_gro_offset(skb) - off;
 
 	list_for_each_entry(p, head, list) {
 		const struct ipv6hdr *iph2;
@@ -327,6 +325,7 @@  static struct sk_buff *sit_ip6ip6_gro_receive(struct list_head *head,
 	}
 
 	NAPI_GRO_CB(skb)->encap_mark = 1;
+	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
 
 	return ipv6_gro_receive(head, skb);
 }
@@ -342,6 +341,7 @@  static struct sk_buff *ip4ip6_gro_receive(struct list_head *head,
 	}
 
 	NAPI_GRO_CB(skb)->encap_mark = 1;
+	NAPI_GRO_CB(skb)->inner_network_offset = skb_gro_offset(skb);
 
 	return inet_gro_receive(head, skb);
 }