diff mbox series

[net-next,v4,2/4] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue

Message ID 20241118131502.10077-3-justin.iurman@uliege.be (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Mitigate the two-reallocations issue for iptunnels | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 157 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-11-18--21-00 (tests: 785)

Commit Message

Justin Iurman Nov. 18, 2024, 1:15 p.m. UTC
This patch mitigates the two-reallocations issue with ioam6_iptunnel by
providing the dst_entry (in the cache) to the first call to
skb_cow_head(). As a result, the very first iteration would still
trigger two reallocations (i.e., empty cache), while next iterations
would only trigger a single reallocation.

Performance tests before/after applying this patch, which clearly shows
the improvement:
- inline mode:
  - before: https://ibb.co/LhQ8V63
  - after: https://ibb.co/x5YT2bS
- encap mode:
  - before: https://ibb.co/3Cjm5m0
  - after: https://ibb.co/TwpsxTC
- encap mode with tunsrc:
  - before: https://ibb.co/Gpy9QPg
  - after: https://ibb.co/PW1bZFT

This patch also fixes an incorrect behavior: after the insertion, the
second call to skb_cow_head() makes sure that the dev has enough
headroom in the skb for layer 2 and stuff. In that case, the "old"
dst_entry was used, which is now fixed. After discussing with Paolo, it
appears that both patches can be merged into a single one -this one-
(for the sake of readability) and target net-next.

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
---
 net/ipv6/ioam6_iptunnel.c | 82 +++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

Comments

Paolo Abeni Nov. 19, 2024, 10:42 a.m. UTC | #1
On 11/18/24 14:15, Justin Iurman wrote:
> @@ -387,45 +392,40 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>  		goto drop;
>  	}
>  
> -	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> -	if (unlikely(err))
> -		goto drop;
> +	if (unlikely(!dst)) {
> +		struct ipv6hdr *hdr = ipv6_hdr(skb);
> +		struct flowi6 fl6;
> +
> +		memset(&fl6, 0, sizeof(fl6));
> +		fl6.daddr = hdr->daddr;
> +		fl6.saddr = hdr->saddr;
> +		fl6.flowlabel = ip6_flowinfo(hdr);
> +		fl6.flowi6_mark = skb->mark;
> +		fl6.flowi6_proto = hdr->nexthdr;
> +
> +		dst = ip6_route_output(net, NULL, &fl6);
> +		if (dst->error) {
> +			err = dst->error;
> +			dst_release(dst);
> +			goto drop;
> +		}
>  
> -	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
>  		local_bh_disable();
> -		dst = dst_cache_get(&ilwt->cache);
> +		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
>  		local_bh_enable();
>  
> -		if (unlikely(!dst)) {
> -			struct ipv6hdr *hdr = ipv6_hdr(skb);
> -			struct flowi6 fl6;
> -
> -			memset(&fl6, 0, sizeof(fl6));
> -			fl6.daddr = hdr->daddr;
> -			fl6.saddr = hdr->saddr;
> -			fl6.flowlabel = ip6_flowinfo(hdr);
> -			fl6.flowi6_mark = skb->mark;
> -			fl6.flowi6_proto = hdr->nexthdr;
> -
> -			dst = ip6_route_output(net, NULL, &fl6);
> -			if (dst->error) {
> -				err = dst->error;
> -				dst_release(dst);
> -				goto drop;
> -			}
> -
> -			local_bh_disable();
> -			dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
> -			local_bh_enable();
> -		}
> +		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
> +		if (unlikely(err))
> +			goto drop;
> +	}
>  
> -		skb_dst_drop(skb);
> -		skb_dst_set(skb, dst);
> +	skb_dst_drop(skb);
> +	skb_dst_set(skb, dst);

Why the above 2 statements are not done only in case of ip address
match, as in the existing code?

>  
> +	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
>  		return dst_output(net, sk, skb);
> -	}


Thanks,

Paolo
Justin Iurman Nov. 19, 2024, 12:59 p.m. UTC | #2
On 11/19/24 11:42, Paolo Abeni wrote>> -		skb_dst_drop(skb);
>> -		skb_dst_set(skb, dst);
>> +	skb_dst_drop(skb);
>> +	skb_dst_set(skb, dst);
> 
> Why the above 2 statements are not done only in case of ip address
> match, as in the existing code?

I guess you meant "when they do *not* match", right?

>>   
>> +	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
>>   		return dst_output(net, sk, skb);
>> -	}

Good catch. Initially, the only reason was to be on the safe side. Will 
change it to:

	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
		skb_dst_drop(skb);
		skb_dst_set(skb, dst);
		return dst_output(net, sk, skb);
	}

Thanks Paolo!

Cheers,
Justin
diff mbox series

Patch

diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 9d8422e350f8..2b9f3cb29db7 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -253,14 +253,15 @@  static int ioam6_do_fill(struct net *net, struct sk_buff *skb)
 }
 
 static int ioam6_do_inline(struct net *net, struct sk_buff *skb,
-			   struct ioam6_lwt_encap *tuninfo)
+			   struct ioam6_lwt_encap *tuninfo,
+			   struct dst_entry *dst)
 {
 	struct ipv6hdr *oldhdr, *hdr;
 	int hdrlen, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 
-	err = skb_cow_head(skb, hdrlen + skb->mac_len);
+	err = skb_cow_head(skb, hdrlen + dst_dev_overhead(dst, skb));
 	if (unlikely(err))
 		return err;
 
@@ -291,16 +292,16 @@  static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 			  struct ioam6_lwt_encap *tuninfo,
 			  bool has_tunsrc,
 			  struct in6_addr *tunsrc,
-			  struct in6_addr *tundst)
+			  struct in6_addr *tundst,
+			  struct dst_entry *dst)
 {
-	struct dst_entry *dst = skb_dst(skb);
 	struct ipv6hdr *hdr, *inner_hdr;
 	int hdrlen, len, err;
 
 	hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
 	len = sizeof(*hdr) + hdrlen;
 
-	err = skb_cow_head(skb, len + skb->mac_len);
+	err = skb_cow_head(skb, len + dst_dev_overhead(dst, skb));
 	if (unlikely(err))
 		return err;
 
@@ -324,7 +325,7 @@  static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 	if (has_tunsrc)
 		memcpy(&hdr->saddr, tunsrc, sizeof(*tunsrc));
 	else
-		ipv6_dev_get_saddr(net, dst->dev, &hdr->daddr,
+		ipv6_dev_get_saddr(net, skb_dst(skb)->dev, &hdr->daddr,
 				   IPV6_PREFER_SRC_PUBLIC, &hdr->saddr);
 
 	skb_postpush_rcsum(skb, hdr, len);
@@ -334,7 +335,7 @@  static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
 
 static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct dst_entry *dst = skb_dst(skb);
+	struct dst_entry *dst, *orig_dst = skb_dst(skb);
 	struct in6_addr orig_daddr;
 	struct ioam6_lwt *ilwt;
 	int err = -EINVAL;
@@ -343,7 +344,7 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto drop;
 
-	ilwt = ioam6_lwt_state(dst->lwtstate);
+	ilwt = ioam6_lwt_state(orig_dst->lwtstate);
 
 	/* Check for insertion frequency (i.e., "k over n" insertions) */
 	pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
@@ -352,6 +353,10 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	orig_daddr = ipv6_hdr(skb)->daddr;
 
+	local_bh_disable();
+	dst = dst_cache_get(&ilwt->cache);
+	local_bh_enable();
+
 	switch (ilwt->mode) {
 	case IOAM6_IPTUNNEL_MODE_INLINE:
 do_inline:
@@ -359,7 +364,7 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
 			goto out;
 
-		err = ioam6_do_inline(net, skb, &ilwt->tuninfo);
+		err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -369,7 +374,7 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		/* Encapsulation (ip6ip6) */
 		err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
 				     ilwt->has_tunsrc, &ilwt->tunsrc,
-				     &ilwt->tundst);
+				     &ilwt->tundst, dst);
 		if (unlikely(err))
 			goto drop;
 
@@ -387,45 +392,40 @@  static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 		goto drop;
 	}
 
-	err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
-	if (unlikely(err))
-		goto drop;
+	if (unlikely(!dst)) {
+		struct ipv6hdr *hdr = ipv6_hdr(skb);
+		struct flowi6 fl6;
+
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.daddr = hdr->daddr;
+		fl6.saddr = hdr->saddr;
+		fl6.flowlabel = ip6_flowinfo(hdr);
+		fl6.flowi6_mark = skb->mark;
+		fl6.flowi6_proto = hdr->nexthdr;
+
+		dst = ip6_route_output(net, NULL, &fl6);
+		if (dst->error) {
+			err = dst->error;
+			dst_release(dst);
+			goto drop;
+		}
 
-	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) {
 		local_bh_disable();
-		dst = dst_cache_get(&ilwt->cache);
+		dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
 		local_bh_enable();
 
-		if (unlikely(!dst)) {
-			struct ipv6hdr *hdr = ipv6_hdr(skb);
-			struct flowi6 fl6;
-
-			memset(&fl6, 0, sizeof(fl6));
-			fl6.daddr = hdr->daddr;
-			fl6.saddr = hdr->saddr;
-			fl6.flowlabel = ip6_flowinfo(hdr);
-			fl6.flowi6_mark = skb->mark;
-			fl6.flowi6_proto = hdr->nexthdr;
-
-			dst = ip6_route_output(net, NULL, &fl6);
-			if (dst->error) {
-				err = dst->error;
-				dst_release(dst);
-				goto drop;
-			}
-
-			local_bh_disable();
-			dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
-			local_bh_enable();
-		}
+		err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
+		if (unlikely(err))
+			goto drop;
+	}
 
-		skb_dst_drop(skb);
-		skb_dst_set(skb, dst);
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
 
+	if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr))
 		return dst_output(net, sk, skb);
-	}
 out:
-	return dst->lwtstate->orig_output(net, sk, skb);
+	return orig_dst->lwtstate->orig_output(net, sk, skb);
 drop:
 	kfree_skb(skb);
 	return err;