diff mbox series

[net,v4,1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

Message ID 20220220132114.18989-1-paulb@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net,v4,1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/apply fail Patch does not apply to net

Commit Message

Paul Blakey Feb. 20, 2022, 1:21 p.m. UTC
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by updating skb->csum if available.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  <IRQ>
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  </IRQ>
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:00007f9022282b20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffffda
[295242.132900] RAX: 0000000000000005 RBX: 0000000000000010 RCX: 0000000000000000
[295242.140120] RDX: 00007f9022282ba8 RSI: 00007f9022282a30 RDI: 00007f9014005c30
[295242.147337] RBP: 00007f9014014d60 R08: 0000000000000020 R09: 00007f90254a8340
[295242.154557] R10: 00007f9022282a28 R11: 0000000000000246 R12: 0000000000000000
[295242.161775] R13: 00007f902308c000 R14: 000000000000002b R15: 00007f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
---
 Changelog:
    v4->v3:
      Use new helper csum_block_replace
    v2->v3:
      Use u8 instead of __u8
      Fix sparse warnings on conversions
    v1->v2:
      Replaced push pull rcsum checksum calc with actual checksum calc

 include/net/checksum.h    |  7 ++++++
 net/openvswitch/actions.c | 47 ++++++++++++++++++++++++++++++++-------
 2 files changed, 46 insertions(+), 8 deletions(-)

Comments

David Laight Feb. 20, 2022, 6:38 p.m. UTC | #1
From: Paul Blakey
> Sent: 20 February 2022 13:21
> 
> Ipv6 ttl, label and tos fields are modified without first
> pulling/pushing the ipv6 header, which would have updated
> the hw csum (if available). This might cause csum validation
> when sending the packet to the stack, as can be seen in
> the trace below.
> 
> Fix this by updating skb->csum if available.
...
> +static inline __wsum
> +csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
> +{
> +	return csum_block_add(csum_block_sub(csum, old, offset), new, offset);
> +}

That look computationally OTT for sub 32bit adjustments.

It ought to be enough to do:
	return csum_add(old_csum, 0xf0000fff + new - old);

Although it will need 'tweaking' for odd aligned 24bit values.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Paul Blakey Feb. 21, 2022, 9:50 a.m. UTC | #2
On Sun, 20 Feb 2022, David Laight wrote:

> From: Paul Blakey
> > Sent: 20 February 2022 13:21
> > 
> > Ipv6 ttl, label and tos fields are modified without first
> > pulling/pushing the ipv6 header, which would have updated
> > the hw csum (if available). This might cause csum validation
> > when sending the packet to the stack, as can be seen in
> > the trace below.
> > 
> > Fix this by updating skb->csum if available.
> ...
> > +static inline __wsum
> > +csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
> > +{
> > +	return csum_block_add(csum_block_sub(csum, old, offset), new, offset);
> > +}
> 
> That look computationally OTT for sub 32bit adjustments.
> 
> It ought to be enough to do:
> 	return csum_add(old_csum, 0xf0000fff + new - old);
> 
> Although it will need 'tweaking' for odd aligned 24bit values.
> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)
> 
> 

I'm not comfortable with manually implementing some low-level
bit fideling checksum calculation, while covering all cases.
And as you said it also doesnt have the offset param (odd aligned).
The same could be said about csum_block_add()/sub(), replace()
just uses them. I think the csum operations are more readable and
should  be inlined/optimize, so can we do it the readable way,
and then,  if needed, optimize it?
Jakub Kicinski Feb. 22, 2022, 7:55 p.m. UTC | #3
You'll need to rebase, the patch which made everything force inlined
got merged.

On Sun, 20 Feb 2022 15:21:14 +0200 Paul Blakey wrote:
> +static inline __wsum
> +csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
> +{
> +	return csum_block_add(csum_block_sub(csum, old, offset), new, offset);

Why still _block? Since the arguments are pre-shifted we should be able
to subtract and add the entire thing, and the math will work out.
Obviously you'd need to shift by the offset in the word, not in a byte
in the caller.

> +}
> +
>  static inline __wsum csum_unfold(__sum16 n)
>  {
>  	return (__force __wsum)n;
> @@ -184,4 +190,5 @@ static inline __wsum wsum_negate(__wsum val)
>  {
>  	return (__force __wsum)-((__force u32)val);
>  }
> +

Spurious?

>  #endif
diff mbox series

Patch

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5218041e5c8f..ce39e47b2881 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -106,6 +106,12 @@  csum_block_sub(__wsum csum, __wsum csum2, int offset)
 	return csum_block_add(csum, ~csum2, offset);
 }
 
+static inline __wsum
+csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
+{
+	return csum_block_add(csum_block_sub(csum, old, offset), new, offset);
+}
+
 static inline __wsum csum_unfold(__sum16 n)
 {
 	return (__force __wsum)n;
@@ -184,4 +190,5 @@  static inline __wsum wsum_negate(__wsum val)
 {
 	return (__force __wsum)-((__force u32)val);
 }
+
 #endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..1bc9037e4b9e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,44 @@  static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
 {
+	u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+	ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_replace(skb->csum, (__force __wsum)(old_ipv6_tclass << 4),
+					       (__force __wsum)(ipv6_tclass << 4), 1);
+
+	ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+	u32 old_fl;
+
+	old_fl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  nh->flow_lbl[2];
+	fl = OVS_MASKED(old_fl, fl, mask);
+
 	/* Bits 21-24 are always unmasked, so this retains their values. */
-	OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
-	OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
-	OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+	nh->flow_lbl[0] = (u8)(fl >> 16);
+	nh->flow_lbl[1] = (u8)(fl >> 8);
+	nh->flow_lbl[2] = (u8)fl;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_replace(skb->csum, (__force __wsum)htonl(old_fl),
+					       (__force __wsum)htonl(fl), 0);
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+	new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_replace(skb->csum, (__force __wsum)nh->hop_limit,
+					       (__force __wsum)new_ttl, 1);
+	nh->hop_limit = new_ttl;
 }
 
 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +578,17 @@  static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
 		}
 	}
 	if (mask->ipv6_tclass) {
-		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+		set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
 		flow_key->ip.tos = ipv6_get_dsfield(nh);
 	}
 	if (mask->ipv6_label) {
-		set_ipv6_fl(nh, ntohl(key->ipv6_label),
+		set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
 			    ntohl(mask->ipv6_label));
 		flow_key->ipv6.label =
 		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
 	}
 	if (mask->ipv6_hlimit) {
-		OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
-			       mask->ipv6_hlimit);
+		set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
 		flow_key->ip.ttl = nh->hop_limit;
 	}
 	return 0;