Message ID | b606e0355949a3ca8081ee29d9d22f2f30e898bd.1650575919.git.peilin.ye@bytedance.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 31c417c948d7f6909cb63f0ac3298f3c38f8ce20 |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | ip_gre, ip6_gre: o_seqno fixes | expand |
On Thu, Apr 21, 2022 at 3:09 PM Peilin Ye <yepeilin.cs@gmail.com> wrote: > > From: Peilin Ye <peilin.ye@bytedance.com> > > As pointed out by Jakub Kicinski, currently using TUNNEL_SEQ in > collect_md mode is racy for [IP6]GRE[TAP] devices. Consider the > following sequence of events: > > 1. An [IP6]GRE[TAP] device is created in collect_md mode using "ip link > add ... external". "ip" ignores "[o]seq" if "external" is specified, > so TUNNEL_SEQ is off, and the device is marked as NETIF_F_LLTX (i.e. > it uses lockless TX); > 2. Someone sets TUNNEL_SEQ on outgoing skb's, using e.g. > bpf_skb_set_tunnel_key() in an eBPF program attached to this device; > 3. gre_fb_xmit() or __gre6_xmit() processes these skb's: > > gre_build_header(skb, tun_hlen, > flags, protocol, > tunnel_id_to_key32(tun_info->key.tun_id), > (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) > : 0); ^^^^^^^^^^^^^^^^^ > > Since we are not using the TX lock (&txq->_xmit_lock), multiple CPUs may > try to do this tunnel->o_seqno++ in parallel, which is racy. Fix it by > making o_seqno atomic_t. > > As mentioned by Eric Dumazet in commit b790e01aee74 ("ip_gre: lockless > xmit"), making o_seqno atomic_t increases "chance for packets being out > of order at receiver" when NETIF_F_LLTX is on. > > Maybe a better fix would be: > > 1. Do not ignore "oseq" in external mode. Users MUST specify "oseq" if > they want the kernel to allow sequencing of outgoing packets; > 2. Reject all outgoing TUNNEL_SEQ packets if the device was not created > with "oseq". > > Unfortunately, that would break userspace. > > We could now make [IP6]GRE[TAP] devices always NETIF_F_LLTX, but let us > do it in separate patches to keep this fix minimal. > > Suggested-by: Jakub Kicinski <kuba@kernel.org> > Fixes: 77a5196a804e ("gre: add sequence number for collect md mode.") > Signed-off-by: Peilin Ye <peilin.ye@bytedance.com> > --- LGTM Acked-by: William Tu <u9012063@gmail.com>
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index a38c4f1e4e5c..74b369bddf49 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -58,7 +58,7 @@ struct ip6_tnl { /* These fields used only by GRE */ __u32 i_seqno; /* The last seen seqno */ - __u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int hlen; /* tun_hlen + encap_hlen */ int tun_hlen; /* Precalculated header length */ int encap_hlen; /* Encap header length (FOU,GUE) */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 0219fe907b26..3ec6146f8734 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -116,7 +116,7 @@ struct ip_tunnel { /* These four fields used only by GRE */ u32 i_seqno; /* The last seen seqno */ - u32 o_seqno; /* The last output seqno */ + atomic_t o_seqno; /* The last output seqno */ int tun_hlen; /* Precalculated header length */ /* These four fields used only by ERSPAN */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ca70b92e80d9..8cf86e42c1d1 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -464,7 +464,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, /* Push GRE header. */ gre_build_header(skb, tunnel->tun_hlen, flags, proto, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } @@ -502,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); gre_build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); @@ -579,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev) } gre_build_header(skb, 8, TUNNEL_SEQ, - proto, 0, htonl(tunnel->o_seqno++)); + proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno))); ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index d9e4ac94eab4..5136959b3dc5 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -766,7 +766,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0); } else { @@ -777,7 +777,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, flags, protocol, tunnel->parms.o_key, - (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); + (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) + : 0); } return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, @@ -1055,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* Push GRE header. */ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN) : htons(ETH_P_ERSPAN2); - gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++)); + gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno))); /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)