diff mbox series

[net-next,1/8] udp: fixup csum for GSO receive slow path

Message ID 4bff28fbaa8c53ca836eb2b9bdabcc3057118916.1616345643.git.pabeni@redhat.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series udp: GRO L4 improvements | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 2 maintainers not CCed: yoshfuji@linux-ipv6.org dsahern@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 306 this patch: 306
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 93 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 414 this patch: 414
netdev/header_inline success Link

Commit Message

Paolo Abeni March 21, 2021, 5:01 p.m. UTC
When looping back UDP GSO over UDP tunnel packets to an UDP socket,
the individual packet csum is currently set to CSUM_NONE. That causes
unexpected/wrong csum validation errors later in the UDP receive path.

We could possibly addressing the issue with some additional check and
csum mangling in the UDP tunnel code. Since the issue affects only
this UDP receive slow path, let's set a suitable csum status there.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/net/udp.h | 18 ++++++++++++++++++
 net/ipv4/udp.c    | 10 ++++++++++
 net/ipv6/udp.c    |  5 +++++
 3 files changed, 33 insertions(+)

Comments

Willem de Bruijn March 22, 2021, 1:18 p.m. UTC | #1
On Sun, Mar 21, 2021 at 1:01 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> When looping back UDP GSO over UDP tunnel packets to an UDP socket,
> the individual packet csum is currently set to CSUM_NONE. That causes
> unexpected/wrong csum validation errors later in the UDP receive path.
>
> We could possibly addressing the issue with some additional check and
> csum mangling in the UDP tunnel code. Since the issue affects only
> this UDP receive slow path, let's set a suitable csum status there.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  include/net/udp.h | 18 ++++++++++++++++++
>  net/ipv4/udp.c    | 10 ++++++++++
>  net/ipv6/udp.c    |  5 +++++
>  3 files changed, 33 insertions(+)
>
> diff --git a/include/net/udp.h b/include/net/udp.h
> index d4d064c592328..007683eb3e113 100644
> --- a/include/net/udp.h
> +++ b/include/net/udp.h
> @@ -515,6 +515,24 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
>         return segs;
>  }
>
> +static inline void udp_post_segment_fix_csum(struct sk_buff *skb, int level)
> +{
> +       /* UDP-lite can't land here - no GRO */
> +       WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> +
> +       /* GRO already validated the csum up to 'level', and we just
> +        * consumed one header, update the skb accordingly
> +        */
> +       UDP_SKB_CB(skb)->cscov = skb->len;
> +       if (level) {
> +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> +               skb->csum_level = 0;
> +       } else {
> +               skb->ip_summed = CHECKSUM_NONE;
> +               skb->csum_valid = 1;
> +       }

why does this function also update these fields for non-tunneled
packets? the commit only describes an issue with tunneled packets.

> +}
> +
>  #ifdef CONFIG_BPF_SYSCALL
>  struct sk_psock;
>  struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 4a0478b17243a..ff54135c51ffa 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2168,6 +2168,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
>  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  {
>         struct sk_buff *next, *segs;
> +       int csum_level;
>         int ret;
>
>         if (likely(!udp_unexpected_gso(sk, skb)))
> @@ -2175,9 +2176,18 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>
>         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
>         __skb_push(skb, -skb_mac_offset(skb));
> +       csum_level = !!(skb_shinfo(skb)->gso_type &
> +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
>         segs = udp_rcv_segment(sk, skb, true);
>         skb_list_walk_safe(segs, skb, next) {
>                 __skb_pull(skb, skb_transport_offset(skb));
> +
> +               /* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
> +                * instead of adding another check in the tunnel fastpath, we can force valid
> +                * csums here (packets are locally generated).
> +                * Additionally fixup the UDP CB
> +                */
> +               udp_post_segment_fix_csum(skb, csum_level);

How does this code differentiates locally generated packets with udp
tunnel headers from packets arriving from the wire, for which the
inner checksum may be incorrect?

>                 ret = udp_queue_rcv_one_skb(sk, skb);
>                 if (ret > 0)
>                         ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
> index d25e5a9252fdb..e7d4bf3a65c72 100644
> --- a/net/ipv6/udp.c
> +++ b/net/ipv6/udp.c
> @@ -739,16 +739,21 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
>  static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  {
>         struct sk_buff *next, *segs;
> +       int csum_level;
>         int ret;
>
>         if (likely(!udp_unexpected_gso(sk, skb)))
>                 return udpv6_queue_rcv_one_skb(sk, skb);
>
>         __skb_push(skb, -skb_mac_offset(skb));
> +       csum_level = !!(skb_shinfo(skb)->gso_type &
> +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
>         segs = udp_rcv_segment(sk, skb, false);
>         skb_list_walk_safe(segs, skb, next) {
>                 __skb_pull(skb, skb_transport_offset(skb));
>
> +               /* see comments in udp_queue_rcv_skb() */
> +               udp_post_segment_fix_csum(skb, csum_level);
>                 ret = udpv6_queue_rcv_one_skb(sk, skb);
>                 if (ret > 0)
>                         ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
> --
> 2.26.2
>
Paolo Abeni March 22, 2021, 4:34 p.m. UTC | #2
On Mon, 2021-03-22 at 09:18 -0400, Willem de Bruijn wrote:
> On Sun, Mar 21, 2021 at 1:01 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > When looping back UDP GSO over UDP tunnel packets to an UDP socket,
> > the individual packet csum is currently set to CSUM_NONE. That causes
> > unexpected/wrong csum validation errors later in the UDP receive path.
> > 
> > We could possibly addressing the issue with some additional check and
> > csum mangling in the UDP tunnel code. Since the issue affects only
> > this UDP receive slow path, let's set a suitable csum status there.
> > 
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > ---
> >  include/net/udp.h | 18 ++++++++++++++++++
> >  net/ipv4/udp.c    | 10 ++++++++++
> >  net/ipv6/udp.c    |  5 +++++
> >  3 files changed, 33 insertions(+)
> > 
> > diff --git a/include/net/udp.h b/include/net/udp.h
> > index d4d064c592328..007683eb3e113 100644
> > --- a/include/net/udp.h
> > +++ b/include/net/udp.h
> > @@ -515,6 +515,24 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
> >         return segs;
> >  }
> > 
> > +static inline void udp_post_segment_fix_csum(struct sk_buff *skb, int level)
> > +{
> > +       /* UDP-lite can't land here - no GRO */
> > +       WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > +
> > +       /* GRO already validated the csum up to 'level', and we just
> > +        * consumed one header, update the skb accordingly
> > +        */
> > +       UDP_SKB_CB(skb)->cscov = skb->len;
> > +       if (level) {
> > +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> > +               skb->csum_level = 0;
> > +       } else {
> > +               skb->ip_summed = CHECKSUM_NONE;
> > +               skb->csum_valid = 1;
> > +       }
> 
> why does this function also update these fields for non-tunneled
> packets? the commit only describes an issue with tunneled packets.
> 
> > +}
> > +
> >  #ifdef CONFIG_BPF_SYSCALL
> >  struct sk_psock;
> >  struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
> > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > index 4a0478b17243a..ff54135c51ffa 100644
> > --- a/net/ipv4/udp.c
> > +++ b/net/ipv4/udp.c
> > @@ -2168,6 +2168,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
> >  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> >  {
> >         struct sk_buff *next, *segs;
> > +       int csum_level;
> >         int ret;
> > 
> >         if (likely(!udp_unexpected_gso(sk, skb)))
> > @@ -2175,9 +2176,18 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > 
> >         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
> >         __skb_push(skb, -skb_mac_offset(skb));
> > +       csum_level = !!(skb_shinfo(skb)->gso_type &
> > +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
> >         segs = udp_rcv_segment(sk, skb, true);
> >         skb_list_walk_safe(segs, skb, next) {
> >                 __skb_pull(skb, skb_transport_offset(skb));
> > +
> > +               /* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
> > +                * instead of adding another check in the tunnel fastpath, we can force valid
> > +                * csums here (packets are locally generated).
> > +                * Additionally fixup the UDP CB
> > +                */
> > +               udp_post_segment_fix_csum(skb, csum_level);
> 
> How does this code differentiates locally generated packets with udp
> tunnel headers from packets arriving from the wire, for which the
> inner checksum may be incorrect?

First thing first, thank you for the detailed review. Digesting all the
comments will take time, so please excuse for some latency.

I'll try to reply to both your question here because the replies are
related.

My understanding is that UDP GRO, when processing UDP over UDP traffic
with the appropriate features bit set, will validate the checksum for
both the inner and the outer header - udp{4,6}_gro_receive will be
traversed twice, the fist one for the outer header, the 2nd for the
inner.

So when we reach here, the inner header csum could not be incorrect,
and I don't do anything to differentiate locally generated GSO packets
and GROed one to keep the code simpler.

The udp_post_segment_fix_csum() always set the csum info - even for non
tunneled packets to avoid additional branches/make the code more
complex. The csum should be valid in any scenario.

I guess I can mention the above either in a code comment and/or in the
commit message.

Cheers,

Paolo
Willem de Bruijn March 24, 2021, 1:45 a.m. UTC | #3
On Mon, Mar 22, 2021 at 12:36 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On Mon, 2021-03-22 at 09:18 -0400, Willem de Bruijn wrote:
> > On Sun, Mar 21, 2021 at 1:01 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > When looping back UDP GSO over UDP tunnel packets to an UDP socket,
> > > the individual packet csum is currently set to CSUM_NONE. That causes
> > > unexpected/wrong csum validation errors later in the UDP receive path.
> > >
> > > We could possibly addressing the issue with some additional check and
> > > csum mangling in the UDP tunnel code. Since the issue affects only
> > > this UDP receive slow path, let's set a suitable csum status there.
> > >
> > > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > > ---
> > >  include/net/udp.h | 18 ++++++++++++++++++
> > >  net/ipv4/udp.c    | 10 ++++++++++
> > >  net/ipv6/udp.c    |  5 +++++
> > >  3 files changed, 33 insertions(+)
> > >
> > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > index d4d064c592328..007683eb3e113 100644
> > > --- a/include/net/udp.h
> > > +++ b/include/net/udp.h
> > > @@ -515,6 +515,24 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
> > >         return segs;
> > >  }
> > >
> > > +static inline void udp_post_segment_fix_csum(struct sk_buff *skb, int level)
> > > +{
> > > +       /* UDP-lite can't land here - no GRO */
> > > +       WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > > +
> > > +       /* GRO already validated the csum up to 'level', and we just
> > > +        * consumed one header, update the skb accordingly
> > > +        */
> > > +       UDP_SKB_CB(skb)->cscov = skb->len;
> > > +       if (level) {
> > > +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> > > +               skb->csum_level = 0;
> > > +       } else {
> > > +               skb->ip_summed = CHECKSUM_NONE;
> > > +               skb->csum_valid = 1;
> > > +       }
> >
> > why does this function also update these fields for non-tunneled
> > packets? the commit only describes an issue with tunneled packets.
> >
> > > +}
> > > +
> > >  #ifdef CONFIG_BPF_SYSCALL
> > >  struct sk_psock;
> > >  struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
> > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > > index 4a0478b17243a..ff54135c51ffa 100644
> > > --- a/net/ipv4/udp.c
> > > +++ b/net/ipv4/udp.c
> > > @@ -2168,6 +2168,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
> > >  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > >  {
> > >         struct sk_buff *next, *segs;
> > > +       int csum_level;
> > >         int ret;
> > >
> > >         if (likely(!udp_unexpected_gso(sk, skb)))
> > > @@ -2175,9 +2176,18 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > >
> > >         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
> > >         __skb_push(skb, -skb_mac_offset(skb));
> > > +       csum_level = !!(skb_shinfo(skb)->gso_type &
> > > +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
> > >         segs = udp_rcv_segment(sk, skb, true);
> > >         skb_list_walk_safe(segs, skb, next) {
> > >                 __skb_pull(skb, skb_transport_offset(skb));
> > > +
> > > +               /* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
> > > +                * instead of adding another check in the tunnel fastpath, we can force valid
> > > +                * csums here (packets are locally generated).
> > > +                * Additionally fixup the UDP CB
> > > +                */
> > > +               udp_post_segment_fix_csum(skb, csum_level);
> >
> > How does this code differentiates locally generated packets with udp
> > tunnel headers from packets arriving from the wire, for which the
> > inner checksum may be incorrect?
>
> First thing first, thank you for the detailed review. Digesting all the
> comments will take time, so please excuse for some latency.

Apologies for my own delayed response. I also need to take time to
understand the existing code and diffs :) And have a few questions.

> I'll try to reply to both your question here because the replies are
> related.
>
> My understanding is that UDP GRO, when processing UDP over UDP traffic

This is a UDP GSO packet egress packet that was further encapsulated
with a GSO_UDP_TUNNEL on egress, then looped to the ingress path?

Then in the ingress path it has traversed the GRO layer.

Is this veth with XDP? That seems unlikely for GSO packets. But there
aren't many paths that will loop a packet through napi_gro_receive or
napi_gro_frags.

> with the appropriate features bit set, will validate the checksum for
> both the inner and the outer header - udp{4,6}_gro_receive will be
> traversed twice, the fist one for the outer header, the 2nd for the
> inner.

GRO will validate multiple levels of checksums with CHECKSUM_COMPLETE.
It can only validate the outer checksum with CHECKSUM_UNNECESSARY, I
believe?

As for looped packets with CHECKSUM_PARTIAL: we definitely have found
bugs in that path before. I think it's fine to set csum_valid on any
packets that can unambiguously be identified as such. Hence the
detailed questions above on which exact packets this code is
targeting, so that there are not accidental false positives that look
the same but have a different ip_summed.

> So when we reach here, the inner header csum could not be incorrect,
> and I don't do anything to differentiate locally generated GSO packets
> and GROed one to keep the code simpler.

But the correctness of the patch depends on them being locally
generated, if I'm not mistaken. Then I would make it explicit.

> The udp_post_segment_fix_csum() always set the csum info - even for non
> tunneled packets to avoid additional branches/make the code more
> complex. The csum should be valid in any scenario.
>
> I guess I can mention the above either in a code comment and/or in the
> commit message.

I don't follow this. The patch adds an else clause for non-tunneled
packets. Why does it update these? It does not seem like avoiding
additional branches, but adding them. So I must be missing something
more involved.

> Cheers,
>
> Paolo
>
Willem de Bruijn March 24, 2021, 1:49 a.m. UTC | #4
> > > > @@ -2168,6 +2168,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
> > > >  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > > >  {
> > > >         struct sk_buff *next, *segs;
> > > > +       int csum_level;
> > > >         int ret;
> > > >
> > > >         if (likely(!udp_unexpected_gso(sk, skb)))
> > > > @@ -2175,9 +2176,18 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > > >
> > > >         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
> > > >         __skb_push(skb, -skb_mac_offset(skb));
> > > > +       csum_level = !!(skb_shinfo(skb)->gso_type &
> > > > +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
> > > >         segs = udp_rcv_segment(sk, skb, true);
> > > >         skb_list_walk_safe(segs, skb, next) {
> > > >                 __skb_pull(skb, skb_transport_offset(skb));
> > > > +
> > > > +               /* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
> > > > +                * instead of adding another check in the tunnel fastpath, we can force valid
> > > > +                * csums here (packets are locally generated).
> > > > +                * Additionally fixup the UDP CB
> > > > +                */

Btw, instead of this comment plus a comment in the ipv6 code that
points back to this ipv4 comment, I would move the explanation to
the udp_post_segment_fix_csum function itself.
Paolo Abeni March 24, 2021, 2:37 p.m. UTC | #5
On Tue, 2021-03-23 at 21:45 -0400, Willem de Bruijn wrote:
> On Mon, Mar 22, 2021 at 12:36 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > On Mon, 2021-03-22 at 09:18 -0400, Willem de Bruijn wrote:
> > > On Sun, Mar 21, 2021 at 1:01 PM Paolo Abeni <pabeni@redhat.com> wrote:
> > > > When looping back UDP GSO over UDP tunnel packets to an UDP socket,
> > > > the individual packet csum is currently set to CSUM_NONE. That causes
> > > > unexpected/wrong csum validation errors later in the UDP receive path.
> > > > 
> > > > We could possibly addressing the issue with some additional check and
> > > > csum mangling in the UDP tunnel code. Since the issue affects only
> > > > this UDP receive slow path, let's set a suitable csum status there.
> > > > 
> > > > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > > > ---
> > > >  include/net/udp.h | 18 ++++++++++++++++++
> > > >  net/ipv4/udp.c    | 10 ++++++++++
> > > >  net/ipv6/udp.c    |  5 +++++
> > > >  3 files changed, 33 insertions(+)
> > > > 
> > > > diff --git a/include/net/udp.h b/include/net/udp.h
> > > > index d4d064c592328..007683eb3e113 100644
> > > > --- a/include/net/udp.h
> > > > +++ b/include/net/udp.h
> > > > @@ -515,6 +515,24 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
> > > >         return segs;
> > > >  }
> > > > 
> > > > +static inline void udp_post_segment_fix_csum(struct sk_buff *skb, int level)
> > > > +{
> > > > +       /* UDP-lite can't land here - no GRO */
> > > > +       WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > > > +
> > > > +       /* GRO already validated the csum up to 'level', and we just
> > > > +        * consumed one header, update the skb accordingly
> > > > +        */
> > > > +       UDP_SKB_CB(skb)->cscov = skb->len;
> > > > +       if (level) {
> > > > +               skb->ip_summed = CHECKSUM_UNNECESSARY;
> > > > +               skb->csum_level = 0;
> > > > +       } else {
> > > > +               skb->ip_summed = CHECKSUM_NONE;
> > > > +               skb->csum_valid = 1;
> > > > +       }
> > > 
> > > why does this function also update these fields for non-tunneled
> > > packets? the commit only describes an issue with tunneled packets.
> > > 
> > > > +}
> > > > +
> > > >  #ifdef CONFIG_BPF_SYSCALL
> > > >  struct sk_psock;
> > > >  struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
> > > > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> > > > index 4a0478b17243a..ff54135c51ffa 100644
> > > > --- a/net/ipv4/udp.c
> > > > +++ b/net/ipv4/udp.c
> > > > @@ -2168,6 +2168,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
> > > >  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > > >  {
> > > >         struct sk_buff *next, *segs;
> > > > +       int csum_level;
> > > >         int ret;
> > > > 
> > > >         if (likely(!udp_unexpected_gso(sk, skb)))
> > > > @@ -2175,9 +2176,18 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
> > > > 
> > > >         BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
> > > >         __skb_push(skb, -skb_mac_offset(skb));
> > > > +       csum_level = !!(skb_shinfo(skb)->gso_type &
> > > > +                       (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
> > > >         segs = udp_rcv_segment(sk, skb, true);
> > > >         skb_list_walk_safe(segs, skb, next) {
> > > >                 __skb_pull(skb, skb_transport_offset(skb));
> > > > +
> > > > +               /* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
> > > > +                * instead of adding another check in the tunnel fastpath, we can force valid
> > > > +                * csums here (packets are locally generated).
> > > > +                * Additionally fixup the UDP CB
> > > > +                */
> > > > +               udp_post_segment_fix_csum(skb, csum_level);
> > > 
> > > How does this code differentiates locally generated packets with udp
> > > tunnel headers from packets arriving from the wire, for which the
> > > inner checksum may be incorrect?
> > 
> > First thing first, thank you for the detailed review. Digesting all the
> > comments will take time, so please excuse for some latency.
> 
> Apologies for my own delayed response. I also need to take time to
> understand the existing code and diffs :) And have a few questions.
> 
> > I'll try to reply to both your question here because the replies are
> > related.
> > 
> > My understanding is that UDP GRO, when processing UDP over UDP traffic
> 
> This is a UDP GSO packet egress packet that was further encapsulated
> with a GSO_UDP_TUNNEL on egress, then looped to the ingress path?
> 
> Then in the ingress path it has traversed the GRO layer.
> 
> Is this veth with XDP? That seems unlikely for GSO packets. But there
> aren't many paths that will loop a packet through napi_gro_receive or
> napi_gro_frags.

This patch addresses the following scenario:

sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk

What I meant here is that the issue is not visible with:

(baremetal, NETIF_F_GRO_UDP_FWD | NETIF_F_GRO_FRAGLIST enabled -> vxlan -> sk

> > with the appropriate features bit set, will validate the checksum for
> > both the inner and the outer header - udp{4,6}_gro_receive will be
> > traversed twice, the fist one for the outer header, the 2nd for the
> > inner.
> 
> GRO will validate multiple levels of checksums with CHECKSUM_COMPLETE.
> It can only validate the outer checksum with CHECKSUM_UNNECESSARY, I
> believe?

I possibly miss some bits here ?!?

AFAICS:

udp4_gro_receive() -> skb_gro_checksum_validate_zero_check() ->
__skb_gro_checksum_validate -> (if  not already valid)
__skb_gro_checksum_validate_complete() -> (if not CHECKSUM_COMPLETE)
__skb_gro_checksum_complete()

the latter will validate the UDP checksum at the current nesting level
(and set the csum-related bits in the GRO skb cb to the same status
as CHECKSUM_COMPLETE)

When processing UDP over UDP tunnel packet, the gro call chain will be:

[l2/l3 GRO] -> udp4_gro_receive (validate outher header csum) -> 
udp_sk(sk)->gro_receive -> [other gro layers] -> 
udp4_gro_receive (validate inner header csum) -> ...

> As for looped packets with CHECKSUM_PARTIAL: we definitely have found
> bugs in that path before. I think it's fine to set csum_valid on any
> packets that can unambiguously be identified as such. Hence the
> detailed questions above on which exact packets this code is
> targeting, so that there are not accidental false positives that look
> the same but have a different ip_summed.

I see this change is controversial. Since the addressed scenario is
really a corner case, a simpler alternative would be
replacing udp_post_segment_fix_csum with:

static inline void udp_post_segment_fix_cb(struct sk_buff *skb, int level)
{
	/* UDP-lite can't land here - no GRO */
	WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);

	/* UDP CB mirrors the GSO packet, we must re-init it */
	UDP_SKB_CB(skb)->cscov = skb->len;
}

the downside will be an additional, later, unneeded csum validation for the 

sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk

scenario. WDYT?

Thanks!

Paolo
Willem de Bruijn March 24, 2021, 10:36 p.m. UTC | #6
> > This is a UDP GSO packet egress packet that was further encapsulated
> > with a GSO_UDP_TUNNEL on egress, then looped to the ingress path?
> >
> > Then in the ingress path it has traversed the GRO layer.
> >
> > Is this veth with XDP? That seems unlikely for GSO packets. But there
> > aren't many paths that will loop a packet through napi_gro_receive or
> > napi_gro_frags.
>
> This patch addresses the following scenario:
>
> sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
>
> What I meant here is that the issue is not visible with:
>
> (baremetal, NETIF_F_GRO_UDP_FWD | NETIF_F_GRO_FRAGLIST enabled -> vxlan -> sk
>
> > > with the appropriate features bit set, will validate the checksum for
> > > both the inner and the outer header - udp{4,6}_gro_receive will be
> > > traversed twice, the fist one for the outer header, the 2nd for the
> > > inner.
> >
> > GRO will validate multiple levels of checksums with CHECKSUM_COMPLETE.
> > It can only validate the outer checksum with CHECKSUM_UNNECESSARY, I
> > believe?
>
> I possibly miss some bits here ?!?
>
> AFAICS:
>
> udp4_gro_receive() -> skb_gro_checksum_validate_zero_check() ->
> __skb_gro_checksum_validate -> (if  not already valid)
> __skb_gro_checksum_validate_complete() -> (if not CHECKSUM_COMPLETE)
> __skb_gro_checksum_complete()
>
> the latter will validate the UDP checksum at the current nesting level
> (and set the csum-related bits in the GRO skb cb to the same status
> as CHECKSUM_COMPLETE)
>
> When processing UDP over UDP tunnel packet, the gro call chain will be:
>
> [l2/l3 GRO] -> udp4_gro_receive (validate outher header csum) ->
> udp_sk(sk)->gro_receive -> [other gro layers] ->
> udp4_gro_receive (validate inner header csum) -> ...

Agreed. But __skb_gro_checksum_validate on the first invocation will
call skb_gro_incr_csum_unnecessary, so that on the second invocation
csum_cnt == 0 and triggers a real checksum validation?

At least, that is my understanding. Intuitively, CHECKSUM_UNNECESSARY
only validates the first checksum, so says nothing about the validity
of any subsequent ones.

But it seems I'm mistaken?

__skb_gro_checksum_validate has an obvious exception for locally
generated packets by excluding CHECKSUM_PARTIAL. Looped packets
usually have CHECKSUM_PARTIAL set. Unfortunately, this is similar to
the issue that I looked at earlier this year with looped UDP packets
with MSG_MORE: those are also changed to CHECKSUM_NONE (and exposed a
checksum bug: 52cbd23a119c).

Is there perhaps some other way that we can identify that these are
local packets? They should trivially avoid all checksum checks.

> > As for looped packets with CHECKSUM_PARTIAL: we definitely have found
> > bugs in that path before. I think it's fine to set csum_valid on any
> > packets that can unambiguously be identified as such. Hence the
> > detailed questions above on which exact packets this code is
> > targeting, so that there are not accidental false positives that look
> > the same but have a different ip_summed.
>
> I see this change is controversial.

I have no concerns with the fix. It is just a very narrow path (veth +
xdp - tso + gro ..), and to minimize risk I would try to avoid
updating state of unrelated packets. That was my initial comment: I
don't understand the need for the else clause.

> Since the addressed scenario is
> really a corner case, a simpler alternative would be
> replacing udp_post_segment_fix_csum with:
>
> static inline void udp_post_segment_fix_cb(struct sk_buff *skb, int level)
> {
>         /* UDP-lite can't land here - no GRO */
>         WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
>
>         /* UDP CB mirrors the GSO packet, we must re-init it */
>         UDP_SKB_CB(skb)->cscov = skb->len;
> }
>
> the downside will be an additional, later, unneeded csum validation for the
>
> sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
>
> scenario. WDYT?

No, let's definitely avoid an unneeded checksum verification.
Paolo Abeni March 25, 2021, 10:56 a.m. UTC | #7
Hello,

On Wed, 2021-03-24 at 18:36 -0400, Willem de Bruijn wrote:
> > > This is a UDP GSO packet egress packet that was further encapsulated
> > > with a GSO_UDP_TUNNEL on egress, then looped to the ingress path?
> > > 
> > > Then in the ingress path it has traversed the GRO layer.
> > > 
> > > Is this veth with XDP? That seems unlikely for GSO packets. But there
> > > aren't many paths that will loop a packet through napi_gro_receive or
> > > napi_gro_frags.
> > 
> > This patch addresses the following scenario:
> > 
> > sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
> > 
> > What I meant here is that the issue is not visible with:
> > 
> > (baremetal, NETIF_F_GRO_UDP_FWD | NETIF_F_GRO_FRAGLIST enabled -> vxlan -> sk
> > 
> > > > with the appropriate features bit set, will validate the checksum for
> > > > both the inner and the outer header - udp{4,6}_gro_receive will be
> > > > traversed twice, the fist one for the outer header, the 2nd for the
> > > > inner.
> > > 
> > > GRO will validate multiple levels of checksums with CHECKSUM_COMPLETE.
> > > It can only validate the outer checksum with CHECKSUM_UNNECESSARY, I
> > > believe?
> > 
> > I possibly miss some bits here ?!?
> > 
> > AFAICS:
> > 
> > udp4_gro_receive() -> skb_gro_checksum_validate_zero_check() ->
> > __skb_gro_checksum_validate -> (if  not already valid)
> > __skb_gro_checksum_validate_complete() -> (if not CHECKSUM_COMPLETE)
> > __skb_gro_checksum_complete()
> > 
> > the latter will validate the UDP checksum at the current nesting level
> > (and set the csum-related bits in the GRO skb cb to the same status
> > as CHECKSUM_COMPLETE)
> > 
> > When processing UDP over UDP tunnel packet, the gro call chain will be:
> > 
> > [l2/l3 GRO] -> udp4_gro_receive (validate outher header csum) ->
> > udp_sk(sk)->gro_receive -> [other gro layers] ->
> > udp4_gro_receive (validate inner header csum) -> ...
> 
> Agreed. But __skb_gro_checksum_validate on the first invocation will
> call skb_gro_incr_csum_unnecessary, so that on the second invocation
> csum_cnt == 0 and triggers a real checksum validation?
> 
> At least, that is my understanding. Intuitively, CHECKSUM_UNNECESSARY
> only validates the first checksum, so says nothing about the validity
> of any subsequent ones.
> 
> But it seems I'm mistaken?

AFAICS, it depends ;) From skbuff.h:

 *   skb->csum_level indicates the number of consecutive checksums found in
 *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.

if skb->csum_level > 0, the NIC validate additional headers. The intel
ixgbe driver use that for vxlan RX csum offload. Such field translates
into:

	NAPI_GRO_CB(skb)->csum_cnt

inside the GRO engine, and skb_gro_incr_csum_unnecessary takes care of
the updating it after validation.

> __skb_gro_checksum_validate has an obvious exception for locally
> generated packets by excluding CHECKSUM_PARTIAL. Looped packets
> usually have CHECKSUM_PARTIAL set. Unfortunately, this is similar to
> the issue that I looked at earlier this year with looped UDP packets
> with MSG_MORE: those are also changed to CHECKSUM_NONE (and exposed a
> checksum bug: 52cbd23a119c).
> 
> Is there perhaps some other way that we can identify that these are
> local packets? They should trivially avoid all checksum checks.
> 
> > > As for looped packets with CHECKSUM_PARTIAL: we definitely have found
> > > bugs in that path before. I think it's fine to set csum_valid on any
> > > packets that can unambiguously be identified as such. Hence the
> > > detailed questions above on which exact packets this code is
> > > targeting, so that there are not accidental false positives that look
> > > the same but have a different ip_summed.
> > 
> > I see this change is controversial.
> 
> I have no concerns with the fix. It is just a very narrow path (veth +
> xdp - tso + gro ..), and to minimize risk I would try to avoid
> updating state of unrelated packets. That was my initial comment: I
> don't understand the need for the else clause.

The branch is there because I wrote this patch before the patches 5,6,7
later in this series. GSO UDP L4 over UDP tunnel packets were segmented
at the UDP tunnel level, and that 'else' branch preserves the
appropriate 'csum_level' value to avoid later (if/when the packet lands
in a plain UDP socket) csum validation.

> > Since the addressed scenario is
> > really a corner case, a simpler alternative would be
> > replacing udp_post_segment_fix_csum with:
> > 
> > static inline void udp_post_segment_fix_cb(struct sk_buff *skb, int level)
> > {
> >         /* UDP-lite can't land here - no GRO */
> >         WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > 
> >         /* UDP CB mirrors the GSO packet, we must re-init it */
> >         UDP_SKB_CB(skb)->cscov = skb->len;
> > }
> > 
> > the downside will be an additional, later, unneeded csum validation for the
> > 
> > sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
> > 
> > scenario. WDYT?
> 
> No, let's definitely avoid an unneeded checksum verification.

Ok.

My understanding is that the following should be better:

static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
{
	/* UDP-lite can't land here - no GRO */
	WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);

	/* UDP packets generated with UDP_SEGMENT and traversing:
	 * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
	 * land here with CHECKSUM_NONE. Instead of adding another check
	 * in the tunnel fastpath, we can force valid csums here:
	 * packets are locally generated and the GRO engine already validated
	 * the csum.
	 * Additionally fixup the UDP CB
	 */
	UDP_SKB_CB(skb)->cscov = skb->len;
	if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
		skb->csum_valid = 1;
}

I'll use the above in v2.

Thanks!

Paolo
Willem de Bruijn March 25, 2021, 1:53 p.m. UTC | #8
On Thu, Mar 25, 2021 at 6:57 AM Paolo Abeni <pabeni@redhat.com> wrote:
>
> Hello,
>
> On Wed, 2021-03-24 at 18:36 -0400, Willem de Bruijn wrote:
> > > > This is a UDP GSO packet egress packet that was further encapsulated
> > > > with a GSO_UDP_TUNNEL on egress, then looped to the ingress path?
> > > >
> > > > Then in the ingress path it has traversed the GRO layer.
> > > >
> > > > Is this veth with XDP? That seems unlikely for GSO packets. But there
> > > > aren't many paths that will loop a packet through napi_gro_receive or
> > > > napi_gro_frags.
> > >
> > > This patch addresses the following scenario:
> > >
> > > sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
> > >
> > > What I meant here is that the issue is not visible with:
> > >
> > > (baremetal, NETIF_F_GRO_UDP_FWD | NETIF_F_GRO_FRAGLIST enabled -> vxlan -> sk
> > >
> > > > > with the appropriate features bit set, will validate the checksum for
> > > > > both the inner and the outer header - udp{4,6}_gro_receive will be
> > > > > traversed twice, the fist one for the outer header, the 2nd for the
> > > > > inner.
> > > >
> > > > GRO will validate multiple levels of checksums with CHECKSUM_COMPLETE.
> > > > It can only validate the outer checksum with CHECKSUM_UNNECESSARY, I
> > > > believe?
> > >
> > > I possibly miss some bits here ?!?
> > >
> > > AFAICS:
> > >
> > > udp4_gro_receive() -> skb_gro_checksum_validate_zero_check() ->
> > > __skb_gro_checksum_validate -> (if  not already valid)
> > > __skb_gro_checksum_validate_complete() -> (if not CHECKSUM_COMPLETE)
> > > __skb_gro_checksum_complete()
> > >
> > > the latter will validate the UDP checksum at the current nesting level
> > > (and set the csum-related bits in the GRO skb cb to the same status
> > > as CHECKSUM_COMPLETE)
> > >
> > > When processing UDP over UDP tunnel packet, the gro call chain will be:
> > >
> > > [l2/l3 GRO] -> udp4_gro_receive (validate outher header csum) ->
> > > udp_sk(sk)->gro_receive -> [other gro layers] ->
> > > udp4_gro_receive (validate inner header csum) -> ...
> >
> > Agreed. But __skb_gro_checksum_validate on the first invocation will
> > call skb_gro_incr_csum_unnecessary, so that on the second invocation
> > csum_cnt == 0 and triggers a real checksum validation?
> >
> > At least, that is my understanding. Intuitively, CHECKSUM_UNNECESSARY
> > only validates the first checksum, so says nothing about the validity
> > of any subsequent ones.
> >
> > But it seems I'm mistaken?
>
> AFAICS, it depends ;) From skbuff.h:
>
>  *   skb->csum_level indicates the number of consecutive checksums found in
>  *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
>
> if skb->csum_level > 0, the NIC validate additional headers. The intel
> ixgbe driver use that for vxlan RX csum offload. Such field translates
> into:
>
>         NAPI_GRO_CB(skb)->csum_cnt
>
> inside the GRO engine, and skb_gro_incr_csum_unnecessary takes care of
> the updating it after validation.

True. I glanced over those cases.

More importantly, where exactly do these looped packets get converted
from CHECKSUM_PARTIAL to CHECKSUM_NONE before this patch?

> > __skb_gro_checksum_validate has an obvious exception for locally
> > generated packets by excluding CHECKSUM_PARTIAL. Looped packets
> > usually have CHECKSUM_PARTIAL set. Unfortunately, this is similar to
> > the issue that I looked at earlier this year with looped UDP packets
> > with MSG_MORE: those are also changed to CHECKSUM_NONE (and exposed a
> > checksum bug: 52cbd23a119c).
> >
> > Is there perhaps some other way that we can identify that these are
> > local packets? They should trivially avoid all checksum checks.
> >
> > > > As for looped packets with CHECKSUM_PARTIAL: we definitely have found
> > > > bugs in that path before. I think it's fine to set csum_valid on any
> > > > packets that can unambiguously be identified as such. Hence the
> > > > detailed questions above on which exact packets this code is
> > > > targeting, so that there are not accidental false positives that look
> > > > the same but have a different ip_summed.
> > >
> > > I see this change is controversial.
> >
> > I have no concerns with the fix. It is just a very narrow path (veth +
> > xdp - tso + gro ..), and to minimize risk I would try to avoid
> > updating state of unrelated packets. That was my initial comment: I
> > don't understand the need for the else clause.
>
> The branch is there because I wrote this patch before the patches 5,6,7
> later in this series. GSO UDP L4 over UDP tunnel packets were segmented
> at the UDP tunnel level, and that 'else' branch preserves the
> appropriate 'csum_level' value to avoid later (if/when the packet lands
> in a plain UDP socket) csum validation.
>
> > > Since the addressed scenario is
> > > really a corner case, a simpler alternative would be
> > > replacing udp_post_segment_fix_csum with:
> > >
> > > static inline void udp_post_segment_fix_cb(struct sk_buff *skb, int level)
> > > {
> > >         /* UDP-lite can't land here - no GRO */
> > >         WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > >
> > >         /* UDP CB mirrors the GSO packet, we must re-init it */
> > >         UDP_SKB_CB(skb)->cscov = skb->len;
> > > }
> > >
> > > the downside will be an additional, later, unneeded csum validation for the
> > >
> > > sk ->vxlan -> veth -> (xdp in use, TSO disabled, GRO on) -> veth -> vxlan -> sk
> > >
> > > scenario. WDYT?
> >
> > No, let's definitely avoid an unneeded checksum verification.
>
> Ok.
>
> My understanding is that the following should be better:
>
> static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
> {
>         /* UDP-lite can't land here - no GRO */
>         WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
>
>         /* UDP packets generated with UDP_SEGMENT and traversing:
>          * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
>          * land here with CHECKSUM_NONE. Instead of adding another check
>          * in the tunnel fastpath, we can force valid csums here:
>          * packets are locally generated and the GRO engine already validated
>          * the csum.
>          * Additionally fixup the UDP CB
>          */
>         UDP_SKB_CB(skb)->cscov = skb->len;
>         if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
>                 skb->csum_valid = 1;
> }
>
> I'll use the above in v2.

Do I understand correctly that this avoids matching tunneled packets
that arrive from the network with rx checksumming disabled, because
__skb_gro_checksum_complete will have been called on the outer packet
and have set skb->csum_valid?

Yes, this just (1) identifying the packet as being of local source and
then (2) setting csum_valid sounds great to me, thanks.
Paolo Abeni March 25, 2021, 4:47 p.m. UTC | #9
On Thu, 2021-03-25 at 09:53 -0400, Willem de Bruijn wrote:
> On Thu, Mar 25, 2021 at 6:57 AM Paolo Abeni <pabeni@redhat.com> wrote:
> > AFAICS, it depends ;) From skbuff.h:
> > 
> >  *   skb->csum_level indicates the number of consecutive checksums found in
> >  *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
> > 
> > if skb->csum_level > 0, the NIC validate additional headers. The intel
> > ixgbe driver use that for vxlan RX csum offload. Such field translates
> > into:
> > 
> >         NAPI_GRO_CB(skb)->csum_cnt
> > 
> > inside the GRO engine, and skb_gro_incr_csum_unnecessary takes care of
> > the updating it after validation.
> 
> True. I glanced over those cases.
> 
> More importantly, where exactly do these looped packets get converted
> from CHECKSUM_PARTIAL to CHECKSUM_NONE before this patch?

Very good question! It took a bit finding the exact place.

int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
			   __be16 inner_proto, bool raw_proto, bool xnet)
{
	if (unlikely(!pskb_may_pull(skb, hdr_len)))
		return -ENOMEM;

	skb_pull_rcsum(skb, hdr_len);
        // here ^^^ via skb_pull_rcsum -> skb_postpull_rcsum() -> __skb_postpull_rcsum()

well, this is actually with _this_ patch applied: it does not change
the place where the ip_summed is set.

> > My understanding is that the following should be better:
> > 
> > static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
> > {
> >         /* UDP-lite can't land here - no GRO */
> >         WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
> > 
> >         /* UDP packets generated with UDP_SEGMENT and traversing:
> >          * UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
> >          * land here with CHECKSUM_NONE. Instead of adding another check
> >          * in the tunnel fastpath, we can force valid csums here:
> >          * packets are locally generated and the GRO engine already validated
> >          * the csum.
> >          * Additionally fixup the UDP CB
> >          */
> >         UDP_SKB_CB(skb)->cscov = skb->len;
> >         if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
> >                 skb->csum_valid = 1;
> > }
> > 
> > I'll use the above in v2.
> 
> Do I understand correctly that this avoids matching tunneled packets
> that arrive from the network with rx checksumming disabled, because
> __skb_gro_checksum_complete will have been called on the outer packet
> and have set skb->csum_valid?

Exactly. I did the test, and perf probes showed that.

> Yes, this just (1) identifying the packet as being of local source and
> then (2) setting csum_valid sounds great to me, thanks.

Will try to submit v2 soon, after some more testing.

Thanks for all the feedback!

Paolo
diff mbox series

Patch

diff --git a/include/net/udp.h b/include/net/udp.h
index d4d064c592328..007683eb3e113 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -515,6 +515,24 @@  static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
 	return segs;
 }
 
+static inline void udp_post_segment_fix_csum(struct sk_buff *skb, int level)
+{
+	/* UDP-lite can't land here - no GRO */
+	WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);
+
+	/* GRO already validated the csum up to 'level', and we just
+	 * consumed one header, update the skb accordingly
+	 */
+	UDP_SKB_CB(skb)->cscov = skb->len;
+	if (level) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->csum_level = 0;
+	} else {
+		skb->ip_summed = CHECKSUM_NONE;
+		skb->csum_valid = 1;
+	}
+}
+
 #ifdef CONFIG_BPF_SYSCALL
 struct sk_psock;
 struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a0478b17243a..ff54135c51ffa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2168,6 +2168,7 @@  static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff *next, *segs;
+	int csum_level;
 	int ret;
 
 	if (likely(!udp_unexpected_gso(sk, skb)))
@@ -2175,9 +2176,18 @@  static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET);
 	__skb_push(skb, -skb_mac_offset(skb));
+	csum_level = !!(skb_shinfo(skb)->gso_type &
+			(SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
 	segs = udp_rcv_segment(sk, skb, true);
 	skb_list_walk_safe(segs, skb, next) {
 		__skb_pull(skb, skb_transport_offset(skb));
+
+		/* UDP GSO packets looped back after adding UDP encap land here with CHECKSUM none,
+		 * instead of adding another check in the tunnel fastpath, we can force valid
+		 * csums here (packets are locally generated).
+		 * Additionally fixup the UDP CB
+		 */
+		udp_post_segment_fix_csum(skb, csum_level);
 		ret = udp_queue_rcv_one_skb(sk, skb);
 		if (ret > 0)
 			ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index d25e5a9252fdb..e7d4bf3a65c72 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -739,16 +739,21 @@  static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff *next, *segs;
+	int csum_level;
 	int ret;
 
 	if (likely(!udp_unexpected_gso(sk, skb)))
 		return udpv6_queue_rcv_one_skb(sk, skb);
 
 	__skb_push(skb, -skb_mac_offset(skb));
+	csum_level = !!(skb_shinfo(skb)->gso_type &
+			(SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM));
 	segs = udp_rcv_segment(sk, skb, false);
 	skb_list_walk_safe(segs, skb, next) {
 		__skb_pull(skb, skb_transport_offset(skb));
 
+		/* see comments in udp_queue_rcv_skb() */
+		udp_post_segment_fix_csum(skb, csum_level);
 		ret = udpv6_queue_rcv_one_skb(sk, skb);
 		if (ret > 0)
 			ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,