diff mbox series

[net,v2,6/8] veth: take into account device reconfiguration for xdp_features flag

Message ID f20cfdb08d7357b0853d25be3b34ace4408693be.1678364613.git.lorenzo@kernel.org (mailing list archive)
State Accepted
Commit fccca038f3003daa8f28a5e5d97efe50f04b8d9d
Delegated to: Netdev Maintainers
Headers show
Series update xdp_features flag according to NIC re-configuration | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 20 this patch: 20
netdev/cc_maintainers fail 3 blamed authors not CCed: simon.horman@corigine.com alardam@gmail.com memxor@gmail.com; 3 maintainers not CCed: simon.horman@corigine.com alardam@gmail.com memxor@gmail.com
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 20 this patch: 20
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 83 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Lorenzo Bianconi March 9, 2023, 12:25 p.m. UTC
Take into account tx/rx queues reconfiguration setting device
xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
ndo_xdp_xmit callback.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/veth.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

Comments

Matthieu Baerts March 13, 2023, 2:15 p.m. UTC | #1
Hi Lorenzo,

On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> Take into account tx/rx queues reconfiguration setting device
> xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> ndo_xdp_xmit callback.
> 
> Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>

Thank you for the modification.

Unfortunately, 'git bisect' just told me this modification is the origin
of a new WARN when using veth in a netns:


###################### 8< ######################

=============================
WARNING: suspicious RCU usage
6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
-----------------------------
drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!

other info that might help us debug this:


rcu_scheduler_active = 2, debug_locks = 1
1 lock held by ip/135:
#0: ffffffff8dc4b108 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg
(net/core/rtnetlink.c:6172)

stack backtrace:
CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
04/01/2014
Call Trace:
 <TASK>
dump_stack_lvl (lib/dump_stack.c:107)
lockdep_rcu_suspicious (include/linux/context_tracking.h:152)
veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9))
veth_newlink (drivers/net/veth.c:1892)
? veth_set_features (drivers/net/veth.c:1774)
? kasan_save_stack (mm/kasan/common.c:47)
? kasan_save_stack (mm/kasan/common.c:46)
? kasan_set_track (mm/kasan/common.c:52)
? alloc_netdev_mqs (include/linux/slab.h:737)
? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
? trace_kmalloc (include/trace/events/kmem.h:54)
? __xdp_rxq_info_reg (net/core/xdp.c:188)
? alloc_netdev_mqs (net/core/dev.c:10657)
? rtnl_create_link (net/core/rtnetlink.c:3312)
rtnl_newlink_create (net/core/rtnetlink.c:3440)
? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391)
__rtnl_newlink (net/core/rtnetlink.c:3657)
? lock_downgrade (kernel/locking/lockdep.c:5321)
? rtnl_link_unregister (net/core/rtnetlink.c:3487)
rtnl_newlink (net/core/rtnetlink.c:3671)
rtnetlink_rcv_msg (net/core/rtnetlink.c:6174)
? rtnl_link_fill (net/core/rtnetlink.c:6070)
? mark_usage (kernel/locking/lockdep.c:4914)
? mark_usage (kernel/locking/lockdep.c:4914)
netlink_rcv_skb (net/netlink/af_netlink.c:2574)
? rtnl_link_fill (net/core/rtnetlink.c:6070)
? netlink_ack (net/netlink/af_netlink.c:2551)
? lock_acquire (kernel/locking/lockdep.c:467)
? net_generic (include/linux/rcupdate.h:805)
? netlink_deliver_tap (include/linux/rcupdate.h:805)
netlink_unicast (net/netlink/af_netlink.c:1340)
? netlink_attachskb (net/netlink/af_netlink.c:1350)
netlink_sendmsg (net/netlink/af_netlink.c:1942)
? netlink_unicast (net/netlink/af_netlink.c:1861)
? netlink_unicast (net/netlink/af_netlink.c:1861)
sock_sendmsg (net/socket.c:727)
____sys_sendmsg (net/socket.c:2501)
? kernel_sendmsg (net/socket.c:2448)
? __copy_msghdr (net/socket.c:2428)
___sys_sendmsg (net/socket.c:2557)
? mark_usage (kernel/locking/lockdep.c:4914)
? do_recvmmsg (net/socket.c:2544)
? lock_acquire (kernel/locking/lockdep.c:467)
? find_held_lock (kernel/locking/lockdep.c:5159)
? __lock_release (kernel/locking/lockdep.c:5345)
? __might_fault (mm/memory.c:5625)
? lock_downgrade (kernel/locking/lockdep.c:5321)
? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227)
__sys_sendmsg (include/linux/file.h:31)
? __sys_sendmsg_sock (net/socket.c:2572)
? rseq_get_rseq_cs (kernel/rseq.c:275)
? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263)
do_syscall_64 (arch/x86/entry/common.c:50)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
RIP: 0033:0x7f0d1aadeb17
Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e
fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00
f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
All code
========
   0:   0f 00                   (bad)
   2:   f7 d8                   neg    %eax
   4:   64 89 02                mov    %eax,%fs:(%rdx)
   7:   48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
   e:   eb b9                   jmp    0xffffffffffffffc9
  10:   0f 1f 00                nopl   (%rax)
  13:   f3 0f 1e fa             endbr64
  17:   64 8b 04 25 18 00 00    mov    %fs:0x18,%eax
  1e:   00
  1f:   85 c0                   test   %eax,%eax
  21:   75 10                   jne    0x33
  23:   b8 2e 00 00 00          mov    $0x2e,%eax
  28:   0f 05                   syscall
  2a:*  48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
<-- trapping instruction
  30:   77 51                   ja     0x83
  32:   c3                      ret
  33:   48 83 ec 28             sub    $0x28,%rsp
  37:   89 54 24 1c             mov    %edx,0x1c(%rsp)
  3b:   48 89 74 24 10          mov    %rsi,0x10(%rsp)

Code starting with the faulting instruction
===========================================
   0:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
   6:   77 51                   ja     0x59
   8:   c3                      ret
   9:   48 83 ec 28             sub    $0x28,%rsp
   d:   89 54 24 1c             mov    %edx,0x1c(%rsp)
  11:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
RSP: 002b:00007ffca3305d48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00000000640f2bb2 RCX: 00007f0d1aadeb17
RDX: 0000000000000000 RSI: 00007ffca3305db0 RDI: 0000000000000003
RBP: 0000000000000000 R08: 0000000000000001 R09: 00007ffca3304ae0
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
R13: 00007ffca3305eb4 R14: 00007ffca3305e80 R15: 0000561e28bf5020
 </TASK>
ip (135) used greatest stack depth: 24544 bytes left

###################### 8< ######################


I can reproduce the issue on the "net" tree with just these 3 commands:

# ip netns add ns1
# ip netns add ns2
# ip link add ns1eth1 netns ns1 type veth peer name ns2eth1 netns ns2

Without this commit fccca038f300 ("veth: take into account device
reconfiguration for xdp_features flag"), I don't have the issue.

For more details about the issue detected by CIs validating our MPTCP
tree, including kconfig and vmlinux if needed:

  https://github.com/multipath-tcp/mptcp_net-next/issues/372


Do you mind looking at this regression please? :)


On our side, we will revert this patch in our tree for the moment to
unblock our CI jobs.

Cheers,
Matt
Eric Dumazet March 13, 2023, 3:50 p.m. UTC | #2
On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
<matthieu.baerts@tessares.net> wrote:
>
> Hi Lorenzo,
>
> On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> > Take into account tx/rx queues reconfiguration setting device
> > xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> > ndo_xdp_xmit callback.
> >
> > Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>
> Thank you for the modification.
>
> Unfortunately, 'git bisect' just told me this modification is the origin
> of a new WARN when using veth in a netns:
>
>
> ###################### 8< ######################
>
> =============================
> WARNING: suspicious RCU usage
> 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
> -----------------------------
> drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>

Same observation here, I am releasing a syzbot report with a repro.



>
> rcu_scheduler_active = 2, debug_locks = 1
> 1 lock held by ip/135:
> #0: ffffffff8dc4b108 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg
> (net/core/rtnetlink.c:6172)
>
> stack backtrace:
> CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
> 04/01/2014
> Call Trace:
>  <TASK>
> dump_stack_lvl (lib/dump_stack.c:107)
> lockdep_rcu_suspicious (include/linux/context_tracking.h:152)
> veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9))
> veth_newlink (drivers/net/veth.c:1892)
> ? veth_set_features (drivers/net/veth.c:1774)
> ? kasan_save_stack (mm/kasan/common.c:47)
> ? kasan_save_stack (mm/kasan/common.c:46)
> ? kasan_set_track (mm/kasan/common.c:52)
> ? alloc_netdev_mqs (include/linux/slab.h:737)
> ? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
> ? trace_kmalloc (include/trace/events/kmem.h:54)
> ? __xdp_rxq_info_reg (net/core/xdp.c:188)
> ? alloc_netdev_mqs (net/core/dev.c:10657)
> ? rtnl_create_link (net/core/rtnetlink.c:3312)
> rtnl_newlink_create (net/core/rtnetlink.c:3440)
> ? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391)
> __rtnl_newlink (net/core/rtnetlink.c:3657)
> ? lock_downgrade (kernel/locking/lockdep.c:5321)
> ? rtnl_link_unregister (net/core/rtnetlink.c:3487)
> rtnl_newlink (net/core/rtnetlink.c:3671)
> rtnetlink_rcv_msg (net/core/rtnetlink.c:6174)
> ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> ? mark_usage (kernel/locking/lockdep.c:4914)
> ? mark_usage (kernel/locking/lockdep.c:4914)
> netlink_rcv_skb (net/netlink/af_netlink.c:2574)
> ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> ? netlink_ack (net/netlink/af_netlink.c:2551)
> ? lock_acquire (kernel/locking/lockdep.c:467)
> ? net_generic (include/linux/rcupdate.h:805)
> ? netlink_deliver_tap (include/linux/rcupdate.h:805)
> netlink_unicast (net/netlink/af_netlink.c:1340)
> ? netlink_attachskb (net/netlink/af_netlink.c:1350)
> netlink_sendmsg (net/netlink/af_netlink.c:1942)
> ? netlink_unicast (net/netlink/af_netlink.c:1861)
> ? netlink_unicast (net/netlink/af_netlink.c:1861)
> sock_sendmsg (net/socket.c:727)
> ____sys_sendmsg (net/socket.c:2501)
> ? kernel_sendmsg (net/socket.c:2448)
> ? __copy_msghdr (net/socket.c:2428)
> ___sys_sendmsg (net/socket.c:2557)
> ? mark_usage (kernel/locking/lockdep.c:4914)
> ? do_recvmmsg (net/socket.c:2544)
> ? lock_acquire (kernel/locking/lockdep.c:467)
> ? find_held_lock (kernel/locking/lockdep.c:5159)
> ? __lock_release (kernel/locking/lockdep.c:5345)
> ? __might_fault (mm/memory.c:5625)
> ? lock_downgrade (kernel/locking/lockdep.c:5321)
> ? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227)
> __sys_sendmsg (include/linux/file.h:31)
> ? __sys_sendmsg_sock (net/socket.c:2572)
> ? rseq_get_rseq_cs (kernel/rseq.c:275)
> ? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263)
> do_syscall_64 (arch/x86/entry/common.c:50)
> entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
> RIP: 0033:0x7f0d1aadeb17
> Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e
> fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00
> f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
> All code
> ========
>    0:   0f 00                   (bad)
>    2:   f7 d8                   neg    %eax
>    4:   64 89 02                mov    %eax,%fs:(%rdx)
>    7:   48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
>    e:   eb b9                   jmp    0xffffffffffffffc9
>   10:   0f 1f 00                nopl   (%rax)
>   13:   f3 0f 1e fa             endbr64
>   17:   64 8b 04 25 18 00 00    mov    %fs:0x18,%eax
>   1e:   00
>   1f:   85 c0                   test   %eax,%eax
>   21:   75 10                   jne    0x33
>   23:   b8 2e 00 00 00          mov    $0x2e,%eax
>   28:   0f 05                   syscall
>   2a:*  48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
> <-- trapping instruction
>   30:   77 51                   ja     0x83
>   32:   c3                      ret
>   33:   48 83 ec 28             sub    $0x28,%rsp
>   37:   89 54 24 1c             mov    %edx,0x1c(%rsp)
>   3b:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
>
> Code starting with the faulting instruction
> ===========================================
>    0:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
>    6:   77 51                   ja     0x59
>    8:   c3                      ret
>    9:   48 83 ec 28             sub    $0x28,%rsp
>    d:   89 54 24 1c             mov    %edx,0x1c(%rsp)
>   11:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
> RSP: 002b:00007ffca3305d48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
> RAX: ffffffffffffffda RBX: 00000000640f2bb2 RCX: 00007f0d1aadeb17
> RDX: 0000000000000000 RSI: 00007ffca3305db0 RDI: 0000000000000003
> RBP: 0000000000000000 R08: 0000000000000001 R09: 00007ffca3304ae0
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> R13: 00007ffca3305eb4 R14: 00007ffca3305e80 R15: 0000561e28bf5020
>  </TASK>
> ip (135) used greatest stack depth: 24544 bytes left
>
> ###################### 8< ######################
>
>
> I can reproduce the issue on the "net" tree with just these 3 commands:
>
> # ip netns add ns1
> # ip netns add ns2
> # ip link add ns1eth1 netns ns1 type veth peer name ns2eth1 netns ns2
>
> Without this commit fccca038f300 ("veth: take into account device
> reconfiguration for xdp_features flag"), I don't have the issue.
>
> For more details about the issue detected by CIs validating our MPTCP
> tree, including kconfig and vmlinux if needed:
>
>   https://github.com/multipath-tcp/mptcp_net-next/issues/372
>
>
> Do you mind looking at this regression please? :)
>
>
> On our side, we will revert this patch in our tree for the moment to
> unblock our CI jobs.
>
> Cheers,
> Matt
> --
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
Eric Dumazet March 13, 2023, 3:53 p.m. UTC | #3
On Mon, Mar 13, 2023 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
>
> On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
> <matthieu.baerts@tessares.net> wrote:
> >
> > Hi Lorenzo,
> >
> > On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> > > Take into account tx/rx queues reconfiguration setting device
> > > xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> > > ndo_xdp_xmit callback.
> > >
> > > Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> >
> > Thank you for the modification.
> >
> > Unfortunately, 'git bisect' just told me this modification is the origin
> > of a new WARN when using veth in a netns:
> >
> >
> > ###################### 8< ######################
> >
> > =============================
> > WARNING: suspicious RCU usage
> > 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
> > -----------------------------
> > drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
> >
> > other info that might help us debug this:
> >
>
> Same observation here, I am releasing a syzbot report with a repro.
>
>

I guess a fix would be:

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 293dc3b2c84a6c1931e8df42cdcd5f2798004f3c..4da74ac27f9a2425d8d3f4ffcc93f453bd58e3a5
100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1262,7 +1262,7 @@ static void veth_set_xdp_features(struct net_device *dev)
        struct veth_priv *priv = netdev_priv(dev);
        struct net_device *peer;

-       peer = rcu_dereference(priv->peer);
+       peer = rtnl_dereference(priv->peer);
        if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
                xdp_features_t val = NETDEV_XDP_ACT_BASIC |
                                     NETDEV_XDP_ACT_REDIRECT |


>
> >
> > rcu_scheduler_active = 2, debug_locks = 1
> > 1 lock held by ip/135:
> > #0: ffffffff8dc4b108 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg
> > (net/core/rtnetlink.c:6172)
> >
> > stack backtrace:
> > CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149
> > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
> > 04/01/2014
> > Call Trace:
> >  <TASK>
> > dump_stack_lvl (lib/dump_stack.c:107)
> > lockdep_rcu_suspicious (include/linux/context_tracking.h:152)
> > veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9))
> > veth_newlink (drivers/net/veth.c:1892)
> > ? veth_set_features (drivers/net/veth.c:1774)
> > ? kasan_save_stack (mm/kasan/common.c:47)
> > ? kasan_save_stack (mm/kasan/common.c:46)
> > ? kasan_set_track (mm/kasan/common.c:52)
> > ? alloc_netdev_mqs (include/linux/slab.h:737)
> > ? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
> > ? trace_kmalloc (include/trace/events/kmem.h:54)
> > ? __xdp_rxq_info_reg (net/core/xdp.c:188)
> > ? alloc_netdev_mqs (net/core/dev.c:10657)
> > ? rtnl_create_link (net/core/rtnetlink.c:3312)
> > rtnl_newlink_create (net/core/rtnetlink.c:3440)
> > ? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391)
> > __rtnl_newlink (net/core/rtnetlink.c:3657)
> > ? lock_downgrade (kernel/locking/lockdep.c:5321)
> > ? rtnl_link_unregister (net/core/rtnetlink.c:3487)
> > rtnl_newlink (net/core/rtnetlink.c:3671)
> > rtnetlink_rcv_msg (net/core/rtnetlink.c:6174)
> > ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> > ? mark_usage (kernel/locking/lockdep.c:4914)
> > ? mark_usage (kernel/locking/lockdep.c:4914)
> > netlink_rcv_skb (net/netlink/af_netlink.c:2574)
> > ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> > ? netlink_ack (net/netlink/af_netlink.c:2551)
> > ? lock_acquire (kernel/locking/lockdep.c:467)
> > ? net_generic (include/linux/rcupdate.h:805)
> > ? netlink_deliver_tap (include/linux/rcupdate.h:805)
> > netlink_unicast (net/netlink/af_netlink.c:1340)
> > ? netlink_attachskb (net/netlink/af_netlink.c:1350)
> > netlink_sendmsg (net/netlink/af_netlink.c:1942)
> > ? netlink_unicast (net/netlink/af_netlink.c:1861)
> > ? netlink_unicast (net/netlink/af_netlink.c:1861)
> > sock_sendmsg (net/socket.c:727)
> > ____sys_sendmsg (net/socket.c:2501)
> > ? kernel_sendmsg (net/socket.c:2448)
> > ? __copy_msghdr (net/socket.c:2428)
> > ___sys_sendmsg (net/socket.c:2557)
> > ? mark_usage (kernel/locking/lockdep.c:4914)
> > ? do_recvmmsg (net/socket.c:2544)
> > ? lock_acquire (kernel/locking/lockdep.c:467)
> > ? find_held_lock (kernel/locking/lockdep.c:5159)
> > ? __lock_release (kernel/locking/lockdep.c:5345)
> > ? __might_fault (mm/memory.c:5625)
> > ? lock_downgrade (kernel/locking/lockdep.c:5321)
> > ? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227)
> > __sys_sendmsg (include/linux/file.h:31)
> > ? __sys_sendmsg_sock (net/socket.c:2572)
> > ? rseq_get_rseq_cs (kernel/rseq.c:275)
> > ? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263)
> > do_syscall_64 (arch/x86/entry/common.c:50)
> > entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
> > RIP: 0033:0x7f0d1aadeb17
> > Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e
> > fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00
> > f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
> > All code
> > ========
> >    0:   0f 00                   (bad)
> >    2:   f7 d8                   neg    %eax
> >    4:   64 89 02                mov    %eax,%fs:(%rdx)
> >    7:   48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
> >    e:   eb b9                   jmp    0xffffffffffffffc9
> >   10:   0f 1f 00                nopl   (%rax)
> >   13:   f3 0f 1e fa             endbr64
> >   17:   64 8b 04 25 18 00 00    mov    %fs:0x18,%eax
> >   1e:   00
> >   1f:   85 c0                   test   %eax,%eax
> >   21:   75 10                   jne    0x33
> >   23:   b8 2e 00 00 00          mov    $0x2e,%eax
> >   28:   0f 05                   syscall
> >   2a:*  48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
> > <-- trapping instruction
> >   30:   77 51                   ja     0x83
> >   32:   c3                      ret
> >   33:   48 83 ec 28             sub    $0x28,%rsp
> >   37:   89 54 24 1c             mov    %edx,0x1c(%rsp)
> >   3b:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
> >
> > Code starting with the faulting instruction
> > ===========================================
> >    0:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
> >    6:   77 51                   ja     0x59
> >    8:   c3                      ret
> >    9:   48 83 ec 28             sub    $0x28,%rsp
> >    d:   89 54 24 1c             mov    %edx,0x1c(%rsp)
> >   11:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
> > RSP: 002b:00007ffca3305d48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
> > RAX: ffffffffffffffda RBX: 00000000640f2bb2 RCX: 00007f0d1aadeb17
> > RDX: 0000000000000000 RSI: 00007ffca3305db0 RDI: 0000000000000003
> > RBP: 0000000000000000 R08: 0000000000000001 R09: 00007ffca3304ae0
> > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> > R13: 00007ffca3305eb4 R14: 00007ffca3305e80 R15: 0000561e28bf5020
> >  </TASK>
> > ip (135) used greatest stack depth: 24544 bytes left
> >
> > ###################### 8< ######################
> >
> >
> > I can reproduce the issue on the "net" tree with just these 3 commands:
> >
> > # ip netns add ns1
> > # ip netns add ns2
> > # ip link add ns1eth1 netns ns1 type veth peer name ns2eth1 netns ns2
> >
> > Without this commit fccca038f300 ("veth: take into account device
> > reconfiguration for xdp_features flag"), I don't have the issue.
> >
> > For more details about the issue detected by CIs validating our MPTCP
> > tree, including kconfig and vmlinux if needed:
> >
> >   https://github.com/multipath-tcp/mptcp_net-next/issues/372
> >
> >
> > Do you mind looking at this regression please? :)
> >
> >
> > On our side, we will revert this patch in our tree for the moment to
> > unblock our CI jobs.
> >
> > Cheers,
> > Matt
> > --
> > Tessares | Belgium | Hybrid Access Solutions
> > www.tessares.net
Matthieu Baerts March 13, 2023, 4:31 p.m. UTC | #4
Hi Eric,

On 13/03/2023 16:53, Eric Dumazet wrote:
> On Mon, Mar 13, 2023 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
>>
>> On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
>> <matthieu.baerts@tessares.net> wrote:
>>>
>>> Hi Lorenzo,
>>>
>>> On 09/03/2023 13:25, Lorenzo Bianconi wrote:
>>>> Take into account tx/rx queues reconfiguration setting device
>>>> xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
>>>> ndo_xdp_xmit callback.
>>>>
>>>> Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
>>>> Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
>>>
>>> Thank you for the modification.
>>>
>>> Unfortunately, 'git bisect' just told me this modification is the origin
>>> of a new WARN when using veth in a netns:
>>>
>>>
>>> ###################### 8< ######################
>>>
>>> =============================
>>> WARNING: suspicious RCU usage
>>> 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
>>> -----------------------------
>>> drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
>>>
>>> other info that might help us debug this:
>>>
>>
>> Same observation here, I am releasing a syzbot report with a repro.
>>
>>
> 
> I guess a fix would be:
> 
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 293dc3b2c84a6c1931e8df42cdcd5f2798004f3c..4da74ac27f9a2425d8d3f4ffcc93f453bd58e3a5
> 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -1262,7 +1262,7 @@ static void veth_set_xdp_features(struct net_device *dev)
>         struct veth_priv *priv = netdev_priv(dev);
>         struct net_device *peer;
> 
> -       peer = rcu_dereference(priv->peer);
> +       peer = rtnl_dereference(priv->peer);
>         if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
>                 xdp_features_t val = NETDEV_XDP_ACT_BASIC |
>                                      NETDEV_XDP_ACT_REDIRECT |
> 

Thank you for having looked!

This patch avoids the warning on our side.

Cheers,
Matt
Lorenzo Bianconi March 13, 2023, 4:35 p.m. UTC | #5
> On Mon, Mar 13, 2023 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> >
> > On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
> > <matthieu.baerts@tessares.net> wrote:
> > >
> > > Hi Lorenzo,
> > >
> > > On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> > > > Take into account tx/rx queues reconfiguration setting device
> > > > xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> > > > ndo_xdp_xmit callback.
> > > >
> > > > Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > >
> > > Thank you for the modification.
> > >
> > > Unfortunately, 'git bisect' just told me this modification is the origin
> > > of a new WARN when using veth in a netns:
> > >
> > >
> > > ###################### 8< ######################
> > >
> > > =============================
> > > WARNING: suspicious RCU usage
> > > 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
> > > -----------------------------
> > > drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
> > >
> > > other info that might help us debug this:
> > >
> >
> > Same observation here, I am releasing a syzbot report with a repro.
> >
> >
> 
> I guess a fix would be:
> 

Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>

> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index 293dc3b2c84a6c1931e8df42cdcd5f2798004f3c..4da74ac27f9a2425d8d3f4ffcc93f453bd58e3a5
> 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -1262,7 +1262,7 @@ static void veth_set_xdp_features(struct net_device *dev)
>         struct veth_priv *priv = netdev_priv(dev);
>         struct net_device *peer;
> 
> -       peer = rcu_dereference(priv->peer);
> +       peer = rtnl_dereference(priv->peer);
>         if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
>                 xdp_features_t val = NETDEV_XDP_ACT_BASIC |
>                                      NETDEV_XDP_ACT_REDIRECT |
> 
> 
> >
> > >
> > > rcu_scheduler_active = 2, debug_locks = 1
> > > 1 lock held by ip/135:
> > > #0: ffffffff8dc4b108 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg
> > > (net/core/rtnetlink.c:6172)
> > >
> > > stack backtrace:
> > > CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149
> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
> > > 04/01/2014
> > > Call Trace:
> > >  <TASK>
> > > dump_stack_lvl (lib/dump_stack.c:107)
> > > lockdep_rcu_suspicious (include/linux/context_tracking.h:152)
> > > veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9))
> > > veth_newlink (drivers/net/veth.c:1892)
> > > ? veth_set_features (drivers/net/veth.c:1774)
> > > ? kasan_save_stack (mm/kasan/common.c:47)
> > > ? kasan_save_stack (mm/kasan/common.c:46)
> > > ? kasan_set_track (mm/kasan/common.c:52)
> > > ? alloc_netdev_mqs (include/linux/slab.h:737)
> > > ? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
> > > ? trace_kmalloc (include/trace/events/kmem.h:54)
> > > ? __xdp_rxq_info_reg (net/core/xdp.c:188)
> > > ? alloc_netdev_mqs (net/core/dev.c:10657)
> > > ? rtnl_create_link (net/core/rtnetlink.c:3312)
> > > rtnl_newlink_create (net/core/rtnetlink.c:3440)
> > > ? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391)
> > > __rtnl_newlink (net/core/rtnetlink.c:3657)
> > > ? lock_downgrade (kernel/locking/lockdep.c:5321)
> > > ? rtnl_link_unregister (net/core/rtnetlink.c:3487)
> > > rtnl_newlink (net/core/rtnetlink.c:3671)
> > > rtnetlink_rcv_msg (net/core/rtnetlink.c:6174)
> > > ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> > > ? mark_usage (kernel/locking/lockdep.c:4914)
> > > ? mark_usage (kernel/locking/lockdep.c:4914)
> > > netlink_rcv_skb (net/netlink/af_netlink.c:2574)
> > > ? rtnl_link_fill (net/core/rtnetlink.c:6070)
> > > ? netlink_ack (net/netlink/af_netlink.c:2551)
> > > ? lock_acquire (kernel/locking/lockdep.c:467)
> > > ? net_generic (include/linux/rcupdate.h:805)
> > > ? netlink_deliver_tap (include/linux/rcupdate.h:805)
> > > netlink_unicast (net/netlink/af_netlink.c:1340)
> > > ? netlink_attachskb (net/netlink/af_netlink.c:1350)
> > > netlink_sendmsg (net/netlink/af_netlink.c:1942)
> > > ? netlink_unicast (net/netlink/af_netlink.c:1861)
> > > ? netlink_unicast (net/netlink/af_netlink.c:1861)
> > > sock_sendmsg (net/socket.c:727)
> > > ____sys_sendmsg (net/socket.c:2501)
> > > ? kernel_sendmsg (net/socket.c:2448)
> > > ? __copy_msghdr (net/socket.c:2428)
> > > ___sys_sendmsg (net/socket.c:2557)
> > > ? mark_usage (kernel/locking/lockdep.c:4914)
> > > ? do_recvmmsg (net/socket.c:2544)
> > > ? lock_acquire (kernel/locking/lockdep.c:467)
> > > ? find_held_lock (kernel/locking/lockdep.c:5159)
> > > ? __lock_release (kernel/locking/lockdep.c:5345)
> > > ? __might_fault (mm/memory.c:5625)
> > > ? lock_downgrade (kernel/locking/lockdep.c:5321)
> > > ? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227)
> > > __sys_sendmsg (include/linux/file.h:31)
> > > ? __sys_sendmsg_sock (net/socket.c:2572)
> > > ? rseq_get_rseq_cs (kernel/rseq.c:275)
> > > ? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263)
> > > do_syscall_64 (arch/x86/entry/common.c:50)
> > > entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
> > > RIP: 0033:0x7f0d1aadeb17
> > > Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e
> > > fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00
> > > f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
> > > All code
> > > ========
> > >    0:   0f 00                   (bad)
> > >    2:   f7 d8                   neg    %eax
> > >    4:   64 89 02                mov    %eax,%fs:(%rdx)
> > >    7:   48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
> > >    e:   eb b9                   jmp    0xffffffffffffffc9
> > >   10:   0f 1f 00                nopl   (%rax)
> > >   13:   f3 0f 1e fa             endbr64
> > >   17:   64 8b 04 25 18 00 00    mov    %fs:0x18,%eax
> > >   1e:   00
> > >   1f:   85 c0                   test   %eax,%eax
> > >   21:   75 10                   jne    0x33
> > >   23:   b8 2e 00 00 00          mov    $0x2e,%eax
> > >   28:   0f 05                   syscall
> > >   2a:*  48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
> > > <-- trapping instruction
> > >   30:   77 51                   ja     0x83
> > >   32:   c3                      ret
> > >   33:   48 83 ec 28             sub    $0x28,%rsp
> > >   37:   89 54 24 1c             mov    %edx,0x1c(%rsp)
> > >   3b:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
> > >
> > > Code starting with the faulting instruction
> > > ===========================================
> > >    0:   48 3d 00 f0 ff ff       cmp    $0xfffffffffffff000,%rax
> > >    6:   77 51                   ja     0x59
> > >    8:   c3                      ret
> > >    9:   48 83 ec 28             sub    $0x28,%rsp
> > >    d:   89 54 24 1c             mov    %edx,0x1c(%rsp)
> > >   11:   48 89 74 24 10          mov    %rsi,0x10(%rsp)
> > > RSP: 002b:00007ffca3305d48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
> > > RAX: ffffffffffffffda RBX: 00000000640f2bb2 RCX: 00007f0d1aadeb17
> > > RDX: 0000000000000000 RSI: 00007ffca3305db0 RDI: 0000000000000003
> > > RBP: 0000000000000000 R08: 0000000000000001 R09: 00007ffca3304ae0
> > > R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
> > > R13: 00007ffca3305eb4 R14: 00007ffca3305e80 R15: 0000561e28bf5020
> > >  </TASK>
> > > ip (135) used greatest stack depth: 24544 bytes left
> > >
> > > ###################### 8< ######################
> > >
> > >
> > > I can reproduce the issue on the "net" tree with just these 3 commands:
> > >
> > > # ip netns add ns1
> > > # ip netns add ns2
> > > # ip link add ns1eth1 netns ns1 type veth peer name ns2eth1 netns ns2
> > >
> > > Without this commit fccca038f300 ("veth: take into account device
> > > reconfiguration for xdp_features flag"), I don't have the issue.
> > >
> > > For more details about the issue detected by CIs validating our MPTCP
> > > tree, including kconfig and vmlinux if needed:
> > >
> > >   https://github.com/multipath-tcp/mptcp_net-next/issues/372
> > >
> > >
> > > Do you mind looking at this regression please? :)
> > >
> > >
> > > On our side, we will revert this patch in our tree for the moment to
> > > unblock our CI jobs.
> > >
> > > Cheers,
> > > Matt
> > > --
> > > Tessares | Belgium | Hybrid Access Solutions
> > > www.tessares.net
Eric Dumazet March 13, 2023, 4:37 p.m. UTC | #6
On Mon, Mar 13, 2023 at 9:36 AM Lorenzo Bianconi <lorenzo@kernel.org> wrote:
>
> > On Mon, Mar 13, 2023 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> > >
> > > On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
> > > <matthieu.baerts@tessares.net> wrote:
> > > >
> > > > Hi Lorenzo,
> > > >
> > > > On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> > > > > Take into account tx/rx queues reconfiguration setting device
> > > > > xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> > > > > ndo_xdp_xmit callback.
> > > > >
> > > > > Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > >
> > > > Thank you for the modification.
> > > >
> > > > Unfortunately, 'git bisect' just told me this modification is the origin
> > > > of a new WARN when using veth in a netns:
> > > >
> > > >
> > > > ###################### 8< ######################
> > > >
> > > > =============================
> > > > WARNING: suspicious RCU usage
> > > > 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
> > > > -----------------------------
> > > > drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
> > > >
> > > > other info that might help us debug this:
> > > >
> > >
> > > Same observation here, I am releasing a syzbot report with a repro.
> > >
> > >
> >
> > I guess a fix would be:
> >
>
> Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>

Can you submit a formal fix ?

Thanks.
Lorenzo Bianconi March 13, 2023, 4:47 p.m. UTC | #7
> On Mon, Mar 13, 2023 at 9:36 AM Lorenzo Bianconi <lorenzo@kernel.org> wrote:
> >
> > > On Mon, Mar 13, 2023 at 8:50 AM Eric Dumazet <edumazet@google.com> wrote:
> > > >
> > > > On Mon, Mar 13, 2023 at 7:15 AM Matthieu Baerts
> > > > <matthieu.baerts@tessares.net> wrote:
> > > > >
> > > > > Hi Lorenzo,
> > > > >
> > > > > On 09/03/2023 13:25, Lorenzo Bianconi wrote:
> > > > > > Take into account tx/rx queues reconfiguration setting device
> > > > > > xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
> > > > > > ndo_xdp_xmit callback.
> > > > > >
> > > > > > Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
> > > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
> > > > >
> > > > > Thank you for the modification.
> > > > >
> > > > > Unfortunately, 'git bisect' just told me this modification is the origin
> > > > > of a new WARN when using veth in a netns:
> > > > >
> > > > >
> > > > > ###################### 8< ######################
> > > > >
> > > > > =============================
> > > > > WARNING: suspicious RCU usage
> > > > > 6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
> > > > > -----------------------------
> > > > > drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!
> > > > >
> > > > > other info that might help us debug this:
> > > > >
> > > >
> > > > Same observation here, I am releasing a syzbot report with a repro.
> > > >
> > > >
> > >
> > > I guess a fix would be:
> > >
> >
> > Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
> 
> Can you submit a formal fix ?

ack, will do.

Regards,
Lorenzo

> 
> Thanks.
Martin KaFai Lau April 6, 2023, 12:56 a.m. UTC | #8
On 3/9/23 4:25 AM, Lorenzo Bianconi wrote:
> +static void veth_set_xdp_features(struct net_device *dev)
> +{
> +	struct veth_priv *priv = netdev_priv(dev);
> +	struct net_device *peer;
> +
> +	peer = rcu_dereference(priv->peer);
> +	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
> +		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
> +				     NETDEV_XDP_ACT_REDIRECT |
> +				     NETDEV_XDP_ACT_RX_SG;
> +
> +		if (priv->_xdp_prog || veth_gro_requested(dev))
> +			val |= NETDEV_XDP_ACT_NDO_XMIT |
> +			       NETDEV_XDP_ACT_NDO_XMIT_SG;

This broke the xdp_do_redirect selftest. The bpf CI is consistently failing at:

test_xdp_do_redirect:FAIL:veth_src query_opts.feature_flags unexpected veth_src 
query_opts.feature_flags: actual 35 != expected 103

Please address it asap.
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 1bb54de7124d..293dc3b2c84a 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1257,6 +1257,26 @@  static int veth_enable_range_safe(struct net_device *dev, int start, int end)
 	return 0;
 }
 
+static void veth_set_xdp_features(struct net_device *dev)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *peer;
+
+	peer = rcu_dereference(priv->peer);
+	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
+				     NETDEV_XDP_ACT_REDIRECT |
+				     NETDEV_XDP_ACT_RX_SG;
+
+		if (priv->_xdp_prog || veth_gro_requested(dev))
+			val |= NETDEV_XDP_ACT_NDO_XMIT |
+			       NETDEV_XDP_ACT_NDO_XMIT_SG;
+		xdp_set_features_flag(dev, val);
+	} else {
+		xdp_clear_features_flag(dev);
+	}
+}
+
 static int veth_set_channels(struct net_device *dev,
 			     struct ethtool_channels *ch)
 {
@@ -1323,6 +1343,12 @@  static int veth_set_channels(struct net_device *dev,
 		if (peer)
 			netif_carrier_on(peer);
 	}
+
+	/* update XDP supported features */
+	veth_set_xdp_features(dev);
+	if (peer)
+		veth_set_xdp_features(peer);
+
 	return err;
 
 revert:
@@ -1489,7 +1515,10 @@  static int veth_set_features(struct net_device *dev,
 		err = veth_napi_enable(dev);
 		if (err)
 			return err;
+
+		xdp_features_set_redirect_target(dev, true);
 	} else {
+		xdp_features_clear_redirect_target(dev);
 		veth_napi_del(dev);
 	}
 	return 0;
@@ -1570,10 +1599,15 @@  static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 			peer->max_mtu = max_mtu;
 		}
+
+		xdp_features_set_redirect_target(dev, true);
 	}
 
 	if (old_prog) {
 		if (!prog) {
+			if (!veth_gro_requested(dev))
+				xdp_features_clear_redirect_target(dev);
+
 			if (dev->flags & IFF_UP)
 				veth_disable_xdp(dev);
 
@@ -1686,10 +1720,6 @@  static void veth_setup(struct net_device *dev)
 	dev->hw_enc_features = VETH_FEATURES;
 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
-
-	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-			    NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
-			    NETDEV_XDP_ACT_NDO_XMIT_SG;
 }
 
 /*
@@ -1857,6 +1887,10 @@  static int veth_newlink(struct net *src_net, struct net_device *dev,
 		goto err_queues;
 
 	veth_disable_gro(dev);
+	/* update XDP supported features */
+	veth_set_xdp_features(dev);
+	veth_set_xdp_features(peer);
+
 	return 0;
 
 err_queues: