Message ID | 20241108124051.415090-1-dmantipov@yandex.ru (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | mac802154: fix interface deletion | expand |
Hello Dmitry, > --- a/net/mac802154/iface.c > +++ b/net/mac802154/iface.c > @@ -669,7 +669,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, > goto err; > > mutex_lock(&local->iflist_mtx); > - list_add_tail_rcu(&sdata->list, &local->interfaces); > + list_add_tail(&sdata->list, &local->interfaces); > mutex_unlock(&local->iflist_mtx); > > return ndev; > @@ -683,11 +683,13 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata) > { > ASSERT_RTNL(); > > + if (test_and_set_bit(SDATA_STATE_REMOVED, &sdata->state)) > + return; > + > mutex_lock(&sdata->local->iflist_mtx); > - list_del_rcu(&sdata->list); > + list_del(&sdata->list); > mutex_unlock(&sdata->local->iflist_mtx); > > - synchronize_rcu(); > unregister_netdevice(sdata->dev); > } > > @@ -697,6 +699,8 @@ void ieee802154_remove_interfaces(struct ieee802154_local *local) > > mutex_lock(&local->iflist_mtx); > list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { > + if (test_and_set_bit(SDATA_STATE_REMOVED, &sdata->state)) > + continue; > list_del(&sdata->list); Why not just enclose this list_del() within a mutex_lock(iflist_mtx) like the others? Would probably make more sense and prevent the use of yet another protection mechanism? Is there anything preventing the use of this mutex here? Thanks, Miquèl
On 11/11/24 10:41 PM, Miquel Raynal wrote: > Why not just enclose this list_del() within a mutex_lock(iflist_mtx) > like the others? Would probably make more sense and prevent the use of > yet another protection mechanism? Is there anything preventing the use > of this mutex here? IIUC this will not work because 'ieee802154_if_remove()' may be called for 'sdata' which was previously removed via 'ieee802154_remove_interfaces()'. After the latter, 'sdata->list' is undefined (or poisoned if CONFIG_DEBUG_LIST is enabled), so re-entering 'list_del(&sdata->list)' in the former is a bug. Dmitry
On 11/11/24 10:41 PM, Miquel Raynal wrote: > Why not just enclose this list_del() within a mutex_lock(iflist_mtx) > like the others? Would probably make more sense and prevent the use of > yet another protection mechanism? Is there anything preventing the use > of this mutex here? Moreover, if we manage interfaces list with RCU and device status with an extra status bit, do we need 'iflist_mtx' at all? I've tried this in https://syzkaller.appspot.com/text?tag=Patch&x=17b1f4e8580000 and it looks good. Dmitry
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 08dd521a51a5..7afcea3447c1 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -40,9 +40,8 @@ struct ieee802154_local { int open_count; /* As in mac80211 slaves list is modified: - * 1) under the RTNL - * 2) protected by slaves_mtx; - * 3) in an RCU manner + * 1) under the RTNL; + * 2) protected by iflist_mtx. * * So atomic readers can use any of this protection methods. */ @@ -101,6 +100,7 @@ enum { enum ieee802154_sdata_state_bits { SDATA_STATE_RUNNING, + SDATA_STATE_REMOVED, }; /* Slave interface definition. diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index c0e2da5072be..700c80e94bb2 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -669,7 +669,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, goto err; mutex_lock(&local->iflist_mtx); - list_add_tail_rcu(&sdata->list, &local->interfaces); + list_add_tail(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); return ndev; @@ -683,11 +683,13 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata) { ASSERT_RTNL(); + if (test_and_set_bit(SDATA_STATE_REMOVED, &sdata->state)) + return; + mutex_lock(&sdata->local->iflist_mtx); - list_del_rcu(&sdata->list); + list_del(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx); - synchronize_rcu(); unregister_netdevice(sdata->dev); } @@ -697,6 +699,8 @@ void ieee802154_remove_interfaces(struct ieee802154_local *local) mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + if (test_and_set_bit(SDATA_STATE_REMOVED, &sdata->state)) + continue; list_del(&sdata->list); unregister_netdevice(sdata->dev);
Syzbot has reported the following BUG: kernel BUG at lib/list_debug.c:58! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 0 UID: 0 PID: 6277 Comm: syz-executor157 Not tainted 6.12.0-rc6-syzkaller-00005-g557329bcecc2 #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__list_del_entry_valid_or_report+0xf4/0x140 lib/list_debug.c:56 Code: e8 a1 7e 00 07 90 0f 0b 48 c7 c7 e0 37 60 8c 4c 89 fe e8 8f 7e 00 07 90 0f 0b 48 c7 c7 40 38 60 8c 4c 89 fe e8 7d 7e 00 07 90 <0f> 0b 48 c7 c7 a0 38 60 8c 4c 89 fe e8 6b 7e 00 07 90 0f 0b 48 c7 RSP: 0018:ffffc9000490f3d0 EFLAGS: 00010246 RAX: 000000000000004e RBX: dead000000000122 RCX: d211eee56bb28d00 RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000 RBP: ffff88805b278dd8 R08: ffffffff8174a12c R09: 1ffffffff2852f0d R10: dffffc0000000000 R11: fffffbfff2852f0e R12: dffffc0000000000 R13: dffffc0000000000 R14: dead000000000100 R15: ffff88805b278cc0 FS: 0000555572f94380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000056262e4a3000 CR3: 0000000078496000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> __list_del_entry_valid include/linux/list.h:124 [inline] __list_del_entry include/linux/list.h:215 [inline] list_del_rcu include/linux/rculist.h:157 [inline] ieee802154_if_remove+0x86/0x1e0 net/mac802154/iface.c:687 rdev_del_virtual_intf_deprecated net/ieee802154/rdev-ops.h:24 [inline] ieee802154_del_iface+0x2c0/0x5c0 net/ieee802154/nl-phy.c:323 genl_family_rcv_msg_doit net/netlink/genetlink.c:1115 [inline] genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0xb14/0xec0 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2551 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:744 ____sys_sendmsg+0x52a/0x7e0 net/socket.c:2607 ___sys_sendmsg net/socket.c:2661 [inline] __sys_sendmsg+0x292/0x380 net/socket.c:2690 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fd094c32309 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 71 19 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffec50063a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fd094c32309 RDX: 0000000004000000 RSI: 0000000020000b00 RDI: 0000000000000004 RBP: 00000000000f4240 R08: 0000000000000000 R09: 00000000000000a0 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000000161b7 R13: 00007ffec50063bc R14: 00007ffec50063d0 R15: 00007ffec50063c0 </TASK> Since 'ieee802154_remove_interfaces()' and 'ieee802154_if_remove()' may try to process the same 'struct ieee802154_sub_if_data' object concurrently, there are two problems: 1) list of 'struct ieee802154_sub_if_data' objects linked via 'interfaces' field of 'struct ieee802154_local' should remain consistent; 2) 'unregister_netdevice()' should not be called for the same 'struct net_device' concurrently. IIUC RCU can guarantee 1) but not 2), so discard RCU quirks and prefer explicit SDATA_STATE_REMOVED flag used via atomic and fully-ordered 'test_and_set_bit()' to mark 'struct ieee802154_sub_if_data' instance which has entered the reclamation. Reported-by: syzbot+985f827280dc3a6e7e92@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=985f827280dc3a6e7e92 Fixes: b210b18747cb ("mac802154: move interface del handling in iface") Signed-off-by: Dmitry Antipov <dmantipov@yandex.ru> --- net/mac802154/ieee802154_i.h | 6 +++--- net/mac802154/iface.c | 10 +++++++--- 2 files changed, 10 insertions(+), 6 deletions(-)