Message ID | 20240830073130.29982-7-michaelgur@nvidia.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Support RDMA events monitoring through | expand |
On Fri, Aug 30, 2024 at 10:31:29AM +0300, Michael Guralnik wrote: > From: Chiara Meiohas <cmeiohas@nvidia.com> > > Introduce a new netlink command to allow rdma event monitoring. > The rdma events supported now are IB device > registration/unregistration and net device attachment/detachment. > > Example output of rdma monitor and the commands which trigger > the events: > > $ rdma monitor > $ rmmod mlx5_ib > [UNREGISTER] dev 3 > [UNREGISTER] dev 0 > > $modprobe mlx5_ib > [REGISTER] dev 4 > [NETDEV_ATTACH] dev 4 port 1 netdev 4 > [REGISTER] dev 5 > [NETDEV_ATTACH] dev 5 port 1 netdev 5 > > $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev > [UNREGISTER] dev 4 > [REGISTER] dev 6 > [NETDEV_ATTACH] dev 6 port 6 netdev 4 > > $ echo 4 > /sys/class/net/eth2/device/sriov_numvfs > [NETDEV_ATTACH] dev 6 port 2 netdev 7 > [NETDEV_ATTACH] dev 6 port 3 netdev 8 > [NETDEV_ATTACH] dev 6 port 4 netdev 9 > [NETDEV_ATTACH] dev 6 port 5 netdev 10 > [REGISTER] dev 7 > [NETDEV_ATTACH] dev 7 port 1 netdev 11 > [REGISTER] dev 8 > [NETDEV_ATTACH] dev 8 port 1 netdev 12 > [REGISTER] dev 9 > [NETDEV_ATTACH] dev 9 port 1 netdev 13 > [REGISTER] dev 10 > [NETDEV_ATTACH] dev 10 port 1 netdev 14 > > $ echo 0 > /sys/class/net/eth2/device/sriov_numvfs > [UNREGISTER] dev 7 > [UNREGISTER] dev 8 > [UNREGISTER] dev 9 > [UNREGISTER] dev 10 > [NETDEV_DETACH] dev 6 port 2 > [NETDEV_DETACH] dev 6 port 3 > [NETDEV_DETACH] dev 6 port 4 > [NETDEV_DETACH] dev 6 port 5 > > Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com> > Signed-off-by: Michael Guralnik <michaelgur@nvidia.com> > Reviewed-by: Leon Romanovsky <leonro@nvidia.com> > --- > drivers/infiniband/core/device.c | 38 ++++++++++ > drivers/infiniband/core/netlink.c | 1 + > drivers/infiniband/core/nldev.c | 118 ++++++++++++++++++++++++++++++ > include/rdma/rdma_netlink.h | 12 +++ > include/uapi/rdma/rdma_netlink.h | 15 ++++ > 5 files changed, 184 insertions(+) This patch breaks RXE and the following splat can be reproduced with "sudo rdma link add rxe1 type rxe netdev eth1" command: [ 16.871877][ T344] rdma_rxe: loaded [ 17.057211][ T343] infiniband rxe1: set active [ 17.057493][ T343] infiniband rxe1: added eth1 [ 17.080757][ T343] [ 17.080891][ T343] ====================================================== [ 17.081170][ T343] WARNING: possible circular locking dependency detected [ 17.081465][ T343] 6.11.0-rc5+ #2367 Not tainted [ 17.081675][ T343] ------------------------------------------------------ [ 17.081886][ T343] rdma/343 is trying to acquire lock: [ 17.082048][ T343] ffff88800ef6d188 (&rxe->usdev_lock){+.+.}-{3:3}, at: rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.082385][ T343] [ 17.082385][ T343] but task is already holding lock: [ 17.082628][ T343] ffff88800ef6ce90 (&device->compat_devs_mutex){+.+.}-{3:3}, at: add_one_compat_dev+0xe4/0x6e0 [ib_core] [ 17.083002][ T343] [ 17.083002][ T343] which lock already depends on the new lock. [ 17.083002][ T343] [ 17.083302][ T343] [ 17.083302][ T343] the existing dependency chain (in reverse order) is: [ 17.083580][ T343] [ 17.083580][ T343] -> #3 (&device->compat_devs_mutex){+.+.}-{3:3}: [ 17.083866][ T343] __mutex_lock+0x14a/0x1940 [ 17.084038][ T343] ib_device_rename+0x110/0x3b0 [ib_core] [ 17.084274][ T343] nldev_set_doit+0x2ef/0x3d0 [ib_core] [ 17.084500][ T343] rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core] [ 17.084715][ T343] rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core] [ 17.084981][ T343] netlink_unicast+0x438/0x730 [ 17.085194][ T343] netlink_sendmsg+0x72a/0xbc0 15:20:28 [177/6052] [ 17.085438][ T343] __sock_sendmsg+0xc5/0x190 [ 17.085668][ T343] ____sys_sendmsg+0x52e/0x6a0 [ 17.085901][ T343] ___sys_sendmsg+0xdf/0x150 [ 17.086128][ T343] __sys_sendmsg+0x161/0x1d0 [ 17.086354][ T343] do_syscall_64+0x6d/0x140 [ 17.086584][ T343] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 17.086872][ T343] [ 17.086872][ T343] -> #2 (devices_rwsem){++++}-{3:3}: [ 17.087171][ T343] down_read+0x96/0x450 [ 17.087322][ T343] ib_device_set_netdev.part.0+0x36b/0x640 [ib_core] [ 17.087554][ T343] ib_device_set_netdev+0xb7/0xe0 [ib_core] [ 17.087749][ T343] mlx5_netdev_event+0x428/0x990 [mlx5_ib] [ 17.087945][ T343] call_netdevice_register_net_notifiers+0xdb/0x290 [ 17.088113][ T343] __register_netdevice_notifier_net+0x4b/0x70 [ 17.088277][ T343] register_netdevice_notifier_dev_net+0x53/0x160 [ 17.088448][ T343] mlx5e_mdev_notifier_event+0x8a/0xf0 [mlx5_ib] [ 17.088630][ T343] notifier_call_chain+0x96/0x270 [ 17.088773][ T343] blocking_notifier_call_chain+0x60/0x80 [ 17.088970][ T343] mlx5_core_uplink_netdev_event_replay+0x4d/0x60 [mlx5_core] [ 17.089289][ T343] mlx5_ib_roce_init+0x1f5/0x720 [mlx5_ib] [ 17.089509][ T343] __mlx5_ib_add+0x6b/0x140 [mlx5_ib] [ 17.089727][ T343] mlx5r_probe+0x24f/0x5d0 [mlx5_ib] [ 17.089951][ T343] auxiliary_bus_probe+0x9d/0xe0 [ 17.090112][ T343] really_probe+0x1cf/0x8b0 [ 17.090278][ T343] __driver_probe_device+0x190/0x370 [ 17.090464][ T343] driver_probe_device+0x4a/0x120 [ 17.090614][ T343] __driver_attach+0x195/0x470 15:20:28 [150/6052] [ 17.090761][ T343] bus_for_each_dev+0xf0/0x170 [ 17.090928][ T343] bus_add_driver+0x21d/0x4d0 [ 17.091080][ T343] driver_register+0x1a1/0x350 [ 17.091238][ T343] __auxiliary_driver_register+0x14e/0x230 [ 17.091440][ T343] cm_dev_release+0xb7/0x170 [ib_cm] [ 17.091657][ T343] do_one_initcall+0xbf/0x390 [ 17.091830][ T343] do_init_module+0x22e/0x710 [ 17.091987][ T343] load_module+0x4e40/0x65a0 [ 17.092142][ T343] init_module_from_file+0xcf/0x120 [ 17.092305][ T343] idempotent_init_module+0x22d/0x720 [ 17.092510][ T343] __x64_sys_finit_module+0xc1/0x130 [ 17.092703][ T343] do_syscall_64+0x6d/0x140 [ 17.092876][ T343] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 17.093091][ T343] [ 17.093091][ T343] -> #1 (rtnl_mutex){+.+.}-{3:3}: [ 17.093338][ T343] __mutex_lock+0x14a/0x1940 [ 17.093506][ T343] ib_get_eth_speed+0xe8/0x9c0 [ib_core] [ 17.093735][ T343] rxe_query_port+0x56/0x170 [rdma_rxe] [ 17.093950][ T343] ib_query_port+0x338/0x670 [ib_core] [ 17.094178][ T343] rxe_port_immutable+0x10f/0x230 [rdma_rxe] [ 17.094388][ T343] ib_register_device+0x3a2/0xac0 [ib_core] [ 17.094618][ T343] rxe_register_device+0x2cd/0x3a0 [rdma_rxe] [ 17.094842][ T343] rxe_net_add+0xaf/0x100 [rdma_rxe] [ 17.095065][ T343] rxe_newlink+0x4f/0xe0 [rdma_rxe] [ 17.095231][ T343] nldev_newlink+0x29d/0x4b0 [ib_core] [ 17.095468][ T343] rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core] [ 17.095712][ T343] rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core] 15:20:28 [123/6052] [ 17.096026][ T343] netlink_unicast+0x438/0x730 [ 17.096206][ T343] netlink_sendmsg+0x72a/0xbc0 [ 17.096379][ T343] __sock_sendmsg+0xc5/0x190 [ 17.096570][ T343] __sys_sendto+0x25d/0x310 [ 17.096742][ T343] __x64_sys_sendto+0xdc/0x1b0 [ 17.096928][ T343] do_syscall_64+0x6d/0x140 [ 17.097138][ T343] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 17.097362][ T343] [ 17.097362][ T343] -> #0 (&rxe->usdev_lock){+.+.}-{3:3}: [ 17.097630][ T343] __lock_acquire+0x2be0/0x6490 [ 17.097803][ T343] lock_acquire+0x1b2/0x4e0 [ 17.097974][ T343] __mutex_lock+0x14a/0x1940 [ 17.098164][ T343] rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.098397][ T343] ib_query_port+0x338/0x670 [ib_core] [ 17.098639][ T343] ib_setup_port_attrs+0x194/0x4b0 [ib_core] [ 17.098878][ T343] add_one_compat_dev+0x450/0x6e0 [ib_core] [ 17.099122][ T343] enable_device_and_get+0x2ae/0x330 [ib_core] [ 17.099363][ T343] ib_register_device+0x6c0/0xac0 [ib_core] [ 17.099598][ T343] rxe_register_device+0x2cd/0x3a0 [rdma_rxe] [ 17.099824][ T343] rxe_net_add+0xaf/0x100 [rdma_rxe] [ 17.100049][ T343] rxe_newlink+0x4f/0xe0 [rdma_rxe] [ 17.100272][ T343] nldev_newlink+0x29d/0x4b0 [ib_core] [ 17.100496][ T343] rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core] [ 17.100755][ T343] rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core] [ 17.101023][ T343] netlink_unicast+0x438/0x730 [ 17.101204][ T343] netlink_sendmsg+0x72a/0xbc0 [ 17.101354][ T343] __sock_sendmsg+0xc5/0x190 15:20:28 [96/6052] [ 17.101511][ T343] __sys_sendto+0x25d/0x310 [ 17.101660][ T343] __x64_sys_sendto+0xdc/0x1b0 [ 17.101819][ T343] do_syscall_64+0x6d/0x140 [ 17.101969][ T343] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 17.102149][ T343] [ 17.102149][ T343] other info that might help us debug this: [ 17.102149][ T343] [ 17.102462][ T343] Chain exists of: [ 17.102462][ T343] &rxe->usdev_lock --> devices_rwsem --> &device->compat_devs_mutex [ 17.102462][ T343] [ 17.102835][ T343] Possible unsafe locking scenario: [ 17.102835][ T343] [ 17.103073][ T343] CPU0 CPU1 [ 17.103219][ T343] ---- ---- [ 17.103364][ T343] lock(&device->compat_devs_mutex); [ 17.103514][ T343] lock(devices_rwsem); [ 17.103692][ T343] lock(&device->compat_devs_mutex); [ 17.103909][ T343] lock(&rxe->usdev_lock); [ 17.104057][ T343] [ 17.104057][ T343] *** DEADLOCK *** [ 17.104057][ T343] [ 17.104290][ T343] 5 locks held by rdma/343: [ 17.104450][ T343] #0: ffffffffa06fcff8 (&rdma_nl_types[idx].sem){.+.+}-{3:3}, at: rdma_nl_rcv_msg+0x125/0x4f0 [ib_core] [ 17.104767][ T343] #1: ffffffffa06f4f50 (link_ops_rwsem){++++}-{3:3}, at: nldev_newlink+0x37f/0x4b0 [ib_core] [ 17.105109][ T343] #2: ffffffffa06e79d0 (devices_rwsem){++++}-{3:3}, at: enable_device_and_get+0xf9/0x330 [ib_core] [ 17.105472][ T343] #3: ffffffffa06e7750 (rdma_nets_rwsem){.+.+}-{3:3}, at: enable_device_and_get+0x250/0x330 [ib_core] [ 17.105874][ T343] #4: ffff88800ef6ce90 (&device->compat_devs_mutex){+.+.}-{3:3}, at: add_one_compat_dev+0xe4/0x6e0 [ib_core] 15:20:28 [69/6052] [ 17.106365][ T343] [ 17.106365][ T343] stack backtrace: [ 17.106586][ T343] CPU: 3 UID: 0 PID: 343 Comm: rdma Not tainted 6.11.0-rc5+ #2367 [ 17.106828][ T343] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 17.107172][ T343] Call Trace: [ 17.107301][ T343] <TASK> [ 17.107397][ T343] dump_stack_lvl+0x57/0x80 [ 17.107560][ T343] check_noncircular+0x2f4/0x3d0 [ 17.107736][ T343] ? print_circular_bug+0x410/0x410 [ 17.107893][ T343] ? __fprop_add_percpu_max+0xb3/0x130 [ 17.108043][ T343] ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 [ 17.108219][ T343] ? find_held_lock+0x2d/0x110 [ 17.108369][ T343] __lock_acquire+0x2be0/0x6490 [ 17.108519][ T343] ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 [ 17.108700][ T343] ? lock_release+0x221/0x780 [ 17.108857][ T343] ? reacquire_held_locks+0x4a0/0x4a0 [ 17.109013][ T343] lock_acquire+0x1b2/0x4e0 [ 17.109172][ T343] ? rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.109342][ T343] ? __lock_acquire+0x6490/0x6490 [ 17.109506][ T343] ? kernfs_add_one+0x397/0x490 [ 17.109671][ T343] ? kernfs_new_node+0x133/0x240 [ 17.109841][ T343] ? lock_is_held_type+0x81/0xe0 [ 17.110003][ T343] __mutex_lock+0x14a/0x1940 [ 17.110168][ T343] ? rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.110339][ T343] ? rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.110513][ T343] ? lock_release+0x221/0x780 [ 17.110679][ T343] ? kfree+0x167/0x2e0 15:20:28 [42/6052] [ 17.110814][ T343] ? mutex_lock_io_nested+0x16e0/0x16e0 [ 17.110978][ T343] ? kobject_add_internal+0x292/0x920 [ 17.111149][ T343] ? kobject_add+0x117/0x180 [ 17.111315][ T343] ? kset_create_and_add+0x160/0x160 [ 17.111484][ T343] ? rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.111658][ T343] rxe_query_port+0x41/0x170 [rdma_rxe] [ 17.111836][ T343] ib_query_port+0x338/0x670 [ib_core] [ 17.112036][ T343] ib_setup_port_attrs+0x194/0x4b0 [ib_core] [ 17.112307][ T343] ? ib_free_port_attrs+0x3c0/0x3c0 [ib_core] [ 17.112539][ T343] ? __init_waitqueue_head+0xcb/0x150 [ 17.112710][ T343] add_one_compat_dev+0x450/0x6e0 [ib_core] [ 17.112950][ T343] enable_device_and_get+0x2ae/0x330 [ib_core] [ 17.113185][ T343] ? add_client_context+0x430/0x430 [ib_core] [ 17.113416][ T343] ? rdma_counter_init+0x139/0x390 [ib_core] [ 17.113656][ T343] ib_register_device+0x6c0/0xac0 [ib_core] [ 17.113894][ T343] ? ib_device_get_netdev+0x3a0/0x3a0 [ib_core] [ 17.114124][ T343] ? crypto_alg_mod_lookup+0x23b/0x3d0 [ 17.114289][ T343] ? crypto_alloc_tfm_node+0xd5/0x1e0 [ 17.114455][ T343] rxe_register_device+0x2cd/0x3a0 [rdma_rxe] [ 17.114667][ T343] rxe_net_add+0xaf/0x100 [rdma_rxe] [ 17.114846][ T343] rxe_newlink+0x4f/0xe0 [rdma_rxe] [ 17.115020][ T343] nldev_newlink+0x29d/0x4b0 [ib_core] [ 17.115216][ T343] ? nldev_port_get_dumpit+0x7a0/0x7a0 [ib_core] [ 17.115454][ T343] ? __lock_acquire+0x6490/0x6490 [ 17.115621][ T343] ? lock_release+0x221/0x780 [ 17.115795][ T343] ? lock_chain_count+0x20/0x20 [ 17.115965][ T343] ? security_capable+0x68/0xa0 [15/6052] [ 17.116130][ T343] rdma_nl_rcv_msg+0x2b0/0x4f0 [ib_core] [ 17.116321][ T343] ? rdma_nl_multicast+0xf0/0xf0 [ib_core] [ 17.116552][ T343] ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0 [ 17.116752][ T343] ? lock_acquire+0x1b2/0x4e0 [ 17.116921][ T343] ? find_held_lock+0x2d/0x110 [ 17.117098][ T343] ? __netlink_lookup+0x339/0x670 [ 17.117277][ T343] rdma_nl_rcv_skb.constprop.0.isra.0+0x238/0x390 [ib_core] [ 17.117560][ T343] ? rdma_nl_rcv_msg+0x4f0/0x4f0 [ib_core] [ 17.117809][ T343] ? lock_release+0x221/0x780 [ 17.117970][ T343] ? netlink_deliver_tap+0xcd/0xa20 [ 17.118127][ T343] ? netlink_deliver_tap+0x152/0xa20 [ 17.118283][ T343] netlink_unicast+0x438/0x730 [ 17.118434][ T343] ? netlink_attachskb+0x710/0x710 [ 17.118602][ T343] ? lock_acquire+0x1b2/0x4e0 [ 17.118772][ T343] netlink_sendmsg+0x72a/0xbc0 [ 17.118933][ T343] ? netlink_unicast+0x730/0x730 [ 17.119099][ T343] ? reacquire_held_locks+0x4a0/0x4a0 [ 17.119270][ T343] ? __might_fault+0xae/0x120 [ 17.119423][ T343] ? netlink_unicast+0x730/0x730 [ 17.119589][ T343] __sock_sendmsg+0xc5/0x190 [ 17.119757][ T343] ? _copy_from_user+0x56/0xa0 [ 17.119921][ T343] __sys_sendto+0x25d/0x310 [ 17.120085][ T343] ? __x64_sys_getpeername+0xb0/0xb0 [ 17.120273][ T343] ? move_addr_to_user+0x54/0x80 [ 17.120446][ T343] ? __sys_getsockname+0x19d/0x230 [ 17.120617][ T343] ? fd_install+0x1c4/0x510 [ 17.120798][ T343] ? __sys_setsockopt+0xdc/0x160 [ 17.120969][ T343] __x64_sys_sendto+0xdc/0x1b0 [ 17.121154][ T343] ? lockdep_hardirqs_on_prepare+0x268/0x3e0 [ 17.121358][ T343] do_syscall_64+0x6d/0x140 [ 17.121524][ T343] entry_SYSCALL_64_after_hwframe+0x4b/0x53 [ 17.121723][ T343] RIP: 0033:0x7f2aadc078b7 [ 17.121895][ T343] Code: c7 c0 ff ff ff ff eb be 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 80 3d 95 17 0d 00 00 41 89 ca 74 10 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 69 c3 55 48 89 e5 53 48 83 ec 38 44 89 4d d0 [ 17.122385][ T343] RSP: 002b:00007ffccd9b5638 EFLAGS: 00000202 ORIG_RAX: 000000000000002c [ 17.122651][ T343] RAX: ffffffffffffffda RBX: 00005625dd51d320 RCX: 00007f2aadc078b7 [ 17.122908][ T343] RDX: 0000000000000030 RSI: 00005625dd51c2a0 RDI: 0000000000000004 [ 17.123158][ T343] RBP: 00007ffccd9b5670 R08: 00007f2aadcec200 R09: 000000000000000c [ 17.123401][ T343] R10: 0000000000000000 R11: 0000000000000202 R12: 00007ffccd9b58b0 [ 17.123642][ T343] R13: 00007ffccd9b5644 R14: 0000000066d5ad8b R15: 0000000000000000 [ 17.123898][ T343] </TASK>
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b2fc5a13577c..2113eb7c7573 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1351,6 +1351,30 @@ static void prevent_dealloc_device(struct ib_device *ib_dev) { } +static void ib_device_notify_register(struct ib_device *device) +{ + struct net_device *netdev; + u32 port; + int ret; + + ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT); + if (ret) + return; + + rdma_for_each_port(device, port) { + netdev = ib_device_get_netdev(device, port); + if (!netdev) + continue; + + ret = rdma_nl_notify_event(device, port, + RDMA_NETDEV_ATTACH_EVENT); + dev_put(netdev); + if (ret) + return; + } + return; +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register @@ -1449,6 +1473,8 @@ int ib_register_device(struct ib_device *device, const char *name, dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ kobject_uevent(&device->dev.kobj, KOBJ_ADD); + + ib_device_notify_register(device); ib_device_put(device); return 0; @@ -1491,6 +1517,7 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); + rdma_nl_notify_event(ib_dev, 0, RDMA_UNREGISTER_EVENT); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); @@ -2159,6 +2186,7 @@ static void add_ndev_hash(struct ib_port_data *pdata) int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, u32 port) { + enum rdma_nl_notify_event_type etype; struct net_device *old_ndev; struct ib_port_data *pdata; unsigned long flags; @@ -2190,6 +2218,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); + + down_read(&devices_rwsem); + if (xa_get_mark(&devices, ib_dev->index, DEVICE_REGISTERED) && + xa_load(&devices, ib_dev->index) == ib_dev) { + etype = ndev ? + RDMA_NETDEV_ATTACH_EVENT : RDMA_NETDEV_DETACH_EVENT; + rdma_nl_notify_event(ib_dev, port, etype); + } + up_read(&devices_rwsem); + return 0; } EXPORT_SYMBOL(ib_device_set_netdev); diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index ae2db0c70788..def14c54b648 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -311,6 +311,7 @@ int rdma_nl_net_init(struct rdma_dev_net *rnet) struct net *net = read_pnet(&rnet->net); struct netlink_kernel_cfg cfg = { .input = rdma_nl_rcv, + .flags = NL_CFG_F_NONROOT_RECV, }; struct sock *nls; diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 4d4a1f90e484..b0354bb8ba0d 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -170,6 +170,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, + [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, @@ -2722,6 +2723,123 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, }; +static int fill_mon_netdev_association(struct sk_buff *msg, + struct ib_device *device, u32 port, + const struct net *net) +{ + struct net_device *netdev = ib_device_get_netdev(device, port); + int ret = 0; + + if (netdev && !net_eq(dev_net(netdev), net)) + goto out; + + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); + if (ret) + goto out; + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); + if (ret) + goto out; + if (netdev) + ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, + netdev->ifindex); + +out: + dev_put(netdev); + return ret; +} + +static int fill_mon_register(struct sk_buff *msg, struct ib_device *device, + const struct net *net) +{ + return nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); +} + +static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, + enum rdma_nl_notify_event_type type) +{ + struct net_device *netdev; + + switch (type) { + case RDMA_REGISTER_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor register device event\n"); + break; + case RDMA_UNREGISTER_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor unregister device event\n"); + break; + case RDMA_NETDEV_ATTACH_EVENT: + netdev = ib_device_get_netdev(device, port_num); + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", + port_num, netdev->ifindex); + dev_put(netdev); + break; + case RDMA_NETDEV_DETACH_EVENT: + dev_warn_ratelimited(&device->dev, + "Failed to send RDMA monitor netdev detach event: port %d\n", + port_num); + default: + break; + }; +} + +int rdma_nl_notify_event(struct ib_device *device, u32 port_num, + enum rdma_nl_notify_event_type type) +{ + struct sk_buff *skb; + struct net *net; + int ret = 0; + void *nlh; + + net = read_pnet(&device->coredev.rdma_net); + if (!net) + return -EINVAL; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + nlh = nlmsg_put(skb, 0, 0, + RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), + 0, 0); + + switch (type) { + case RDMA_REGISTER_EVENT: + case RDMA_UNREGISTER_EVENT: + ret = fill_mon_register(skb, device, net); + if (ret) + goto err_free; + break; + case RDMA_NETDEV_ATTACH_EVENT: + case RDMA_NETDEV_DETACH_EVENT: + ret = fill_mon_netdev_association(skb, device, + port_num, net); + if (ret) + goto err_free; + break; + default: + ret = -EINVAL; + goto err_free; + } + + ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); + if (ret) + goto err_free; + + nlmsg_end(skb, nlh); + ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); + if (ret && ret != -ESRCH) { + skb = NULL; /* skb is freed in the netlink send-op handling */ + goto err_free; + } + return 0; + +err_free: + rdma_nl_notify_err_msg(device, port_num, type); + nlmsg_free(skb); + return ret; +} + void __init nldev_init(void) { rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index c2a79aeee113..326deaf56d5d 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -6,6 +6,8 @@ #include <linux/netlink.h> #include <uapi/rdma/rdma_netlink.h> +struct ib_device; + enum { RDMA_NLDEV_ATTR_EMPTY_STRING = 1, RDMA_NLDEV_ATTR_ENTRY_STRLEN = 16, @@ -110,6 +112,16 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb, */ bool rdma_nl_chk_listeners(unsigned int group); +/** + * Prepare and send an event message + * @ib: the IB device which triggered the event + * @port_num: the port number which triggered the event - 0 if unused + * @type: the event type + * Returns 0 on success or a negative error code + */ +int rdma_nl_notify_event(struct ib_device *ib, u32 port_num, + enum rdma_nl_notify_event_type type); + struct rdma_link_ops { struct list_head list; const char *type; diff --git a/include/uapi/rdma/rdma_netlink.h b/include/uapi/rdma/rdma_netlink.h index 2f37568f5556..5f9636d26050 100644 --- a/include/uapi/rdma/rdma_netlink.h +++ b/include/uapi/rdma/rdma_netlink.h @@ -15,6 +15,7 @@ enum { enum { RDMA_NL_GROUP_IWPM = 2, RDMA_NL_GROUP_LS, + RDMA_NL_GROUP_NOTIFY, RDMA_NL_NUM_GROUPS }; @@ -305,6 +306,8 @@ enum rdma_nldev_command { RDMA_NLDEV_CMD_DELDEV, + RDMA_NLDEV_CMD_MONITOR, + RDMA_NLDEV_NUM_OPS }; @@ -574,6 +577,8 @@ enum rdma_nldev_attr { RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, /* u8 */ + RDMA_NLDEV_ATTR_EVENT_TYPE, /* u8 */ + /* * Always the end */ @@ -624,4 +629,14 @@ enum rdma_nl_name_assign_type { RDMA_NAME_ASSIGN_TYPE_USER = 1, /* Provided by user-space */ }; +/* + * Supported rdma monitoring event types. + */ +enum rdma_nl_notify_event_type { + RDMA_REGISTER_EVENT, + RDMA_UNREGISTER_EVENT, + RDMA_NETDEV_ATTACH_EVENT, + RDMA_NETDEV_DETACH_EVENT, +}; + #endif /* _UAPI_RDMA_NETLINK_H */