diff mbox series

[net-next,v4,07/12] net: hold netdev instance lock during ndo_bpf

Message ID 20250218020948.160643-8-sdf@fomichev.me (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: Hold netdev instance lock during ndo operations | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 40 this patch: 40
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 19 maintainers not CCed: ast@kernel.org jonathan.lemon@gmail.com jolsa@kernel.org magnus.karlsson@intel.com john.fastabend@gmail.com bpf@vger.kernel.org andrew+netdev@lunn.ch daniel@iogearbox.net kpsingh@kernel.org yonghong.song@linux.dev horms@kernel.org andrii@kernel.org song@kernel.org bjorn@kernel.org haoluo@google.com eddyz87@gmail.com hawk@kernel.org maciej.fijalkowski@intel.com martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 541 this patch: 541
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4116 this patch: 4116
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 138 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 85 this patch: 85
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-02-18--12-00 (tests: 891)

Commit Message

Stanislav Fomichev Feb. 18, 2025, 2:09 a.m. UTC
Cover the paths that come via bpf system call and XSK bind.

Cc: Saeed Mahameed <saeed@kernel.org>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 include/linux/netdevice.h |  1 +
 kernel/bpf/offload.c      |  6 ++++--
 net/core/dev.c            | 13 +++++++++++--
 net/core/dev_api.c        | 12 ++++++++++++
 net/xdp/xsk.c             |  3 +++
 net/xdp/xsk_buff_pool.c   |  2 ++
 6 files changed, 33 insertions(+), 4 deletions(-)

Comments

Jakub Kicinski Feb. 19, 2025, 3:02 a.m. UTC | #1
On Mon, 17 Feb 2025 18:09:43 -0800 Stanislav Fomichev wrote:
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -528,10 +528,10 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
>  		return ERR_PTR(-ENOMEM);
>  
>  	bpf_map_init_from_attr(&offmap->map, attr);
> -
>  	rtnl_lock();
> -	down_write(&bpf_devs_lock);
>  	offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
> +	netdev_lock_ops(offmap->netdev);
> +	down_write(&bpf_devs_lock);
>  	err = bpf_dev_offload_check(offmap->netdev);
>  	if (err)
>  		goto err_unlock;
> @@ -548,12 +548,14 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
>  
>  	list_add_tail(&offmap->offloads, &ondev->maps);
>  	up_write(&bpf_devs_lock);
> +	netdev_unlock_ops(offmap->netdev);
>  	rtnl_unlock();
>  
>  	return &offmap->map;
>  
>  err_unlock:
>  	up_write(&bpf_devs_lock);
> +	netdev_unlock_ops(offmap->netdev);
>  	rtnl_unlock();
>  	bpf_map_area_free(offmap);
>  	return ERR_PTR(err);

Any reason for this lock ordering? My intuition would be from biggest
to smallest, so rtnl_lock -> sem -> instance
Stanislav Fomichev Feb. 19, 2025, 4:54 a.m. UTC | #2
On 02/18, Jakub Kicinski wrote:
> On Mon, 17 Feb 2025 18:09:43 -0800 Stanislav Fomichev wrote:
> > --- a/kernel/bpf/offload.c
> > +++ b/kernel/bpf/offload.c
> > @@ -528,10 +528,10 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
> >  		return ERR_PTR(-ENOMEM);
> >  
> >  	bpf_map_init_from_attr(&offmap->map, attr);
> > -
> >  	rtnl_lock();
> > -	down_write(&bpf_devs_lock);
> >  	offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
> > +	netdev_lock_ops(offmap->netdev);
> > +	down_write(&bpf_devs_lock);
> >  	err = bpf_dev_offload_check(offmap->netdev);
> >  	if (err)
> >  		goto err_unlock;
> > @@ -548,12 +548,14 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
> >  
> >  	list_add_tail(&offmap->offloads, &ondev->maps);
> >  	up_write(&bpf_devs_lock);
> > +	netdev_unlock_ops(offmap->netdev);
> >  	rtnl_unlock();
> >  
> >  	return &offmap->map;
> >  
> >  err_unlock:
> >  	up_write(&bpf_devs_lock);
> > +	netdev_unlock_ops(offmap->netdev);
> >  	rtnl_unlock();
> >  	bpf_map_area_free(offmap);
> >  	return ERR_PTR(err);
> 
> Any reason for this lock ordering? My intuition would be from biggest
> to smallest, so rtnl_lock -> sem -> instance

From rtnl we take the following:

rtnl_newlink
  rtnl_lock
  do_setlink
    netdev_lock_ops
    dev_change_xdp_fd
      dev_xdp_attach
        bpf_offload_dev_match
	  down_read(bpf_devs_lock)

So I made bpf syscall path to look similar:

map_create
  bpf_map_offload_map_alloc
    rtnl_lock
    netdev_ops_lock
    down_write(bpf_devs_lock)
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc707b2bbf4b..11873947527d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4238,6 +4238,7 @@  struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 u8 dev_xdp_prog_count(struct net_device *dev);
+int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
 int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf);
 u8 dev_xdp_sb_prog_count(struct net_device *dev);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1a4fec330eaa..a10153c3be2d 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -528,10 +528,10 @@  struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 		return ERR_PTR(-ENOMEM);
 
 	bpf_map_init_from_attr(&offmap->map, attr);
-
 	rtnl_lock();
-	down_write(&bpf_devs_lock);
 	offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
+	netdev_lock_ops(offmap->netdev);
+	down_write(&bpf_devs_lock);
 	err = bpf_dev_offload_check(offmap->netdev);
 	if (err)
 		goto err_unlock;
@@ -548,12 +548,14 @@  struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
 
 	list_add_tail(&offmap->offloads, &ondev->maps);
 	up_write(&bpf_devs_lock);
+	netdev_unlock_ops(offmap->netdev);
 	rtnl_unlock();
 
 	return &offmap->map;
 
 err_unlock:
 	up_write(&bpf_devs_lock);
+	netdev_unlock_ops(offmap->netdev);
 	rtnl_unlock();
 	bpf_map_area_free(offmap);
 	return ERR_PTR(err);
diff --git a/net/core/dev.c b/net/core/dev.c
index 25696caa1071..b5a607bbf4a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9637,7 +9637,7 @@  u8 dev_xdp_sb_prog_count(struct net_device *dev)
 	return count;
 }
 
-int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
+int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
 {
 	if (!dev->netdev_ops->ndo_bpf)
 		return -EOPNOTSUPP;
@@ -9657,7 +9657,6 @@  int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
 
 	return dev->netdev_ops->ndo_bpf(dev, bpf);
 }
-EXPORT_SYMBOL_GPL(dev_xdp_propagate);
 
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
 {
@@ -9687,6 +9686,8 @@  static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
 	struct netdev_bpf xdp;
 	int err;
 
+	netdev_ops_assert_locked(dev);
+
 	if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
 	    prog && !prog->aux->xdp_has_frags) {
 		NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split");
@@ -9919,7 +9920,9 @@  static void bpf_xdp_link_release(struct bpf_link *link)
 	 * already NULL, in which case link was already auto-detached
 	 */
 	if (xdp_link->dev) {
+		netdev_lock_ops(xdp_link->dev);
 		WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
+		netdev_unlock_ops(xdp_link->dev);
 		xdp_link->dev = NULL;
 	}
 
@@ -10001,10 +10004,12 @@  static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 		goto out_unlock;
 	}
 
+	netdev_lock_ops(xdp_link->dev);
 	mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags);
 	bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
 	err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
 			      xdp_link->flags, new_prog);
+	netdev_unlock_ops(xdp_link->dev);
 	if (err)
 		goto out_unlock;
 
@@ -10790,7 +10795,9 @@  int register_netdevice(struct net_device *dev)
 	if (ret)
 		goto err_uninit_notify;
 
+	netdev_lock_ops(dev);
 	__netdev_update_features(dev);
+	netdev_unlock_ops(dev);
 
 	/*
 	 *	Default initial state at registry is that the
@@ -11729,7 +11736,9 @@  void unregister_netdevice_many_notify(struct list_head *head,
 		/* Shutdown queueing discipline. */
 		dev_shutdown(dev);
 		dev_tcx_uninstall(dev);
+		netdev_lock_ops(dev);
 		dev_xdp_uninstall(dev);
+		netdev_unlock_ops(dev);
 		bpf_dev_bound_netdev_unregister(dev);
 		dev_memory_provider_uninstall(dev);
 
diff --git a/net/core/dev_api.c b/net/core/dev_api.c
index 87a62022ef1c..0db20ed086d3 100644
--- a/net/core/dev_api.c
+++ b/net/core/dev_api.c
@@ -317,3 +317,15 @@  int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 	return ret;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
+
+int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
+{
+	int ret;
+
+	netdev_lock_ops(dev);
+	ret = netif_xdp_propagate(dev, bpf);
+	netdev_unlock_ops(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_xdp_propagate);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 89d2bef96469..277c97c58c09 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1178,6 +1178,8 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		goto out_release;
 	}
 
+	netdev_lock_ops(dev);
+
 	if (!xs->rx && !xs->tx) {
 		err = -EINVAL;
 		goto out_unlock;
@@ -1312,6 +1314,7 @@  static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		smp_wmb();
 		WRITE_ONCE(xs->state, XSK_BOUND);
 	}
+	netdev_unlock_ops(dev);
 out_release:
 	mutex_unlock(&xs->mutex);
 	rtnl_unlock();
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index c263fb7a68dc..0e6ca568fdee 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -1,5 +1,6 @@ 
 // SPDX-License-Identifier: GPL-2.0
 
+#include <linux/netdevice.h>
 #include <net/xsk_buff_pool.h>
 #include <net/xdp_sock.h>
 #include <net/xdp_sock_drv.h>
@@ -219,6 +220,7 @@  int xp_assign_dev(struct xsk_buff_pool *pool,
 	bpf.xsk.pool = pool;
 	bpf.xsk.queue_id = queue_id;
 
+	netdev_ops_assert_locked(netdev);
 	err = netdev->netdev_ops->ndo_bpf(netdev, &bpf);
 	if (err)
 		goto err_unreg_pool;