diff mbox series

[v1,net-next,1/8] rtnetlink: Introduce struct rtnl_nets and helpers.

Message ID 20241105020514.41963-2-kuniyu@amazon.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series rtnetlink: Convert rtnl_newlink() to per-netns RTNL. | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success Errors and warnings before: 2 (+0) this patch: 2 (+0)
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 31 this patch: 31
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success Found: 'put_net(' was: 2 now: 2
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 2866 this patch: 2866
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 117 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 13 this patch: 13
netdev/source_inline success Was 0 now: 0
netdev/contest fail net-next-2024-11-05--09-00 (tests: 777)

Commit Message

Kuniyuki Iwashima Nov. 5, 2024, 2:05 a.m. UTC
rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
and 1 for its peer.

We will add rtnl_nets_lock() later, which performs the nested locking
based on struct rtnl_nets, which has an array of struct net pointers.

rtnl_nets_add() adds a net pointer to the array and sorts it so that
rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].

Before calling rtnl_nets_add(), get_net() must be called for the net,
and rtnl_nets_destroy() will call put_net() for each.

Let's apply the helpers to rtnl_newlink().

When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
rtnl_net_lock() thus do not care about the array order, so
rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
can be optimised to NOP.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
 include/net/rtnetlink.h | 10 +++++++
 net/core/rtnetlink.c    | 63 +++++++++++++++++++++++++++++++++++++++--
 2 files changed, 70 insertions(+), 3 deletions(-)

Comments

Eric Dumazet Nov. 5, 2024, 10:16 a.m. UTC | #1
On Tue, Nov 5, 2024 at 3:05 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> rtnl_newlink() needs to hold 3 per-netns RTNL: 2 for a new device
> and 1 for its peer.
>
> We will add rtnl_nets_lock() later, which performs the nested locking
> based on struct rtnl_nets, which has an array of struct net pointers.
>
> rtnl_nets_add() adds a net pointer to the array and sorts it so that
> rtnl_nets_lock() can simply acquire per-netns RTNL from array[0] to [2].
>
> Before calling rtnl_nets_add(), get_net() must be called for the net,
> and rtnl_nets_destroy() will call put_net() for each.
>
> Let's apply the helpers to rtnl_newlink().
>
> When CONFIG_DEBUG_NET_SMALL_RTNL is disabled, we do not call
> rtnl_net_lock() thus do not care about the array order, so
> rtnl_net_cmp_locks() returns -1 so that the loop in rtnl_nets_add()
> can be optimised to NOP.
>
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>

Reviewed-by: Eric Dumazet <edumazet@google.com>
Jakub Kicinski Nov. 6, 2024, 12:35 a.m. UTC | #2
On Mon, 4 Nov 2024 18:05:07 -0800 Kuniyuki Iwashima wrote:
> +EXPORT_SYMBOL(rtnl_nets_add);

Will you need the export later?
Current series doesn't use it in modules.
Kuniyuki Iwashima Nov. 6, 2024, 12:41 a.m. UTC | #3
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 5 Nov 2024 16:35:06 -0800
> On Mon, 4 Nov 2024 18:05:07 -0800 Kuniyuki Iwashima wrote:
> > +EXPORT_SYMBOL(rtnl_nets_add);
> 
> Will you need the export later?
> Current series doesn't use it in modules.

Ah, good catch.  I used it in each drivers in v0 but factorised
it to core, so the symbol no longer need to be exported.

Will remove in v2.

Thanks!
diff mbox series

Patch

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index b260c0cc9671..814364367dd7 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -68,6 +68,16 @@  static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 		return AF_UNSPEC;
 }
 
+struct rtnl_nets {
+	/* ->newlink() needs to freeze 3 netns at most;
+	 * 2 for the new device, 1 for its peer.
+	 */
+	struct net *net[3];
+	unsigned char len;
+};
+
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net);
+
 /**
  *	struct rtnl_link_ops - rtnetlink link operations
  *
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3b33810d92a8..f98706ad390a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -258,8 +258,60 @@  bool lockdep_rtnl_net_is_held(struct net *net)
 	return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex);
 }
 EXPORT_SYMBOL(lockdep_rtnl_net_is_held);
+#else
+static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
+{
+	/* No need to swap */
+	return -1;
+}
 #endif
 
+static void rtnl_nets_init(struct rtnl_nets *rtnl_nets)
+{
+	memset(rtnl_nets, 0, sizeof(*rtnl_nets));
+}
+
+static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets)
+{
+	int i;
+
+	for (i = 0; i < rtnl_nets->len; i++) {
+		put_net(rtnl_nets->net[i]);
+		rtnl_nets->net[i] = NULL;
+	}
+
+	rtnl_nets->len = 0;
+}
+
+/**
+ * rtnl_nets_add - Add netns to be locked before ->newlink().
+ *
+ * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net().
+ * @net: netns pointer with an extra refcnt held.
+ *
+ * The extra refcnt is released in rtnl_nets_destroy().
+ */
+void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net)
+{
+	int i;
+
+	DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net));
+
+	for (i = 0; i < rtnl_nets->len; i++) {
+		switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) {
+		case 0:
+			put_net(net);
+			return;
+		case 1:
+			swap(rtnl_nets->net[i], net);
+		}
+	}
+
+	rtnl_nets->net[i] = net;
+	rtnl_nets->len++;
+}
+EXPORT_SYMBOL(rtnl_nets_add);
+
 static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
@@ -3796,6 +3848,7 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *tgt_net, *link_net = NULL;
 	struct rtnl_link_ops *ops = NULL;
 	struct rtnl_newlink_tbs *tbs;
+	struct rtnl_nets rtnl_nets;
 	int ops_srcu_index;
 	int ret;
 
@@ -3839,6 +3892,8 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 #endif
 	}
 
+	rtnl_nets_init(&rtnl_nets);
+
 	if (ops) {
 		if (ops->maxtype > RTNL_MAX_TYPE) {
 			ret = -EINVAL;
@@ -3868,6 +3923,8 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto put_ops;
 	}
 
+	rtnl_nets_add(&rtnl_nets, tgt_net);
+
 	if (tb[IFLA_LINK_NETNSID]) {
 		int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
 
@@ -3878,6 +3935,8 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 			goto put_net;
 		}
 
+		rtnl_nets_add(&rtnl_nets, link_net);
+
 		if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) {
 			ret = -EPERM;
 			goto put_net;
@@ -3887,9 +3946,7 @@  static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 	ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, tbs, data, extack);
 
 put_net:
-	if (link_net)
-		put_net(link_net);
-	put_net(tgt_net);
+	rtnl_nets_destroy(&rtnl_nets);
 put_ops:
 	if (ops)
 		rtnl_link_ops_put(ops, ops_srcu_index);