diff mbox series

[net-next,v6,06/10] netdev-genl: Add netlink framework functions for napi

Message ID 169811123039.59034.9768807608201356902.stgit@anambiarhost.jf.intel.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Introduce queue and NAPI support in netdev-genl (Was: Introduce NAPI queues support) | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1376 this patch: 1376
netdev/cc_maintainers warning 6 maintainers not CCed: liuhangbin@gmail.com edumazet@google.com gnault@redhat.com davem@davemloft.net lucien.xin@gmail.com daniel@iogearbox.net
netdev/build_clang success Errors and warnings before: 1389 this patch: 1389
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1401 this patch: 1401
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 162 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nambiar, Amritha Oct. 24, 2023, 1:33 a.m. UTC
Implement the netdev netlink framework functions for
napi support. The netdev structure tracks all the napi
instances and napi fields. The napi instances and associated
parameters can be retrieved this way.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 net/core/dev.c         |    3 -
 net/core/dev.h         |    2 +
 net/core/netdev-genl.c |  116 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 117 insertions(+), 4 deletions(-)

Comments

Jakub Kicinski Oct. 24, 2023, 10:39 p.m. UTC | #1
On Mon, 23 Oct 2023 18:33:50 -0700 Amritha Nambiar wrote:
> +	rcu_read_lock();
> +
> +	napi = napi_by_id(napi_id);
> +	if (napi)
> +		err = netdev_nl_napi_fill_one(rsp, napi, info);
> +	else
> +		err = -EINVAL;
> +
> +	rcu_read_unlock();

Is rcu_read_lock always going to be sufficient here?
Reading of the thread, for example, without much locking could
potentially get problematic.
Nambiar, Amritha Oct. 25, 2023, 2:18 a.m. UTC | #2
On 10/24/2023 3:39 PM, Jakub Kicinski wrote:
> On Mon, 23 Oct 2023 18:33:50 -0700 Amritha Nambiar wrote:
>> +	rcu_read_lock();
>> +
>> +	napi = napi_by_id(napi_id);
>> +	if (napi)
>> +		err = netdev_nl_napi_fill_one(rsp, napi, info);
>> +	else
>> +		err = -EINVAL;
>> +
>> +	rcu_read_unlock();
> 
> Is rcu_read_lock always going to be sufficient here?
> Reading of the thread, for example, without much locking could
> potentially get problematic.

I think I was inspired by the napi_busy_loop code which works on 
napi->state under rcu_read_lock.

WRT napi_get_doit here (just thinking aloud), so if we have a reader 
thread retrieving napi from the global list for a napi_id, and a writer 
thread (maybe ethtool -L sort which is under rtnl_lock) that changes the 
napi list... Now, the reader may be working on a stale napi instance, or 
if the writer still holds lock until freeing memory containing the napi, 
then the reader may cause a crash... okay, all the more reason for the 
reader to hold rtnl_lock instead of rcu_read_lock.
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index ba8fe3b6240b..d02c7a0ce4bc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -165,7 +165,6 @@  static int netif_rx_internal(struct sk_buff *skb);
 static int call_netdevice_notifiers_extack(unsigned long val,
 					   struct net_device *dev,
 					   struct netlink_ext_ack *extack);
-static struct napi_struct *napi_by_id(unsigned int napi_id);
 
 /*
  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -6167,7 +6166,7 @@  bool napi_complete_done(struct napi_struct *n, int work_done)
 EXPORT_SYMBOL(napi_complete_done);
 
 /* must be called under rcu_read_lock(), as we dont take a reference */
-static struct napi_struct *napi_by_id(unsigned int napi_id)
+struct napi_struct *napi_by_id(unsigned int napi_id)
 {
 	unsigned int hash = napi_id % HASH_SIZE(napi_hash);
 	struct napi_struct *napi;
diff --git a/net/core/dev.h b/net/core/dev.h
index fa2e9c5c4122..91147f3851d0 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -139,4 +139,6 @@  static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
 }
 
 int rps_cpumask_housekeeping(struct cpumask *mask);
+
+struct napi_struct *napi_by_id(unsigned int napi_id);
 #endif
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 27d7f9dff228..d6ef7fc093d0 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -7,13 +7,16 @@ 
 #include <net/sock.h>
 #include <net/xdp.h>
 #include <net/netdev_rx_queue.h>
+#include <net/busy_poll.h>
 
 #include "netdev-genl-gen.h"
+#include "dev.h"
 
 struct netdev_nl_dump_ctx {
 	unsigned long	ifindex;
 	unsigned int	rxq_idx;
 	unsigned int	txq_idx;
+	unsigned int	napi_id;
 };
 
 static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
@@ -144,14 +147,123 @@  int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int
+netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
+			const struct genl_info *info)
+{
+	void *hdr;
+
+	if (WARN_ON_ONCE(!napi->dev))
+		return -EINVAL;
+
+	hdr = genlmsg_iput(rsp, info);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (napi->napi_id >= MIN_NAPI_ID &&
+	    nla_put_u32(rsp, NETDEV_A_NAPI_NAPI_ID, napi->napi_id))
+		goto nla_put_failure;
+
+	if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
+		goto nla_put_failure;
+
+	genlmsg_end(rsp, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(rsp, hdr);
+	return -EMSGSIZE;
+}
+
 int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
 {
-	return -EOPNOTSUPP;
+	struct napi_struct *napi;
+	struct sk_buff *rsp;
+	u32 napi_id;
+	int err;
+
+	if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_NAPI_ID))
+		return -EINVAL;
+
+	napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_NAPI_ID]);
+
+	rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!rsp)
+		return -ENOMEM;
+
+	rcu_read_lock();
+
+	napi = napi_by_id(napi_id);
+	if (napi)
+		err = netdev_nl_napi_fill_one(rsp, napi, info);
+	else
+		err = -EINVAL;
+
+	rcu_read_unlock();
+
+	if (err)
+		goto err_free_msg;
+
+	return genlmsg_reply(rsp, info);
+
+err_free_msg:
+	nlmsg_free(rsp);
+	return err;
+}
+
+static int
+netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+			const struct genl_info *info,
+			struct netdev_nl_dump_ctx *ctx)
+{
+	struct napi_struct *napi;
+	int err = 0;
+
+	list_for_each_entry(napi, &netdev->napi_list, dev_list) {
+		if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
+			continue;
+
+		err = netdev_nl_napi_fill_one(rsp, napi, info);
+		if (err)
+			return err;
+		ctx->napi_id = napi->napi_id;
+	}
+	return err;
 }
 
 int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	return -EOPNOTSUPP;
+	struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+	const struct genl_info *info = genl_info_dump(cb);
+	struct net *net = sock_net(skb->sk);
+	struct net_device *netdev;
+	u32 ifindex = 0;
+	int err = 0;
+
+	if (info->attrs[NETDEV_A_NAPI_IFINDEX])
+		ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
+
+	rtnl_lock();
+	if (ifindex) {
+		netdev = __dev_get_by_index(net, ifindex);
+		if (netdev)
+			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+		else
+			err = -ENODEV;
+	} else {
+		for_each_netdev_dump(net, netdev, ctx->ifindex) {
+			err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+			if (err < 0)
+				break;
+			ctx->napi_id = 0;
+		}
+	}
+	rtnl_unlock();
+
+	if (err != -EMSGSIZE)
+		return err;
+
+	return skb->len;
 }
 
 static int