diff mbox series

[net-next] drop_monitor: remove quadratic behavior

Message ID 20220222220450.1154948-1-eric.dumazet@gmail.com (mailing list archive)
State Accepted
Commit b26ef81c46ed15d11ddddba9ba1cd52c749385ad
Delegated to: Netdev Maintainers
Headers show
Series [net-next] drop_monitor: remove quadratic behavior | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4827 this patch: 4827
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 822 this patch: 822
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4982 this patch: 4982
netdev/checkpatch warning CHECK: Unnecessary parentheses around 'dev->stats.rx_dropped != stat->last_drop_val'
netdev/kdoc fail Errors and warnings before: 0 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

Eric Dumazet Feb. 22, 2022, 10:04 p.m. UTC
From: Eric Dumazet <edumazet@google.com>

drop_monitor is using an unique list on which all netdevices in
the host have an element, regardless of their netns.

This scales poorly, not only at device unregister time (what I
caught during my netns dismantle stress tests), but also at packet
processing time whenever trace_napi_poll_hit() is called.

If the intent was to avoid adding one pointer in 'struct net_device'
then surely we prefer O(1) behavior.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
---
 include/linux/netdevice.h |  4 +-
 net/core/drop_monitor.c   | 79 ++++++++++++---------------------------
 2 files changed, 27 insertions(+), 56 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Feb. 23, 2022, 12:50 p.m. UTC | #1
Hello:

This patch was applied to netdev/net-next.git (master)
by David S. Miller <davem@davemloft.net>:

On Tue, 22 Feb 2022 14:04:50 -0800 you wrote:
> From: Eric Dumazet <edumazet@google.com>
> 
> drop_monitor is using an unique list on which all netdevices in
> the host have an element, regardless of their netns.
> 
> This scales poorly, not only at device unregister time (what I
> caught during my netns dismantle stress tests), but also at packet
> processing time whenever trace_napi_poll_hit() is called.
> 
> [...]

Here is the summary with links:
  - [net-next] drop_monitor: remove quadratic behavior
    https://git.kernel.org/netdev/net-next/c/b26ef81c46ed

You are awesome, thank you!
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93fc680b658f0f1b31c4d33a34e21b705097ad6f..c79ee22962961810f5f8f2f4529159fd308b8fc6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2236,7 +2236,9 @@  struct net_device {
 #if IS_ENABLED(CONFIG_MRP)
 	struct mrp_port __rcu	*mrp_port;
 #endif
-
+#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
+	struct dm_hw_stat_delta __rcu *dm_private;
+#endif
 	struct device		dev;
 	const struct attribute_group *sysfs_groups[4];
 	const struct attribute_group *sysfs_rx_queue_group;
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d7d177f75d43c6fbcdbbf2facaa18c120a541ffb..b89e3e95bffc0f50d477e86959f2bb42af9dce46 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -64,7 +64,6 @@  static const char * const drop_reasons[] = {
 /* net_dm_mutex
  *
  * An overall lock guarding every operation coming from userspace.
- * It also guards the global 'hw_stats_list' list.
  */
 static DEFINE_MUTEX(net_dm_mutex);
 
@@ -100,11 +99,9 @@  struct per_cpu_dm_data {
 };
 
 struct dm_hw_stat_delta {
-	struct net_device *dev;
 	unsigned long last_rx;
-	struct list_head list;
-	struct rcu_head rcu;
 	unsigned long last_drop_val;
+	struct rcu_head rcu;
 };
 
 static struct genl_family net_drop_monitor_family;
@@ -115,7 +112,6 @@  static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
 static int dm_hit_limit = 64;
 static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
-static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
 static u32 net_dm_trunc_len;
@@ -287,33 +283,27 @@  static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
 static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 				int work, int budget)
 {
-	struct dm_hw_stat_delta *new_stat;
-
+	struct net_device *dev = napi->dev;
+	struct dm_hw_stat_delta *stat;
 	/*
 	 * Don't check napi structures with no associated device
 	 */
-	if (!napi->dev)
+	if (!dev)
 		return;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
-		struct net_device *dev;
-
+	stat = rcu_dereference(dev->dm_private);
+	if (stat) {
 		/*
 		 * only add a note to our monitor buffer if:
-		 * 1) this is the dev we received on
-		 * 2) its after the last_rx delta
-		 * 3) our rx_dropped count has gone up
+		 * 1) its after the last_rx delta
+		 * 2) our rx_dropped count has gone up
 		 */
-		/* Paired with WRITE_ONCE() in dropmon_net_event() */
-		dev = READ_ONCE(new_stat->dev);
-		if ((dev == napi->dev)  &&
-		    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
-		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
+		if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
+		    (dev->stats.rx_dropped != stat->last_drop_val)) {
 			trace_drop_common(NULL, NULL);
-			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
-			new_stat->last_rx = jiffies;
-			break;
+			stat->last_drop_val = dev->stats.rx_dropped;
+			stat->last_rx = jiffies;
 		}
 	}
 	rcu_read_unlock();
@@ -1198,7 +1188,6 @@  static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 
 static void net_dm_trace_off_set(void)
 {
-	struct dm_hw_stat_delta *new_stat, *temp;
 	const struct net_dm_alert_ops *ops;
 	int cpu;
 
@@ -1222,13 +1211,6 @@  static void net_dm_trace_off_set(void)
 			consume_skb(skb);
 	}
 
-	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-		if (new_stat->dev == NULL) {
-			list_del_rcu(&new_stat->list);
-			kfree_rcu(new_stat, rcu);
-		}
-	}
-
 	module_put(THIS_MODULE);
 }
 
@@ -1589,41 +1571,28 @@  static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct dm_hw_stat_delta *new_stat = NULL;
-	struct dm_hw_stat_delta *tmp;
+	struct dm_hw_stat_delta *stat;
 
 	switch (event) {
 	case NETDEV_REGISTER:
-		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);
+		if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
+			break;
+		stat = kzalloc(sizeof(*stat), GFP_KERNEL);
+		if (!stat)
+			break;
 
-		if (!new_stat)
-			goto out;
+		stat->last_rx = jiffies;
+		rcu_assign_pointer(dev->dm_private, stat);
 
-		new_stat->dev = dev;
-		new_stat->last_rx = jiffies;
-		mutex_lock(&net_dm_mutex);
-		list_add_rcu(&new_stat->list, &hw_stats_list);
-		mutex_unlock(&net_dm_mutex);
 		break;
 	case NETDEV_UNREGISTER:
-		mutex_lock(&net_dm_mutex);
-		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
-			if (new_stat->dev == dev) {
-
-				/* Paired with READ_ONCE() in trace_napi_poll_hit() */
-				WRITE_ONCE(new_stat->dev, NULL);
-
-				if (trace_state == TRACE_OFF) {
-					list_del_rcu(&new_stat->list);
-					kfree_rcu(new_stat, rcu);
-					break;
-				}
-			}
+		stat = rtnl_dereference(dev->dm_private);
+		if (stat) {
+			rcu_assign_pointer(dev->dm_private, NULL);
+			kfree_rcu(stat, rcu);
 		}
-		mutex_unlock(&net_dm_mutex);
 		break;
 	}
-out:
 	return NOTIFY_DONE;
 }