diff mbox series

[v2,net] net: Use this_cpu_inc() to increment net->core_stats

Message ID YmbO0pxgtKpCw4SY@linutronix.de (mailing list archive)
State Accepted
Commit 6510ea973d8d9d4a0cb2fb557b36bd1ab3eb49f6
Delegated to: Netdev Maintainers
Headers show
Series [v2,net] net: Use this_cpu_inc() to increment net->core_stats | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net, async
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4807 this patch: 4807
netdev/cc_maintainers warning 1 maintainers not CCed: petrm@nvidia.com
netdev/build_clang success Errors and warnings before: 1071 this patch: 1071
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4953 this patch: 4953
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Sebastian Andrzej Siewior April 25, 2022, 4:39 p.m. UTC
The macro dev_core_stats_##FIELD##_inc() disables preemption and invokes
netdev_core_stats_alloc() to return a per-CPU pointer.
netdev_core_stats_alloc() will allocate memory on its first invocation
which breaks on PREEMPT_RT because it requires non-atomic context for
memory allocation.

This can be avoided by enabling preemption in netdev_core_stats_alloc()
assuming the caller always disables preemption.

It might be better to replace local_inc() with this_cpu_inc() now that
dev_core_stats_##FIELD##_inc() gained a preempt-disable section and does
not rely on already disabled preemption. This results in less
instructions on x86-64:
local_inc:
|          incl %gs:__preempt_count(%rip)  # __preempt_count
|          movq    488(%rdi), %rax # _1->core_stats, _22
|          testq   %rax, %rax      # _22
|          je      .L585   #,
|          add %gs:this_cpu_off(%rip), %rax        # this_cpu_off, tcp_ptr__
|  .L586:
|          testq   %rax, %rax      # _27
|          je      .L587   #,
|          incq (%rax)            # _6->a.counter
|  .L587:
|          decl %gs:__preempt_count(%rip)  # __preempt_count

this_cpu_inc(), this patch:
|         movq    488(%rdi), %rax # _1->core_stats, _5
|         testq   %rax, %rax      # _5
|         je      .L591   #,
| .L585:
|         incq %gs:(%rax) # _18->rx_dropped

Use unsigned long as type for the counter. Use this_cpu_inc() to
increment the counter. Use a plain read of the counter.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
v1…v2:
	- Add missing __percpu annotation as noticed by Jakub + robot
	- Use READ_ONCE() in dev_get_stats() to avoid possible split
	  reads, noticed by Eric.

 include/linux/netdevice.h | 21 +++++++++------------
 net/core/dev.c            | 14 +++++---------
 2 files changed, 14 insertions(+), 21 deletions(-)

Comments

Eric Dumazet April 26, 2022, 3:43 a.m. UTC | #1
On Mon, Apr 25, 2022 at 9:39 AM Sebastian Andrzej Siewior
<bigeasy@linutronix.de> wrote:
>
> The macro dev_core_stats_##FIELD##_inc() disables preemption and invokes
> netdev_core_stats_alloc() to return a per-CPU pointer.
> netdev_core_stats_alloc() will allocate memory on its first invocation
> which breaks on PREEMPT_RT because it requires non-atomic context for
> memory allocation.
>
> This can be avoided by enabling preemption in netdev_core_stats_alloc()
> assuming the caller always disables preemption.
>
> Use unsigned long as type for the counter. Use this_cpu_inc() to
> increment the counter. Use a plain read of the counter.
>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
> v1…v2:
>         - Add missing __percpu annotation as noticed by Jakub + robot
>         - Use READ_ONCE() in dev_get_stats() to avoid possible split
>           reads, noticed by Eric.
>

SGTM, thanks.

Note this will cause a merge conflict in net-next

Reviewed-by: Eric Dumazet <edumazet@google.com>
patchwork-bot+netdevbpf@kernel.org April 27, 2022, 1 a.m. UTC | #2
Hello:

This patch was applied to netdev/net.git (master)
by Jakub Kicinski <kuba@kernel.org>:

On Mon, 25 Apr 2022 18:39:46 +0200 you wrote:
> The macro dev_core_stats_##FIELD##_inc() disables preemption and invokes
> netdev_core_stats_alloc() to return a per-CPU pointer.
> netdev_core_stats_alloc() will allocate memory on its first invocation
> which breaks on PREEMPT_RT because it requires non-atomic context for
> memory allocation.
> 
> This can be avoided by enabling preemption in netdev_core_stats_alloc()
> assuming the caller always disables preemption.
> 
> [...]

Here is the summary with links:
  - [v2,net] net: Use this_cpu_inc() to increment net->core_stats
    https://git.kernel.org/netdev/net/c/6510ea973d8d

You are awesome, thank you!
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 59e27a2b7bf04..b1fbe21650bb5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -199,10 +199,10 @@  struct net_device_stats {
  * Try to fit them in a single cache line, for dev_get_stats() sake.
  */
 struct net_device_core_stats {
-	local_t		rx_dropped;
-	local_t		tx_dropped;
-	local_t		rx_nohandler;
-} __aligned(4 * sizeof(local_t));
+	unsigned long	rx_dropped;
+	unsigned long	tx_dropped;
+	unsigned long	rx_nohandler;
+} __aligned(4 * sizeof(unsigned long));
 
 #include <linux/cache.h>
 #include <linux/skbuff.h>
@@ -3843,15 +3843,15 @@  static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
 	return false;
 }
 
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev);
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
 
-static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev)
+static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
 {
 	/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
 	struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
 
 	if (likely(p))
-		return this_cpu_ptr(p);
+		return p;
 
 	return netdev_core_stats_alloc(dev);
 }
@@ -3859,14 +3859,11 @@  static inline struct net_device_core_stats *dev_core_stats(struct net_device *de
 #define DEV_CORE_STATS_INC(FIELD)						\
 static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)		\
 {										\
-	struct net_device_core_stats *p;					\
+	struct net_device_core_stats __percpu *p;				\
 										\
-	preempt_disable();							\
 	p = dev_core_stats(dev);						\
-										\
 	if (p)									\
-		local_inc(&p->FIELD);						\
-	preempt_enable();							\
+		this_cpu_inc(p->FIELD);						\
 }
 DEV_CORE_STATS_INC(rx_dropped)
 DEV_CORE_STATS_INC(tx_dropped)
diff --git a/net/core/dev.c b/net/core/dev.c
index ae7f42f782e9f..19ef1006f0bf8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10304,7 +10304,7 @@  void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
 {
 	struct net_device_core_stats __percpu *p;
 
@@ -10315,11 +10315,7 @@  struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
 		free_percpu(p);
 
 	/* This READ_ONCE() pairs with the cmpxchg() above */
-	p = READ_ONCE(dev->core_stats);
-	if (!p)
-		return NULL;
-
-	return this_cpu_ptr(p);
+	return READ_ONCE(dev->core_stats);
 }
 EXPORT_SYMBOL(netdev_core_stats_alloc);
 
@@ -10356,9 +10352,9 @@  struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 
 		for_each_possible_cpu(i) {
 			core_stats = per_cpu_ptr(p, i);
-			storage->rx_dropped += local_read(&core_stats->rx_dropped);
-			storage->tx_dropped += local_read(&core_stats->tx_dropped);
-			storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
+			storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+			storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+			storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
 		}
 	}
 	return storage;