diff mbox series

[RFC] net: optimise rps IPI sending

Message ID 1630058273-2400-1-git-send-email-lirongqing@baidu.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series [RFC] net: optimise rps IPI sending | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 14 maintainers not CCed: bjorn@kernel.org alobakin@pm.me atenart@kernel.org masahiroy@kernel.org ap420073@gmail.com memxor@gmail.com daniel@iogearbox.net arnd@arndb.de davem@davemloft.net edumazet@google.com dvyukov@google.com maheshb@google.com kuba@kernel.org weiwan@google.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 4774 this patch: 4774
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Alignment should match open parenthesis WARNING: line length of 81 exceeds 80 columns WARNING: line length of 82 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 4838 this patch: 4838
netdev/header_inline success Link

Commit Message

Li RongQing Aug. 27, 2021, 9:57 a.m. UTC
In virtualization setup, IPI sending will cause vmexit,
and is expensive so it should be avoid to send IPI one
by one in highest throughput

smp_call_function_many maybe call PV ipi to send IPI to
many cpus once

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/linux/netdevice.h  |  2 +-
 net/core/dev.c             | 32 +++++++++++++++++++++++++-------
 net/core/sysctl_net_core.c |  9 +++++++++
 3 files changed, 35 insertions(+), 8 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bd8d5b8e2de3..ccf9e3e7c33d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4137,7 +4137,7 @@  void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
 
 extern int		netdev_budget;
 extern unsigned int	netdev_budget_usecs;
-
+extern unsigned int rps_pv_send_ipi __read_mostly;
 /* Called by rtnetlink.c:rtnl_unlock() */
 void netdev_run_todo(void);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 88650791c360..e839de51b555 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -203,6 +203,8 @@  static unsigned int napi_gen_id = NR_CPUS;
 static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
 
 static DECLARE_RWSEM(devnet_rename_sem);
+unsigned int rps_pv_send_ipi __read_mostly;
+static DEFINE_PER_CPU(cpumask_var_t, rps_ipi_mask);
 
 static inline void dev_base_seq_inc(struct net *net)
 {
@@ -4529,9 +4531,9 @@  EXPORT_SYMBOL(rps_may_expire_flow);
 #endif /* CONFIG_RFS_ACCEL */
 
 /* Called from hardirq (IPI) context */
-static void rps_trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data __maybe_unused)
 {
-	struct softnet_data *sd = data;
+	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
 
 	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
@@ -6364,12 +6366,26 @@  EXPORT_SYMBOL(__skb_gro_checksum_complete);
 static void net_rps_send_ipi(struct softnet_data *remsd)
 {
 #ifdef CONFIG_RPS
-	while (remsd) {
-		struct softnet_data *next = remsd->rps_ipi_next;
+	if (!rps_pv_send_ipi) {
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				smp_call_function_single_async(remsd->cpu, &remsd->csd);
+			remsd = next;
+		}
+	} else {
+		struct cpumask *tmpmask = this_cpu_cpumask_var_ptr(rps_ipi_mask);
+
+		cpumask_clear(tmpmask);
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
 
-		if (cpu_online(remsd->cpu))
-			smp_call_function_single_async(remsd->cpu, &remsd->csd);
-		remsd = next;
+			if (cpu_online(remsd->cpu))
+				cpumask_set_cpu(remsd->cpu, tmpmask);
+			remsd = next;
+		}
+		smp_call_function_many(tmpmask, rps_trigger_softirq, NULL, false);
 	}
 #endif
 }
@@ -11627,6 +11643,8 @@  static int __init net_dev_init(void)
 #ifdef CONFIG_RPS
 		INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
 		sd->cpu = i;
+		zalloc_cpumask_var_node(&per_cpu(rps_ipi_mask, i),
+			GFP_KERNEL, cpu_to_node(i));
 #endif
 
 		init_gro_hash(&sd->backlog);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..dc807841d7c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -377,6 +377,15 @@  static struct ctl_table net_core_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_do_rss_key,
 	},
+	{
+		.procname	= "rps_pv_send_ipi",
+		.data		= &rps_pv_send_ipi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #ifdef CONFIG_BPF_JIT
 	{
 		.procname	= "bpf_jit_enable",