diff mbox series

[v2,net-next,1/2] net: make default_rps_mask a per netns attribute

Message ID 25427ebf3d3f533bca446f9df4794a1b7021f318.1676635317.git.pabeni@redhat.com (mailing list archive)
State Accepted
Delegated to: Netdev Maintainers
Headers show
Series net: default_rps_mask follow-up | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 4553 this patch: 4553
netdev/cc_maintainers warning 1 maintainers not CCed: wangyufen@huawei.com
netdev/build_clang success Errors and warnings before: 1077 this patch: 1077
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 4763 this patch: 4763
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Paolo Abeni Feb. 17, 2023, 12:28 p.m. UTC
That really was meant to be a per netns attribute from the beginning.

The idea is that once proper isolation is in place in the main
namespace, additional demux in the child namespaces will be redundant.
Let's make child netns default rps mask empty by default.

To avoid bloating the netns with a possibly large cpumask, allocate
it on-demand during the first write operation.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v1 -> v2:
 - fix build issue for !CONFIG_CPUMASK_OFFSTACK build
---
 include/linux/netdevice.h  |  1 -
 include/net/netns/core.h   |  5 ++++
 net/core/net-sysfs.c       | 23 +++++++++++------
 net/core/sysctl_net_core.c | 51 ++++++++++++++++++++++++++++----------
 4 files changed, 59 insertions(+), 21 deletions(-)

Comments

Simon Horman Feb. 21, 2023, 3:45 p.m. UTC | #1
On Fri, Feb 17, 2023 at 01:28:49PM +0100, Paolo Abeni wrote:
> That really was meant to be a per netns attribute from the beginning.
> 
> The idea is that once proper isolation is in place in the main
> namespace, additional demux in the child namespaces will be redundant.
> Let's make child netns default rps mask empty by default.
> 
> To avoid bloating the netns with a possibly large cpumask, allocate
> it on-demand during the first write operation.
> 
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

...

> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
> index 7130e6d9e263..74842b453407 100644
> --- a/net/core/sysctl_net_core.c
> +++ b/net/core/sysctl_net_core.c
> @@ -74,24 +74,47 @@ static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
>  #endif
>  
>  #ifdef CONFIG_RPS
> -struct cpumask rps_default_mask;
> +
> +static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
> +{
> +	struct cpumask *rps_default_mask;
> +
> +	if (net->core.rps_default_mask)
> +		return net->core.rps_default_mask;
> +
> +	rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL);
> +	if (!rps_default_mask)
> +		return NULL;
> +
> +	/* pairs with READ_ONCE in rx_queue_default_mask() */
> +	WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
> +	return rps_default_mask;
> +}
>  
>  static int rps_default_mask_sysctl(struct ctl_table *table, int write,
>  				   void *buffer, size_t *lenp, loff_t *ppos)
>  {
> +	struct net *net = (struct net *)table->data;
>  	int err = 0;
>  
>  	rtnl_lock();
>  	if (write) {
> -		err = cpumask_parse(buffer, &rps_default_mask);
> +		struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
> +
> +		err = -ENOMEM;

nit: Would it be nicer to set err to -ENOMEM inside the if clause?
     I think that is the only path where it is used.

> +		if (!rps_default_mask)
> +			goto done;
> +
> +		err = cpumask_parse(buffer, rps_default_mask);
>  		if (err)
>  			goto done;
>  
> -		err = rps_cpumask_housekeeping(&rps_default_mask);
> +		err = rps_cpumask_housekeeping(rps_default_mask);
>  		if (err)
>  			goto done;
>  	} else {
> -		dump_cpumask(buffer, lenp, ppos, &rps_default_mask);
> +		dump_cpumask(buffer, lenp, ppos,
> +			     net->core.rps_default_mask ? : cpu_none_mask);
>  	}
>  
>  done:

...
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efbee940bb03..6a14b7b11766 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -224,7 +224,6 @@  struct net_device_core_stats {
 #include <linux/static_key.h>
 extern struct static_key_false rps_needed;
 extern struct static_key_false rfs_needed;
-extern struct cpumask rps_default_mask;
 #endif
 
 struct neighbour;
diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 8249060cf5d0..a91ef9f8de60 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -6,6 +6,7 @@ 
 
 struct ctl_table_header;
 struct prot_inuse;
+struct cpumask;
 
 struct netns_core {
 	/* core sysctls */
@@ -17,6 +18,10 @@  struct netns_core {
 #ifdef CONFIG_PROC_FS
 	struct prot_inuse __percpu *prot_inuse;
 #endif
+
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+	struct cpumask *rps_default_mask;
+#endif
 };
 
 #endif
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e20784b6f873..15e3f4606b5f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1060,6 +1060,18 @@  static const struct kobj_type rx_queue_ktype = {
 	.get_ownership = rx_queue_get_ownership,
 };
 
+static int rx_queue_default_mask(struct net_device *dev,
+				 struct netdev_rx_queue *queue)
+{
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+	struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
+
+	if (rps_default_mask && !cpumask_empty(rps_default_mask))
+		return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
+#endif
+	return 0;
+}
+
 static int rx_queue_add_kobject(struct net_device *dev, int index)
 {
 	struct netdev_rx_queue *queue = dev->_rx + index;
@@ -1083,13 +1095,10 @@  static int rx_queue_add_kobject(struct net_device *dev, int index)
 			goto err;
 	}
 
-#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
-	if (!cpumask_empty(&rps_default_mask)) {
-		error = netdev_rx_queue_set_rps_mask(queue, &rps_default_mask);
-		if (error)
-			goto err;
-	}
-#endif
+	error = rx_queue_default_mask(dev, queue);
+	if (error)
+		goto err;
+
 	kobject_uevent(kobj, KOBJ_ADD);
 
 	return error;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7130e6d9e263..74842b453407 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -74,24 +74,47 @@  static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
 #endif
 
 #ifdef CONFIG_RPS
-struct cpumask rps_default_mask;
+
+static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
+{
+	struct cpumask *rps_default_mask;
+
+	if (net->core.rps_default_mask)
+		return net->core.rps_default_mask;
+
+	rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL);
+	if (!rps_default_mask)
+		return NULL;
+
+	/* pairs with READ_ONCE in rx_queue_default_mask() */
+	WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
+	return rps_default_mask;
+}
 
 static int rps_default_mask_sysctl(struct ctl_table *table, int write,
 				   void *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct net *net = (struct net *)table->data;
 	int err = 0;
 
 	rtnl_lock();
 	if (write) {
-		err = cpumask_parse(buffer, &rps_default_mask);
+		struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
+
+		err = -ENOMEM;
+		if (!rps_default_mask)
+			goto done;
+
+		err = cpumask_parse(buffer, rps_default_mask);
 		if (err)
 			goto done;
 
-		err = rps_cpumask_housekeeping(&rps_default_mask);
+		err = rps_cpumask_housekeeping(rps_default_mask);
 		if (err)
 			goto done;
 	} else {
-		dump_cpumask(buffer, lenp, ppos, &rps_default_mask);
+		dump_cpumask(buffer, lenp, ppos,
+			     net->core.rps_default_mask ? : cpu_none_mask);
 	}
 
 done:
@@ -508,11 +531,6 @@  static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= rps_sock_flow_sysctl
 	},
-	{
-		.procname	= "rps_default_mask",
-		.mode		= 0644,
-		.proc_handler	= rps_default_mask_sysctl
-	},
 #endif
 #ifdef CONFIG_NET_FLOW_LIMIT
 	{
@@ -639,6 +657,14 @@  static struct ctl_table net_core_table[] = {
 };
 
 static struct ctl_table netns_core_table[] = {
+#if IS_ENABLED(CONFIG_RPS)
+	{
+		.procname	= "rps_default_mask",
+		.data		= &init_net,
+		.mode		= 0644,
+		.proc_handler	= rps_default_mask_sysctl
+	},
+#endif
 	{
 		.procname	= "somaxconn",
 		.data		= &init_net.core.sysctl_somaxconn,
@@ -706,6 +732,9 @@  static __net_exit void sysctl_core_net_exit(struct net *net)
 	tbl = net->core.sysctl_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->core.sysctl_hdr);
 	BUG_ON(tbl == netns_core_table);
+#if IS_ENABLED(CONFIG_RPS)
+	kfree(net->core.rps_default_mask);
+#endif
 	kfree(tbl);
 }
 
@@ -716,10 +745,6 @@  static __net_initdata struct pernet_operations sysctl_core_ops = {
 
 static __init int sysctl_core_init(void)
 {
-#if IS_ENABLED(CONFIG_RPS)
-	cpumask_copy(&rps_default_mask, cpu_none_mask);
-#endif
-
 	register_net_sysctl(&init_net, "net/core", net_core_table);
 	return register_pernet_subsys(&sysctl_core_ops);
 }