diff mbox series

[net-next,v6,1/4] netdevsim: allow two netdevsim ports to be connected

Message ID 20240126012357.535494-2-dw@davidwei.uk (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series netdevsim: link and forward skbs between ports | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1064 this patch: 1064
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 0 of 0 maintainers
netdev/build_clang success Errors and warnings before: 1081 this patch: 1081
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api warning Found: 'dev_put(' was: 0 now: 2; 'put_net(' was: 0 now: 2
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1081 this patch: 1081
netdev/checkpatch warning WARNING: line length of 100 exceeds 80 columns WARNING: line length of 104 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

David Wei Jan. 26, 2024, 1:23 a.m. UTC
Add a netdevsim bus attribute to sysfs:
/sys/bus/netdevsim/link_device

Writing "A M B N" to this file will link netdevsim M in netnsid A with
netdevsim N in netnsid B.

rtnl_lock is taken to ensure nothing changes during the linking.

Signed-off-by: David Wei <dw@davidwei.uk>
---
 drivers/net/netdevsim/bus.c       | 72 +++++++++++++++++++++++++++++++
 drivers/net/netdevsim/netdev.c    | 11 +++++
 drivers/net/netdevsim/netdevsim.h |  2 +
 3 files changed, 85 insertions(+)

Comments

Jakub Kicinski Jan. 26, 2024, 2:24 a.m. UTC | #1
On Thu, 25 Jan 2024 17:23:54 -0800 David Wei wrote:
> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
> index bcbc1e19edde..be8ac2e60c69 100644
> --- a/drivers/net/netdevsim/bus.c
> +++ b/drivers/net/netdevsim/bus.c
> @@ -232,9 +232,81 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
>  }
>  static BUS_ATTR_WO(del_device);
>  
> +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
> +{
> +	unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b;
> +	struct netdevsim *nsim_a, *nsim_b;
> +	struct net_device *dev_a, *dev_b;
> +	struct net *ns_a, *ns_b;
> +	int err;
> +
> +	err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b);

I'd go for "%u:%u %u:%u" to make the 'grouping' of netns and ifindex
more obvious. But no strong feelings.

> +	if (err != 4) {
> +		pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n");
> +		return -EINVAL;
> +	}
> +
> +	err = -EINVAL;
> +	rtnl_lock();
> +	ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a);
> +	if (!ns_a) {
> +		pr_err("Could not find netns with id: %d\n", netnsid_a);
> +		goto out_unlock_rtnl;
> +	}
> +
> +	dev_a = dev_get_by_index(ns_a, ifidx_a);

since you're under rtnl_lock you can use __get_device_by_index(),
it doesn't increase the refcount so you won't have to worry about
releasing it.

> +	if (!dev_a) {
> +		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a);
> +		goto out_put_netns_a;
> +	}
> +
> +	if (!netdev_is_nsim(dev_a)) {
> +		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a);
> +		goto out_put_dev_a;
> +	}
> +
> +	ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b);
> +	if (!ns_b) {
> +		pr_err("Could not find netns with id: %d\n", netnsid_b);
> +		goto out_put_dev_a;
> +	}
> +
> +	dev_b = dev_get_by_index(ns_b, ifidx_b);
> +	if (!dev_b) {
> +		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b);
> +		goto out_put_netns_b;
> +	}
> +
> +	if (!netdev_is_nsim(dev_b)) {
> +		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b);
> +		goto out_put_dev_b;
> +	}
> +
> +	err = 0;
> +	nsim_a = netdev_priv(dev_a);
> +	nsim_b = netdev_priv(dev_b);
> +	rcu_assign_pointer(nsim_a->peer, nsim_b);
> +	rcu_assign_pointer(nsim_b->peer, nsim_a);

Shouldn't we check if peer is NULL? Otherwise we can get into weird
situations where we link A<>B then B<>C and then the pointers look like
this A->B<>C. When B gets freed A's pointer won't get cleared.

> +out_put_dev_b:
> +	dev_put(dev_b);
> +out_put_netns_b:
> +	put_net(ns_b);
> +out_put_dev_a:
> +	dev_put(dev_a);
> +out_put_netns_a:
> +	put_net(ns_a);
> +out_unlock_rtnl:
> +	rtnl_unlock();
> +
> +	return !err ? count : err;
> +}
> +static BUS_ATTR_WO(link_device);
David Wei Jan. 26, 2024, 6:54 p.m. UTC | #2
On 2024-01-25 18:24, Jakub Kicinski wrote:
> On Thu, 25 Jan 2024 17:23:54 -0800 David Wei wrote:
>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
>> index bcbc1e19edde..be8ac2e60c69 100644
>> --- a/drivers/net/netdevsim/bus.c
>> +++ b/drivers/net/netdevsim/bus.c
>> @@ -232,9 +232,81 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
>>  }
>>  static BUS_ATTR_WO(del_device);
>>  
>> +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
>> +{
>> +	unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b;
>> +	struct netdevsim *nsim_a, *nsim_b;
>> +	struct net_device *dev_a, *dev_b;
>> +	struct net *ns_a, *ns_b;
>> +	int err;
>> +
>> +	err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b);
> 
> I'd go for "%u:%u %u:%u" to make the 'grouping' of netns and ifindex
> more obvious. But no strong feelings.

Also no strong feelings so I will go with your feelings.

> 
>> +	if (err != 4) {
>> +		pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	err = -EINVAL;
>> +	rtnl_lock();
>> +	ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a);
>> +	if (!ns_a) {
>> +		pr_err("Could not find netns with id: %d\n", netnsid_a);
>> +		goto out_unlock_rtnl;
>> +	}
>> +
>> +	dev_a = dev_get_by_index(ns_a, ifidx_a);
> 
> since you're under rtnl_lock you can use __get_device_by_index(),
> it doesn't increase the refcount so you won't have to worry about
> releasing it.

Ah, I will change this. Is this true in general i.e. if I hold some big
lock then I can use versions of functions that do not modify refcounts?

> 
>> +	if (!dev_a) {
>> +		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a);
>> +		goto out_put_netns_a;
>> +	}
>> +
>> +	if (!netdev_is_nsim(dev_a)) {
>> +		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a);
>> +		goto out_put_dev_a;
>> +	}
>> +
>> +	ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b);
>> +	if (!ns_b) {
>> +		pr_err("Could not find netns with id: %d\n", netnsid_b);
>> +		goto out_put_dev_a;
>> +	}
>> +
>> +	dev_b = dev_get_by_index(ns_b, ifidx_b);
>> +	if (!dev_b) {
>> +		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b);
>> +		goto out_put_netns_b;
>> +	}
>> +
>> +	if (!netdev_is_nsim(dev_b)) {
>> +		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b);
>> +		goto out_put_dev_b;
>> +	}
>> +
>> +	err = 0;
>> +	nsim_a = netdev_priv(dev_a);
>> +	nsim_b = netdev_priv(dev_b);
>> +	rcu_assign_pointer(nsim_a->peer, nsim_b);
>> +	rcu_assign_pointer(nsim_b->peer, nsim_a);
> 
> Shouldn't we check if peer is NULL? Otherwise we can get into weird
> situations where we link A<>B then B<>C and then the pointers look like
> this A->B<>C. When B gets freed A's pointer won't get cleared.

Yep, that's an oversight from me. Will address.

> 
>> +out_put_dev_b:
>> +	dev_put(dev_b);
>> +out_put_netns_b:
>> +	put_net(ns_b);
>> +out_put_dev_a:
>> +	dev_put(dev_a);
>> +out_put_netns_a:
>> +	put_net(ns_a);
>> +out_unlock_rtnl:
>> +	rtnl_unlock();
>> +
>> +	return !err ? count : err;
>> +}
>> +static BUS_ATTR_WO(link_device);
Jakub Kicinski Jan. 26, 2024, 7:17 p.m. UTC | #3
On Fri, 26 Jan 2024 10:54:35 -0800 David Wei wrote:
> > since you're under rtnl_lock you can use __get_device_by_index(),
> > it doesn't increase the refcount so you won't have to worry about
> > releasing it.  
> 
> Ah, I will change this. Is this true in general i.e. if I hold some big
> lock then I can use versions of functions that do not modify refcounts?

I don't think so, generally you can ignore refcounts if you're holding
the lock protecting the table in which the object is registered while
it is alive, and you just looked it up in that table... if that makes
sense.

netdev lifetime is a bit unusual in how much the rtnl_lock protects.
diff mbox series

Patch

diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c
index bcbc1e19edde..be8ac2e60c69 100644
--- a/drivers/net/netdevsim/bus.c
+++ b/drivers/net/netdevsim/bus.c
@@ -232,9 +232,81 @@  del_device_store(const struct bus_type *bus, const char *buf, size_t count)
 }
 static BUS_ATTR_WO(del_device);
 
+static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
+{
+	unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b;
+	struct netdevsim *nsim_a, *nsim_b;
+	struct net_device *dev_a, *dev_b;
+	struct net *ns_a, *ns_b;
+	int err;
+
+	err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b);
+	if (err != 4) {
+		pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n");
+		return -EINVAL;
+	}
+
+	err = -EINVAL;
+	rtnl_lock();
+	ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a);
+	if (!ns_a) {
+		pr_err("Could not find netns with id: %d\n", netnsid_a);
+		goto out_unlock_rtnl;
+	}
+
+	dev_a = dev_get_by_index(ns_a, ifidx_a);
+	if (!dev_a) {
+		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a);
+		goto out_put_netns_a;
+	}
+
+	if (!netdev_is_nsim(dev_a)) {
+		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a);
+		goto out_put_dev_a;
+	}
+
+	ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b);
+	if (!ns_b) {
+		pr_err("Could not find netns with id: %d\n", netnsid_b);
+		goto out_put_dev_a;
+	}
+
+	dev_b = dev_get_by_index(ns_b, ifidx_b);
+	if (!dev_b) {
+		pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b);
+		goto out_put_netns_b;
+	}
+
+	if (!netdev_is_nsim(dev_b)) {
+		pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b);
+		goto out_put_dev_b;
+	}
+
+	err = 0;
+	nsim_a = netdev_priv(dev_a);
+	nsim_b = netdev_priv(dev_b);
+	rcu_assign_pointer(nsim_a->peer, nsim_b);
+	rcu_assign_pointer(nsim_b->peer, nsim_a);
+
+out_put_dev_b:
+	dev_put(dev_b);
+out_put_netns_b:
+	put_net(ns_b);
+out_put_dev_a:
+	dev_put(dev_a);
+out_put_netns_a:
+	put_net(ns_a);
+out_unlock_rtnl:
+	rtnl_unlock();
+
+	return !err ? count : err;
+}
+static BUS_ATTR_WO(link_device);
+
 static struct attribute *nsim_bus_attrs[] = {
 	&bus_attr_new_device.attr,
 	&bus_attr_del_device.attr,
+	&bus_attr_link_device.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(nsim_bus);
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index 77e8250282a5..969248ffeca8 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -394,6 +394,7 @@  nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 	ns->nsim_dev = nsim_dev;
 	ns->nsim_dev_port = nsim_dev_port;
 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
+	RCU_INIT_POINTER(ns->peer, NULL);
 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
 	nsim_ethtool_init(ns);
@@ -413,8 +414,13 @@  nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
 void nsim_destroy(struct netdevsim *ns)
 {
 	struct net_device *dev = ns->netdev;
+	struct netdevsim *peer;
 
 	rtnl_lock();
+	peer = rtnl_dereference(ns->peer);
+	if (peer)
+		RCU_INIT_POINTER(peer->peer, NULL);
+	RCU_INIT_POINTER(ns->peer, NULL);
 	unregister_netdevice(dev);
 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
 		nsim_macsec_teardown(ns);
@@ -427,6 +433,11 @@  void nsim_destroy(struct netdevsim *ns)
 	free_netdev(dev);
 }
 
+bool netdev_is_nsim(struct net_device *dev)
+{
+	return dev->netdev_ops == &nsim_netdev_ops;
+}
+
 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
 {
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 028c825b86db..c8b45b0d955e 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -125,11 +125,13 @@  struct netdevsim {
 	} udp_ports;
 
 	struct nsim_ethtool ethtool;
+	struct netdevsim __rcu *peer;
 };
 
 struct netdevsim *
 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
 void nsim_destroy(struct netdevsim *ns);
+bool netdev_is_nsim(struct net_device *dev);
 
 void nsim_ethtool_init(struct netdevsim *ns);