Message ID | 20240126012357.535494-2-dw@davidwei.uk (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | netdevsim: link and forward skbs between ports | expand |
On Thu, 25 Jan 2024 17:23:54 -0800 David Wei wrote: > diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c > index bcbc1e19edde..be8ac2e60c69 100644 > --- a/drivers/net/netdevsim/bus.c > +++ b/drivers/net/netdevsim/bus.c > @@ -232,9 +232,81 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count) > } > static BUS_ATTR_WO(del_device); > > +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) > +{ > + unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b; > + struct netdevsim *nsim_a, *nsim_b; > + struct net_device *dev_a, *dev_b; > + struct net *ns_a, *ns_b; > + int err; > + > + err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b); I'd go for "%u:%u %u:%u" to make the 'grouping' of netns and ifindex more obvious. But no strong feelings. > + if (err != 4) { > + pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n"); > + return -EINVAL; > + } > + > + err = -EINVAL; > + rtnl_lock(); > + ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a); > + if (!ns_a) { > + pr_err("Could not find netns with id: %d\n", netnsid_a); > + goto out_unlock_rtnl; > + } > + > + dev_a = dev_get_by_index(ns_a, ifidx_a); since you're under rtnl_lock you can use __get_device_by_index(), it doesn't increase the refcount so you won't have to worry about releasing it. > + if (!dev_a) { > + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a); > + goto out_put_netns_a; > + } > + > + if (!netdev_is_nsim(dev_a)) { > + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a); > + goto out_put_dev_a; > + } > + > + ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b); > + if (!ns_b) { > + pr_err("Could not find netns with id: %d\n", netnsid_b); > + goto out_put_dev_a; > + } > + > + dev_b = dev_get_by_index(ns_b, ifidx_b); > + if (!dev_b) { > + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b); > + goto out_put_netns_b; > + } > + > + if (!netdev_is_nsim(dev_b)) { > + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b); > + goto out_put_dev_b; > + } > + > + err = 0; > + nsim_a = netdev_priv(dev_a); > + nsim_b = netdev_priv(dev_b); > + rcu_assign_pointer(nsim_a->peer, nsim_b); > + rcu_assign_pointer(nsim_b->peer, nsim_a); Shouldn't we check if peer is NULL? Otherwise we can get into weird situations where we link A<>B then B<>C and then the pointers look like this A->B<>C. When B gets freed A's pointer won't get cleared. > +out_put_dev_b: > + dev_put(dev_b); > +out_put_netns_b: > + put_net(ns_b); > +out_put_dev_a: > + dev_put(dev_a); > +out_put_netns_a: > + put_net(ns_a); > +out_unlock_rtnl: > + rtnl_unlock(); > + > + return !err ? count : err; > +} > +static BUS_ATTR_WO(link_device);
On 2024-01-25 18:24, Jakub Kicinski wrote: > On Thu, 25 Jan 2024 17:23:54 -0800 David Wei wrote: >> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c >> index bcbc1e19edde..be8ac2e60c69 100644 >> --- a/drivers/net/netdevsim/bus.c >> +++ b/drivers/net/netdevsim/bus.c >> @@ -232,9 +232,81 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count) >> } >> static BUS_ATTR_WO(del_device); >> >> +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) >> +{ >> + unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b; >> + struct netdevsim *nsim_a, *nsim_b; >> + struct net_device *dev_a, *dev_b; >> + struct net *ns_a, *ns_b; >> + int err; >> + >> + err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b); > > I'd go for "%u:%u %u:%u" to make the 'grouping' of netns and ifindex > more obvious. But no strong feelings. Also no strong feelings so I will go with your feelings. > >> + if (err != 4) { >> + pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n"); >> + return -EINVAL; >> + } >> + >> + err = -EINVAL; >> + rtnl_lock(); >> + ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a); >> + if (!ns_a) { >> + pr_err("Could not find netns with id: %d\n", netnsid_a); >> + goto out_unlock_rtnl; >> + } >> + >> + dev_a = dev_get_by_index(ns_a, ifidx_a); > > since you're under rtnl_lock you can use __get_device_by_index(), > it doesn't increase the refcount so you won't have to worry about > releasing it. Ah, I will change this. Is this true in general i.e. if I hold some big lock then I can use versions of functions that do not modify refcounts? > >> + if (!dev_a) { >> + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a); >> + goto out_put_netns_a; >> + } >> + >> + if (!netdev_is_nsim(dev_a)) { >> + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a); >> + goto out_put_dev_a; >> + } >> + >> + ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b); >> + if (!ns_b) { >> + pr_err("Could not find netns with id: %d\n", netnsid_b); >> + goto out_put_dev_a; >> + } >> + >> + dev_b = dev_get_by_index(ns_b, ifidx_b); >> + if (!dev_b) { >> + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b); >> + goto out_put_netns_b; >> + } >> + >> + if (!netdev_is_nsim(dev_b)) { >> + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b); >> + goto out_put_dev_b; >> + } >> + >> + err = 0; >> + nsim_a = netdev_priv(dev_a); >> + nsim_b = netdev_priv(dev_b); >> + rcu_assign_pointer(nsim_a->peer, nsim_b); >> + rcu_assign_pointer(nsim_b->peer, nsim_a); > > Shouldn't we check if peer is NULL? Otherwise we can get into weird > situations where we link A<>B then B<>C and then the pointers look like > this A->B<>C. When B gets freed A's pointer won't get cleared. Yep, that's an oversight from me. Will address. > >> +out_put_dev_b: >> + dev_put(dev_b); >> +out_put_netns_b: >> + put_net(ns_b); >> +out_put_dev_a: >> + dev_put(dev_a); >> +out_put_netns_a: >> + put_net(ns_a); >> +out_unlock_rtnl: >> + rtnl_unlock(); >> + >> + return !err ? count : err; >> +} >> +static BUS_ATTR_WO(link_device);
On Fri, 26 Jan 2024 10:54:35 -0800 David Wei wrote: > > since you're under rtnl_lock you can use __get_device_by_index(), > > it doesn't increase the refcount so you won't have to worry about > > releasing it. > > Ah, I will change this. Is this true in general i.e. if I hold some big > lock then I can use versions of functions that do not modify refcounts? I don't think so, generally you can ignore refcounts if you're holding the lock protecting the table in which the object is registered while it is alive, and you just looked it up in that table... if that makes sense. netdev lifetime is a bit unusual in how much the rtnl_lock protects.
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index bcbc1e19edde..be8ac2e60c69 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -232,9 +232,81 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count) } static BUS_ATTR_WO(del_device); +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + unsigned int netnsid_a, netnsid_b, ifidx_a, ifidx_b; + struct netdevsim *nsim_a, *nsim_b; + struct net_device *dev_a, *dev_b; + struct net *ns_a, *ns_b; + int err; + + err = sscanf(buf, "%u %u %u %u", &netnsid_a, &ifidx_a, &netnsid_b, &ifidx_b); + if (err != 4) { + pr_err("Format for linking two devices is \"netnsid_a ifidx_a netnsid_b ifidx_b\" (uint uint unit uint).\n"); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + ns_a = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_a); + if (!ns_a) { + pr_err("Could not find netns with id: %d\n", netnsid_a); + goto out_unlock_rtnl; + } + + dev_a = dev_get_by_index(ns_a, ifidx_a); + if (!dev_a) { + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_a, netnsid_a); + goto out_put_netns_a; + } + + if (!netdev_is_nsim(dev_a)) { + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_a, netnsid_a); + goto out_put_dev_a; + } + + ns_b = get_net_ns_by_id(current->nsproxy->net_ns, netnsid_b); + if (!ns_b) { + pr_err("Could not find netns with id: %d\n", netnsid_b); + goto out_put_dev_a; + } + + dev_b = dev_get_by_index(ns_b, ifidx_b); + if (!dev_b) { + pr_err("Could not find device with ifindex %d in netnsid %d\n", ifidx_b, netnsid_b); + goto out_put_netns_b; + } + + if (!netdev_is_nsim(dev_b)) { + pr_err("Device with ifindex %d in netnsid %d is not a netdevsim\n", ifidx_b, netnsid_b); + goto out_put_dev_b; + } + + err = 0; + nsim_a = netdev_priv(dev_a); + nsim_b = netdev_priv(dev_b); + rcu_assign_pointer(nsim_a->peer, nsim_b); + rcu_assign_pointer(nsim_b->peer, nsim_a); + +out_put_dev_b: + dev_put(dev_b); +out_put_netns_b: + put_net(ns_b); +out_put_dev_a: + dev_put(dev_a); +out_put_netns_a: + put_net(ns_a); +out_unlock_rtnl: + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(link_device); + static struct attribute *nsim_bus_attrs[] = { &bus_attr_new_device.attr, &bus_attr_del_device.attr, + &bus_attr_link_device.attr, NULL }; ATTRIBUTE_GROUPS(nsim_bus); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 77e8250282a5..969248ffeca8 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -394,6 +394,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; + RCU_INIT_POINTER(ns->peer, NULL); SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); nsim_ethtool_init(ns); @@ -413,8 +414,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; + struct netdevsim *peer; rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); @@ -427,6 +433,11 @@ void nsim_destroy(struct netdevsim *ns) free_netdev(dev); } +bool netdev_is_nsim(struct net_device *dev) +{ + return dev->netdev_ops == &nsim_netdev_ops; +} + static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 028c825b86db..c8b45b0d955e 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -125,11 +125,13 @@ struct netdevsim { } udp_ports; struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; }; struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port); void nsim_destroy(struct netdevsim *ns); +bool netdev_is_nsim(struct net_device *dev); void nsim_ethtool_init(struct netdevsim *ns);
Add a netdevsim bus attribute to sysfs: /sys/bus/netdevsim/link_device Writing "A M B N" to this file will link netdevsim M in netnsid A with netdevsim N in netnsid B. rtnl_lock is taken to ensure nothing changes during the linking. Signed-off-by: David Wei <dw@davidwei.uk> --- drivers/net/netdevsim/bus.c | 72 +++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdev.c | 11 +++++ drivers/net/netdevsim/netdevsim.h | 2 + 3 files changed, 85 insertions(+)