Message ID | 20231214212443.3638210-2-dw@davidwei.uk (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | netdevsim: link and forward skbs between ports | expand |
Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote: >Add a debugfs file in >/sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer > >Writing "M B" to this file will link port A of netdevsim N with port B of >netdevsim M. > >Reading this file will return the linked netdevsim id and port, if any. > >Signed-off-by: David Wei <dw@davidwei.uk> >--- > drivers/net/netdevsim/bus.c | 17 ++++++ > drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++ > drivers/net/netdevsim/netdev.c | 6 +++ > drivers/net/netdevsim/netdevsim.h | 3 ++ > 4 files changed, 114 insertions(+) > >diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c >index bcbc1e19edde..1ef95661a3f5 100644 >--- a/drivers/net/netdevsim/bus.c >+++ b/drivers/net/netdevsim/bus.c >@@ -323,6 +323,23 @@ static struct device_driver nsim_driver = { > .owner = THIS_MODULE, > }; > >+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id) This sounds definitelly incorrect. You should not need to touch bus.c code. It arranges the bus and devices on it. The fact that a device is probed or not is parallel to this. I think you need to maintain a separate list/xarray of netdevsim devices probed by nsim_drv_probe() >+{ >+ struct nsim_bus_dev *nsim_bus_dev; >+ >+ mutex_lock(&nsim_bus_dev_list_lock); >+ list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) { >+ if (nsim_bus_dev->dev.id == id) { >+ get_device(&nsim_bus_dev->dev); >+ mutex_unlock(&nsim_bus_dev_list_lock); >+ return nsim_bus_dev; >+ } >+ } >+ mutex_unlock(&nsim_bus_dev_list_lock); >+ >+ return NULL; >+} >+ > int nsim_bus_init(void) > { > int err; >diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >index b4d3b9cde8bd..034145ba1861 100644 >--- a/drivers/net/netdevsim/dev.c >+++ b/drivers/net/netdevsim/dev.c >@@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = { > .owner = THIS_MODULE, > }; > >+static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >+ size_t count, loff_t *ppos) >+{ >+ struct nsim_dev_port *nsim_dev_port; >+ struct netdevsim *peer; >+ unsigned int id, port; >+ char buf[23]; >+ ssize_t len; >+ >+ nsim_dev_port = file->private_data; >+ rcu_read_lock(); >+ peer = rcu_dereference(nsim_dev_port->ns->peer); >+ if (!peer) { >+ rcu_read_unlock(); >+ return 0; >+ } >+ >+ id = peer->nsim_bus_dev->dev.id; >+ port = peer->nsim_dev_port->port_index; >+ len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >+ >+ rcu_read_unlock(); >+ return simple_read_from_buffer(data, count, ppos, buf, len); >+} >+ >+static ssize_t nsim_dev_peer_write(struct file *file, >+ const char __user *data, >+ size_t count, loff_t *ppos) >+{ >+ struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >+ struct nsim_bus_dev *peer_bus_dev; >+ struct nsim_dev *peer_dev; >+ unsigned int id, port; >+ char buf[22]; >+ ssize_t ret; >+ >+ if (count >= sizeof(buf)) >+ return -ENOSPC; >+ >+ ret = copy_from_user(buf, data, count); >+ if (ret) >+ return -EFAULT; >+ buf[count] = '\0'; >+ >+ ret = sscanf(buf, "%u %u", &id, &port); >+ if (ret != 2) { >+ pr_err("Format for adding a peer is \"id port\" (uint uint)"); >+ return -EINVAL; >+ } >+ >+ /* invalid netdevsim id */ >+ peer_bus_dev = nsim_bus_dev_get(id); >+ if (!peer_bus_dev) >+ return -EINVAL; >+ >+ ret = -EINVAL; >+ /* cannot link to self */ >+ nsim_dev_port = file->private_data; >+ if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev && >+ nsim_dev_port->port_index == port) >+ goto out; >+ >+ peer_dev = dev_get_drvdata(&peer_bus_dev->dev); Again, no bus touching should be needed. (btw, this could be null is dev is not probed) >+ list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) { >+ if (peer_dev_port->port_index != port) >+ continue; >+ rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >+ rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); What is stopping another cpu from setting different peer for the same port here, making a mess? >+ ret = count; >+ goto out; >+ } >+ >+out: >+ put_device(&peer_bus_dev->dev); >+ return ret; >+} >+ >+static const struct file_operations nsim_dev_peer_fops = { >+ .open = simple_open, >+ .read = nsim_dev_peer_read, >+ .write = nsim_dev_peer_write, >+ .llseek = generic_file_llseek, >+ .owner = THIS_MODULE, >+}; >+ > static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, > struct nsim_dev_port *nsim_dev_port) > { >@@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, > } > debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); > >+ debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >+ nsim_dev_port, &nsim_dev_peer_fops); >+ > return 0; > } > >diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >index aecaf5f44374..e290c54b0e70 100644 >--- a/drivers/net/netdevsim/netdev.c >+++ b/drivers/net/netdevsim/netdev.c >@@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) > ns->nsim_dev = nsim_dev; > ns->nsim_dev_port = nsim_dev_port; > ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >+ RCU_INIT_POINTER(ns->peer, NULL); > SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); > SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); > nsim_ethtool_init(ns); >@@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) > void nsim_destroy(struct netdevsim *ns) > { > struct net_device *dev = ns->netdev; >+ struct netdevsim *peer; > > rtnl_lock(); >+ peer = rtnl_dereference(ns->peer); >+ RCU_INIT_POINTER(ns->peer, NULL); > unregister_netdevice(dev); >+ if (peer) >+ RCU_INIT_POINTER(peer->peer, NULL); What is stopping the another CPU from setting this back to this "ns"? Or what is stopping another netdevsim port from setting this ns while going away? Do you rely on RTNL_LOCK in any way (other then synchronize_net() in unlock())? If yes, looks wrong. This ns->peer update locking looks very broken to me :/ > if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { > nsim_macsec_teardown(ns); > nsim_ipsec_teardown(ns); >diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >index 028c825b86db..61ac3a80cf9a 100644 >--- a/drivers/net/netdevsim/netdevsim.h >+++ b/drivers/net/netdevsim/netdevsim.h >@@ -125,6 +125,7 @@ struct netdevsim { > } udp_ports; > > struct nsim_ethtool ethtool; >+ struct netdevsim __rcu *peer; > }; > > struct netdevsim * >@@ -415,5 +416,7 @@ struct nsim_bus_dev { > bool init; > }; > >+struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id); >+ > int nsim_bus_init(void); > void nsim_bus_exit(void); >-- >2.39.3 >
On 2023-12-15 03:11, Jiri Pirko wrote: > Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote: >> Add a debugfs file in >> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer >> >> Writing "M B" to this file will link port A of netdevsim N with port B of >> netdevsim M. >> >> Reading this file will return the linked netdevsim id and port, if any. >> >> Signed-off-by: David Wei <dw@davidwei.uk> >> --- >> drivers/net/netdevsim/bus.c | 17 ++++++ >> drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++ >> drivers/net/netdevsim/netdev.c | 6 +++ >> drivers/net/netdevsim/netdevsim.h | 3 ++ >> 4 files changed, 114 insertions(+) >> >> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c >> index bcbc1e19edde..1ef95661a3f5 100644 >> --- a/drivers/net/netdevsim/bus.c >> +++ b/drivers/net/netdevsim/bus.c >> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = { >> .owner = THIS_MODULE, >> }; >> >> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id) > > This sounds definitelly incorrect. You should not need to touch bus.c > code. It arranges the bus and devices on it. The fact that a device is > probed or not is parallel to this. > > I think you need to maintain a separate list/xarray of netdevsim devices > probed by nsim_drv_probe() There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices (nsim_dev). Adding a separate list for nsim devices seemed redundant to me when there is already a list for bus devices. > > >> +{ >> + struct nsim_bus_dev *nsim_bus_dev; >> + >> + mutex_lock(&nsim_bus_dev_list_lock); >> + list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) { >> + if (nsim_bus_dev->dev.id == id) { >> + get_device(&nsim_bus_dev->dev); >> + mutex_unlock(&nsim_bus_dev_list_lock); >> + return nsim_bus_dev; >> + } >> + } >> + mutex_unlock(&nsim_bus_dev_list_lock); >> + >> + return NULL; >> +} >> + >> int nsim_bus_init(void) >> { >> int err; >> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >> index b4d3b9cde8bd..034145ba1861 100644 >> --- a/drivers/net/netdevsim/dev.c >> +++ b/drivers/net/netdevsim/dev.c >> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = { >> .owner = THIS_MODULE, >> }; >> >> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >> + size_t count, loff_t *ppos) >> +{ >> + struct nsim_dev_port *nsim_dev_port; >> + struct netdevsim *peer; >> + unsigned int id, port; >> + char buf[23]; >> + ssize_t len; >> + >> + nsim_dev_port = file->private_data; >> + rcu_read_lock(); >> + peer = rcu_dereference(nsim_dev_port->ns->peer); >> + if (!peer) { >> + rcu_read_unlock(); >> + return 0; >> + } >> + >> + id = peer->nsim_bus_dev->dev.id; >> + port = peer->nsim_dev_port->port_index; >> + len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >> + >> + rcu_read_unlock(); >> + return simple_read_from_buffer(data, count, ppos, buf, len); >> +} >> + >> +static ssize_t nsim_dev_peer_write(struct file *file, >> + const char __user *data, >> + size_t count, loff_t *ppos) >> +{ >> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >> + struct nsim_bus_dev *peer_bus_dev; >> + struct nsim_dev *peer_dev; >> + unsigned int id, port; >> + char buf[22]; >> + ssize_t ret; >> + >> + if (count >= sizeof(buf)) >> + return -ENOSPC; >> + >> + ret = copy_from_user(buf, data, count); >> + if (ret) >> + return -EFAULT; >> + buf[count] = '\0'; >> + >> + ret = sscanf(buf, "%u %u", &id, &port); >> + if (ret != 2) { >> + pr_err("Format for adding a peer is \"id port\" (uint uint)"); >> + return -EINVAL; >> + } >> + >> + /* invalid netdevsim id */ >> + peer_bus_dev = nsim_bus_dev_get(id); >> + if (!peer_bus_dev) >> + return -EINVAL; >> + >> + ret = -EINVAL; >> + /* cannot link to self */ >> + nsim_dev_port = file->private_data; >> + if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev && >> + nsim_dev_port->port_index == port) >> + goto out; >> + >> + peer_dev = dev_get_drvdata(&peer_bus_dev->dev); > > Again, no bus touching should be needed. (btw, this could be null is dev > is not probed) That's fair, I can do a null check. > > >> + list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) { >> + if (peer_dev_port->port_index != port) >> + continue; >> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); > > What is stopping another cpu from setting different peer for the same > port here, making a mess? Looking into RCU a bit more, you're right that it does not protect from multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that be sufficient here? Or what if I took rtnl_lock()? > > >> + ret = count; >> + goto out; >> + } >> + >> +out: >> + put_device(&peer_bus_dev->dev); >> + return ret; >> +} >> + >> +static const struct file_operations nsim_dev_peer_fops = { >> + .open = simple_open, >> + .read = nsim_dev_peer_read, >> + .write = nsim_dev_peer_write, >> + .llseek = generic_file_llseek, >> + .owner = THIS_MODULE, >> +}; >> + >> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >> struct nsim_dev_port *nsim_dev_port) >> { >> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >> } >> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); >> >> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >> + nsim_dev_port, &nsim_dev_peer_fops); >> + >> return 0; >> } >> >> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >> index aecaf5f44374..e290c54b0e70 100644 >> --- a/drivers/net/netdevsim/netdev.c >> +++ b/drivers/net/netdevsim/netdev.c >> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >> ns->nsim_dev = nsim_dev; >> ns->nsim_dev_port = nsim_dev_port; >> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >> + RCU_INIT_POINTER(ns->peer, NULL); >> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); >> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); >> nsim_ethtool_init(ns); >> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >> void nsim_destroy(struct netdevsim *ns) >> { >> struct net_device *dev = ns->netdev; >> + struct netdevsim *peer; >> >> rtnl_lock(); >> + peer = rtnl_dereference(ns->peer); >> + RCU_INIT_POINTER(ns->peer, NULL); >> unregister_netdevice(dev); >> + if (peer) >> + RCU_INIT_POINTER(peer->peer, NULL); > > What is stopping the another CPU from setting this back to this "ns"? > Or what is stopping another netdevsim port from setting this ns while > going away? > > Do you rely on RTNL_LOCK in any way (other then synchronize_net() in > unlock())? If yes, looks wrong. > > This ns->peer update locking looks very broken to me :/ As above, would a spinlock on nsim_dev or taking rtnl_lock() in nsim_dev_peer_write() resolve this? > > > > >> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { >> nsim_macsec_teardown(ns); >> nsim_ipsec_teardown(ns); >> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >> index 028c825b86db..61ac3a80cf9a 100644 >> --- a/drivers/net/netdevsim/netdevsim.h >> +++ b/drivers/net/netdevsim/netdevsim.h >> @@ -125,6 +125,7 @@ struct netdevsim { >> } udp_ports; >> >> struct nsim_ethtool ethtool; >> + struct netdevsim __rcu *peer; >> }; >> >> struct netdevsim * >> @@ -415,5 +416,7 @@ struct nsim_bus_dev { >> bool init; >> }; >> >> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id); >> + >> int nsim_bus_init(void); >> void nsim_bus_exit(void); >> -- >> 2.39.3 >>
Fri, Dec 15, 2023 at 08:13:45PM CET, dw@davidwei.uk wrote: >On 2023-12-15 03:11, Jiri Pirko wrote: >> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote: >>> Add a debugfs file in >>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer >>> >>> Writing "M B" to this file will link port A of netdevsim N with port B of >>> netdevsim M. >>> >>> Reading this file will return the linked netdevsim id and port, if any. >>> >>> Signed-off-by: David Wei <dw@davidwei.uk> >>> --- >>> drivers/net/netdevsim/bus.c | 17 ++++++ >>> drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++ >>> drivers/net/netdevsim/netdev.c | 6 +++ >>> drivers/net/netdevsim/netdevsim.h | 3 ++ >>> 4 files changed, 114 insertions(+) >>> >>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c >>> index bcbc1e19edde..1ef95661a3f5 100644 >>> --- a/drivers/net/netdevsim/bus.c >>> +++ b/drivers/net/netdevsim/bus.c >>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = { >>> .owner = THIS_MODULE, >>> }; >>> >>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id) >> >> This sounds definitelly incorrect. You should not need to touch bus.c >> code. It arranges the bus and devices on it. The fact that a device is >> probed or not is parallel to this. >> >> I think you need to maintain a separate list/xarray of netdevsim devices >> probed by nsim_drv_probe() > >There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices Of course it is not. I thought I exaplained that. If you unbind (or not bind at all), it is still in this list, however not probed. >(nsim_dev). Adding a separate list for nsim devices seemed redundant to me when >there is already a list for bus devices. Again, please don't call into bus.c here. > >> >> >>> +{ >>> + struct nsim_bus_dev *nsim_bus_dev; >>> + >>> + mutex_lock(&nsim_bus_dev_list_lock); >>> + list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) { >>> + if (nsim_bus_dev->dev.id == id) { >>> + get_device(&nsim_bus_dev->dev); >>> + mutex_unlock(&nsim_bus_dev_list_lock); >>> + return nsim_bus_dev; >>> + } >>> + } >>> + mutex_unlock(&nsim_bus_dev_list_lock); >>> + >>> + return NULL; >>> +} >>> + >>> int nsim_bus_init(void) >>> { >>> int err; >>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >>> index b4d3b9cde8bd..034145ba1861 100644 >>> --- a/drivers/net/netdevsim/dev.c >>> +++ b/drivers/net/netdevsim/dev.c >>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = { >>> .owner = THIS_MODULE, >>> }; >>> >>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >>> + size_t count, loff_t *ppos) >>> +{ >>> + struct nsim_dev_port *nsim_dev_port; >>> + struct netdevsim *peer; >>> + unsigned int id, port; >>> + char buf[23]; >>> + ssize_t len; >>> + >>> + nsim_dev_port = file->private_data; >>> + rcu_read_lock(); >>> + peer = rcu_dereference(nsim_dev_port->ns->peer); >>> + if (!peer) { >>> + rcu_read_unlock(); >>> + return 0; >>> + } >>> + >>> + id = peer->nsim_bus_dev->dev.id; >>> + port = peer->nsim_dev_port->port_index; >>> + len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >>> + >>> + rcu_read_unlock(); >>> + return simple_read_from_buffer(data, count, ppos, buf, len); >>> +} >>> + >>> +static ssize_t nsim_dev_peer_write(struct file *file, >>> + const char __user *data, >>> + size_t count, loff_t *ppos) >>> +{ >>> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >>> + struct nsim_bus_dev *peer_bus_dev; >>> + struct nsim_dev *peer_dev; >>> + unsigned int id, port; >>> + char buf[22]; >>> + ssize_t ret; >>> + >>> + if (count >= sizeof(buf)) >>> + return -ENOSPC; >>> + >>> + ret = copy_from_user(buf, data, count); >>> + if (ret) >>> + return -EFAULT; >>> + buf[count] = '\0'; >>> + >>> + ret = sscanf(buf, "%u %u", &id, &port); >>> + if (ret != 2) { >>> + pr_err("Format for adding a peer is \"id port\" (uint uint)"); >>> + return -EINVAL; >>> + } >>> + >>> + /* invalid netdevsim id */ >>> + peer_bus_dev = nsim_bus_dev_get(id); >>> + if (!peer_bus_dev) >>> + return -EINVAL; >>> + >>> + ret = -EINVAL; >>> + /* cannot link to self */ >>> + nsim_dev_port = file->private_data; >>> + if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev && >>> + nsim_dev_port->port_index == port) >>> + goto out; >>> + >>> + peer_dev = dev_get_drvdata(&peer_bus_dev->dev); >> >> Again, no bus touching should be needed. (btw, this could be null is dev >> is not probed) > >That's fair, I can do a null check. > >> >> >>> + list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) { >>> + if (peer_dev_port->port_index != port) >>> + continue; >>> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >>> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >> >> What is stopping another cpu from setting different peer for the same >> port here, making a mess? > >Looking into RCU a bit more, you're right that it does not protect from >multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that >be sufficient here? > >Or what if I took rtnl_lock()? You have multiple choices how to handle this. > >> >> >>> + ret = count; >>> + goto out; >>> + } >>> + >>> +out: >>> + put_device(&peer_bus_dev->dev); >>> + return ret; >>> +} >>> + >>> +static const struct file_operations nsim_dev_peer_fops = { >>> + .open = simple_open, >>> + .read = nsim_dev_peer_read, >>> + .write = nsim_dev_peer_write, >>> + .llseek = generic_file_llseek, >>> + .owner = THIS_MODULE, >>> +}; >>> + >>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>> struct nsim_dev_port *nsim_dev_port) >>> { >>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>> } >>> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); >>> >>> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >>> + nsim_dev_port, &nsim_dev_peer_fops); >>> + >>> return 0; >>> } >>> >>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >>> index aecaf5f44374..e290c54b0e70 100644 >>> --- a/drivers/net/netdevsim/netdev.c >>> +++ b/drivers/net/netdevsim/netdev.c >>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>> ns->nsim_dev = nsim_dev; >>> ns->nsim_dev_port = nsim_dev_port; >>> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >>> + RCU_INIT_POINTER(ns->peer, NULL); >>> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); >>> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); >>> nsim_ethtool_init(ns); >>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>> void nsim_destroy(struct netdevsim *ns) >>> { >>> struct net_device *dev = ns->netdev; >>> + struct netdevsim *peer; >>> >>> rtnl_lock(); >>> + peer = rtnl_dereference(ns->peer); >>> + RCU_INIT_POINTER(ns->peer, NULL); >>> unregister_netdevice(dev); >>> + if (peer) >>> + RCU_INIT_POINTER(peer->peer, NULL); >> >> What is stopping the another CPU from setting this back to this "ns"? >> Or what is stopping another netdevsim port from setting this ns while >> going away? >> >> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in >> unlock())? If yes, looks wrong. >> >> This ns->peer update locking looks very broken to me :/ > >As above, would a spinlock on nsim_dev or taking rtnl_lock() in >nsim_dev_peer_write() resolve this? > >> >> >> >> >>> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { >>> nsim_macsec_teardown(ns); >>> nsim_ipsec_teardown(ns); >>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >>> index 028c825b86db..61ac3a80cf9a 100644 >>> --- a/drivers/net/netdevsim/netdevsim.h >>> +++ b/drivers/net/netdevsim/netdevsim.h >>> @@ -125,6 +125,7 @@ struct netdevsim { >>> } udp_ports; >>> >>> struct nsim_ethtool ethtool; >>> + struct netdevsim __rcu *peer; >>> }; >>> >>> struct netdevsim * >>> @@ -415,5 +416,7 @@ struct nsim_bus_dev { >>> bool init; >>> }; >>> >>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id); >>> + >>> int nsim_bus_init(void); >>> void nsim_bus_exit(void); >>> -- >>> 2.39.3 >>>
On 2023-12-16 01:21, Jiri Pirko wrote: > Fri, Dec 15, 2023 at 08:13:45PM CET, dw@davidwei.uk wrote: >> On 2023-12-15 03:11, Jiri Pirko wrote: >>> Thu, Dec 14, 2023 at 10:24:40PM CET, dw@davidwei.uk wrote: >>>> Add a debugfs file in >>>> /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer >>>> >>>> Writing "M B" to this file will link port A of netdevsim N with port B of >>>> netdevsim M. >>>> >>>> Reading this file will return the linked netdevsim id and port, if any. >>>> >>>> Signed-off-by: David Wei <dw@davidwei.uk> >>>> --- >>>> drivers/net/netdevsim/bus.c | 17 ++++++ >>>> drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++ >>>> drivers/net/netdevsim/netdev.c | 6 +++ >>>> drivers/net/netdevsim/netdevsim.h | 3 ++ >>>> 4 files changed, 114 insertions(+) >>>> >>>> diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c >>>> index bcbc1e19edde..1ef95661a3f5 100644 >>>> --- a/drivers/net/netdevsim/bus.c >>>> +++ b/drivers/net/netdevsim/bus.c >>>> @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = { >>>> .owner = THIS_MODULE, >>>> }; >>>> >>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id) >>> >>> This sounds definitelly incorrect. You should not need to touch bus.c >>> code. It arranges the bus and devices on it. The fact that a device is >>> probed or not is parallel to this. >>> >>> I think you need to maintain a separate list/xarray of netdevsim devices >>> probed by nsim_drv_probe() >> >> There is a 1:1 relationship between bus devices (nsim_bus_dev) and nsim devices > > Of course it is not. I thought I exaplained that. If you unbind (or not > bind at all), it is still in this list, however not probed. Right, I understand now thanks. The 1:1 relationship is initially true, but devices can be removed by unbinding their drivers. I tried manually unbinding using /sys/bus/netdevsim/drivers/netdevsim/unbind which removes a device but keeps the bus device. The current implementation is not resilient to this and I can trigger crashes e.g. by unbinding then trying to add a port. > > >> (nsim_dev). Adding a separate list for nsim devices seemed redundant to me when >> there is already a list for bus devices. > > Again, please don't call into bus.c here. Got it, I will maintain a separate list of bound devices. > > >> >>> >>> >>>> +{ >>>> + struct nsim_bus_dev *nsim_bus_dev; >>>> + >>>> + mutex_lock(&nsim_bus_dev_list_lock); >>>> + list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) { >>>> + if (nsim_bus_dev->dev.id == id) { >>>> + get_device(&nsim_bus_dev->dev); >>>> + mutex_unlock(&nsim_bus_dev_list_lock); >>>> + return nsim_bus_dev; >>>> + } >>>> + } >>>> + mutex_unlock(&nsim_bus_dev_list_lock); >>>> + >>>> + return NULL; >>>> +} >>>> + >>>> int nsim_bus_init(void) >>>> { >>>> int err; >>>> diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c >>>> index b4d3b9cde8bd..034145ba1861 100644 >>>> --- a/drivers/net/netdevsim/dev.c >>>> +++ b/drivers/net/netdevsim/dev.c >>>> @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = { >>>> .owner = THIS_MODULE, >>>> }; >>>> >>>> +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, >>>> + size_t count, loff_t *ppos) >>>> +{ >>>> + struct nsim_dev_port *nsim_dev_port; >>>> + struct netdevsim *peer; >>>> + unsigned int id, port; >>>> + char buf[23]; >>>> + ssize_t len; >>>> + >>>> + nsim_dev_port = file->private_data; >>>> + rcu_read_lock(); >>>> + peer = rcu_dereference(nsim_dev_port->ns->peer); >>>> + if (!peer) { >>>> + rcu_read_unlock(); >>>> + return 0; >>>> + } >>>> + >>>> + id = peer->nsim_bus_dev->dev.id; >>>> + port = peer->nsim_dev_port->port_index; >>>> + len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); >>>> + >>>> + rcu_read_unlock(); >>>> + return simple_read_from_buffer(data, count, ppos, buf, len); >>>> +} >>>> + >>>> +static ssize_t nsim_dev_peer_write(struct file *file, >>>> + const char __user *data, >>>> + size_t count, loff_t *ppos) >>>> +{ >>>> + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; >>>> + struct nsim_bus_dev *peer_bus_dev; >>>> + struct nsim_dev *peer_dev; >>>> + unsigned int id, port; >>>> + char buf[22]; >>>> + ssize_t ret; >>>> + >>>> + if (count >= sizeof(buf)) >>>> + return -ENOSPC; >>>> + >>>> + ret = copy_from_user(buf, data, count); >>>> + if (ret) >>>> + return -EFAULT; >>>> + buf[count] = '\0'; >>>> + >>>> + ret = sscanf(buf, "%u %u", &id, &port); >>>> + if (ret != 2) { >>>> + pr_err("Format for adding a peer is \"id port\" (uint uint)"); >>>> + return -EINVAL; >>>> + } >>>> + >>>> + /* invalid netdevsim id */ >>>> + peer_bus_dev = nsim_bus_dev_get(id); >>>> + if (!peer_bus_dev) >>>> + return -EINVAL; >>>> + >>>> + ret = -EINVAL; >>>> + /* cannot link to self */ >>>> + nsim_dev_port = file->private_data; >>>> + if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev && >>>> + nsim_dev_port->port_index == port) >>>> + goto out; >>>> + >>>> + peer_dev = dev_get_drvdata(&peer_bus_dev->dev); >>> >>> Again, no bus touching should be needed. (btw, this could be null is dev >>> is not probed) >> >> That's fair, I can do a null check. >> >>> >>> >>>> + list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) { >>>> + if (peer_dev_port->port_index != port) >>>> + continue; >>>> + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); >>>> + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); >>> >>> What is stopping another cpu from setting different peer for the same >>> port here, making a mess? >> >> Looking into RCU a bit more, you're right that it does not protect from >> multiple writers. Would adding a lock (spinlock?) to nsim_dev and taking that >> be sufficient here? >> >> Or what if I took rtnl_lock()? > > You have multiple choices how to handle this. I'll start with a spinlock but it would be good to know what the 'best' option is in this case. I don't have a well calibrated sense for it yet. > >> >>> >>> >>>> + ret = count; >>>> + goto out; >>>> + } >>>> + >>>> +out: >>>> + put_device(&peer_bus_dev->dev); >>>> + return ret; >>>> +} >>>> + >>>> +static const struct file_operations nsim_dev_peer_fops = { >>>> + .open = simple_open, >>>> + .read = nsim_dev_peer_read, >>>> + .write = nsim_dev_peer_write, >>>> + .llseek = generic_file_llseek, >>>> + .owner = THIS_MODULE, >>>> +}; >>>> + >>>> static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>>> struct nsim_dev_port *nsim_dev_port) >>>> { >>>> @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, >>>> } >>>> debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); >>>> >>>> + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, >>>> + nsim_dev_port, &nsim_dev_peer_fops); >>>> + >>>> return 0; >>>> } >>>> >>>> diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c >>>> index aecaf5f44374..e290c54b0e70 100644 >>>> --- a/drivers/net/netdevsim/netdev.c >>>> +++ b/drivers/net/netdevsim/netdev.c >>>> @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>>> ns->nsim_dev = nsim_dev; >>>> ns->nsim_dev_port = nsim_dev_port; >>>> ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; >>>> + RCU_INIT_POINTER(ns->peer, NULL); >>>> SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); >>>> SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); >>>> nsim_ethtool_init(ns); >>>> @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) >>>> void nsim_destroy(struct netdevsim *ns) >>>> { >>>> struct net_device *dev = ns->netdev; >>>> + struct netdevsim *peer; >>>> >>>> rtnl_lock(); >>>> + peer = rtnl_dereference(ns->peer); >>>> + RCU_INIT_POINTER(ns->peer, NULL); >>>> unregister_netdevice(dev); >>>> + if (peer) >>>> + RCU_INIT_POINTER(peer->peer, NULL); >>> >>> What is stopping the another CPU from setting this back to this "ns"? >>> Or what is stopping another netdevsim port from setting this ns while >>> going away? >>> >>> Do you rely on RTNL_LOCK in any way (other then synchronize_net() in >>> unlock())? If yes, looks wrong. >>> >>> This ns->peer update locking looks very broken to me :/ >> >> As above, would a spinlock on nsim_dev or taking rtnl_lock() in >> nsim_dev_peer_write() resolve this? >> >>> >>> >>> >>> >>>> if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { >>>> nsim_macsec_teardown(ns); >>>> nsim_ipsec_teardown(ns); >>>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h >>>> index 028c825b86db..61ac3a80cf9a 100644 >>>> --- a/drivers/net/netdevsim/netdevsim.h >>>> +++ b/drivers/net/netdevsim/netdevsim.h >>>> @@ -125,6 +125,7 @@ struct netdevsim { >>>> } udp_ports; >>>> >>>> struct nsim_ethtool ethtool; >>>> + struct netdevsim __rcu *peer; >>>> }; >>>> >>>> struct netdevsim * >>>> @@ -415,5 +416,7 @@ struct nsim_bus_dev { >>>> bool init; >>>> }; >>>> >>>> +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id); >>>> + >>>> int nsim_bus_init(void); >>>> void nsim_bus_exit(void); >>>> -- >>>> 2.39.3 >>>>
diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index bcbc1e19edde..1ef95661a3f5 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -323,6 +323,23 @@ static struct device_driver nsim_driver = { .owner = THIS_MODULE, }; +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id) +{ + struct nsim_bus_dev *nsim_bus_dev; + + mutex_lock(&nsim_bus_dev_list_lock); + list_for_each_entry(nsim_bus_dev, &nsim_bus_dev_list, list) { + if (nsim_bus_dev->dev.id == id) { + get_device(&nsim_bus_dev->dev); + mutex_unlock(&nsim_bus_dev_list_lock); + return nsim_bus_dev; + } + } + mutex_unlock(&nsim_bus_dev_list_lock); + + return NULL; +} + int nsim_bus_init(void) { int err; diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index b4d3b9cde8bd..034145ba1861 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -388,6 +388,91 @@ static const struct file_operations nsim_dev_rate_parent_fops = { .owner = THIS_MODULE, }; +static ssize_t nsim_dev_peer_read(struct file *file, char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_port *nsim_dev_port; + struct netdevsim *peer; + unsigned int id, port; + char buf[23]; + ssize_t len; + + nsim_dev_port = file->private_data; + rcu_read_lock(); + peer = rcu_dereference(nsim_dev_port->ns->peer); + if (!peer) { + rcu_read_unlock(); + return 0; + } + + id = peer->nsim_bus_dev->dev.id; + port = peer->nsim_dev_port->port_index; + len = scnprintf(buf, sizeof(buf), "%u %u\n", id, port); + + rcu_read_unlock(); + return simple_read_from_buffer(data, count, ppos, buf, len); +} + +static ssize_t nsim_dev_peer_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) +{ + struct nsim_dev_port *nsim_dev_port, *peer_dev_port; + struct nsim_bus_dev *peer_bus_dev; + struct nsim_dev *peer_dev; + unsigned int id, port; + char buf[22]; + ssize_t ret; + + if (count >= sizeof(buf)) + return -ENOSPC; + + ret = copy_from_user(buf, data, count); + if (ret) + return -EFAULT; + buf[count] = '\0'; + + ret = sscanf(buf, "%u %u", &id, &port); + if (ret != 2) { + pr_err("Format for adding a peer is \"id port\" (uint uint)"); + return -EINVAL; + } + + /* invalid netdevsim id */ + peer_bus_dev = nsim_bus_dev_get(id); + if (!peer_bus_dev) + return -EINVAL; + + ret = -EINVAL; + /* cannot link to self */ + nsim_dev_port = file->private_data; + if (nsim_dev_port->ns->nsim_bus_dev == peer_bus_dev && + nsim_dev_port->port_index == port) + goto out; + + peer_dev = dev_get_drvdata(&peer_bus_dev->dev); + list_for_each_entry(peer_dev_port, &peer_dev->port_list, list) { + if (peer_dev_port->port_index != port) + continue; + rcu_assign_pointer(nsim_dev_port->ns->peer, peer_dev_port->ns); + rcu_assign_pointer(peer_dev_port->ns->peer, nsim_dev_port->ns); + ret = count; + goto out; + } + +out: + put_device(&peer_bus_dev->dev); + return ret; +} + +static const struct file_operations nsim_dev_peer_fops = { + .open = simple_open, + .read = nsim_dev_peer_read, + .write = nsim_dev_peer_write, + .llseek = generic_file_llseek, + .owner = THIS_MODULE, +}; + static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) { @@ -418,6 +503,9 @@ static int nsim_dev_port_debugfs_init(struct nsim_dev *nsim_dev, } debugfs_create_symlink("dev", nsim_dev_port->ddir, dev_link_name); + debugfs_create_file("peer", 0600, nsim_dev_port->ddir, + nsim_dev_port, &nsim_dev_peer_fops); + return 0; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index aecaf5f44374..e290c54b0e70 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -388,6 +388,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; + RCU_INIT_POINTER(ns->peer, NULL); SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev); SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port); nsim_ethtool_init(ns); @@ -407,9 +408,14 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; + struct netdevsim *peer; rtnl_lock(); + peer = rtnl_dereference(ns->peer); + RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); nsim_ipsec_teardown(ns); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 028c825b86db..61ac3a80cf9a 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -125,6 +125,7 @@ struct netdevsim { } udp_ports; struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; }; struct netdevsim * @@ -415,5 +416,7 @@ struct nsim_bus_dev { bool init; }; +struct nsim_bus_dev *nsim_bus_dev_get(unsigned int id); + int nsim_bus_init(void); void nsim_bus_exit(void);
Add a debugfs file in /sys/kernel/debug/netdevsim/netdevsimN/ports/A/peer Writing "M B" to this file will link port A of netdevsim N with port B of netdevsim M. Reading this file will return the linked netdevsim id and port, if any. Signed-off-by: David Wei <dw@davidwei.uk> --- drivers/net/netdevsim/bus.c | 17 ++++++ drivers/net/netdevsim/dev.c | 88 +++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdev.c | 6 +++ drivers/net/netdevsim/netdevsim.h | 3 ++ 4 files changed, 114 insertions(+)