Message ID | 20230405063323.36270-1-glipus@gmail.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [RFC,v3,1/5] Add NDOs for hardware timestamp get/set | expand |
On Wed, Apr 05, 2023 at 12:33:23AM -0600, Maxim Georgiev wrote: > This patch makes VLAN subsystem to use the newly introduced > ndo_hwtstamp_get/set API to pass hw timestamp requests to > underlying NIC drivers in case if these drivers implement > ndo_hwtstamp_get/set functions. Otherwise VLAN┬Ěsubsystem Strange symbols (┬Ě). > falls back to calling ndo_eth_ioctl. > > Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com> > Signed-off-by: Maxim Georgiev <glipus@gmail.com> > --- > net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 41 insertions(+), 1 deletion(-) > > diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c > index 5920544e93e8..66d54c610aa5 100644 > --- a/net/8021q/vlan_dev.c > +++ b/net/8021q/vlan_dev.c > @@ -353,6 +353,44 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) > return 0; > } > > +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd) > +{ > + const struct net_device_ops *ops = dev->netdev_ops; > + struct kernel_hwtstamp_config kernel_config = {}; > + struct hwtstamp_config config; > + int err; > + > + if (!netif_device_present(dev)) > + return -ENODEV; > + > + if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) || > + (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) { > + if (ops->ndo_eth_ioctl) { > + return ops->ndo_eth_ioctl(real_dev, &ifr, cmd); > + else > + return -EOPNOTSUPP; > + } > + > + kernel_config.ifr = ifr; > + if (cmd == SIOCSHWTSTAMP) { > + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) > + return -EFAULT; > + > + hwtstamp_config_to_kernel(&kernel_config, &config); > + err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL); > + } else if (cmd == SIOCGHWTSTAMP) { > + err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL); > + } > + > + if (err) > + return err; > + > + hwtstamp_kernel_to_config(&config, &kernel_config); > + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) > + return -EFAULT; > + return 0; > +} > + > static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) > { > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > @@ -368,10 +406,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) > if (!net_eq(dev_net(dev), dev_net(real_dev))) > break; > fallthrough; > + case SIOCGHWTSTAMP: > + err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd); > + break; > case SIOCGMIIPHY: > case SIOCGMIIREG: > case SIOCSMIIREG: > - case SIOCGHWTSTAMP: I would recommend also making vlan_dev_hwtstamp() be called from the VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl(). My understanding of Jakub's suggestion to (temporarily) stuff ifr inside kernel_config was to do that from top-level net/core/dev_ioctl.c, not from the VLAN driver. > if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) > err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); > break; > -- > 2.39.2 >
On Wed, Apr 5, 2023 at 6:26 AM Vladimir Oltean <vladimir.oltean@nxp.com> wrote: > > On Wed, Apr 05, 2023 at 12:33:23AM -0600, Maxim Georgiev wrote: > > This patch makes VLAN subsystem to use the newly introduced > > ndo_hwtstamp_get/set API to pass hw timestamp requests to > > underlying NIC drivers in case if these drivers implement > > ndo_hwtstamp_get/set functions. Otherwise VLAN┬Ěsubsystem > > Strange symbols (┬Ě). Bad copy-paste, sorry. Fixed. > > > falls back to calling ndo_eth_ioctl. > > > > Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com> > > Signed-off-by: Maxim Georgiev <glipus@gmail.com> > > --- > > net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 41 insertions(+), 1 deletion(-) > > > > diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c > > index 5920544e93e8..66d54c610aa5 100644 > > --- a/net/8021q/vlan_dev.c > > +++ b/net/8021q/vlan_dev.c > > @@ -353,6 +353,44 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) > > return 0; > > } > > > > +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd) > > +{ > > + const struct net_device_ops *ops = dev->netdev_ops; > > + struct kernel_hwtstamp_config kernel_config = {}; > > + struct hwtstamp_config config; > > + int err; > > + > > + if (!netif_device_present(dev)) > > + return -ENODEV; > > + > > + if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) || > > + (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) { > > + if (ops->ndo_eth_ioctl) { > > + return ops->ndo_eth_ioctl(real_dev, &ifr, cmd); > > + else > > + return -EOPNOTSUPP; > > + } > > + > > + kernel_config.ifr = ifr; > > + if (cmd == SIOCSHWTSTAMP) { > > + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) > > + return -EFAULT; > > + > > + hwtstamp_config_to_kernel(&kernel_config, &config); > > + err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL); > > + } else if (cmd == SIOCGHWTSTAMP) { > > + err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL); > > + } > > + > > + if (err) > > + return err; > > + > > + hwtstamp_kernel_to_config(&config, &kernel_config); > > + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) > > + return -EFAULT; > > + return 0; > > +} > > + > > static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) > > { > > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > > @@ -368,10 +406,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) > > if (!net_eq(dev_net(dev), dev_net(real_dev))) > > break; > > fallthrough; > > + case SIOCGHWTSTAMP: > > + err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd); > > + break; > > case SIOCGMIIPHY: > > case SIOCGMIIREG: > > case SIOCSMIIREG: > > - case SIOCGHWTSTAMP: > > I would recommend also making vlan_dev_hwtstamp() be called from the > VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl(). Vladimir, could you please elaborate here a bit? Are you saying that I should go all the way with vlan NDO conversion, implement ndo_hwtstamp_get/set() for vlan, and stop handling SIOCGHWTSTAMP/SIOCSHWTSTAMP in vlan_dev_ioctl()? > > My understanding of Jakub's suggestion to (temporarily) stuff ifr > inside kernel_config was to do that from top-level net/core/dev_ioctl.c, > not from the VLAN driver. [RFC PATCH v3 2/5] in this patch stack changes net/core/dev_ioctl.c to insert ifr inside kernel_config. I assumed that I should do it here too so underlying drivers could rely on ifr pointer in kernel_config being always initialized. If the plan is to stop supporting SIOCGHWTSTAMP/SIOCSHWTSTAMP in vlan_dev_ioctl() all together and move the hw timestamp handling logic to vlan_get/set_hwtstamp() functions, then this ifr initialization code will be removed from net/8021q/vlan_dev.c anyway. > > > if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) > > err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); > > break; > > -- > > 2.39.2 > >
On Wed, Apr 05, 2023 at 10:19:11AM -0600, Max Georgiev wrote: > > I would recommend also making vlan_dev_hwtstamp() be called from the > > VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl(). > > Vladimir, could you please elaborate here a bit? > Are you saying that I should go all the way with vlan NDO conversion, > implement ndo_hwtstamp_get/set() for vlan, and stop handling > SIOCGHWTSTAMP/SIOCSHWTSTAMP in vlan_dev_ioctl()? Yes, sorry for being unclear. > > My understanding of Jakub's suggestion to (temporarily) stuff ifr > > inside kernel_config was to do that from top-level net/core/dev_ioctl.c, > > not from the VLAN driver. > > [RFC PATCH v3 2/5] in this patch stack changes net/core/dev_ioctl.c > to insert ifr inside kernel_config. I assumed that I should do it here too > so underlying drivers could rely on ifr pointer in kernel_config being > always initialized. > If the plan is to stop supporting SIOCGHWTSTAMP/SIOCSHWTSTAMP > in vlan_dev_ioctl() all together and move the hw timestamp handling > logic to vlan_get/set_hwtstamp() functions, then this ifr initialization > code will be removed from net/8021q/vlan_dev.c anyway. Yes, correct, dev_set_hwtstamp() should provide it. There's a small thing I don't like about stuffing "ifr" inside struct kernel_hwtstamp_config, and that's that some drivers keep the last configuration privately using memcpy(). If we put "ifr" there, they will practically have access to a stale pointer, because "ifr" loses meaning once the ioctl syscall is over. Since we don't know how long it will take until the ndo_hwtstamp_set() conversion is complete (experience says: possibly indefinitely), it would be good if this wasn't possible, because who knows what ideas people might get to do with it. Options to avoid it are: - keep doing what you're doing - let drivers memcpy() the struct hwtstamp_config and not the struct kernel_hwtstamp_config. - pass the ifr as yet another argument to ndo_hwtstamp_set(), and don't stuff it inside struct kernel_hwtstamp_config. Neither of these is particularly great, because at the end of the conversion, some extra cleanup will be required to fix up the API again (either to stop all drivers from using struct hwtstamp_config, or to stop passing the argument which is no longer used by anybody). I think, personally, I'd opt for the first bullet, keep doing what you're doing. It should require a bit less cleanup, since not all drivers do the memcpy() thing.
On Wed, 5 Apr 2023 00:33:23 -0600 Maxim Georgiev wrote: > +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd) > +{ > + const struct net_device_ops *ops = dev->netdev_ops; > + struct kernel_hwtstamp_config kernel_config = {}; > + struct hwtstamp_config config; > + int err; > + > + if (!netif_device_present(dev)) > + return -ENODEV; > + > + if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) || > + (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) { > + if (ops->ndo_eth_ioctl) { > + return ops->ndo_eth_ioctl(real_dev, &ifr, cmd); > + else > + return -EOPNOTSUPP; > + } > + > + kernel_config.ifr = ifr; > + if (cmd == SIOCSHWTSTAMP) { > + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) > + return -EFAULT; > + > + hwtstamp_config_to_kernel(&kernel_config, &config); > + err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL); > + } else if (cmd == SIOCGHWTSTAMP) { > + err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL); > + } > + > + if (err) > + return err; > + > + hwtstamp_kernel_to_config(&config, &kernel_config); > + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) > + return -EFAULT; > + return 0; > +} This needs to live in the core. I think the real_dev is a lower of the vlan device? All the vlan driver should do is attach the generic helper: .ndo_hwtstamp_get = generic_hwtstamp_get_lower, and the same for set. No?
On Wed, Apr 05, 2023 at 09:42:10AM -0700, Jakub Kicinski wrote: > This needs to live in the core. I think the real_dev is a lower of the > vlan device? All the vlan driver should do is attach the generic helper: > > .ndo_hwtstamp_get = generic_hwtstamp_get_lower, > > and the same for set. No? The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()? How would the generic helper get to bond_option_active_slave_get_rcu(), vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)? Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument, and 3 small wrappers in vlan, macvlan, bonding which identify that lower?
On Wed, 5 Apr 2023 20:03:22 +0300 Vladimir Oltean wrote: > The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()? > How would the generic helper get to bond_option_active_slave_get_rcu(), > vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)? > > Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument, > and 3 small wrappers in vlan, macvlan, bonding which identify that lower? The bonding situation is probably more complex, I haven't looked, but for *vlans we can just get the lower from netdev linkage, no? Sure the drivers have their own pointers for convenience and with their own lifetime rules but under rtnl lock lower/upper should work...
On Wed, Apr 05, 2023 at 10:13:23AM -0700, Jakub Kicinski wrote: > On Wed, 5 Apr 2023 20:03:22 +0300 Vladimir Oltean wrote: > > The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()? > > How would the generic helper get to bond_option_active_slave_get_rcu(), > > vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)? > > > > Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument, > > and 3 small wrappers in vlan, macvlan, bonding which identify that lower? > > The bonding situation is probably more complex, I haven't looked, > but for *vlans we can just get the lower from netdev linkage, no? > Sure the drivers have their own pointers for convenience and with > their own lifetime rules but under rtnl lock lower/upper should work... So what do you suggest doing with bonding, then? Not use the generic helper at all? It's not that more complex, btw. Here are the differences: - it changes ifrr.ifr_name with real_dev->name for a reason I can't really determine or find in commit 94dd016ae538 ("bond: pass get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device"). Since vlan and macvlan don't do it, and operate with lower drivers from the same pool as bonding, I'd imagine it's not needed. - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in SET requests - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses Notably, something I would have expected it does but it doesn't do is it doesn't apply the same hwtstamping config to the lower interface that isn't active, when the switchover happens. Presumably user space does that. So it's not that much different.
On Wed, Apr 05, 2023 at 08:28:40PM +0300, Vladimir Oltean wrote: > - it changes ifrr.ifr_name with real_dev->name for a reason I can't > really determine or find in commit 94dd016ae538 ("bond: pass > get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device"). Since vlan > and macvlan don't do it, and operate with lower drivers from the same > pool as bonding, I'd imagine it's not needed. Ah, they do, they do, my bad. So it's down to 2 differences, which can be handled easily with the wrapper function model - the bonding wrapper checks, or sets, the HWTSTAMP_FLAG_BONDED_PHC_INDEX flag.
On Wed, 5 Apr 2023 20:28:40 +0300 Vladimir Oltean wrote: > So what do you suggest doing with bonding, then? Not use the generic > helper at all? It'd seem most natural to me to split the generic "descend" helper into two functions, one which retrieves the lower and one which does the appropriate calling dance (ndo vs ioctl, and DSA, which I guess is now gone?). The latter could be used for the first descend as well I'd presume. And it can be exported for the use of more complex drivers like bonding which want to walk the lowers themselves. > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in > SET requests > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses IIRC that was to indicate to user space that the real PHC may change for this netdev so it needs to pay attention to netlink notifications. Shouldn't apply to *vlans? > Notably, something I would have expected it does but it doesn't do is it > doesn't apply the same hwtstamping config to the lower interface that > isn't active, when the switchover happens. Presumably user space does that. Yes, user space must be involved anyway, because the entire clock will change. IMHO implementing the pass thru for timestamping requests on bonding is checkbox engineering, kernel can't make it work transparently. But nobody else spoke up when it was proposed so... > So it's not that much different.
On Wed, Apr 05, 2023 at 10:42:53AM -0700, Jakub Kicinski wrote: > On Wed, 5 Apr 2023 20:28:40 +0300 Vladimir Oltean wrote: > > So what do you suggest doing with bonding, then? Not use the generic > > helper at all? > > It'd seem most natural to me to split the generic "descend" helper into > two functions, one which retrieves the lower and one which does the > appropriate calling dance (ndo vs ioctl, and DSA, which I guess is now > gone?). There's nothing DSA-related to be done. DSA masters can't be lowers of any other virtual interface kinds except bridge or bonding/team, and: - bridge doesn't support hwtstamping - bonding is also DSA master when it has a DSA master as lower, so the DSA master restriction has already run once - on the bonding device itself > The latter could be used for the first descend as well I'd presume. > And it can be exported for the use of more complex drivers like > bonding which want to walk the lowers themselves. > > > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in > > SET requests > > > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses > > IIRC that was to indicate to user space that the real PHC may change > for this netdev so it needs to pay attention to netlink notifications. > Shouldn't apply to *vlans? No, this shouldn't apply to *vlans, but I didn't suggest that it should. I don't think my proposal was clear enough, so here's some code (untested, written in email client). static int macvlan_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = macvlan_dev_real_dev(dev); return generic_hwtstamp_get_lower(real_dev, cfg, extack); } static int macvlan_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = macvlan_dev_real_dev(dev); return generic_hwtstamp_set_lower(real_dev, cfg, extack); } static int vlan_hwtstamp_get(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; return generic_hwtstamp_get_lower(real_dev, cfg, extack); } static int vlan_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; return generic_hwtstamp_set_lower(real_dev, cfg, extack); } static int bond_hwtstamp_get(struct net_device *bond_dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); int err; if (!real_dev) return -EOPNOTSUPP; err = generic_hwtstamp_get_lower(real_dev, cfg, extack); if (err) return err; /* Set the BOND_PHC_INDEX flag to notify user space */ cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; return 0; } static int bond_hwtstamp_set(struct net_device *bond_dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); int err; if (!real_dev) return -EOPNOTSUPP; if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX)) return -EOPNOTSUPP; return generic_hwtstamp_set_lower(real_dev, cfg, extack); } Doesn't seem in any way necessary to complicate things with the netdev adjacence lists? > Yes, user space must be involved anyway, because the entire clock will > change. IMHO implementing the pass thru for timestamping requests on > bonding is checkbox engineering, kernel can't make it work > transparently. But nobody else spoke up when it was proposed so... ok, but that's a bit beside the point here.
On Wed, 5 Apr 2023 21:01:21 +0300 Vladimir Oltean wrote: > - bonding is also DSA master when it has a DSA master as lower, so the > DSA master restriction has already run once - on the bonding device > itself Huh, didn't know that. > > The latter could be used for the first descend as well I'd presume. > > And it can be exported for the use of more complex drivers like > > bonding which want to walk the lowers themselves. > > > > > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in > > > SET requests > > > > > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses > > > > IIRC that was to indicate to user space that the real PHC may change > > for this netdev so it needs to pay attention to netlink notifications. > > Shouldn't apply to *vlans? > > No, this shouldn't apply to *vlans, but I didn't suggest that it should. Good, so if we just target *vlans we don't have to worry. > I don't think my proposal was clear enough, so here's some code > (untested, written in email client). > > static int macvlan_hwtstamp_get(struct net_device *dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct net_device *real_dev = macvlan_dev_real_dev(dev); > > return generic_hwtstamp_get_lower(real_dev, cfg, extack); > } > > static int macvlan_hwtstamp_set(struct net_device *dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct net_device *real_dev = macvlan_dev_real_dev(dev); > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > } > > static int vlan_hwtstamp_get(struct net_device *dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > > return generic_hwtstamp_get_lower(real_dev, cfg, extack); > } > > static int vlan_hwtstamp_set(struct net_device *dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > } I got that, but why wouldn't this not be better, as it avoids the 3 driver stubs? (also written in the MUA) int net_lower_hwtstamp_set(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack) { struct list_head *iter = dev->adj_list.lower.next; struct net_device *lower; lower = netdev_lower_get_next(dev, &iter); return generic_hwtstamp_set_lower(lower, cfg, extack); } > static int bond_hwtstamp_get(struct net_device *bond_dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct bonding *bond = netdev_priv(bond_dev); > struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); > int err; > > if (!real_dev) > return -EOPNOTSUPP; > > err = generic_hwtstamp_get_lower(real_dev, cfg, extack); > if (err) > return err; > > /* Set the BOND_PHC_INDEX flag to notify user space */ > cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; > > return 0; > } > > static int bond_hwtstamp_set(struct net_device *bond_dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct bonding *bond = netdev_priv(bond_dev); > struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); > int err; > > if (!real_dev) > return -EOPNOTSUPP; > > if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX)) > return -EOPNOTSUPP; > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > } > > Doesn't seem in any way necessary to complicate things with the netdev > adjacence lists? What is the complication? We can add a "get first" helper maybe to hide the oddities of the linking. > > Yes, user space must be involved anyway, because the entire clock will > > change. IMHO implementing the pass thru for timestamping requests on > > bonding is checkbox engineering, kernel can't make it work > > transparently. But nobody else spoke up when it was proposed so... > > ok, but that's a bit beside the point here. You cut off the quote it was responding to so IDK if it is.
On Wed, Apr 5, 2023 at 6:00 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Wed, 5 Apr 2023 21:01:21 +0300 Vladimir Oltean wrote: > > - bonding is also DSA master when it has a DSA master as lower, so the > > DSA master restriction has already run once - on the bonding device > > itself > > Huh, didn't know that. > > > > The latter could be used for the first descend as well I'd presume. > > > And it can be exported for the use of more complex drivers like > > > bonding which want to walk the lowers themselves. > > > > > > > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in > > > > SET requests > > > > > > > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses > > > > > > IIRC that was to indicate to user space that the real PHC may change > > > for this netdev so it needs to pay attention to netlink notifications. > > > Shouldn't apply to *vlans? > > > > No, this shouldn't apply to *vlans, but I didn't suggest that it should. > > Good, so if we just target *vlans we don't have to worry. > > > I don't think my proposal was clear enough, so here's some code > > (untested, written in email client). > > > > static int macvlan_hwtstamp_get(struct net_device *dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct net_device *real_dev = macvlan_dev_real_dev(dev); > > > > return generic_hwtstamp_get_lower(real_dev, cfg, extack); > > } > > > > static int macvlan_hwtstamp_set(struct net_device *dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct net_device *real_dev = macvlan_dev_real_dev(dev); > > > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > > } > > > > static int vlan_hwtstamp_get(struct net_device *dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > > > > return generic_hwtstamp_get_lower(real_dev, cfg, extack); > > } > > > > static int vlan_hwtstamp_set(struct net_device *dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; > > > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > > } > > I got that, but why wouldn't this not be better, as it avoids > the 3 driver stubs? (also written in the MUA) > > int net_lower_hwtstamp_set(struct net_device *dev, > struct kernel_hwtstamp_config *cfg, > struct netlink_ext_ack *extack) > { > struct list_head *iter = dev->adj_list.lower.next; > struct net_device *lower; > > lower = netdev_lower_get_next(dev, &iter); > return generic_hwtstamp_set_lower(lower, cfg, extack); > } > > > static int bond_hwtstamp_get(struct net_device *bond_dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct bonding *bond = netdev_priv(bond_dev); > > struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); > > int err; > > > > if (!real_dev) > > return -EOPNOTSUPP; > > > > err = generic_hwtstamp_get_lower(real_dev, cfg, extack); > > if (err) > > return err; > > > > /* Set the BOND_PHC_INDEX flag to notify user space */ > > cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX; > > > > return 0; > > } > > > > static int bond_hwtstamp_set(struct net_device *bond_dev, > > struct kernel_hwtstamp_config *cfg, > > struct netlink_ext_ack *extack) > > { > > struct bonding *bond = netdev_priv(bond_dev); > > struct net_device *real_dev = bond_option_active_slave_get_rcu(bond); > > int err; > > > > if (!real_dev) > > return -EOPNOTSUPP; > > > > if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX)) > > return -EOPNOTSUPP; > > > > return generic_hwtstamp_set_lower(real_dev, cfg, extack); > > } > > > > Doesn't seem in any way necessary to complicate things with the netdev > > adjacence lists? > > What is the complication? We can add a "get first" helper maybe to hide > the oddities of the linking. > > > > Yes, user space must be involved anyway, because the entire clock will > > > change. IMHO implementing the pass thru for timestamping requests on > > > bonding is checkbox engineering, kernel can't make it work > > > transparently. But nobody else spoke up when it was proposed so... > > > > ok, but that's a bit beside the point here. > > You cut off the quote it was responding to so IDK if it is. I tried my best to follow the discussion, and convert it to compilable code. Here is what I have in mind for generic_hwtstamp_get_lower(): int generic_hwtstamp_get_lower(struct net_dev *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct hwtstamp_config cfg; int err; if (!netif_device_present(dev)) return -ENODEV; if (ops->ndo_hwtstamp_get) return ops->ndo_hwtstamp_get(dev, cfg, extack); if (!cfg->ifr) return -EOPNOTSUPP; err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP); if (err) return err; if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg))) return -EFAULT; hwtstamp_config_to_kernel(kernel_cfg, &cfg); return 0; } It looks like there is a possibility that the returned hwtstamp_config structure will be copied twice to ifr and copied once from ifr on the return path in case if the underlying driver does not implement ndo_hwtstamp_get(): - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl() implementation to return the data to generic_hwtstamp_get_lower(); - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it back out of the ifr to kernel_hwtstamp_config structure; - then dev_get_hwtstamp() calls copy_to_user() again to update the same ifr with the same data the ifr already contains. Should we consider this acceptable?
On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote: > I tried my best to follow the discussion, and convert it to compilable code. > Here is what I have in mind for generic_hwtstamp_get_lower(): > > int generic_hwtstamp_get_lower(struct net_dev *dev, > struct kernel_hwtstamp_config *kernel_cfg, > struct netlink_ext_ack *extack) > { > const struct net_device_ops *ops = dev->netdev_ops; > struct hwtstamp_config cfg; > int err; > > if (!netif_device_present(dev)) > return -ENODEV; > > if (ops->ndo_hwtstamp_get) > return ops->ndo_hwtstamp_get(dev, cfg, extack); > > if (!cfg->ifr) > return -EOPNOTSUPP; > > err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP); > if (err) > return err; > > if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg))) > return -EFAULT; > > hwtstamp_config_to_kernel(kernel_cfg, &cfg); > > return 0; > } Side note, it doesn't look like this code is particularly compilable either - "cfg" is used in some places instead of "kernel_cfg". > > It looks like there is a possibility that the returned hwtstamp_config structure > will be copied twice to ifr and copied once from ifr on the return path > in case if the underlying driver does not implement ndo_hwtstamp_get(): > - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl() > implementation to return the data to generic_hwtstamp_get_lower(); > - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it > back out of the ifr to kernel_hwtstamp_config structure; > - then dev_get_hwtstamp() calls copy_to_user() again to update > the same ifr with the same data the ifr already contains. > > Should we consider this acceptable? Thanks for laying this out. I guess with a table it's going to be clearer, so to summarize, I believe this is the status: Assuming we convert *vlan to ndo_hwtstamp_set(): =================== If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev driver uses ndo_eth_ioctl(), we have: - one copy_from_user() in dev_set_hwtstamp() - one copy_from_user() in the real_dev's ndo_eth_ioctl() - one copy_to_user() in the real_dev's ndo_eth_ioctl() - one copy_from_user() in generic_hwtstamp_get_lower() - one copy_to_user() in dev_set_hwtstamp() If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev is converted too, we have: - one copy_from_user() in dev_set_hwtstamp() - one copy_to_user() in dev_set_hwtstamp() =================== Assuming we don't convert *vlan to ndo_hwtstamp_set(): =================== If the vlan driver isn't converted to ndo_hwtstamp_set() and the real_dev driver isn't converted either, we have: - one copy_from_user() in dev_set_hwtstamp() - one copy_from_user() in the real_dev's ndo_eth_ioctl() - one copy_to_user() in the real_dev's ndo_eth_ioctl() If the vlan driver isn't converted to ndo_hwtstamp_set(), but the real_dev driver is, we have: - one copy_from_user() in dev_set_hwtstamp() - one copy_from_user() in the vlan's ndo_eth_ioctl() - one copy_to_user() in the vlan's ndo_eth_ioctl() =================== So between converting and not converting the *vlans to ndo_hwtstamp_set(), the worst case is going to be worse (with a mix of new API in *vlan and old API in real_dev) and the best case is going to be better (with new API in both *vlan and real_dev). OTOH, with old API in *vlan, the number of copies to/from the user buffer is going to be constant at 3, which is not the best, not the worst. I guess the data indicates that we should convert the *vlans to ndo_hwtstamp_set() at the very end of the process, and for now, just make them compatible with a real_dev that uses the new API? Note that I haven't done the math for the "get" operation yet, but I believe it to be similar.
On Thu, Apr 6, 2023 at 9:02 AM Vladimir Oltean <vladimir.oltean@nxp.com> wrote: > > On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote: > > I tried my best to follow the discussion, and convert it to compilable code. > > Here is what I have in mind for generic_hwtstamp_get_lower(): > > > > int generic_hwtstamp_get_lower(struct net_dev *dev, > > struct kernel_hwtstamp_config *kernel_cfg, > > struct netlink_ext_ack *extack) > > { > > const struct net_device_ops *ops = dev->netdev_ops; > > struct hwtstamp_config cfg; > > int err; > > > > if (!netif_device_present(dev)) > > return -ENODEV; > > > > if (ops->ndo_hwtstamp_get) > > return ops->ndo_hwtstamp_get(dev, cfg, extack); > > > > if (!cfg->ifr) > > return -EOPNOTSUPP; > > > > err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP); > > if (err) > > return err; > > > > if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg))) > > return -EFAULT; > > > > hwtstamp_config_to_kernel(kernel_cfg, &cfg); > > > > return 0; > > } > > Side note, it doesn't look like this code is particularly compilable > either - "cfg" is used in some places instead of "kernel_cfg". That's true, this code wouldn't compile. I copied it here to illustrate the potentially concerning logic. Thank you for pointing this out though! > > > > > It looks like there is a possibility that the returned hwtstamp_config structure > > will be copied twice to ifr and copied once from ifr on the return path > > in case if the underlying driver does not implement ndo_hwtstamp_get(): > > - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl() > > implementation to return the data to generic_hwtstamp_get_lower(); > > - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it > > back out of the ifr to kernel_hwtstamp_config structure; > > - then dev_get_hwtstamp() calls copy_to_user() again to update > > the same ifr with the same data the ifr already contains. > > > > Should we consider this acceptable? > > Thanks for laying this out. I guess with a table it's going to be > clearer, so to summarize, I believe this is the status: > > Assuming we convert *vlan to ndo_hwtstamp_set(): > > =================== > > If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev > driver uses ndo_eth_ioctl(), we have: > - one copy_from_user() in dev_set_hwtstamp() > - one copy_from_user() in the real_dev's ndo_eth_ioctl() > - one copy_to_user() in the real_dev's ndo_eth_ioctl() > - one copy_from_user() in generic_hwtstamp_get_lower() > - one copy_to_user() in dev_set_hwtstamp() > > If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev > is converted too, we have: > - one copy_from_user() in dev_set_hwtstamp() > - one copy_to_user() in dev_set_hwtstamp() > > =================== > > Assuming we don't convert *vlan to ndo_hwtstamp_set(): > > =================== > > If the vlan driver isn't converted to ndo_hwtstamp_set() and the > real_dev driver isn't converted either, we have: > - one copy_from_user() in dev_set_hwtstamp() > - one copy_from_user() in the real_dev's ndo_eth_ioctl() > - one copy_to_user() in the real_dev's ndo_eth_ioctl() > > If the vlan driver isn't converted to ndo_hwtstamp_set(), but the > real_dev driver is, we have: > - one copy_from_user() in dev_set_hwtstamp() > - one copy_from_user() in the vlan's ndo_eth_ioctl() > - one copy_to_user() in the vlan's ndo_eth_ioctl() > > =================== > > So between converting and not converting the *vlans to ndo_hwtstamp_set(), > the worst case is going to be worse (with a mix of new API in *vlan and > old API in real_dev) and the best case is going to be better (with new > API in both *vlan and real_dev). OTOH, with old API in *vlan, the number > of copies to/from the user buffer is going to be constant at 3, which is > not the best, not the worst. > > I guess the data indicates that we should convert the *vlans to > ndo_hwtstamp_set() at the very end of the process, and for now, just > make them compatible with a real_dev that uses the new API? > > Note that I haven't done the math for the "get" operation yet, but I > believe it to be similar. Thank you for putting this overhead tracking table together! Here is my guess on how this accounting would look like for the "get" codepath: Assuming we convert *vlan to ndo_hwtstamp_set(): =================== If the vlan driver is converted to ndo_hwtstamp_get() and the real_dev driver uses ndo_eth_ioctl(), we have: - one copy_to_user() in the real_dev's ndo_eth_ioctl() - one copy_from_user() in generic_hwtstamp_get_lower() - one copy_to_user() in dev_get_hwtstamp() If the vlan driver is converted to ndo_hwtstamp_get() and the real_dev is converted too, we have: - one copy_to_user() in dev_get_hwtstamp() =================== Assuming we don't convert *vlan to ndo_hwtstamp_get(): =================== If the vlan driver isn't converted to ndo_hwtstamp_get() and the real_dev driver isn't converted either, we have: - one copy_to_user() in the real_dev's ndo_eth_ioctl() If the vlan driver isn't converted to ndo_hwtstamp_get(), but the real_dev driver is, we have: - one copy_to_user() in the vlan's ndo_eth_ioctl() =================== Adding real_dev->ndo_hwtstamp_get/set() support to vlan_eth_ioctl() and holding back with ndo_hwtstamp_get/set() implementation in vlan code looks like a winner again. If I may, there are other ways to work around this inefficiency. Since kernel_hwtstamp_config was meant to be easily extendable, we can abuse it and add a flag field there. One of the flag values can indicate that the operation result structure was already copied to kernel_config->ifr by the function that received this kernel_config instance as a parameter, and that the content of the hwtstamp_config-related fields in kernel_config structure must be ignored when the function returns. It would complicate the implementation logic, but we'd avoid some unnecessary copy operations while converting *vlan components to the newer interface. Would it be a completely unreasonable approach?
On Thu, Apr 06, 2023 at 10:18:36AM -0600, Max Georgiev wrote: > If I may, there are other ways to work around this inefficiency. > Since kernel_hwtstamp_config was meant to be easily extendable, > we can abuse it and add a flag field there. One of the flag values > can indicate that the operation result structure was already copied > to kernel_config->ifr by the function that received this kernel_config > instance as a parameter, and that the content of the > hwtstamp_config-related fields in kernel_config structure must > be ignored when the function returns. It would complicate the > implementation logic, but we'd avoid some unnecessary copy > operations while converting *vlan components to the newer interface. > Would it be a completely unreasonable approach? No, I think that's fair game and a good idea. It would make the best case better (SET request on a converted real_dev drops from 3 copies to 2), while keeping the worst case the same (SET request on an unconverted real_dev remains at 3 copies).
On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote: > It looks like there is a possibility that the returned hwtstamp_config structure > will be copied twice to ifr and copied once from ifr on the return path > in case if the underlying driver does not implement ndo_hwtstamp_get(): > - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl() > implementation to return the data to generic_hwtstamp_get_lower(); > - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it > back out of the ifr to kernel_hwtstamp_config structure; > - then dev_get_hwtstamp() calls copy_to_user() again to update > the same ifr with the same data the ifr already contains. > > Should we consider this acceptable? This is a slow path so copying a small structure is not a concern. Thanks, Richard
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 5920544e93e8..66d54c610aa5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -353,6 +353,44 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p) return 0; } +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct kernel_hwtstamp_config kernel_config = {}; + struct hwtstamp_config config; + int err; + + if (!netif_device_present(dev)) + return -ENODEV; + + if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) || + (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) { + if (ops->ndo_eth_ioctl) { + return ops->ndo_eth_ioctl(real_dev, &ifr, cmd); + else + return -EOPNOTSUPP; + } + + kernel_config.ifr = ifr; + if (cmd == SIOCSHWTSTAMP) { + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + hwtstamp_config_to_kernel(&kernel_config, &config); + err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL); + } else if (cmd == SIOCGHWTSTAMP) { + err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL); + } + + if (err) + return err; + + hwtstamp_kernel_to_config(&config, &kernel_config); + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + return 0; +} + static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct net_device *real_dev = vlan_dev_priv(dev)->real_dev; @@ -368,10 +406,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (!net_eq(dev_net(dev), dev_net(real_dev))) break; fallthrough; + case SIOCGHWTSTAMP: + err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd); + break; case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCGHWTSTAMP: if (netif_device_present(real_dev) && ops->ndo_eth_ioctl) err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd); break;
This patch makes VLAN subsystem to use the newly introduced ndo_hwtstamp_get/set API to pass hw timestamp requests to underlying NIC drivers in case if these drivers implement ndo_hwtstamp_get/set functions. Otherwise VLAN·subsystem falls back to calling ndo_eth_ioctl. Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: Maxim Georgiev <glipus@gmail.com> --- net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-)