diff mbox series

[RFC,v3,3/5] Add ndo_hwtstamp_get/set support to vlan code path

Message ID 20230405063323.36270-1-glipus@gmail.com (mailing list archive)
State RFC
Delegated to: Netdev Maintainers
Headers show
Series [RFC,v3,1/5] Add NDOs for hardware timestamp get/set | expand

Checks

Context Check Description
netdev/series_format warning Series does not have a cover letter; Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 142 this patch: 27
netdev/cc_maintainers warning 3 maintainers not CCed: edumazet@google.com pabeni@redhat.com davem@davemloft.net
netdev/build_clang fail Errors and warnings before: 107 this patch: 44
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 144 this patch: 27
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 57 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Max Georgiev April 5, 2023, 6:33 a.m. UTC
This patch makes VLAN subsystem to use the newly introduced
ndo_hwtstamp_get/set API to pass hw timestamp requests to
underlying NIC drivers in case if these drivers implement
ndo_hwtstamp_get/set functions. Otherwise VLAN·subsystem
falls back to calling ndo_eth_ioctl.

Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Maxim Georgiev <glipus@gmail.com>
---
 net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

Comments

Vladimir Oltean April 5, 2023, 12:26 p.m. UTC | #1
On Wed, Apr 05, 2023 at 12:33:23AM -0600, Maxim Georgiev wrote:
> This patch makes VLAN subsystem to use the newly introduced
> ndo_hwtstamp_get/set API to pass hw timestamp requests to
> underlying NIC drivers in case if these drivers implement
> ndo_hwtstamp_get/set functions. Otherwise VLAN┬Ěsubsystem

Strange symbols (┬Ě).

> falls back to calling ndo_eth_ioctl.
> 
> Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> Signed-off-by: Maxim Georgiev <glipus@gmail.com>
> ---
>  net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
> index 5920544e93e8..66d54c610aa5 100644
> --- a/net/8021q/vlan_dev.c
> +++ b/net/8021q/vlan_dev.c
> @@ -353,6 +353,44 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
>  	return 0;
>  }
>  
> +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd)
> +{
> +	const struct net_device_ops *ops = dev->netdev_ops;
> +	struct kernel_hwtstamp_config kernel_config = {};
> +	struct hwtstamp_config config;
> +	int err;
> +
> +	if (!netif_device_present(dev))
> +		return -ENODEV;
> +
> +	if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) ||
> +	    (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) {
> +		if (ops->ndo_eth_ioctl) {
> +			return ops->ndo_eth_ioctl(real_dev, &ifr, cmd);
> +		else
> +			return -EOPNOTSUPP;
> +	}
> +
> +	kernel_config.ifr = ifr;
> +	if (cmd == SIOCSHWTSTAMP) {
> +		if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
> +			return -EFAULT;
> +
> +		hwtstamp_config_to_kernel(&kernel_config, &config);
> +		err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL);
> +	} else if (cmd == SIOCGHWTSTAMP) {
> +		err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL);
> +	}
> +
> +	if (err)
> +		return err;
> +
> +	hwtstamp_kernel_to_config(&config, &kernel_config);
> +	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
> +		return -EFAULT;
> +	return 0;
> +}
> +
>  static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
>  {
>  	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> @@ -368,10 +406,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
>  		if (!net_eq(dev_net(dev), dev_net(real_dev)))
>  			break;
>  		fallthrough;
> +	case SIOCGHWTSTAMP:
> +		err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd);
> +		break;
>  	case SIOCGMIIPHY:
>  	case SIOCGMIIREG:
>  	case SIOCSMIIREG:
> -	case SIOCGHWTSTAMP:

I would recommend also making vlan_dev_hwtstamp() be called from the
VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl().

My understanding of Jakub's suggestion to (temporarily) stuff ifr
inside kernel_config was to do that from top-level net/core/dev_ioctl.c,
not from the VLAN driver.

>  		if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
>  			err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
>  		break;
> -- 
> 2.39.2
>
Max Georgiev April 5, 2023, 4:19 p.m. UTC | #2
On Wed, Apr 5, 2023 at 6:26 AM Vladimir Oltean <vladimir.oltean@nxp.com> wrote:
>
> On Wed, Apr 05, 2023 at 12:33:23AM -0600, Maxim Georgiev wrote:
> > This patch makes VLAN subsystem to use the newly introduced
> > ndo_hwtstamp_get/set API to pass hw timestamp requests to
> > underlying NIC drivers in case if these drivers implement
> > ndo_hwtstamp_get/set functions. Otherwise VLAN┬Ěsubsystem
>
> Strange symbols (┬Ě).

Bad copy-paste, sorry. Fixed.

>
> > falls back to calling ndo_eth_ioctl.
> >
> > Suggested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> > Signed-off-by: Maxim Georgiev <glipus@gmail.com>
> > ---
> >  net/8021q/vlan_dev.c | 42 +++++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 41 insertions(+), 1 deletion(-)
> >
> > diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
> > index 5920544e93e8..66d54c610aa5 100644
> > --- a/net/8021q/vlan_dev.c
> > +++ b/net/8021q/vlan_dev.c
> > @@ -353,6 +353,44 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
> >       return 0;
> >  }
> >
> > +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd)
> > +{
> > +     const struct net_device_ops *ops = dev->netdev_ops;
> > +     struct kernel_hwtstamp_config kernel_config = {};
> > +     struct hwtstamp_config config;
> > +     int err;
> > +
> > +     if (!netif_device_present(dev))
> > +             return -ENODEV;
> > +
> > +     if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) ||
> > +         (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) {
> > +             if (ops->ndo_eth_ioctl) {
> > +                     return ops->ndo_eth_ioctl(real_dev, &ifr, cmd);
> > +             else
> > +                     return -EOPNOTSUPP;
> > +     }
> > +
> > +     kernel_config.ifr = ifr;
> > +     if (cmd == SIOCSHWTSTAMP) {
> > +             if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
> > +                     return -EFAULT;
> > +
> > +             hwtstamp_config_to_kernel(&kernel_config, &config);
> > +             err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL);
> > +     } else if (cmd == SIOCGHWTSTAMP) {
> > +             err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL);
> > +     }
> > +
> > +     if (err)
> > +             return err;
> > +
> > +     hwtstamp_kernel_to_config(&config, &kernel_config);
> > +     if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
> > +             return -EFAULT;
> > +     return 0;
> > +}
> > +
> >  static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
> >  {
> >       struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> > @@ -368,10 +406,12 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
> >               if (!net_eq(dev_net(dev), dev_net(real_dev)))
> >                       break;
> >               fallthrough;
> > +     case SIOCGHWTSTAMP:
> > +             err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd);
> > +             break;
> >       case SIOCGMIIPHY:
> >       case SIOCGMIIREG:
> >       case SIOCSMIIREG:
> > -     case SIOCGHWTSTAMP:
>
> I would recommend also making vlan_dev_hwtstamp() be called from the
> VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl().

Vladimir, could you please elaborate here a bit?
Are you saying that I should go all the way with vlan NDO conversion,
implement ndo_hwtstamp_get/set() for vlan, and stop handling
SIOCGHWTSTAMP/SIOCSHWTSTAMP in vlan_dev_ioctl()?

>
> My understanding of Jakub's suggestion to (temporarily) stuff ifr
> inside kernel_config was to do that from top-level net/core/dev_ioctl.c,
> not from the VLAN driver.

[RFC PATCH v3 2/5] in this patch stack changes net/core/dev_ioctl.c
to insert ifr inside kernel_config. I assumed that I should do it here too
so underlying drivers could rely on ifr pointer in kernel_config being
always initialized.
If the plan is to stop supporting SIOCGHWTSTAMP/SIOCSHWTSTAMP
in vlan_dev_ioctl() all together and move the hw timestamp handling
logic to vlan_get/set_hwtstamp() functions, then this ifr initialization
code will be removed from net/8021q/vlan_dev.c anyway.

>
> >               if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
> >                       err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
> >               break;
> > --
> > 2.39.2
> >
Vladimir Oltean April 5, 2023, 4:28 p.m. UTC | #3
On Wed, Apr 05, 2023 at 10:19:11AM -0600, Max Georgiev wrote:
> > I would recommend also making vlan_dev_hwtstamp() be called from the
> > VLAN driver's ndo_hwtstamp_set() rather than from ndo_eth_ioctl().
> 
> Vladimir, could you please elaborate here a bit?
> Are you saying that I should go all the way with vlan NDO conversion,
> implement ndo_hwtstamp_get/set() for vlan, and stop handling
> SIOCGHWTSTAMP/SIOCSHWTSTAMP in vlan_dev_ioctl()?

Yes, sorry for being unclear.

> > My understanding of Jakub's suggestion to (temporarily) stuff ifr
> > inside kernel_config was to do that from top-level net/core/dev_ioctl.c,
> > not from the VLAN driver.
> 
> [RFC PATCH v3 2/5] in this patch stack changes net/core/dev_ioctl.c
> to insert ifr inside kernel_config. I assumed that I should do it here too
> so underlying drivers could rely on ifr pointer in kernel_config being
> always initialized.
> If the plan is to stop supporting SIOCGHWTSTAMP/SIOCSHWTSTAMP
> in vlan_dev_ioctl() all together and move the hw timestamp handling
> logic to vlan_get/set_hwtstamp() functions, then this ifr initialization
> code will be removed from net/8021q/vlan_dev.c anyway.

Yes, correct, dev_set_hwtstamp() should provide it.

There's a small thing I don't like about stuffing "ifr" inside struct
kernel_hwtstamp_config, and that's that some drivers keep the last
configuration privately using memcpy(). If we put "ifr" there, they will
practically have access to a stale pointer, because "ifr" loses meaning
once the ioctl syscall is over.

Since we don't know how long it will take until the ndo_hwtstamp_set()
conversion is complete (experience says: possibly indefinitely), it
would be good if this wasn't possible, because who knows what ideas
people might get to do with it.

Options to avoid it are:
- keep doing what you're doing - let drivers memcpy() the struct
  hwtstamp_config and not the struct kernel_hwtstamp_config.
- pass the ifr as yet another argument to ndo_hwtstamp_set(), and don't
  stuff it inside struct kernel_hwtstamp_config.

Neither of these is particularly great, because at the end of the
conversion, some extra cleanup will be required to fix up the API again
(either to stop all drivers from using struct hwtstamp_config, or to
stop passing the argument which is no longer used by anybody).

I think, personally, I'd opt for the first bullet, keep doing what
you're doing. It should require a bit less cleanup, since not all
drivers do the memcpy() thing.
Jakub Kicinski April 5, 2023, 4:42 p.m. UTC | #4
On Wed,  5 Apr 2023 00:33:23 -0600 Maxim Georgiev wrote:
> +static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd)
> +{
> +	const struct net_device_ops *ops = dev->netdev_ops;
> +	struct kernel_hwtstamp_config kernel_config = {};
> +	struct hwtstamp_config config;
> +	int err;
> +
> +	if (!netif_device_present(dev))
> +		return -ENODEV;
> +
> +	if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) ||
> +	    (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) {
> +		if (ops->ndo_eth_ioctl) {
> +			return ops->ndo_eth_ioctl(real_dev, &ifr, cmd);
> +		else
> +			return -EOPNOTSUPP;
> +	}
> +
> +	kernel_config.ifr = ifr;
> +	if (cmd == SIOCSHWTSTAMP) {
> +		if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
> +			return -EFAULT;
> +
> +		hwtstamp_config_to_kernel(&kernel_config, &config);
> +		err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL);
> +	} else if (cmd == SIOCGHWTSTAMP) {
> +		err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL);
> +	}
> +
> +	if (err)
> +		return err;
> +
> +	hwtstamp_kernel_to_config(&config, &kernel_config);
> +	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
> +		return -EFAULT;
> +	return 0;
> +}

This needs to live in the core. I think the real_dev is a lower of the
vlan device? All the vlan driver should do is attach the generic helper:

	.ndo_hwtstamp_get = generic_hwtstamp_get_lower,

and the same for set. No?
Vladimir Oltean April 5, 2023, 5:03 p.m. UTC | #5
On Wed, Apr 05, 2023 at 09:42:10AM -0700, Jakub Kicinski wrote:
> This needs to live in the core. I think the real_dev is a lower of the
> vlan device? All the vlan driver should do is attach the generic helper:
> 
> 	.ndo_hwtstamp_get = generic_hwtstamp_get_lower,
> 
> and the same for set. No?

The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()?
How would the generic helper get to bond_option_active_slave_get_rcu(),
vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)?

Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument,
and 3 small wrappers in vlan, macvlan, bonding which identify that lower?
Jakub Kicinski April 5, 2023, 5:13 p.m. UTC | #6
On Wed, 5 Apr 2023 20:03:22 +0300 Vladimir Oltean wrote:
> The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()?
> How would the generic helper get to bond_option_active_slave_get_rcu(),
> vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)?
> 
> Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument,
> and 3 small wrappers in vlan, macvlan, bonding which identify that lower?

The bonding situation is probably more complex, I haven't looked,
but for *vlans we can just get the lower from netdev linkage, no?
Sure the drivers have their own pointers for convenience and with
their own lifetime rules but under rtnl lock lower/upper should work...
Vladimir Oltean April 5, 2023, 5:28 p.m. UTC | #7
On Wed, Apr 05, 2023 at 10:13:23AM -0700, Jakub Kicinski wrote:
> On Wed, 5 Apr 2023 20:03:22 +0300 Vladimir Oltean wrote:
> > The goal would be for macvlan and bonding to use the same generic_hwtstamp_get_lower()?
> > How would the generic helper get to bond_option_active_slave_get_rcu(),
> > vlan_dev_priv(dev)->real_dev, macvlan_dev_real_dev(dev)?
> > 
> > Perhaps a generic_hwtstamp_get_lower() that takes the lower as argument,
> > and 3 small wrappers in vlan, macvlan, bonding which identify that lower?
> 
> The bonding situation is probably more complex, I haven't looked,
> but for *vlans we can just get the lower from netdev linkage, no?
> Sure the drivers have their own pointers for convenience and with
> their own lifetime rules but under rtnl lock lower/upper should work...

So what do you suggest doing with bonding, then? Not use the generic
helper at all? It's not that more complex, btw. Here are the differences:

- it changes ifrr.ifr_name with real_dev->name for a reason I can't
  really determine or find in commit 94dd016ae538 ("bond: pass
  get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device"). Since vlan
  and macvlan don't do it, and operate with lower drivers from the same
  pool as bonding, I'd imagine it's not needed.

- it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in
  SET requests

- it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses

Notably, something I would have expected it does but it doesn't do is it
doesn't apply the same hwtstamping config to the lower interface that
isn't active, when the switchover happens. Presumably user space does that.

So it's not that much different.
Vladimir Oltean April 5, 2023, 5:34 p.m. UTC | #8
On Wed, Apr 05, 2023 at 08:28:40PM +0300, Vladimir Oltean wrote:
> - it changes ifrr.ifr_name with real_dev->name for a reason I can't
>   really determine or find in commit 94dd016ae538 ("bond: pass
>   get_ts_info and SIOC[SG]HWTSTAMP ioctl to active device"). Since vlan
>   and macvlan don't do it, and operate with lower drivers from the same
>   pool as bonding, I'd imagine it's not needed.

Ah, they do, they do, my bad. So it's down to 2 differences, which can
be handled easily with the wrapper function model - the bonding wrapper
checks, or sets, the HWTSTAMP_FLAG_BONDED_PHC_INDEX flag.
Jakub Kicinski April 5, 2023, 5:42 p.m. UTC | #9
On Wed, 5 Apr 2023 20:28:40 +0300 Vladimir Oltean wrote:
> So what do you suggest doing with bonding, then? Not use the generic
> helper at all?

It'd seem most natural to me to split the generic "descend" helper into
two functions, one which retrieves the lower and one which does the
appropriate calling dance (ndo vs ioctl, and DSA, which I guess is now
gone?). The latter could be used for the first descend as well I'd
presume. And it can be exported for the use of more complex drivers
like bonding which want to walk the lowers themselves.

> - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in
>   SET requests
> 
> - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses

IIRC that was to indicate to user space that the real PHC may change
for this netdev so it needs to pay attention to netlink notifications.
Shouldn't apply to *vlans?

> Notably, something I would have expected it does but it doesn't do is it
> doesn't apply the same hwtstamping config to the lower interface that
> isn't active, when the switchover happens. Presumably user space does that.

Yes, user space must be involved anyway, because the entire clock will
change. IMHO implementing the pass thru for timestamping requests on
bonding is checkbox engineering, kernel can't make it work
transparently. But nobody else spoke up when it was proposed so...

> So it's not that much different.
Vladimir Oltean April 5, 2023, 6:01 p.m. UTC | #10
On Wed, Apr 05, 2023 at 10:42:53AM -0700, Jakub Kicinski wrote:
> On Wed, 5 Apr 2023 20:28:40 +0300 Vladimir Oltean wrote:
> > So what do you suggest doing with bonding, then? Not use the generic
> > helper at all?
> 
> It'd seem most natural to me to split the generic "descend" helper into
> two functions, one which retrieves the lower and one which does the
> appropriate calling dance (ndo vs ioctl, and DSA, which I guess is now
> gone?).

There's nothing DSA-related to be done. DSA masters can't be lowers of
any other virtual interface kinds except bridge or bonding/team, and:
- bridge doesn't support hwtstamping
- bonding is also DSA master when it has a DSA master as lower, so the
  DSA master restriction has already run once - on the bonding device
  itself

> The latter could be used for the first descend as well I'd presume.
> And it can be exported for the use of more complex drivers like
> bonding which want to walk the lowers themselves.
> 
> > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in
> >   SET requests
> > 
> > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses
> 
> IIRC that was to indicate to user space that the real PHC may change
> for this netdev so it needs to pay attention to netlink notifications.
> Shouldn't apply to *vlans?

No, this shouldn't apply to *vlans, but I didn't suggest that it should.
I don't think my proposal was clear enough, so here's some code
(untested, written in email client).

static int macvlan_hwtstamp_get(struct net_device *dev,
				struct kernel_hwtstamp_config *cfg,
				struct netlink_ext_ack *extack)
{
	struct net_device *real_dev = macvlan_dev_real_dev(dev);

	return generic_hwtstamp_get_lower(real_dev, cfg, extack);
}

static int macvlan_hwtstamp_set(struct net_device *dev,
				struct kernel_hwtstamp_config *cfg,
				struct netlink_ext_ack *extack)
{
	struct net_device *real_dev = macvlan_dev_real_dev(dev);

	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
}

static int vlan_hwtstamp_get(struct net_device *dev,
			     struct kernel_hwtstamp_config *cfg,
			     struct netlink_ext_ack *extack)
{
	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;

	return generic_hwtstamp_get_lower(real_dev, cfg, extack);
}

static int vlan_hwtstamp_set(struct net_device *dev,
			     struct kernel_hwtstamp_config *cfg,
			     struct netlink_ext_ack *extack)
{
	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;

	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
}

static int bond_hwtstamp_get(struct net_device *bond_dev,
			     struct kernel_hwtstamp_config *cfg,
			     struct netlink_ext_ack *extack)
{
	struct bonding *bond = netdev_priv(bond_dev);
	struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
	int err;

	if (!real_dev)
		return -EOPNOTSUPP;

	err = generic_hwtstamp_get_lower(real_dev, cfg, extack);
	if (err)
		return err;

	/* Set the BOND_PHC_INDEX flag to notify user space */
	cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;

	return 0;
}

static int bond_hwtstamp_set(struct net_device *bond_dev,
			     struct kernel_hwtstamp_config *cfg,
			     struct netlink_ext_ack *extack)
{
	struct bonding *bond = netdev_priv(bond_dev);
	struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
	int err;

	if (!real_dev)
		return -EOPNOTSUPP;

	if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX))
		return -EOPNOTSUPP;

	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
}

Doesn't seem in any way necessary to complicate things with the netdev
adjacence lists?

> Yes, user space must be involved anyway, because the entire clock will
> change. IMHO implementing the pass thru for timestamping requests on
> bonding is checkbox engineering, kernel can't make it work
> transparently. But nobody else spoke up when it was proposed so...

ok, but that's a bit beside the point here.
Jakub Kicinski April 6, 2023, midnight UTC | #11
On Wed, 5 Apr 2023 21:01:21 +0300 Vladimir Oltean wrote:
> - bonding is also DSA master when it has a DSA master as lower, so the
>   DSA master restriction has already run once - on the bonding device
>   itself

Huh, didn't know that.

> > The latter could be used for the first descend as well I'd presume.
> > And it can be exported for the use of more complex drivers like
> > bonding which want to walk the lowers themselves.
> >   
> > > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in
> > >   SET requests
> > > 
> > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses  
> > 
> > IIRC that was to indicate to user space that the real PHC may change
> > for this netdev so it needs to pay attention to netlink notifications.
> > Shouldn't apply to *vlans?  
> 
> No, this shouldn't apply to *vlans, but I didn't suggest that it should.

Good, so if we just target *vlans we don't have to worry.

> I don't think my proposal was clear enough, so here's some code
> (untested, written in email client).
> 
> static int macvlan_hwtstamp_get(struct net_device *dev,
> 				struct kernel_hwtstamp_config *cfg,
> 				struct netlink_ext_ack *extack)
> {
> 	struct net_device *real_dev = macvlan_dev_real_dev(dev);
> 
> 	return generic_hwtstamp_get_lower(real_dev, cfg, extack);
> }
> 
> static int macvlan_hwtstamp_set(struct net_device *dev,
> 				struct kernel_hwtstamp_config *cfg,
> 				struct netlink_ext_ack *extack)
> {
> 	struct net_device *real_dev = macvlan_dev_real_dev(dev);
> 
> 	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> }
> 
> static int vlan_hwtstamp_get(struct net_device *dev,
> 			     struct kernel_hwtstamp_config *cfg,
> 			     struct netlink_ext_ack *extack)
> {
> 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> 
> 	return generic_hwtstamp_get_lower(real_dev, cfg, extack);
> }
> 
> static int vlan_hwtstamp_set(struct net_device *dev,
> 			     struct kernel_hwtstamp_config *cfg,
> 			     struct netlink_ext_ack *extack)
> {
> 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> 
> 	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> }

I got that, but why wouldn't this not be better, as it avoids 
the 3 driver stubs? (also written in the MUA)

int net_lower_hwtstamp_set(struct net_device *dev,
			   struct kernel_hwtstamp_config *cfg,
			   struct netlink_ext_ack *extack)
{
	struct list_head *iter = dev->adj_list.lower.next;
	struct net_device *lower;
	
	lower = netdev_lower_get_next(dev, &iter);
	return generic_hwtstamp_set_lower(lower, cfg, extack);
}

> static int bond_hwtstamp_get(struct net_device *bond_dev,
> 			     struct kernel_hwtstamp_config *cfg,
> 			     struct netlink_ext_ack *extack)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
> 	struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
> 	int err;
> 
> 	if (!real_dev)
> 		return -EOPNOTSUPP;
> 
> 	err = generic_hwtstamp_get_lower(real_dev, cfg, extack);
> 	if (err)
> 		return err;
> 
> 	/* Set the BOND_PHC_INDEX flag to notify user space */
> 	cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;
> 
> 	return 0;
> }
> 
> static int bond_hwtstamp_set(struct net_device *bond_dev,
> 			     struct kernel_hwtstamp_config *cfg,
> 			     struct netlink_ext_ack *extack)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
> 	struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
> 	int err;
> 
> 	if (!real_dev)
> 		return -EOPNOTSUPP;
> 
> 	if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX))
> 		return -EOPNOTSUPP;
> 
> 	return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> }
> 
> Doesn't seem in any way necessary to complicate things with the netdev
> adjacence lists?

What is the complication? We can add a "get first" helper maybe to hide
the oddities of the linking.

> > Yes, user space must be involved anyway, because the entire clock will
> > change. IMHO implementing the pass thru for timestamping requests on
> > bonding is checkbox engineering, kernel can't make it work
> > transparently. But nobody else spoke up when it was proposed so...  
> 
> ok, but that's a bit beside the point here.

You cut off the quote it was responding to so IDK if it is.
Max Georgiev April 6, 2023, 6:21 a.m. UTC | #12
On Wed, Apr 5, 2023 at 6:00 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Wed, 5 Apr 2023 21:01:21 +0300 Vladimir Oltean wrote:
> > - bonding is also DSA master when it has a DSA master as lower, so the
> >   DSA master restriction has already run once - on the bonding device
> >   itself
>
> Huh, didn't know that.
>
> > > The latter could be used for the first descend as well I'd presume.
> > > And it can be exported for the use of more complex drivers like
> > > bonding which want to walk the lowers themselves.
> > >
> > > > - it requires cfg.flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX to be set in
> > > >   SET requests
> > > >
> > > > - it sets cfg.flags | HWTSTAMP_FLAG_BONDED_PHC_INDEX in GET responses
> > >
> > > IIRC that was to indicate to user space that the real PHC may change
> > > for this netdev so it needs to pay attention to netlink notifications.
> > > Shouldn't apply to *vlans?
> >
> > No, this shouldn't apply to *vlans, but I didn't suggest that it should.
>
> Good, so if we just target *vlans we don't have to worry.
>
> > I don't think my proposal was clear enough, so here's some code
> > (untested, written in email client).
> >
> > static int macvlan_hwtstamp_get(struct net_device *dev,
> >                               struct kernel_hwtstamp_config *cfg,
> >                               struct netlink_ext_ack *extack)
> > {
> >       struct net_device *real_dev = macvlan_dev_real_dev(dev);
> >
> >       return generic_hwtstamp_get_lower(real_dev, cfg, extack);
> > }
> >
> > static int macvlan_hwtstamp_set(struct net_device *dev,
> >                               struct kernel_hwtstamp_config *cfg,
> >                               struct netlink_ext_ack *extack)
> > {
> >       struct net_device *real_dev = macvlan_dev_real_dev(dev);
> >
> >       return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> > }
> >
> > static int vlan_hwtstamp_get(struct net_device *dev,
> >                            struct kernel_hwtstamp_config *cfg,
> >                            struct netlink_ext_ack *extack)
> > {
> >       struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> >
> >       return generic_hwtstamp_get_lower(real_dev, cfg, extack);
> > }
> >
> > static int vlan_hwtstamp_set(struct net_device *dev,
> >                            struct kernel_hwtstamp_config *cfg,
> >                            struct netlink_ext_ack *extack)
> > {
> >       struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
> >
> >       return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> > }
>
> I got that, but why wouldn't this not be better, as it avoids
> the 3 driver stubs? (also written in the MUA)
>
> int net_lower_hwtstamp_set(struct net_device *dev,
>                            struct kernel_hwtstamp_config *cfg,
>                            struct netlink_ext_ack *extack)
> {
>         struct list_head *iter = dev->adj_list.lower.next;
>         struct net_device *lower;
>
>         lower = netdev_lower_get_next(dev, &iter);
>         return generic_hwtstamp_set_lower(lower, cfg, extack);
> }
>
> > static int bond_hwtstamp_get(struct net_device *bond_dev,
> >                            struct kernel_hwtstamp_config *cfg,
> >                            struct netlink_ext_ack *extack)
> > {
> >       struct bonding *bond = netdev_priv(bond_dev);
> >       struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
> >       int err;
> >
> >       if (!real_dev)
> >               return -EOPNOTSUPP;
> >
> >       err = generic_hwtstamp_get_lower(real_dev, cfg, extack);
> >       if (err)
> >               return err;
> >
> >       /* Set the BOND_PHC_INDEX flag to notify user space */
> >       cfg->flags |= HWTSTAMP_FLAG_BONDED_PHC_INDEX;
> >
> >       return 0;
> > }
> >
> > static int bond_hwtstamp_set(struct net_device *bond_dev,
> >                            struct kernel_hwtstamp_config *cfg,
> >                            struct netlink_ext_ack *extack)
> > {
> >       struct bonding *bond = netdev_priv(bond_dev);
> >       struct net_device *real_dev = bond_option_active_slave_get_rcu(bond);
> >       int err;
> >
> >       if (!real_dev)
> >               return -EOPNOTSUPP;
> >
> >       if (!(cfg->flags & HWTSTAMP_FLAG_BONDED_PHC_INDEX))
> >               return -EOPNOTSUPP;
> >
> >       return generic_hwtstamp_set_lower(real_dev, cfg, extack);
> > }
> >
> > Doesn't seem in any way necessary to complicate things with the netdev
> > adjacence lists?
>
> What is the complication? We can add a "get first" helper maybe to hide
> the oddities of the linking.
>
> > > Yes, user space must be involved anyway, because the entire clock will
> > > change. IMHO implementing the pass thru for timestamping requests on
> > > bonding is checkbox engineering, kernel can't make it work
> > > transparently. But nobody else spoke up when it was proposed so...
> >
> > ok, but that's a bit beside the point here.
>
> You cut off the quote it was responding to so IDK if it is.

I tried my best to follow the discussion, and convert it to compilable code.
Here is what I have in mind for generic_hwtstamp_get_lower():

int generic_hwtstamp_get_lower(struct net_dev *dev,
                           struct kernel_hwtstamp_config *kernel_cfg,
                           struct netlink_ext_ack *extack)
{
        const struct net_device_ops *ops = dev->netdev_ops;
        struct hwtstamp_config cfg;
        int err;

        if (!netif_device_present(dev))
                return -ENODEV;

        if (ops->ndo_hwtstamp_get)
                return ops->ndo_hwtstamp_get(dev, cfg, extack);

        if (!cfg->ifr)
                return -EOPNOTSUPP;

        err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP);
        if (err)
            return err;

        if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg)))
                return -EFAULT;

        hwtstamp_config_to_kernel(kernel_cfg, &cfg);

        return 0;
}

It looks like there is a possibility that the returned hwtstamp_config structure
will be copied twice to ifr and copied once from ifr on the return path
in case if the underlying driver does not implement ndo_hwtstamp_get():
- the underlying driver calls copy_to_user() inside its ndo_eth_ioctl()
  implementation to return the data to generic_hwtstamp_get_lower();
- then generic_hwtstamp_get_lower() calls copy_from_user() to copy it
  back out of the ifr to kernel_hwtstamp_config structure;
- then dev_get_hwtstamp() calls copy_to_user() again to update
  the same ifr with the same data the ifr already contains.

Should we consider this acceptable?
Vladimir Oltean April 6, 2023, 3:01 p.m. UTC | #13
On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote:
> I tried my best to follow the discussion, and convert it to compilable code.
> Here is what I have in mind for generic_hwtstamp_get_lower():
> 
> int generic_hwtstamp_get_lower(struct net_dev *dev,
>                            struct kernel_hwtstamp_config *kernel_cfg,
>                            struct netlink_ext_ack *extack)
> {
>         const struct net_device_ops *ops = dev->netdev_ops;
>         struct hwtstamp_config cfg;
>         int err;
> 
>         if (!netif_device_present(dev))
>                 return -ENODEV;
> 
>         if (ops->ndo_hwtstamp_get)
>                 return ops->ndo_hwtstamp_get(dev, cfg, extack);
> 
>         if (!cfg->ifr)
>                 return -EOPNOTSUPP;
> 
>         err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP);
>         if (err)
>             return err;
> 
>         if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg)))
>                 return -EFAULT;
> 
>         hwtstamp_config_to_kernel(kernel_cfg, &cfg);
> 
>         return 0;
> }

Side note, it doesn't look like this code is particularly compilable
either - "cfg" is used in some places instead of "kernel_cfg".

> 
> It looks like there is a possibility that the returned hwtstamp_config structure
> will be copied twice to ifr and copied once from ifr on the return path
> in case if the underlying driver does not implement ndo_hwtstamp_get():
> - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl()
>   implementation to return the data to generic_hwtstamp_get_lower();
> - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it
>   back out of the ifr to kernel_hwtstamp_config structure;
> - then dev_get_hwtstamp() calls copy_to_user() again to update
>   the same ifr with the same data the ifr already contains.
> 
> Should we consider this acceptable?

Thanks for laying this out. I guess with a table it's going to be
clearer, so to summarize, I believe this is the status:

Assuming we convert *vlan to ndo_hwtstamp_set():

===================

If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev
driver uses ndo_eth_ioctl(), we have:
- one copy_from_user() in dev_set_hwtstamp()
- one copy_from_user() in the real_dev's ndo_eth_ioctl()
- one copy_to_user() in the real_dev's ndo_eth_ioctl()
- one copy_from_user() in generic_hwtstamp_get_lower()
- one copy_to_user() in dev_set_hwtstamp()

If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev
is converted too, we have:
- one copy_from_user() in dev_set_hwtstamp()
- one copy_to_user() in dev_set_hwtstamp()

===================

Assuming we don't convert *vlan to ndo_hwtstamp_set():

===================

If the vlan driver isn't converted to ndo_hwtstamp_set() and the
real_dev driver isn't converted either, we have:
- one copy_from_user() in dev_set_hwtstamp()
- one copy_from_user() in the real_dev's ndo_eth_ioctl()
- one copy_to_user() in the real_dev's ndo_eth_ioctl()

If the vlan driver isn't converted to ndo_hwtstamp_set(), but the
real_dev driver is, we have:
- one copy_from_user() in dev_set_hwtstamp()
- one copy_from_user() in the vlan's ndo_eth_ioctl()
- one copy_to_user() in the vlan's ndo_eth_ioctl()

===================

So between converting and not converting the *vlans to ndo_hwtstamp_set(),
the worst case is going to be worse (with a mix of new API in *vlan and
old API in real_dev) and the best case is going to be better (with new
API in both *vlan and real_dev). OTOH, with old API in *vlan, the number
of copies to/from the user buffer is going to be constant at 3, which is
not the best, not the worst.

I guess the data indicates that we should convert the *vlans to
ndo_hwtstamp_set() at the very end of the process, and for now, just
make them compatible with a real_dev that uses the new API?

Note that I haven't done the math for the "get" operation yet, but I
believe it to be similar.
Max Georgiev April 6, 2023, 4:18 p.m. UTC | #14
On Thu, Apr 6, 2023 at 9:02 AM Vladimir Oltean <vladimir.oltean@nxp.com> wrote:
>
> On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote:
> > I tried my best to follow the discussion, and convert it to compilable code.
> > Here is what I have in mind for generic_hwtstamp_get_lower():
> >
> > int generic_hwtstamp_get_lower(struct net_dev *dev,
> >                            struct kernel_hwtstamp_config *kernel_cfg,
> >                            struct netlink_ext_ack *extack)
> > {
> >         const struct net_device_ops *ops = dev->netdev_ops;
> >         struct hwtstamp_config cfg;
> >         int err;
> >
> >         if (!netif_device_present(dev))
> >                 return -ENODEV;
> >
> >         if (ops->ndo_hwtstamp_get)
> >                 return ops->ndo_hwtstamp_get(dev, cfg, extack);
> >
> >         if (!cfg->ifr)
> >                 return -EOPNOTSUPP;
> >
> >         err = dev_eth_ioctl(dev, cfg->ifr, SIOCGHWTSTAMP);
> >         if (err)
> >             return err;
> >
> >         if (copy_from_user(&cfg, cfg->ifr->ifr_data, sizeof(cfg)))
> >                 return -EFAULT;
> >
> >         hwtstamp_config_to_kernel(kernel_cfg, &cfg);
> >
> >         return 0;
> > }
>
> Side note, it doesn't look like this code is particularly compilable
> either - "cfg" is used in some places instead of "kernel_cfg".

That's true, this code wouldn't compile. I copied it here to
illustrate the potentially concerning logic.
Thank you for pointing this out though!

>
> >
> > It looks like there is a possibility that the returned hwtstamp_config structure
> > will be copied twice to ifr and copied once from ifr on the return path
> > in case if the underlying driver does not implement ndo_hwtstamp_get():
> > - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl()
> >   implementation to return the data to generic_hwtstamp_get_lower();
> > - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it
> >   back out of the ifr to kernel_hwtstamp_config structure;
> > - then dev_get_hwtstamp() calls copy_to_user() again to update
> >   the same ifr with the same data the ifr already contains.
> >
> > Should we consider this acceptable?
>
> Thanks for laying this out. I guess with a table it's going to be
> clearer, so to summarize, I believe this is the status:
>
> Assuming we convert *vlan to ndo_hwtstamp_set():
>
> ===================
>
> If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev
> driver uses ndo_eth_ioctl(), we have:
> - one copy_from_user() in dev_set_hwtstamp()
> - one copy_from_user() in the real_dev's ndo_eth_ioctl()
> - one copy_to_user() in the real_dev's ndo_eth_ioctl()
> - one copy_from_user() in generic_hwtstamp_get_lower()
> - one copy_to_user() in dev_set_hwtstamp()
>
> If the vlan driver is converted to ndo_hwtstamp_set() and the real_dev
> is converted too, we have:
> - one copy_from_user() in dev_set_hwtstamp()
> - one copy_to_user() in dev_set_hwtstamp()
>
> ===================
>
> Assuming we don't convert *vlan to ndo_hwtstamp_set():
>
> ===================
>
> If the vlan driver isn't converted to ndo_hwtstamp_set() and the
> real_dev driver isn't converted either, we have:
> - one copy_from_user() in dev_set_hwtstamp()
> - one copy_from_user() in the real_dev's ndo_eth_ioctl()
> - one copy_to_user() in the real_dev's ndo_eth_ioctl()
>
> If the vlan driver isn't converted to ndo_hwtstamp_set(), but the
> real_dev driver is, we have:
> - one copy_from_user() in dev_set_hwtstamp()
> - one copy_from_user() in the vlan's ndo_eth_ioctl()
> - one copy_to_user() in the vlan's ndo_eth_ioctl()
>
> ===================
>
> So between converting and not converting the *vlans to ndo_hwtstamp_set(),
> the worst case is going to be worse (with a mix of new API in *vlan and
> old API in real_dev) and the best case is going to be better (with new
> API in both *vlan and real_dev). OTOH, with old API in *vlan, the number
> of copies to/from the user buffer is going to be constant at 3, which is
> not the best, not the worst.
>
> I guess the data indicates that we should convert the *vlans to
> ndo_hwtstamp_set() at the very end of the process, and for now, just
> make them compatible with a real_dev that uses the new API?
>
> Note that I haven't done the math for the "get" operation yet, but I
> believe it to be similar.

Thank you for putting this overhead tracking table together!

Here is my guess on how this accounting would look like
for the "get" codepath:

Assuming we convert *vlan to ndo_hwtstamp_set():

===================

If the vlan driver is converted to ndo_hwtstamp_get() and the real_dev
driver uses ndo_eth_ioctl(), we have:
- one copy_to_user() in the real_dev's ndo_eth_ioctl()
- one copy_from_user() in generic_hwtstamp_get_lower()
- one copy_to_user() in dev_get_hwtstamp()

If the vlan driver is converted to ndo_hwtstamp_get() and the real_dev
is converted too, we have:
- one copy_to_user() in dev_get_hwtstamp()

===================

Assuming we don't convert *vlan to ndo_hwtstamp_get():

===================

If the vlan driver isn't converted to ndo_hwtstamp_get() and the
real_dev driver isn't converted either, we have:
- one copy_to_user() in the real_dev's ndo_eth_ioctl()

If the vlan driver isn't converted to ndo_hwtstamp_get(), but the
real_dev driver is, we have:
- one copy_to_user() in the vlan's ndo_eth_ioctl()

===================

Adding real_dev->ndo_hwtstamp_get/set() support to vlan_eth_ioctl()
and holding back with ndo_hwtstamp_get/set() implementation in vlan
code looks like a winner again.

If I may, there are other ways to work around this inefficiency.
Since kernel_hwtstamp_config was meant to be easily extendable,
we can abuse it and add a flag field there. One of the flag values
can indicate that the operation result structure was already copied
to kernel_config->ifr by the function that received this kernel_config
instance as a parameter, and that the content of the
hwtstamp_config-related fields in kernel_config structure must
be ignored when the function returns. It would complicate the
implementation logic, but we'd avoid some unnecessary copy
operations while converting *vlan components to the newer interface.
Would it be a completely unreasonable approach?
Vladimir Oltean April 6, 2023, 4:50 p.m. UTC | #15
On Thu, Apr 06, 2023 at 10:18:36AM -0600, Max Georgiev wrote:
> If I may, there are other ways to work around this inefficiency.
> Since kernel_hwtstamp_config was meant to be easily extendable,
> we can abuse it and add a flag field there. One of the flag values
> can indicate that the operation result structure was already copied
> to kernel_config->ifr by the function that received this kernel_config
> instance as a parameter, and that the content of the
> hwtstamp_config-related fields in kernel_config structure must
> be ignored when the function returns. It would complicate the
> implementation logic, but we'd avoid some unnecessary copy
> operations while converting *vlan components to the newer interface.
> Would it be a completely unreasonable approach?

No, I think that's fair game and a good idea. It would make the best
case better (SET request on a converted real_dev drops from 3 copies to 2),
while keeping the worst case the same (SET request on an unconverted
real_dev remains at 3 copies).
Richard Cochran April 8, 2023, 1:56 p.m. UTC | #16
On Thu, Apr 06, 2023 at 12:21:37AM -0600, Max Georgiev wrote:

> It looks like there is a possibility that the returned hwtstamp_config structure
> will be copied twice to ifr and copied once from ifr on the return path
> in case if the underlying driver does not implement ndo_hwtstamp_get():
> - the underlying driver calls copy_to_user() inside its ndo_eth_ioctl()
>   implementation to return the data to generic_hwtstamp_get_lower();
> - then generic_hwtstamp_get_lower() calls copy_from_user() to copy it
>   back out of the ifr to kernel_hwtstamp_config structure;
> - then dev_get_hwtstamp() calls copy_to_user() again to update
>   the same ifr with the same data the ifr already contains.
> 
> Should we consider this acceptable?

This is a slow path so copying a small structure is not a concern.

Thanks,
Richard
diff mbox series

Patch

diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 5920544e93e8..66d54c610aa5 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -353,6 +353,44 @@  static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 	return 0;
 }
 
+static int vlan_dev_hwtstamp(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+	struct kernel_hwtstamp_config kernel_config = {};
+	struct hwtstamp_config config;
+	int err;
+
+	if (!netif_device_present(dev))
+		return -ENODEV;
+
+	if ((cmd == SIOCSHWTSTAMP && !ops->ndo_hwtstamp_set) ||
+	    (cmd == SIOCGHWTSTAMP && !ops->ndo_hwtstamp_get)) {
+		if (ops->ndo_eth_ioctl) {
+			return ops->ndo_eth_ioctl(real_dev, &ifr, cmd);
+		else
+			return -EOPNOTSUPP;
+	}
+
+	kernel_config.ifr = ifr;
+	if (cmd == SIOCSHWTSTAMP) {
+		if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+			return -EFAULT;
+
+		hwtstamp_config_to_kernel(&kernel_config, &config);
+		err = ops->ndo_hwtstamp_set(dev, &kernel_config, NULL);
+	} else if (cmd == SIOCGHWTSTAMP) {
+		err = ops->ndo_hwtstamp_get(dev, &kernel_config, NULL);
+	}
+
+	if (err)
+		return err;
+
+	hwtstamp_kernel_to_config(&config, &kernel_config);
+	if (copy_to_user(ifr->ifr_data, &config, sizeof(config)))
+		return -EFAULT;
+	return 0;
+}
+
 static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
@@ -368,10 +406,12 @@  static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 		if (!net_eq(dev_net(dev), dev_net(real_dev)))
 			break;
 		fallthrough;
+	case SIOCGHWTSTAMP:
+		err = vlan_dev_hwtstamp(real_dev, &ifrr, cmd);
+		break;
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-	case SIOCGHWTSTAMP:
 		if (netif_device_present(real_dev) && ops->ndo_eth_ioctl)
 			err = ops->ndo_eth_ioctl(real_dev, &ifrr, cmd);
 		break;