diff mbox series

[net] net: fix dev_ifsioc_locked() race condition

Message ID 20210124013049.132571-1-xiyou.wangcong@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: fix dev_ifsioc_locked() race condition | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net
netdev/subject_prefix success Link
netdev/cc_maintainers warning 14 maintainers not CCed: gustavoars@kernel.org colin.king@canonical.com andriin@fb.com f.fainelli@gmail.com andrew@lunn.ch daniel@iogearbox.net bjorn@kernel.org edumazet@google.com ap420073@gmail.com jiri@mellanox.com mkubecek@suse.cz davem@davemloft.net ast@kernel.org kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 7180 this patch: 7180
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 104 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 7585 this patch: 7585
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Cong Wang Jan. 24, 2021, 1:30 a.m. UTC
From: Cong Wang <cong.wang@bytedance.com>

dev_ifsioc_locked() is called with only RCU read lock, so when
there is a parallel writer changing the mac address, it could
get a partially updated mac address, as shown below:

Thread 1			Thread 2
// eth_commit_mac_addr_change()
memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
				// dev_ifsioc_locked()
				memcpy(ifr->ifr_hwaddr.sa_data,
					dev->dev_addr,...);

Close this race condition by guarding them with a RW semaphore,
like netdev_get_name(). The writers take RTNL anyway, so this
will not affect the slow path.

Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 39 ++++++++++++++++++++++++++++++++++++---
 net/core/dev_ioctl.c      | 18 ++++++------------
 3 files changed, 43 insertions(+), 15 deletions(-)

Comments

Gong, Sishuai Jan. 25, 2021, 7:43 p.m. UTC | #1
Hi,

We also found another pair of writer and reader that may suffer the same problem.

A data race may also happen on the variable netdev->dev_addr when functions e1000_set_mac() and packet_getname() run in parallel, eventually it could return a partially updated MAC address to the user, as shown below:

Thread 1								Thread 2
									// packet_getname()
									memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
//e1000_set_mac()
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);

And the calling stacks are:

Writer calling trace
- __sys_sendmsg
-- ___sys_sendmsg
--- sock_sendmsg
---- netlink_unicast
----- netlink_rcv_skb 
------ __rtnl_newlink
------- do_setlink
-------- dev_set_mac_address

Reader calling trace
- __sys_getsockname

Since e1000_set_mac() is also called by dev_set_mac_address(), the writer can be protected already with this patch. The reader, however, needs to grab the same semaphore to close the race condition.


Thanks,
Sishuai

> On Jan 23, 2021, at 8:30 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> 
> From: Cong Wang <cong.wang@bytedance.com>
> 
> dev_ifsioc_locked() is called with only RCU read lock, so when
> there is a parallel writer changing the mac address, it could
> get a partially updated mac address, as shown below:
> 
> Thread 1			Thread 2
> // eth_commit_mac_addr_change()
> memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> 				// dev_ifsioc_locked()
> 				memcpy(ifr->ifr_hwaddr.sa_data,
> 					dev->dev_addr,...);
> 
> Close this race condition by guarding them with a RW semaphore,
> like netdev_get_name(). The writers take RTNL anyway, so this
> will not affect the slow path.
> 
> Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> ---
> include/linux/netdevice.h |  1 +
> net/core/dev.c            | 39 ++++++++++++++++++++++++++++++++++++---
> net/core/dev_ioctl.c      | 18 ++++++------------
> 3 files changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 259be67644e3..7a871f2dcc03 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3918,6 +3918,7 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
> 			      struct netlink_ext_ack *extack);
> int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
> 			struct netlink_ext_ack *extack);
> +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
> int dev_change_carrier(struct net_device *, bool new_carrier);
> int dev_get_phys_port_id(struct net_device *dev,
> 			 struct netdev_phys_item_id *ppid);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index a979b86dbacd..55c0db7704c7 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -8709,6 +8709,8 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
> }
> EXPORT_SYMBOL(dev_pre_changeaddr_notify);
> 
> +static DECLARE_RWSEM(dev_addr_sem);
> +
> /**
>  *	dev_set_mac_address - Change Media Access Control Address
>  *	@dev: device
> @@ -8729,19 +8731,50 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
> 		return -EINVAL;
> 	if (!netif_device_present(dev))
> 		return -ENODEV;
> +
> +	down_write(&dev_addr_sem);
> 	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
> 	if (err)
> -		return err;
> +		goto out;
> 	err = ops->ndo_set_mac_address(dev, sa);
> 	if (err)
> -		return err;
> +		goto out;
> 	dev->addr_assign_type = NET_ADDR_SET;
> 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
> 	add_device_randomness(dev->dev_addr, dev->addr_len);
> -	return 0;
> +out:
> +	up_write(&dev_addr_sem);
> +	return err;
> }
> EXPORT_SYMBOL(dev_set_mac_address);
> 
> +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
> +{
> +	size_t size = sizeof(sa->sa_data);
> +	struct net_device *dev;
> +	int ret = 0;
> +
> +	down_read(&dev_addr_sem);
> +	rcu_read_lock();
> +
> +	dev = dev_get_by_name_rcu(net, dev_name);
> +	if (!dev) {
> +		ret = -ENODEV;
> +		goto unlock;
> +	}
> +	if (!dev->addr_len)
> +		memset(sa->sa_data, 0, size);
> +	else
> +		memcpy(sa->sa_data, dev->dev_addr,
> +		       min_t(size_t, size, dev->addr_len));
> +	sa->sa_family = dev->type;
> +
> +unlock:
> +	rcu_read_unlock();
> +	up_read(&dev_addr_sem);
> +	return ret;
> +}
> +
> /**
>  *	dev_change_carrier - Change device carrier
>  *	@dev: device
> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index db8a0ff86f36..bfa0dbd3d36f 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
> 		ifr->ifr_mtu = dev->mtu;
> 		return 0;
> 
> -	case SIOCGIFHWADDR:
> -		if (!dev->addr_len)
> -			memset(ifr->ifr_hwaddr.sa_data, 0,
> -			       sizeof(ifr->ifr_hwaddr.sa_data));
> -		else
> -			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
> -			       min(sizeof(ifr->ifr_hwaddr.sa_data),
> -				   (size_t)dev->addr_len));
> -		ifr->ifr_hwaddr.sa_family = dev->type;
> -		return 0;
> -
> 	case SIOCGIFSLAVE:
> 		err = -EINVAL;
> 		break;
> @@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
> 	 */
> 
> 	switch (cmd) {
> +	case SIOCGIFHWADDR:
> +		dev_load(net, ifr->ifr_name);
> +		ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
> +		if (colon)
> +			*colon = ':';
> +		return ret;
> 	/*
> 	 *	These ioctl calls:
> 	 *	- can be done by all.
> @@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
> 	case SIOCGIFFLAGS:
> 	case SIOCGIFMETRIC:
> 	case SIOCGIFMTU:
> -	case SIOCGIFHWADDR:
> 	case SIOCGIFSLAVE:
> 	case SIOCGIFMAP:
> 	case SIOCGIFINDEX:
> -- 
> 2.25.1
>
Cong Wang Jan. 28, 2021, 1:42 a.m. UTC | #2
On Mon, Jan 25, 2021 at 11:43 AM Gong, Sishuai <sishuai@purdue.edu> wrote:
>
> Hi,
>
> We also found another pair of writer and reader that may suffer the same problem.
>
> A data race may also happen on the variable netdev->dev_addr when functions e1000_set_mac() and packet_getname() run in parallel, eventually it could return a partially updated MAC address to the user, as shown below:

Yeah, this one requires a separate patch, because at least
it uses an index instead of a name to lookup the devices.

Thanks.
Jakub Kicinski Jan. 28, 2021, 8:55 p.m. UTC | #3
On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> From: Cong Wang <cong.wang@bytedance.com>
> 
> dev_ifsioc_locked() is called with only RCU read lock, so when
> there is a parallel writer changing the mac address, it could
> get a partially updated mac address, as shown below:
> 
> Thread 1			Thread 2
> // eth_commit_mac_addr_change()
> memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> 				// dev_ifsioc_locked()
> 				memcpy(ifr->ifr_hwaddr.sa_data,
> 					dev->dev_addr,...);
> 
> Close this race condition by guarding them with a RW semaphore,
> like netdev_get_name(). The writers take RTNL anyway, so this
> will not affect the slow path.
> 
> Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> Cc: Eric Dumazet <eric.dumazet@gmail.com>
> Signed-off-by: Cong Wang <cong.wang@bytedance.com>

The addition of the write lock scares me a little for a fix, there's a
lot of code which can potentially run under the callbacks and notifiers
there.

What about using a seqlock?

> +static DECLARE_RWSEM(dev_addr_sem);
> +
>  /**
>   *	dev_set_mac_address - Change Media Access Control Address
>   *	@dev: device
> @@ -8729,19 +8731,50 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
>  		return -EINVAL;
>  	if (!netif_device_present(dev))
>  		return -ENODEV;
> +
> +	down_write(&dev_addr_sem);
>  	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
>  	if (err)
> -		return err;
> +		goto out;
>  	err = ops->ndo_set_mac_address(dev, sa);
>  	if (err)
> -		return err;
> +		goto out;
>  	dev->addr_assign_type = NET_ADDR_SET;
>  	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
>  	add_device_randomness(dev->dev_addr, dev->addr_len);
> -	return 0;
> +out:
> +	up_write(&dev_addr_sem);
> +	return err;
>  }
Cong Wang Jan. 29, 2021, 5:08 a.m. UTC | #4
On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > From: Cong Wang <cong.wang@bytedance.com>
> >
> > dev_ifsioc_locked() is called with only RCU read lock, so when
> > there is a parallel writer changing the mac address, it could
> > get a partially updated mac address, as shown below:
> >
> > Thread 1                      Thread 2
> > // eth_commit_mac_addr_change()
> > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> >                               // dev_ifsioc_locked()
> >                               memcpy(ifr->ifr_hwaddr.sa_data,
> >                                       dev->dev_addr,...);
> >
> > Close this race condition by guarding them with a RW semaphore,
> > like netdev_get_name(). The writers take RTNL anyway, so this
> > will not affect the slow path.
> >
> > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
>
> The addition of the write lock scares me a little for a fix, there's a
> lot of code which can potentially run under the callbacks and notifiers
> there.
>
> What about using a seqlock?

Actually I did use seqlock in my initial version (not posted), it does not
allow blocking inside write_seqlock() protection, so I have to change
to rwsem.

Thanks.
Jakub Kicinski Jan. 29, 2021, 5:21 a.m. UTC | #5
On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> >
> > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:  
> > > From: Cong Wang <cong.wang@bytedance.com>
> > >
> > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > there is a parallel writer changing the mac address, it could
> > > get a partially updated mac address, as shown below:
> > >
> > > Thread 1                      Thread 2
> > > // eth_commit_mac_addr_change()
> > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > >                               // dev_ifsioc_locked()
> > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > >                                       dev->dev_addr,...);
> > >
> > > Close this race condition by guarding them with a RW semaphore,
> > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > will not affect the slow path.
> > >
> > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>  
> >
> > The addition of the write lock scares me a little for a fix, there's a
> > lot of code which can potentially run under the callbacks and notifiers
> > there.
> >
> > What about using a seqlock?  
> 
> Actually I did use seqlock in my initial version (not posted), it does not
> allow blocking inside write_seqlock() protection, so I have to change
> to rwsem.

Argh, you're right. No way we can construct something that tries to
read once and if it fails falls back to waiting for RTNL?
Cong Wang Jan. 29, 2021, 5:47 a.m. UTC | #6
On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > >
> > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > > > From: Cong Wang <cong.wang@bytedance.com>
> > > >
> > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > there is a parallel writer changing the mac address, it could
> > > > get a partially updated mac address, as shown below:
> > > >
> > > > Thread 1                      Thread 2
> > > > // eth_commit_mac_addr_change()
> > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > >                               // dev_ifsioc_locked()
> > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > >                                       dev->dev_addr,...);
> > > >
> > > > Close this race condition by guarding them with a RW semaphore,
> > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > will not affect the slow path.
> > > >
> > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > >
> > > The addition of the write lock scares me a little for a fix, there's a
> > > lot of code which can potentially run under the callbacks and notifiers
> > > there.
> > >
> > > What about using a seqlock?
> >
> > Actually I did use seqlock in my initial version (not posted), it does not
> > allow blocking inside write_seqlock() protection, so I have to change
> > to rwsem.
>
> Argh, you're right. No way we can construct something that tries to
> read once and if it fails falls back to waiting for RTNL?

I don't think there is any way to tell whether the read fails, a partially
updated address can not be detected without additional flags etc..

And devnet_rename_sem is already there, pretty much similar to this
one.

Thanks.
Jakub Kicinski Jan. 29, 2021, 6 p.m. UTC | #7
On Thu, 28 Jan 2021 21:47:04 -0800 Cong Wang wrote:
> On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:  
> > > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:  
> > > >
> > > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:  
> > > > > From: Cong Wang <cong.wang@bytedance.com>
> > > > >
> > > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > > there is a parallel writer changing the mac address, it could
> > > > > get a partially updated mac address, as shown below:
> > > > >
> > > > > Thread 1                      Thread 2
> > > > > // eth_commit_mac_addr_change()
> > > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > > >                               // dev_ifsioc_locked()
> > > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > > >                                       dev->dev_addr,...);
> > > > >
> > > > > Close this race condition by guarding them with a RW semaphore,
> > > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > > will not affect the slow path.
> > > > >
> > > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>  
> > > >
> > > > The addition of the write lock scares me a little for a fix, there's a
> > > > lot of code which can potentially run under the callbacks and notifiers
> > > > there.
> > > >
> > > > What about using a seqlock?  
> > >
> > > Actually I did use seqlock in my initial version (not posted), it does not
> > > allow blocking inside write_seqlock() protection, so I have to change
> > > to rwsem.  
> >
> > Argh, you're right. No way we can construct something that tries to
> > read once and if it fails falls back to waiting for RTNL?  
> 
> I don't think there is any way to tell whether the read fails, a partially
> updated address can not be detected without additional flags etc..

Let me pseudo code it, I can't English that well:

void reader(obj)
{
	unsigned int seq;

	seq = READ_ONCE(seqcnt);
	if (seq & 1)
		goto slow_path;
	smb_rmb();

	obj = read_the_thing();

	smb_rmb();
	if (seq == READ_ONCE(seqcnt))
		return;

slow_path:
	rtnl_lock();
	obj = read_the_thing();
	rtnl_unlock();
}

void writer()
{
	ASSERT_RNTL();

	seqcnt++;
	smb_wmb();

	modify_the_thing();

	smb_wmb();
	seqcnt++;
}


I think raw_seqcount helpers should do here?

> And devnet_rename_sem is already there, pretty much similar to this
> one.

Ack, I don't see rename triggering cascading notifications, tho.
I think you've seen the recent patch for locking in team, that's 
pretty much what I'm afraid will happen here. 

But if I'm missing something about the seqcount or you strongly prefer
the rwlock, we can do that, too. Although I'd rather take this patch 
to net-next in that case.
Cong Wang Jan. 29, 2021, 11:12 p.m. UTC | #8
On Fri, Jan 29, 2021 at 10:00 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> On Thu, 28 Jan 2021 21:47:04 -0800 Cong Wang wrote:
> > On Thu, Jan 28, 2021 at 9:21 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > On Thu, 28 Jan 2021 21:08:05 -0800 Cong Wang wrote:
> > > > On Thu, Jan 28, 2021 at 12:55 PM Jakub Kicinski <kuba@kernel.org> wrote:
> > > > >
> > > > > On Sat, 23 Jan 2021 17:30:49 -0800 Cong Wang wrote:
> > > > > > From: Cong Wang <cong.wang@bytedance.com>
> > > > > >
> > > > > > dev_ifsioc_locked() is called with only RCU read lock, so when
> > > > > > there is a parallel writer changing the mac address, it could
> > > > > > get a partially updated mac address, as shown below:
> > > > > >
> > > > > > Thread 1                      Thread 2
> > > > > > // eth_commit_mac_addr_change()
> > > > > > memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
> > > > > >                               // dev_ifsioc_locked()
> > > > > >                               memcpy(ifr->ifr_hwaddr.sa_data,
> > > > > >                                       dev->dev_addr,...);
> > > > > >
> > > > > > Close this race condition by guarding them with a RW semaphore,
> > > > > > like netdev_get_name(). The writers take RTNL anyway, so this
> > > > > > will not affect the slow path.
> > > > > >
> > > > > > Fixes: 3710becf8a58 ("net: RCU locking for simple ioctl()")
> > > > > > Reported-by: "Gong, Sishuai" <sishuai@purdue.edu>
> > > > > > Cc: Eric Dumazet <eric.dumazet@gmail.com>
> > > > > > Signed-off-by: Cong Wang <cong.wang@bytedance.com>
> > > > >
> > > > > The addition of the write lock scares me a little for a fix, there's a
> > > > > lot of code which can potentially run under the callbacks and notifiers
> > > > > there.
> > > > >
> > > > > What about using a seqlock?
> > > >
> > > > Actually I did use seqlock in my initial version (not posted), it does not
> > > > allow blocking inside write_seqlock() protection, so I have to change
> > > > to rwsem.
> > >
> > > Argh, you're right. No way we can construct something that tries to
> > > read once and if it fails falls back to waiting for RTNL?
> >
> > I don't think there is any way to tell whether the read fails, a partially
> > updated address can not be detected without additional flags etc..
>
> Let me pseudo code it, I can't English that well:
>
> void reader(obj)
> {
>         unsigned int seq;
>
>         seq = READ_ONCE(seqcnt);
>         if (seq & 1)
>                 goto slow_path;
>         smb_rmb();
>
>         obj = read_the_thing();
>
>         smb_rmb();
>         if (seq == READ_ONCE(seqcnt))
>                 return;
>
> slow_path:
>         rtnl_lock();
>         obj = read_the_thing();
>         rtnl_unlock();
> }
>
> void writer()
> {
>         ASSERT_RNTL();
>
>         seqcnt++;
>         smb_wmb();
>
>         modify_the_thing();
>
>         smb_wmb();
>         seqcnt++;
> }
>
>
> I think raw_seqcount helpers should do here?

Not sure why you want to kinda rewrite seqlock here. Please see below.

>
> > And devnet_rename_sem is already there, pretty much similar to this
> > one.
>
> Ack, I don't see rename triggering cascading notifications, tho.
> I think you've seen the recent patch for locking in team, that's
> pretty much what I'm afraid will happen here.

Right, I should make only user-facing callers (ioctl, rtnetlink) use this
writer semaphore, and leave other callers of dev_set_mac_address()
untouched. Something like:

dev_set_mac_address_locked(...)
{
  down_write(&dev_addr_sem);
  dev_set_mac_address(...);
  up_write(&dev_addr_sem);
  ...
}

With this, we don't need to reinvent seqlock.

>
> But if I'm missing something about the seqcount or you strongly prefer
> the rwlock, we can do that, too. Although I'd rather take this patch
> to net-next in that case.

I have no problem with applying to net-next. I will send v2 targeting
net-next instead.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 259be67644e3..7a871f2dcc03 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3918,6 +3918,7 @@  int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 			      struct netlink_ext_ack *extack);
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 			struct netlink_ext_ack *extack);
+int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_item_id *ppid);
diff --git a/net/core/dev.c b/net/core/dev.c
index a979b86dbacd..55c0db7704c7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8709,6 +8709,8 @@  int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
 }
 EXPORT_SYMBOL(dev_pre_changeaddr_notify);
 
+static DECLARE_RWSEM(dev_addr_sem);
+
 /**
  *	dev_set_mac_address - Change Media Access Control Address
  *	@dev: device
@@ -8729,19 +8731,50 @@  int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
 		return -EINVAL;
 	if (!netif_device_present(dev))
 		return -ENODEV;
+
+	down_write(&dev_addr_sem);
 	err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
 	if (err)
-		return err;
+		goto out;
 	err = ops->ndo_set_mac_address(dev, sa);
 	if (err)
-		return err;
+		goto out;
 	dev->addr_assign_type = NET_ADDR_SET;
 	call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
 	add_device_randomness(dev->dev_addr, dev->addr_len);
-	return 0;
+out:
+	up_write(&dev_addr_sem);
+	return err;
 }
 EXPORT_SYMBOL(dev_set_mac_address);
 
+int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
+{
+	size_t size = sizeof(sa->sa_data);
+	struct net_device *dev;
+	int ret = 0;
+
+	down_read(&dev_addr_sem);
+	rcu_read_lock();
+
+	dev = dev_get_by_name_rcu(net, dev_name);
+	if (!dev) {
+		ret = -ENODEV;
+		goto unlock;
+	}
+	if (!dev->addr_len)
+		memset(sa->sa_data, 0, size);
+	else
+		memcpy(sa->sa_data, dev->dev_addr,
+		       min_t(size_t, size, dev->addr_len));
+	sa->sa_family = dev->type;
+
+unlock:
+	rcu_read_unlock();
+	up_read(&dev_addr_sem);
+	return ret;
+}
+
 /**
  *	dev_change_carrier - Change device carrier
  *	@dev: device
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index db8a0ff86f36..bfa0dbd3d36f 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -123,17 +123,6 @@  static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm
 		ifr->ifr_mtu = dev->mtu;
 		return 0;
 
-	case SIOCGIFHWADDR:
-		if (!dev->addr_len)
-			memset(ifr->ifr_hwaddr.sa_data, 0,
-			       sizeof(ifr->ifr_hwaddr.sa_data));
-		else
-			memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
-			       min(sizeof(ifr->ifr_hwaddr.sa_data),
-				   (size_t)dev->addr_len));
-		ifr->ifr_hwaddr.sa_family = dev->type;
-		return 0;
-
 	case SIOCGIFSLAVE:
 		err = -EINVAL;
 		break;
@@ -418,6 +407,12 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	 */
 
 	switch (cmd) {
+	case SIOCGIFHWADDR:
+		dev_load(net, ifr->ifr_name);
+		ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name);
+		if (colon)
+			*colon = ':';
+		return ret;
 	/*
 	 *	These ioctl calls:
 	 *	- can be done by all.
@@ -427,7 +422,6 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFFLAGS:
 	case SIOCGIFMETRIC:
 	case SIOCGIFMTU:
-	case SIOCGIFHWADDR:
 	case SIOCGIFSLAVE:
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX: