diff mbox series

[net,v3] can: raw: fix receiver memory leak

Message ID 20230711011737.1969582-1-william.xuanziyang@huawei.com (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series [net,v3] can: raw: fix receiver memory leak | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1341 this patch: 1341
netdev/cc_maintainers fail 2 blamed authors not CCed: penguin-kernel@i-love.sakura.ne.jp tkhai@ya.ru; 2 maintainers not CCed: penguin-kernel@i-love.sakura.ne.jp tkhai@ya.ru
netdev/build_clang success Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 1364 this patch: 1364
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 154 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ziyang Xuan (William) July 11, 2023, 1:17 a.m. UTC
Got kmemleak errors with the following ltp can_filter testcase:

for ((i=1; i<=100; i++))
do
        ./can_filter &
        sleep 0.1
done

==============================================================
[<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
[<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
[<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
[<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
[<00000000fd468496>] do_syscall_64+0x33/0x40
[<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6

It's a bug in the concurrent scenario of unregister_netdevice_many()
and raw_release() as following:

             cpu0                                        cpu1
unregister_netdevice_many(can_dev)
  unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
  net_set_todo(can_dev)
						raw_release(can_socket)
						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
						    raw_disable_allfilters(, dev, );
						    dev_put(dev);
						  }
						  ...
						  ro->bound = 0;
						  ...

call_netdevice_notifiers(NETDEV_UNREGISTER, )
  raw_notify(, NETDEV_UNREGISTER, )
    if (ro->bound) // invalid because ro->bound has been set 0
      raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed

Add a net_device pointer member in struct raw_sock to record bound can_dev,
and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
dev_rcv_lists.

Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
---
v3:
  - Remove unnecessary coding style changes.
  - Add Reviewed-by and Acked-by tags.
v2:
  - Do not hold idev anyway firstly.
---
 net/can/raw.c | 57 ++++++++++++++++++++++-----------------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

Comments

Oliver Hartkopp July 11, 2023, 6:45 a.m. UTC | #1
On 11.07.23 03:17, Ziyang Xuan wrote:
> Got kmemleak errors with the following ltp can_filter testcase:
> 
> for ((i=1; i<=100; i++))
> do
>          ./can_filter &
>          sleep 0.1
> done
> 
> ==============================================================
> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> [<00000000fd468496>] do_syscall_64+0x33/0x40
> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> 
> It's a bug in the concurrent scenario of unregister_netdevice_many()
> and raw_release() as following:
> 
>               cpu0                                        cpu1
> unregister_netdevice_many(can_dev)
>    unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>    net_set_todo(can_dev)
> 						raw_release(can_socket)
> 						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> 						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> 						    raw_disable_allfilters(, dev, );
> 						    dev_put(dev);
> 						  }
> 						  ...
> 						  ro->bound = 0;
> 						  ...
> 
> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>    raw_notify(, NETDEV_UNREGISTER, )
>      if (ro->bound) // invalid because ro->bound has been set 0
>        raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> 
> Add a net_device pointer member in struct raw_sock to record bound can_dev,
> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> dev_rcv_lists.
> 
> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
> ---
> v3:
>    - Remove unnecessary coding style changes.
>    - Add Reviewed-by and Acked-by tags.
> v2:
>    - Do not hold idev anyway firstly.

Just a nitpick:

The change for v2 was:

- Fix the case for a bound socket for "all" CAN interfaces (ifindex == 
0) in raw_bind().

The rest is ok now, thanks!

@Marc Kleine-Budde: Please remove the patch history or change the v2 
description - as you like. Thx!

Best regards,
Oliver

> ---
>   net/can/raw.c | 57 ++++++++++++++++++++++-----------------------------
>   1 file changed, 24 insertions(+), 33 deletions(-)
> 
> diff --git a/net/can/raw.c b/net/can/raw.c
> index 15c79b079184..2302e4882967 100644
> --- a/net/can/raw.c
> +++ b/net/can/raw.c
> @@ -84,6 +84,7 @@ struct raw_sock {
>   	struct sock sk;
>   	int bound;
>   	int ifindex;
> +	struct net_device *dev;
>   	struct list_head notifier;
>   	int loopback;
>   	int recv_own_msgs;
> @@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>   	if (!net_eq(dev_net(dev), sock_net(sk)))
>   		return;
>   
> -	if (ro->ifindex != dev->ifindex)
> +	if (ro->dev != dev)
>   		return;
>   
>   	switch (msg) {
> @@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
>   
>   		ro->ifindex = 0;
>   		ro->bound = 0;
> +		ro->dev = NULL;
>   		ro->count = 0;
>   		release_sock(sk);
>   
> @@ -337,6 +339,7 @@ static int raw_init(struct sock *sk)
>   
>   	ro->bound            = 0;
>   	ro->ifindex          = 0;
> +	ro->dev              = NULL;
>   
>   	/* set default filter to single entry dfilter */
>   	ro->dfilter.can_id   = 0;
> @@ -385,19 +388,13 @@ static int raw_release(struct socket *sock)
>   
>   	lock_sock(sk);
>   
> +	rtnl_lock();
>   	/* remove current filters & unregister */
>   	if (ro->bound) {
> -		if (ro->ifindex) {
> -			struct net_device *dev;
> -
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (dev) {
> -				raw_disable_allfilters(dev_net(dev), dev, sk);
> -				dev_put(dev);
> -			}
> -		} else {
> +		if (ro->dev)
> +			raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
> +		else
>   			raw_disable_allfilters(sock_net(sk), NULL, sk);
> -		}
>   	}
>   
>   	if (ro->count > 1)
> @@ -405,8 +402,10 @@ static int raw_release(struct socket *sock)
>   
>   	ro->ifindex = 0;
>   	ro->bound = 0;
> +	ro->dev = NULL;
>   	ro->count = 0;
>   	free_percpu(ro->uniq);
> +	rtnl_unlock();
>   
>   	sock_orphan(sk);
>   	sock->sk = NULL;
> @@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
>   	struct sock *sk = sock->sk;
>   	struct raw_sock *ro = raw_sk(sk);
> +	struct net_device *dev = NULL;
>   	int ifindex;
>   	int err = 0;
>   	int notify_enetdown = 0;
> @@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	if (addr->can_family != AF_CAN)
>   		return -EINVAL;
>   
> +	rtnl_lock();
>   	lock_sock(sk);
>   
>   	if (ro->bound && addr->can_ifindex == ro->ifindex)
>   		goto out;
>   
>   	if (addr->can_ifindex) {
> -		struct net_device *dev;
> -
>   		dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
>   		if (!dev) {
>   			err = -ENODEV;
> @@ -467,26 +466,20 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
>   	if (!err) {
>   		if (ro->bound) {
>   			/* unregister old filters */
> -			if (ro->ifindex) {
> -				struct net_device *dev;
> -
> -				dev = dev_get_by_index(sock_net(sk),
> -						       ro->ifindex);
> -				if (dev) {
> -					raw_disable_allfilters(dev_net(dev),
> -							       dev, sk);
> -					dev_put(dev);
> -				}
> -			} else {
> +			if (ro->dev)
> +				raw_disable_allfilters(dev_net(ro->dev),
> +						       ro->dev, sk);
> +			else
>   				raw_disable_allfilters(sock_net(sk), NULL, sk);
> -			}
>   		}
>   		ro->ifindex = ifindex;
>   		ro->bound = 1;
> +		ro->dev = dev;
>   	}
>   
>    out:
>   	release_sock(sk);
> +	rtnl_unlock();
>   
>   	if (notify_enetdown) {
>   		sk->sk_err = ENETDOWN;
> @@ -553,9 +546,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		rtnl_lock();
>   		lock_sock(sk);
>   
> -		if (ro->bound && ro->ifindex) {
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (!dev) {
> +		dev = ro->dev;
> +		if (ro->bound && dev) {
> +			if (dev->reg_state != NETREG_REGISTERED) {
>   				if (count > 1)
>   					kfree(filter);
>   				err = -ENODEV;
> @@ -596,7 +589,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		ro->count  = count;
>   
>    out_fil:
> -		dev_put(dev);
>   		release_sock(sk);
>   		rtnl_unlock();
>   
> @@ -614,9 +606,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		rtnl_lock();
>   		lock_sock(sk);
>   
> -		if (ro->bound && ro->ifindex) {
> -			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
> -			if (!dev) {
> +		dev = ro->dev;
> +		if (ro->bound && dev) {
> +			if (dev->reg_state != NETREG_REGISTERED) {
>   				err = -ENODEV;
>   				goto out_err;
>   			}
> @@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
>   		ro->err_mask = err_mask;
>   
>    out_err:
> -		dev_put(dev);
>   		release_sock(sk);
>   		rtnl_unlock();
>
Marc Kleine-Budde July 17, 2023, 7:27 a.m. UTC | #2
On 11.07.2023 09:17:37, Ziyang Xuan wrote:
> Got kmemleak errors with the following ltp can_filter testcase:
> 
> for ((i=1; i<=100; i++))
> do
>         ./can_filter &
>         sleep 0.1
> done
> 
> ==============================================================
> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> [<00000000fd468496>] do_syscall_64+0x33/0x40
> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> 
> It's a bug in the concurrent scenario of unregister_netdevice_many()
> and raw_release() as following:
> 
>              cpu0                                        cpu1
> unregister_netdevice_many(can_dev)
>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>   net_set_todo(can_dev)
> 						raw_release(can_socket)
> 						  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> 						  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> 						    raw_disable_allfilters(, dev, );
> 						    dev_put(dev);
> 						  }
> 						  ...
> 						  ro->bound = 0;
> 						  ...
> 
> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>   raw_notify(, NETDEV_UNREGISTER, )
>     if (ro->bound) // invalid because ro->bound has been set 0
>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> 
> Add a net_device pointer member in struct raw_sock to record bound can_dev,
> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> dev_rcv_lists.
> 
> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>

Added to linux-can/testing.

regards,
Marc
Eric Dumazet July 19, 2023, 3:31 a.m. UTC | #3
On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
>
> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
> > Got kmemleak errors with the following ltp can_filter testcase:
> >
> > for ((i=1; i<=100; i++))
> > do
> >         ./can_filter &
> >         sleep 0.1
> > done
> >
> > ==============================================================
> > [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> > [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> > [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> > [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> > [<00000000fd468496>] do_syscall_64+0x33/0x40
> > [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> >
> > It's a bug in the concurrent scenario of unregister_netdevice_many()
> > and raw_release() as following:
> >
> >              cpu0                                        cpu1
> > unregister_netdevice_many(can_dev)
> >   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
> >   net_set_todo(can_dev)
> >                                               raw_release(can_socket)
> >                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> >                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> >                                                   raw_disable_allfilters(, dev, );
> >                                                   dev_put(dev);
> >                                                 }
> >                                                 ...
> >                                                 ro->bound = 0;
> >                                                 ...
> >
> > call_netdevice_notifiers(NETDEV_UNREGISTER, )
> >   raw_notify(, NETDEV_UNREGISTER, )
> >     if (ro->bound) // invalid because ro->bound has been set 0
> >       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> >
> > Add a net_device pointer member in struct raw_sock to record bound can_dev,
> > and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> > raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> > dev_rcv_lists.
> >
> > Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> > Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> > Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>
> Added to linux-can/testing.
>

This patch causes three syzbot LOCKDEP reports so far.

I suspect we need something like the following patch.

If nobody objects, I will submit this formally soon.

diff --git a/net/can/raw.c b/net/can/raw.c
index 2302e48829677334f8b2d74a479e5a9cbb5ce03c..ba6b52b1d7767fdd7b57d1b8e5519495340c572c
100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -386,9 +386,9 @@ static int raw_release(struct socket *sock)
        list_del(&ro->notifier);
        spin_unlock(&raw_notifier_lock);

+       rtnl_lock();
        lock_sock(sk);

-       rtnl_lock();
        /* remove current filters & unregister */
        if (ro->bound) {
                if (ro->dev)
@@ -405,12 +405,13 @@ static int raw_release(struct socket *sock)
        ro->dev = NULL;
        ro->count = 0;
        free_percpu(ro->uniq);
-       rtnl_unlock();

        sock_orphan(sk);
        sock->sk = NULL;

        release_sock(sk);
+       rtnl_unlock();
+
        sock_put(sk);

        return 0;
Ziyang Xuan (William) July 19, 2023, 4:41 a.m. UTC | #4
> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
>>
>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
>>> Got kmemleak errors with the following ltp can_filter testcase:
>>>
>>> for ((i=1; i<=100; i++))
>>> do
>>>         ./can_filter &
>>>         sleep 0.1
>>> done
>>>
>>> ==============================================================
>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>>
>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
>>> and raw_release() as following:
>>>
>>>              cpu0                                        cpu1
>>> unregister_netdevice_many(can_dev)
>>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>>>   net_set_todo(can_dev)
>>>                                               raw_release(can_socket)
>>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
>>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
>>>                                                   raw_disable_allfilters(, dev, );
>>>                                                   dev_put(dev);
>>>                                                 }
>>>                                                 ...
>>>                                                 ro->bound = 0;
>>>                                                 ...
>>>
>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>>>   raw_notify(, NETDEV_UNREGISTER, )
>>>     if (ro->bound) // invalid because ro->bound has been set 0
>>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
>>>
>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
>>> dev_rcv_lists.
>>>
>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>
>> Added to linux-can/testing.
>>
> 
> This patch causes three syzbot LOCKDEP reports so far.

Hello Eric,

Is there reproducer? I want to understand the specific root cause.

Thanks,
William Xuan

> 
> I suspect we need something like the following patch.
> 
> If nobody objects, I will submit this formally soon.
> 
> diff --git a/net/can/raw.c b/net/can/raw.c
> index 2302e48829677334f8b2d74a479e5a9cbb5ce03c..ba6b52b1d7767fdd7b57d1b8e5519495340c572c
> 100644
> --- a/net/can/raw.c
> +++ b/net/can/raw.c
> @@ -386,9 +386,9 @@ static int raw_release(struct socket *sock)
>         list_del(&ro->notifier);
>         spin_unlock(&raw_notifier_lock);
> 
> +       rtnl_lock();
>         lock_sock(sk);
> 
> -       rtnl_lock();
>         /* remove current filters & unregister */
>         if (ro->bound) {
>                 if (ro->dev)
> @@ -405,12 +405,13 @@ static int raw_release(struct socket *sock)
>         ro->dev = NULL;
>         ro->count = 0;
>         free_percpu(ro->uniq);
> -       rtnl_unlock();
> 
>         sock_orphan(sk);
>         sock->sk = NULL;
> 
>         release_sock(sk);
> +       rtnl_unlock();
> +
>         sock_put(sk);
> 
>         return 0;
> .
>
Eric Dumazet July 19, 2023, 5:04 a.m. UTC | #5
On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
<william.xuanziyang@huawei.com> wrote:
>
> > On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
> >>
> >> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
> >>> Got kmemleak errors with the following ltp can_filter testcase:
> >>>
> >>> for ((i=1; i<=100; i++))
> >>> do
> >>>         ./can_filter &
> >>>         sleep 0.1
> >>> done
> >>>
> >>> ==============================================================
> >>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> >>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> >>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> >>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> >>> [<00000000fd468496>] do_syscall_64+0x33/0x40
> >>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> >>>
> >>> It's a bug in the concurrent scenario of unregister_netdevice_many()
> >>> and raw_release() as following:
> >>>
> >>>              cpu0                                        cpu1
> >>> unregister_netdevice_many(can_dev)
> >>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
> >>>   net_set_todo(can_dev)
> >>>                                               raw_release(can_socket)
> >>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> >>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> >>>                                                   raw_disable_allfilters(, dev, );
> >>>                                                   dev_put(dev);
> >>>                                                 }
> >>>                                                 ...
> >>>                                                 ro->bound = 0;
> >>>                                                 ...
> >>>
> >>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
> >>>   raw_notify(, NETDEV_UNREGISTER, )
> >>>     if (ro->bound) // invalid because ro->bound has been set 0
> >>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> >>>
> >>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
> >>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> >>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> >>> dev_rcv_lists.
> >>>
> >>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> >>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> >>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>
> >> Added to linux-can/testing.
> >>
> >
> > This patch causes three syzbot LOCKDEP reports so far.
>
> Hello Eric,
>
> Is there reproducer? I want to understand the specific root cause.
>

No repro yet, but simply look at other functions in net/can/raw.c

You must always take locks in the same order.

raw_bind(), raw_setsockopt() use:

rtnl_lock();
lock_sock(sk);

Therefore, raw_release() must _also_ use the same order, or risk deadlock.

Please build a LOCKDEP enabled kernel, and run your tests ?
Ziyang Xuan (William) July 19, 2023, 7:49 a.m. UTC | #6
在 2023/7/19 13:04, Eric Dumazet 写道:
> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
> <william.xuanziyang@huawei.com> wrote:
>>
>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
>>>>
>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
>>>>> Got kmemleak errors with the following ltp can_filter testcase:
>>>>>
>>>>> for ((i=1; i<=100; i++))
>>>>> do
>>>>>         ./can_filter &
>>>>>         sleep 0.1
>>>>> done
>>>>>
>>>>> ==============================================================
>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>>>>
>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
>>>>> and raw_release() as following:
>>>>>
>>>>>              cpu0                                        cpu1
>>>>> unregister_netdevice_many(can_dev)
>>>>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>>>>>   net_set_todo(can_dev)
>>>>>                                               raw_release(can_socket)
>>>>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
>>>>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
>>>>>                                                   raw_disable_allfilters(, dev, );
>>>>>                                                   dev_put(dev);
>>>>>                                                 }
>>>>>                                                 ...
>>>>>                                                 ro->bound = 0;
>>>>>                                                 ...
>>>>>
>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>>>>>   raw_notify(, NETDEV_UNREGISTER, )
>>>>>     if (ro->bound) // invalid because ro->bound has been set 0
>>>>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
>>>>>
>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
>>>>> dev_rcv_lists.
>>>>>
>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>
>>>> Added to linux-can/testing.
>>>>
>>>
>>> This patch causes three syzbot LOCKDEP reports so far.
>>
>> Hello Eric,
>>
>> Is there reproducer? I want to understand the specific root cause.
>>
> 
> No repro yet, but simply look at other functions in net/can/raw.c
> 
> You must always take locks in the same order.
> 
> raw_bind(), raw_setsockopt() use:
> 
> rtnl_lock();
> lock_sock(sk);
> 
> Therefore, raw_release() must _also_ use the same order, or risk deadlock.
> 
> Please build a LOCKDEP enabled kernel, and run your tests ?

I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release().
And there is not the scenario in my current testcase. I did not get it. I will try to
reproduce it and add the testcase.

Thank you for your patient explanation.

William Xuan.
> .
>
Eric Dumazet Aug. 2, 2023, 5:48 p.m. UTC | #7
On Wed, Jul 19, 2023 at 9:49 AM Ziyang Xuan (William)
<william.xuanziyang@huawei.com> wrote:
>
>
>
> 在 2023/7/19 13:04, Eric Dumazet 写道:
> > On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
> > <william.xuanziyang@huawei.com> wrote:
> >>
> >>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
> >>>>
> >>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
> >>>>> Got kmemleak errors with the following ltp can_filter testcase:
> >>>>>
> >>>>> for ((i=1; i<=100; i++))
> >>>>> do
> >>>>>         ./can_filter &
> >>>>>         sleep 0.1
> >>>>> done
> >>>>>
> >>>>> ==============================================================
> >>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> >>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> >>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> >>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> >>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
> >>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> >>>>>
> >>>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
> >>>>> and raw_release() as following:
> >>>>>
> >>>>>              cpu0                                        cpu1
> >>>>> unregister_netdevice_many(can_dev)
> >>>>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
> >>>>>   net_set_todo(can_dev)
> >>>>>                                               raw_release(can_socket)
> >>>>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> >>>>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> >>>>>                                                   raw_disable_allfilters(, dev, );
> >>>>>                                                   dev_put(dev);
> >>>>>                                                 }
> >>>>>                                                 ...
> >>>>>                                                 ro->bound = 0;
> >>>>>                                                 ...
> >>>>>
> >>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
> >>>>>   raw_notify(, NETDEV_UNREGISTER, )
> >>>>>     if (ro->bound) // invalid because ro->bound has been set 0
> >>>>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> >>>>>
> >>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
> >>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> >>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> >>>>> dev_rcv_lists.
> >>>>>
> >>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> >>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> >>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>>>
> >>>> Added to linux-can/testing.
> >>>>
> >>>
> >>> This patch causes three syzbot LOCKDEP reports so far.
> >>
> >> Hello Eric,
> >>
> >> Is there reproducer? I want to understand the specific root cause.
> >>
> >
> > No repro yet, but simply look at other functions in net/can/raw.c
> >
> > You must always take locks in the same order.
> >
> > raw_bind(), raw_setsockopt() use:
> >
> > rtnl_lock();
> > lock_sock(sk);
> >
> > Therefore, raw_release() must _also_ use the same order, or risk deadlock.
> >
> > Please build a LOCKDEP enabled kernel, and run your tests ?
>
> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release().
> And there is not the scenario in my current testcase. I did not get it. I will try to
> reproduce it and add the testcase.
>
> Thank you for your patient explanation.

Another syzbot report is firing because of your patch

Apparently we store in ro->dev a pointer to a netdev without holding a
refcount on it.
Ziyang Xuan (William) Aug. 3, 2023, 12:43 a.m. UTC | #8
>>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
>>> <william.xuanziyang@huawei.com> wrote:
>>>>
>>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
>>>>>>
>>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
>>>>>>> Got kmemleak errors with the following ltp can_filter testcase:
>>>>>>>
>>>>>>> for ((i=1; i<=100; i++))
>>>>>>> do
>>>>>>>         ./can_filter &
>>>>>>>         sleep 0.1
>>>>>>> done
>>>>>>>
>>>>>>> ==============================================================
>>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
>>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
>>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
>>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
>>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
>>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>>>>>>
>>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
>>>>>>> and raw_release() as following:
>>>>>>>
>>>>>>>              cpu0                                        cpu1
>>>>>>> unregister_netdevice_many(can_dev)
>>>>>>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>>>>>>>   net_set_todo(can_dev)
>>>>>>>                                               raw_release(can_socket)
>>>>>>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
>>>>>>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
>>>>>>>                                                   raw_disable_allfilters(, dev, );
>>>>>>>                                                   dev_put(dev);
>>>>>>>                                                 }
>>>>>>>                                                 ...
>>>>>>>                                                 ro->bound = 0;
>>>>>>>                                                 ...
>>>>>>>
>>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>>>>>>>   raw_notify(, NETDEV_UNREGISTER, )
>>>>>>>     if (ro->bound) // invalid because ro->bound has been set 0
>>>>>>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
>>>>>>>
>>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
>>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
>>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
>>>>>>> dev_rcv_lists.
>>>>>>>
>>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
>>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>>>
>>>>>> Added to linux-can/testing.
>>>>>>
>>>>>
>>>>> This patch causes three syzbot LOCKDEP reports so far.
>>>>
>>>> Hello Eric,
>>>>
>>>> Is there reproducer? I want to understand the specific root cause.
>>>>
>>>
>>> No repro yet, but simply look at other functions in net/can/raw.c
>>>
>>> You must always take locks in the same order.
>>>
>>> raw_bind(), raw_setsockopt() use:
>>>
>>> rtnl_lock();
>>> lock_sock(sk);
>>>
>>> Therefore, raw_release() must _also_ use the same order, or risk deadlock.
>>>
>>> Please build a LOCKDEP enabled kernel, and run your tests ?
>>
>> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release().
>> And there is not the scenario in my current testcase. I did not get it. I will try to
>> reproduce it and add the testcase.
>>
>> Thank you for your patient explanation.
> 
> Another syzbot report is firing because of your patch
> 
> Apparently we store in ro->dev a pointer to a netdev without holding a
> refcount on it.
> .
Hello Eric,

Is there a syzbot link or reproducer can be provided?

Thank you!
William Xuan
>
Eric Dumazet Aug. 3, 2023, 7:11 a.m. UTC | #9
On Thu, Aug 3, 2023 at 2:44 AM Ziyang Xuan (William)
<william.xuanziyang@huawei.com> wrote:
>
> >>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
> >>> <william.xuanziyang@huawei.com> wrote:
> >>>>
> >>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
> >>>>>>
> >>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
> >>>>>>> Got kmemleak errors with the following ltp can_filter testcase:
> >>>>>>>
> >>>>>>> for ((i=1; i<=100; i++))
> >>>>>>> do
> >>>>>>>         ./can_filter &
> >>>>>>>         sleep 0.1
> >>>>>>> done
> >>>>>>>
> >>>>>>> ==============================================================
> >>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
> >>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
> >>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
> >>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
> >>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
> >>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
> >>>>>>>
> >>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
> >>>>>>> and raw_release() as following:
> >>>>>>>
> >>>>>>>              cpu0                                        cpu1
> >>>>>>> unregister_netdevice_many(can_dev)
> >>>>>>>   unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
> >>>>>>>   net_set_todo(can_dev)
> >>>>>>>                                               raw_release(can_socket)
> >>>>>>>                                                 dev = dev_get_by_index(, ro->ifindex); // dev == NULL
> >>>>>>>                                                 if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
> >>>>>>>                                                   raw_disable_allfilters(, dev, );
> >>>>>>>                                                   dev_put(dev);
> >>>>>>>                                                 }
> >>>>>>>                                                 ...
> >>>>>>>                                                 ro->bound = 0;
> >>>>>>>                                                 ...
> >>>>>>>
> >>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
> >>>>>>>   raw_notify(, NETDEV_UNREGISTER, )
> >>>>>>>     if (ro->bound) // invalid because ro->bound has been set 0
> >>>>>>>       raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
> >>>>>>>
> >>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
> >>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
> >>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
> >>>>>>> dev_rcv_lists.
> >>>>>>>
> >>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
> >>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
> >>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
> >>>>>>
> >>>>>> Added to linux-can/testing.
> >>>>>>
> >>>>>
> >>>>> This patch causes three syzbot LOCKDEP reports so far.
> >>>>
> >>>> Hello Eric,
> >>>>
> >>>> Is there reproducer? I want to understand the specific root cause.
> >>>>
> >>>
> >>> No repro yet, but simply look at other functions in net/can/raw.c
> >>>
> >>> You must always take locks in the same order.
> >>>
> >>> raw_bind(), raw_setsockopt() use:
> >>>
> >>> rtnl_lock();
> >>> lock_sock(sk);
> >>>
> >>> Therefore, raw_release() must _also_ use the same order, or risk deadlock.
> >>>
> >>> Please build a LOCKDEP enabled kernel, and run your tests ?
> >>
> >> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release().
> >> And there is not the scenario in my current testcase. I did not get it. I will try to
> >> reproduce it and add the testcase.
> >>
> >> Thank you for your patient explanation.
> >
> > Another syzbot report is firing because of your patch
> >
> > Apparently we store in ro->dev a pointer to a netdev without holding a
> > refcount on it.
> > .
> Hello Eric,
>
> Is there a syzbot link or reproducer can be provided?
>

Not yet, but really we should not cache a dev pointer without making
sure it does not disappear later.

Caching the ifindex is fine though.

BUG: KASAN: use-after-free in read_pnet include/net/net_namespace.h:383 [inline]
BUG: KASAN: use-after-free in dev_net include/linux/netdevice.h:2590 [inline]
BUG: KASAN: use-after-free in raw_release+0x960/0x9b0 net/can/raw.c:395
Read of size 8 at addr ffff88802b9b8670 by task syz-executor.1/7705

CPU: 0 PID: 7705 Comm: syz-executor.1 Not tainted
6.5.0-rc3-syzkaller-00176-g13d2618b48f1 #0
Hardware name: Google Google Compute Engine/Google Compute Engine,
BIOS Google 07/12/2023
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
print_address_description mm/kasan/report.c:364 [inline]
print_report+0xc4/0x620 mm/kasan/report.c:475
kasan_report+0xda/0x110 mm/kasan/report.c:588
read_pnet include/net/net_namespace.h:383 [inline]
dev_net include/linux/netdevice.h:2590 [inline]
raw_release+0x960/0x9b0 net/can/raw.c:395
__sock_release+0xcd/0x290 net/socket.c:654
sock_close+0x1c/0x20 net/socket.c:1386
__fput+0x3fd/0xac0 fs/file_table.c:384
task_work_run+0x14d/0x240 kernel/task_work.c:179
resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
exit_to_user_mode_prepare+0x210/0x240 kernel/entry/common.c:204
__syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:297
do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7fb7e927b9da
Code: 48 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89
7c 24 0c e8 03 7f 02 00 8b 7c 24 0c 89 c2 b8 03 00 00 00 0f 05 <48> 3d
00 f0 ff ff 77 36 89 d7 89 44 24 0c e8 63 7f 02 00 8b 44 24
RSP: 002b:00007ffd15e54c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 000000000000000a RCX: 00007fb7e927b9da
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009
RBP: 00007fb7e939d980 R08: 0000001b2fb20000 R09: 000000000000040e
R10: 000000008190df57 R11: 0000000000000293 R12: 000000000011244a
R13: ffffffffffffffff R14: 00007fb7e8e00000 R15: 0000000000112109
</TASK>

The buggy address belongs to the physical page:
page:ffffea0000ae6e00 refcount:0 mapcount:0 mapping:0000000000000000
index:0x0 pfn:0x2b9b8
flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff)
page_type: 0xffffffff()
raw: 00fff00000000000 ffffea0001e32208 ffffea0000ae7c08 0000000000000000
raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as freed
page last allocated via order 3, migratetype Unmovable, gfp_mask
0x446dc0(GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP|__GFP_ZERO),
pid 5067, tgid 5067 (syz-executor.1), ts 153775196806, free_ts
1123753306319
set_page_owner include/linux/page_owner.h:31 [inline]
post_alloc_hook+0x2d2/0x350 mm/page_alloc.c:1570
prep_new_page mm/page_alloc.c:1577 [inline]
get_page_from_freelist+0x10a9/0x31e0 mm/page_alloc.c:3221
__alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4477
__alloc_pages_node include/linux/gfp.h:237 [inline]
alloc_pages_node include/linux/gfp.h:260 [inline]
__kmalloc_large_node+0x87/0x1c0 mm/slab_common.c:1126
__do_kmalloc_node mm/slab_common.c:973 [inline]
__kmalloc_node.cold+0x5/0xdd mm/slab_common.c:992
kmalloc_node include/linux/slab.h:602 [inline]
kvmalloc_node+0x6f/0x1a0 mm/util.c:604
kvmalloc include/linux/slab.h:720 [inline]
kvzalloc include/linux/slab.h:728 [inline]
alloc_netdev_mqs+0x9b/0x1240 net/core/dev.c:10594
rtnl_create_link+0xc9c/0xfd0 net/core/rtnetlink.c:3336
rtnl_newlink_create net/core/rtnetlink.c:3462 [inline]
__rtnl_newlink+0x1075/0x18c0 net/core/rtnetlink.c:3689
rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3702
rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6428
netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914
sock_sendmsg_nosec net/socket.c:725 [inline]
sock_sendmsg+0xd9/0x180 net/socket.c:748
__sys_sendto+0x255/0x340 net/socket.c:2134
page last free stack trace:
reset_page_owner include/linux/page_owner.h:24 [inline]
free_pages_prepare mm/page_alloc.c:1161 [inline]
free_unref_page_prepare+0x508/0xb90 mm/page_alloc.c:2348
free_unref_page+0x33/0x3b0 mm/page_alloc.c:2443
kvfree+0x47/0x50 mm/util.c:650
device_release+0xa1/0x240 drivers/base/core.c:2484
kobject_cleanup lib/kobject.c:682 [inline]
kobject_release lib/kobject.c:713 [inline]
kref_put include/linux/kref.h:65 [inline]
kobject_put+0x1f7/0x5b0 lib/kobject.c:730
netdev_run_todo+0x7dd/0x11d0 net/core/dev.c:10366
rtnl_unlock net/core/rtnetlink.c:151 [inline]
rtnetlink_rcv_msg+0x446/0xd30 net/core/rtnetlink.c:6429
netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549
netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365
netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914
sock_sendmsg_nosec net/socket.c:725 [inline]
sock_sendmsg+0xd9/0x180 net/socket.c:748
____sys_sendmsg+0x6ac/0x940 net/socket.c:2494
___sys_sendmsg+0x135/0x1d0 net/socket.c:2548
__sys_sendmsg+0x117/0x1e0 net/socket.c:2577
do_syscall_x64 arch/x86/entry/common.c:50 [inline]
do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
entry_SYSCALL_64_after_hwframe+0x63/0xcd

Memory state around the buggy address:
ffff88802b9b8500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ffff88802b9b8580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>ffff88802b9b8600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
^
ffff88802b9b8680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ffff88802b9b8700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==================================================================
Oliver Hartkopp Aug. 4, 2023, 11:47 a.m. UTC | #10
On 03.08.23 09:11, Eric Dumazet wrote:
> On Thu, Aug 3, 2023 at 2:44 AM Ziyang Xuan (William)
> <william.xuanziyang@huawei.com> wrote:
>>
>>>>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William)
>>>>> <william.xuanziyang@huawei.com> wrote:
>>>>>>
>>>>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote:
>>>>>>>>
>>>>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote:
>>>>>>>>> Got kmemleak errors with the following ltp can_filter testcase:
>>>>>>>>>
>>>>>>>>> for ((i=1; i<=100; i++))
>>>>>>>>> do
>>>>>>>>>          ./can_filter &
>>>>>>>>>          sleep 0.1
>>>>>>>>> done
>>>>>>>>>
>>>>>>>>> ==============================================================
>>>>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can]
>>>>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw]
>>>>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0
>>>>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70
>>>>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40
>>>>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6
>>>>>>>>>
>>>>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many()
>>>>>>>>> and raw_release() as following:
>>>>>>>>>
>>>>>>>>>               cpu0                                        cpu1
>>>>>>>>> unregister_netdevice_many(can_dev)
>>>>>>>>>    unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this
>>>>>>>>>    net_set_todo(can_dev)
>>>>>>>>>                                                raw_release(can_socket)
>>>>>>>>>                                                  dev = dev_get_by_index(, ro->ifindex); // dev == NULL
>>>>>>>>>                                                  if (dev) { // receivers in dev_rcv_lists not free because dev is NULL
>>>>>>>>>                                                    raw_disable_allfilters(, dev, );
>>>>>>>>>                                                    dev_put(dev);
>>>>>>>>>                                                  }
>>>>>>>>>                                                  ...
>>>>>>>>>                                                  ro->bound = 0;
>>>>>>>>>                                                  ...
>>>>>>>>>
>>>>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, )
>>>>>>>>>    raw_notify(, NETDEV_UNREGISTER, )
>>>>>>>>>      if (ro->bound) // invalid because ro->bound has been set 0
>>>>>>>>>        raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed
>>>>>>>>>
>>>>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev,
>>>>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(),
>>>>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in
>>>>>>>>> dev_rcv_lists.
>>>>>>>>>
>>>>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier")
>>>>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
>>>>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
>>>>>>>>
>>>>>>>> Added to linux-can/testing.
>>>>>>>>
>>>>>>>
>>>>>>> This patch causes three syzbot LOCKDEP reports so far.
>>>>>>
>>>>>> Hello Eric,
>>>>>>
>>>>>> Is there reproducer? I want to understand the specific root cause.
>>>>>>
>>>>>
>>>>> No repro yet, but simply look at other functions in net/can/raw.c
>>>>>
>>>>> You must always take locks in the same order.
>>>>>
>>>>> raw_bind(), raw_setsockopt() use:
>>>>>
>>>>> rtnl_lock();
>>>>> lock_sock(sk);
>>>>>
>>>>> Therefore, raw_release() must _also_ use the same order, or risk deadlock.
>>>>>
>>>>> Please build a LOCKDEP enabled kernel, and run your tests ?
>>>>
>>>> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release().
>>>> And there is not the scenario in my current testcase. I did not get it. I will try to
>>>> reproduce it and add the testcase.
>>>>
>>>> Thank you for your patient explanation.
>>>
>>> Another syzbot report is firing because of your patch
>>>
>>> Apparently we store in ro->dev a pointer to a netdev without holding a
>>> refcount on it.
>>> .

I've sent a patch addressing this issue by holding the dev refcount as 
long as it is needed.

https://lore.kernel.org/linux-can/20230804112811.42259-1-socketcan@hartkopp.net/T/#u

Another review is appreciated.

Many thanks,
Oliver

>> Hello Eric,
>>
>> Is there a syzbot link or reproducer can be provided?
>>
> 
> Not yet, but really we should not cache a dev pointer without making
> sure it does not disappear later.
> 
> Caching the ifindex is fine though.
> 
> BUG: KASAN: use-after-free in read_pnet include/net/net_namespace.h:383 [inline]
> BUG: KASAN: use-after-free in dev_net include/linux/netdevice.h:2590 [inline]
> BUG: KASAN: use-after-free in raw_release+0x960/0x9b0 net/can/raw.c:395
> Read of size 8 at addr ffff88802b9b8670 by task syz-executor.1/7705
> 
> CPU: 0 PID: 7705 Comm: syz-executor.1 Not tainted
> 6.5.0-rc3-syzkaller-00176-g13d2618b48f1 #0
> Hardware name: Google Google Compute Engine/Google Compute Engine,
> BIOS Google 07/12/2023
> Call Trace:
> <TASK>
> __dump_stack lib/dump_stack.c:88 [inline]
> dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106
> print_address_description mm/kasan/report.c:364 [inline]
> print_report+0xc4/0x620 mm/kasan/report.c:475
> kasan_report+0xda/0x110 mm/kasan/report.c:588
> read_pnet include/net/net_namespace.h:383 [inline]
> dev_net include/linux/netdevice.h:2590 [inline]
> raw_release+0x960/0x9b0 net/can/raw.c:395
> __sock_release+0xcd/0x290 net/socket.c:654
> sock_close+0x1c/0x20 net/socket.c:1386
> __fput+0x3fd/0xac0 fs/file_table.c:384
> task_work_run+0x14d/0x240 kernel/task_work.c:179
> resume_user_mode_work include/linux/resume_user_mode.h:49 [inline]
> exit_to_user_mode_loop kernel/entry/common.c:171 [inline]
> exit_to_user_mode_prepare+0x210/0x240 kernel/entry/common.c:204
> __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline]
> syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:297
> do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> RIP: 0033:0x7fb7e927b9da
> Code: 48 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89
> 7c 24 0c e8 03 7f 02 00 8b 7c 24 0c 89 c2 b8 03 00 00 00 0f 05 <48> 3d
> 00 f0 ff ff 77 36 89 d7 89 44 24 0c e8 63 7f 02 00 8b 44 24
> RSP: 002b:00007ffd15e54c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
> RAX: 0000000000000000 RBX: 000000000000000a RCX: 00007fb7e927b9da
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009
> RBP: 00007fb7e939d980 R08: 0000001b2fb20000 R09: 000000000000040e
> R10: 000000008190df57 R11: 0000000000000293 R12: 000000000011244a
> R13: ffffffffffffffff R14: 00007fb7e8e00000 R15: 0000000000112109
> </TASK>
> 
> The buggy address belongs to the physical page:
> page:ffffea0000ae6e00 refcount:0 mapcount:0 mapping:0000000000000000
> index:0x0 pfn:0x2b9b8
> flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff)
> page_type: 0xffffffff()
> raw: 00fff00000000000 ffffea0001e32208 ffffea0000ae7c08 0000000000000000
> raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
> page dumped because: kasan: bad access detected
> page_owner tracks the page as freed
> page last allocated via order 3, migratetype Unmovable, gfp_mask
> 0x446dc0(GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP|__GFP_ZERO),
> pid 5067, tgid 5067 (syz-executor.1), ts 153775196806, free_ts
> 1123753306319
> set_page_owner include/linux/page_owner.h:31 [inline]
> post_alloc_hook+0x2d2/0x350 mm/page_alloc.c:1570
> prep_new_page mm/page_alloc.c:1577 [inline]
> get_page_from_freelist+0x10a9/0x31e0 mm/page_alloc.c:3221
> __alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4477
> __alloc_pages_node include/linux/gfp.h:237 [inline]
> alloc_pages_node include/linux/gfp.h:260 [inline]
> __kmalloc_large_node+0x87/0x1c0 mm/slab_common.c:1126
> __do_kmalloc_node mm/slab_common.c:973 [inline]
> __kmalloc_node.cold+0x5/0xdd mm/slab_common.c:992
> kmalloc_node include/linux/slab.h:602 [inline]
> kvmalloc_node+0x6f/0x1a0 mm/util.c:604
> kvmalloc include/linux/slab.h:720 [inline]
> kvzalloc include/linux/slab.h:728 [inline]
> alloc_netdev_mqs+0x9b/0x1240 net/core/dev.c:10594
> rtnl_create_link+0xc9c/0xfd0 net/core/rtnetlink.c:3336
> rtnl_newlink_create net/core/rtnetlink.c:3462 [inline]
> __rtnl_newlink+0x1075/0x18c0 net/core/rtnetlink.c:3689
> rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3702
> rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6428
> netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549
> netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
> netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365
> netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914
> sock_sendmsg_nosec net/socket.c:725 [inline]
> sock_sendmsg+0xd9/0x180 net/socket.c:748
> __sys_sendto+0x255/0x340 net/socket.c:2134
> page last free stack trace:
> reset_page_owner include/linux/page_owner.h:24 [inline]
> free_pages_prepare mm/page_alloc.c:1161 [inline]
> free_unref_page_prepare+0x508/0xb90 mm/page_alloc.c:2348
> free_unref_page+0x33/0x3b0 mm/page_alloc.c:2443
> kvfree+0x47/0x50 mm/util.c:650
> device_release+0xa1/0x240 drivers/base/core.c:2484
> kobject_cleanup lib/kobject.c:682 [inline]
> kobject_release lib/kobject.c:713 [inline]
> kref_put include/linux/kref.h:65 [inline]
> kobject_put+0x1f7/0x5b0 lib/kobject.c:730
> netdev_run_todo+0x7dd/0x11d0 net/core/dev.c:10366
> rtnl_unlock net/core/rtnetlink.c:151 [inline]
> rtnetlink_rcv_msg+0x446/0xd30 net/core/rtnetlink.c:6429
> netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549
> netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
> netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365
> netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914
> sock_sendmsg_nosec net/socket.c:725 [inline]
> sock_sendmsg+0xd9/0x180 net/socket.c:748
> ____sys_sendmsg+0x6ac/0x940 net/socket.c:2494
> ___sys_sendmsg+0x135/0x1d0 net/socket.c:2548
> __sys_sendmsg+0x117/0x1e0 net/socket.c:2577
> do_syscall_x64 arch/x86/entry/common.c:50 [inline]
> do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80
> entry_SYSCALL_64_after_hwframe+0x63/0xcd
> 
> Memory state around the buggy address:
> ffff88802b9b8500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> ffff88802b9b8580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>> ffff88802b9b8600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> ^
> ffff88802b9b8680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> ffff88802b9b8700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
> ==================================================================
diff mbox series

Patch

diff --git a/net/can/raw.c b/net/can/raw.c
index 15c79b079184..2302e4882967 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,6 +84,7 @@  struct raw_sock {
 	struct sock sk;
 	int bound;
 	int ifindex;
+	struct net_device *dev;
 	struct list_head notifier;
 	int loopback;
 	int recv_own_msgs;
@@ -277,7 +278,7 @@  static void raw_notify(struct raw_sock *ro, unsigned long msg,
 	if (!net_eq(dev_net(dev), sock_net(sk)))
 		return;
 
-	if (ro->ifindex != dev->ifindex)
+	if (ro->dev != dev)
 		return;
 
 	switch (msg) {
@@ -292,6 +293,7 @@  static void raw_notify(struct raw_sock *ro, unsigned long msg,
 
 		ro->ifindex = 0;
 		ro->bound = 0;
+		ro->dev = NULL;
 		ro->count = 0;
 		release_sock(sk);
 
@@ -337,6 +339,7 @@  static int raw_init(struct sock *sk)
 
 	ro->bound            = 0;
 	ro->ifindex          = 0;
+	ro->dev              = NULL;
 
 	/* set default filter to single entry dfilter */
 	ro->dfilter.can_id   = 0;
@@ -385,19 +388,13 @@  static int raw_release(struct socket *sock)
 
 	lock_sock(sk);
 
+	rtnl_lock();
 	/* remove current filters & unregister */
 	if (ro->bound) {
-		if (ro->ifindex) {
-			struct net_device *dev;
-
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (dev) {
-				raw_disable_allfilters(dev_net(dev), dev, sk);
-				dev_put(dev);
-			}
-		} else {
+		if (ro->dev)
+			raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+		else
 			raw_disable_allfilters(sock_net(sk), NULL, sk);
-		}
 	}
 
 	if (ro->count > 1)
@@ -405,8 +402,10 @@  static int raw_release(struct socket *sock)
 
 	ro->ifindex = 0;
 	ro->bound = 0;
+	ro->dev = NULL;
 	ro->count = 0;
 	free_percpu(ro->uniq);
+	rtnl_unlock();
 
 	sock_orphan(sk);
 	sock->sk = NULL;
@@ -422,6 +421,7 @@  static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
+	struct net_device *dev = NULL;
 	int ifindex;
 	int err = 0;
 	int notify_enetdown = 0;
@@ -431,14 +431,13 @@  static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (addr->can_family != AF_CAN)
 		return -EINVAL;
 
+	rtnl_lock();
 	lock_sock(sk);
 
 	if (ro->bound && addr->can_ifindex == ro->ifindex)
 		goto out;
 
 	if (addr->can_ifindex) {
-		struct net_device *dev;
-
 		dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
 		if (!dev) {
 			err = -ENODEV;
@@ -467,26 +466,20 @@  static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 	if (!err) {
 		if (ro->bound) {
 			/* unregister old filters */
-			if (ro->ifindex) {
-				struct net_device *dev;
-
-				dev = dev_get_by_index(sock_net(sk),
-						       ro->ifindex);
-				if (dev) {
-					raw_disable_allfilters(dev_net(dev),
-							       dev, sk);
-					dev_put(dev);
-				}
-			} else {
+			if (ro->dev)
+				raw_disable_allfilters(dev_net(ro->dev),
+						       ro->dev, sk);
+			else
 				raw_disable_allfilters(sock_net(sk), NULL, sk);
-			}
 		}
 		ro->ifindex = ifindex;
 		ro->bound = 1;
+		ro->dev = dev;
 	}
 
  out:
 	release_sock(sk);
+	rtnl_unlock();
 
 	if (notify_enetdown) {
 		sk->sk_err = ENETDOWN;
@@ -553,9 +546,9 @@  static int raw_setsockopt(struct socket *sock, int level, int optname,
 		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex) {
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (!dev) {
+		dev = ro->dev;
+		if (ro->bound && dev) {
+			if (dev->reg_state != NETREG_REGISTERED) {
 				if (count > 1)
 					kfree(filter);
 				err = -ENODEV;
@@ -596,7 +589,6 @@  static int raw_setsockopt(struct socket *sock, int level, int optname,
 		ro->count  = count;
 
  out_fil:
-		dev_put(dev);
 		release_sock(sk);
 		rtnl_unlock();
 
@@ -614,9 +606,9 @@  static int raw_setsockopt(struct socket *sock, int level, int optname,
 		rtnl_lock();
 		lock_sock(sk);
 
-		if (ro->bound && ro->ifindex) {
-			dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-			if (!dev) {
+		dev = ro->dev;
+		if (ro->bound && dev) {
+			if (dev->reg_state != NETREG_REGISTERED) {
 				err = -ENODEV;
 				goto out_err;
 			}
@@ -640,7 +632,6 @@  static int raw_setsockopt(struct socket *sock, int level, int optname,
 		ro->err_mask = err_mask;
 
  out_err:
-		dev_put(dev);
 		release_sock(sk);
 		rtnl_unlock();