Message ID | 20230711011737.1969582-1-william.xuanziyang@huawei.com (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net,v3] can: raw: fix receiver memory leak | expand |
On 11.07.23 03:17, Ziyang Xuan wrote: > Got kmemleak errors with the following ltp can_filter testcase: > > for ((i=1; i<=100; i++)) > do > ./can_filter & > sleep 0.1 > done > > ============================================================== > [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > [<00000000fd468496>] do_syscall_64+0x33/0x40 > [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > > It's a bug in the concurrent scenario of unregister_netdevice_many() > and raw_release() as following: > > cpu0 cpu1 > unregister_netdevice_many(can_dev) > unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > net_set_todo(can_dev) > raw_release(can_socket) > dev = dev_get_by_index(, ro->ifindex); // dev == NULL > if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > raw_disable_allfilters(, dev, ); > dev_put(dev); > } > ... > ro->bound = 0; > ... > > call_netdevice_notifiers(NETDEV_UNREGISTER, ) > raw_notify(, NETDEV_UNREGISTER, ) > if (ro->bound) // invalid because ro->bound has been set 0 > raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > > Add a net_device pointer member in struct raw_sock to record bound can_dev, > and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > dev_rcv_lists. > > Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> > --- > v3: > - Remove unnecessary coding style changes. > - Add Reviewed-by and Acked-by tags. > v2: > - Do not hold idev anyway firstly. Just a nitpick: The change for v2 was: - Fix the case for a bound socket for "all" CAN interfaces (ifindex == 0) in raw_bind(). The rest is ok now, thanks! @Marc Kleine-Budde: Please remove the patch history or change the v2 description - as you like. Thx! Best regards, Oliver > --- > net/can/raw.c | 57 ++++++++++++++++++++++----------------------------- > 1 file changed, 24 insertions(+), 33 deletions(-) > > diff --git a/net/can/raw.c b/net/can/raw.c > index 15c79b079184..2302e4882967 100644 > --- a/net/can/raw.c > +++ b/net/can/raw.c > @@ -84,6 +84,7 @@ struct raw_sock { > struct sock sk; > int bound; > int ifindex; > + struct net_device *dev; > struct list_head notifier; > int loopback; > int recv_own_msgs; > @@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, > if (!net_eq(dev_net(dev), sock_net(sk))) > return; > > - if (ro->ifindex != dev->ifindex) > + if (ro->dev != dev) > return; > > switch (msg) { > @@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, > > ro->ifindex = 0; > ro->bound = 0; > + ro->dev = NULL; > ro->count = 0; > release_sock(sk); > > @@ -337,6 +339,7 @@ static int raw_init(struct sock *sk) > > ro->bound = 0; > ro->ifindex = 0; > + ro->dev = NULL; > > /* set default filter to single entry dfilter */ > ro->dfilter.can_id = 0; > @@ -385,19 +388,13 @@ static int raw_release(struct socket *sock) > > lock_sock(sk); > > + rtnl_lock(); > /* remove current filters & unregister */ > if (ro->bound) { > - if (ro->ifindex) { > - struct net_device *dev; > - > - dev = dev_get_by_index(sock_net(sk), ro->ifindex); > - if (dev) { > - raw_disable_allfilters(dev_net(dev), dev, sk); > - dev_put(dev); > - } > - } else { > + if (ro->dev) > + raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); > + else > raw_disable_allfilters(sock_net(sk), NULL, sk); > - } > } > > if (ro->count > 1) > @@ -405,8 +402,10 @@ static int raw_release(struct socket *sock) > > ro->ifindex = 0; > ro->bound = 0; > + ro->dev = NULL; > ro->count = 0; > free_percpu(ro->uniq); > + rtnl_unlock(); > > sock_orphan(sk); > sock->sk = NULL; > @@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) > struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; > struct sock *sk = sock->sk; > struct raw_sock *ro = raw_sk(sk); > + struct net_device *dev = NULL; > int ifindex; > int err = 0; > int notify_enetdown = 0; > @@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) > if (addr->can_family != AF_CAN) > return -EINVAL; > > + rtnl_lock(); > lock_sock(sk); > > if (ro->bound && addr->can_ifindex == ro->ifindex) > goto out; > > if (addr->can_ifindex) { > - struct net_device *dev; > - > dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); > if (!dev) { > err = -ENODEV; > @@ -467,26 +466,20 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) > if (!err) { > if (ro->bound) { > /* unregister old filters */ > - if (ro->ifindex) { > - struct net_device *dev; > - > - dev = dev_get_by_index(sock_net(sk), > - ro->ifindex); > - if (dev) { > - raw_disable_allfilters(dev_net(dev), > - dev, sk); > - dev_put(dev); > - } > - } else { > + if (ro->dev) > + raw_disable_allfilters(dev_net(ro->dev), > + ro->dev, sk); > + else > raw_disable_allfilters(sock_net(sk), NULL, sk); > - } > } > ro->ifindex = ifindex; > ro->bound = 1; > + ro->dev = dev; > } > > out: > release_sock(sk); > + rtnl_unlock(); > > if (notify_enetdown) { > sk->sk_err = ENETDOWN; > @@ -553,9 +546,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, > rtnl_lock(); > lock_sock(sk); > > - if (ro->bound && ro->ifindex) { > - dev = dev_get_by_index(sock_net(sk), ro->ifindex); > - if (!dev) { > + dev = ro->dev; > + if (ro->bound && dev) { > + if (dev->reg_state != NETREG_REGISTERED) { > if (count > 1) > kfree(filter); > err = -ENODEV; > @@ -596,7 +589,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, > ro->count = count; > > out_fil: > - dev_put(dev); > release_sock(sk); > rtnl_unlock(); > > @@ -614,9 +606,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, > rtnl_lock(); > lock_sock(sk); > > - if (ro->bound && ro->ifindex) { > - dev = dev_get_by_index(sock_net(sk), ro->ifindex); > - if (!dev) { > + dev = ro->dev; > + if (ro->bound && dev) { > + if (dev->reg_state != NETREG_REGISTERED) { > err = -ENODEV; > goto out_err; > } > @@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, > ro->err_mask = err_mask; > > out_err: > - dev_put(dev); > release_sock(sk); > rtnl_unlock(); >
On 11.07.2023 09:17:37, Ziyang Xuan wrote: > Got kmemleak errors with the following ltp can_filter testcase: > > for ((i=1; i<=100; i++)) > do > ./can_filter & > sleep 0.1 > done > > ============================================================== > [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > [<00000000fd468496>] do_syscall_64+0x33/0x40 > [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > > It's a bug in the concurrent scenario of unregister_netdevice_many() > and raw_release() as following: > > cpu0 cpu1 > unregister_netdevice_many(can_dev) > unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > net_set_todo(can_dev) > raw_release(can_socket) > dev = dev_get_by_index(, ro->ifindex); // dev == NULL > if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > raw_disable_allfilters(, dev, ); > dev_put(dev); > } > ... > ro->bound = 0; > ... > > call_netdevice_notifiers(NETDEV_UNREGISTER, ) > raw_notify(, NETDEV_UNREGISTER, ) > if (ro->bound) // invalid because ro->bound has been set 0 > raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > > Add a net_device pointer member in struct raw_sock to record bound can_dev, > and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > dev_rcv_lists. > > Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> Added to linux-can/testing. regards, Marc
On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: > > On 11.07.2023 09:17:37, Ziyang Xuan wrote: > > Got kmemleak errors with the following ltp can_filter testcase: > > > > for ((i=1; i<=100; i++)) > > do > > ./can_filter & > > sleep 0.1 > > done > > > > ============================================================== > > [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > > [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > > [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > > [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > > [<00000000fd468496>] do_syscall_64+0x33/0x40 > > [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > > > > It's a bug in the concurrent scenario of unregister_netdevice_many() > > and raw_release() as following: > > > > cpu0 cpu1 > > unregister_netdevice_many(can_dev) > > unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > > net_set_todo(can_dev) > > raw_release(can_socket) > > dev = dev_get_by_index(, ro->ifindex); // dev == NULL > > if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > > raw_disable_allfilters(, dev, ); > > dev_put(dev); > > } > > ... > > ro->bound = 0; > > ... > > > > call_netdevice_notifiers(NETDEV_UNREGISTER, ) > > raw_notify(, NETDEV_UNREGISTER, ) > > if (ro->bound) // invalid because ro->bound has been set 0 > > raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > > > > Add a net_device pointer member in struct raw_sock to record bound can_dev, > > and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > > raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > > dev_rcv_lists. > > > > Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > > Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > > Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > > Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> > > Added to linux-can/testing. > This patch causes three syzbot LOCKDEP reports so far. I suspect we need something like the following patch. If nobody objects, I will submit this formally soon. diff --git a/net/can/raw.c b/net/can/raw.c index 2302e48829677334f8b2d74a479e5a9cbb5ce03c..ba6b52b1d7767fdd7b57d1b8e5519495340c572c 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -386,9 +386,9 @@ static int raw_release(struct socket *sock) list_del(&ro->notifier); spin_unlock(&raw_notifier_lock); + rtnl_lock(); lock_sock(sk); - rtnl_lock(); /* remove current filters & unregister */ if (ro->bound) { if (ro->dev) @@ -405,12 +405,13 @@ static int raw_release(struct socket *sock) ro->dev = NULL; ro->count = 0; free_percpu(ro->uniq); - rtnl_unlock(); sock_orphan(sk); sock->sk = NULL; release_sock(sk); + rtnl_unlock(); + sock_put(sk); return 0;
> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: >> >> On 11.07.2023 09:17:37, Ziyang Xuan wrote: >>> Got kmemleak errors with the following ltp can_filter testcase: >>> >>> for ((i=1; i<=100; i++)) >>> do >>> ./can_filter & >>> sleep 0.1 >>> done >>> >>> ============================================================== >>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] >>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] >>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 >>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 >>> [<00000000fd468496>] do_syscall_64+0x33/0x40 >>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 >>> >>> It's a bug in the concurrent scenario of unregister_netdevice_many() >>> and raw_release() as following: >>> >>> cpu0 cpu1 >>> unregister_netdevice_many(can_dev) >>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this >>> net_set_todo(can_dev) >>> raw_release(can_socket) >>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL >>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL >>> raw_disable_allfilters(, dev, ); >>> dev_put(dev); >>> } >>> ... >>> ro->bound = 0; >>> ... >>> >>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) >>> raw_notify(, NETDEV_UNREGISTER, ) >>> if (ro->bound) // invalid because ro->bound has been set 0 >>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed >>> >>> Add a net_device pointer member in struct raw_sock to record bound can_dev, >>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), >>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in >>> dev_rcv_lists. >>> >>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") >>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> >>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> >>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> >> >> Added to linux-can/testing. >> > > This patch causes three syzbot LOCKDEP reports so far. Hello Eric, Is there reproducer? I want to understand the specific root cause. Thanks, William Xuan > > I suspect we need something like the following patch. > > If nobody objects, I will submit this formally soon. > > diff --git a/net/can/raw.c b/net/can/raw.c > index 2302e48829677334f8b2d74a479e5a9cbb5ce03c..ba6b52b1d7767fdd7b57d1b8e5519495340c572c > 100644 > --- a/net/can/raw.c > +++ b/net/can/raw.c > @@ -386,9 +386,9 @@ static int raw_release(struct socket *sock) > list_del(&ro->notifier); > spin_unlock(&raw_notifier_lock); > > + rtnl_lock(); > lock_sock(sk); > > - rtnl_lock(); > /* remove current filters & unregister */ > if (ro->bound) { > if (ro->dev) > @@ -405,12 +405,13 @@ static int raw_release(struct socket *sock) > ro->dev = NULL; > ro->count = 0; > free_percpu(ro->uniq); > - rtnl_unlock(); > > sock_orphan(sk); > sock->sk = NULL; > > release_sock(sk); > + rtnl_unlock(); > + > sock_put(sk); > > return 0; > . >
On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) <william.xuanziyang@huawei.com> wrote: > > > On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: > >> > >> On 11.07.2023 09:17:37, Ziyang Xuan wrote: > >>> Got kmemleak errors with the following ltp can_filter testcase: > >>> > >>> for ((i=1; i<=100; i++)) > >>> do > >>> ./can_filter & > >>> sleep 0.1 > >>> done > >>> > >>> ============================================================== > >>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > >>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > >>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > >>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > >>> [<00000000fd468496>] do_syscall_64+0x33/0x40 > >>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > >>> > >>> It's a bug in the concurrent scenario of unregister_netdevice_many() > >>> and raw_release() as following: > >>> > >>> cpu0 cpu1 > >>> unregister_netdevice_many(can_dev) > >>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > >>> net_set_todo(can_dev) > >>> raw_release(can_socket) > >>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL > >>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > >>> raw_disable_allfilters(, dev, ); > >>> dev_put(dev); > >>> } > >>> ... > >>> ro->bound = 0; > >>> ... > >>> > >>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) > >>> raw_notify(, NETDEV_UNREGISTER, ) > >>> if (ro->bound) // invalid because ro->bound has been set 0 > >>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > >>> > >>> Add a net_device pointer member in struct raw_sock to record bound can_dev, > >>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > >>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > >>> dev_rcv_lists. > >>> > >>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > >>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > >>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > >>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> > >> > >> Added to linux-can/testing. > >> > > > > This patch causes three syzbot LOCKDEP reports so far. > > Hello Eric, > > Is there reproducer? I want to understand the specific root cause. > No repro yet, but simply look at other functions in net/can/raw.c You must always take locks in the same order. raw_bind(), raw_setsockopt() use: rtnl_lock(); lock_sock(sk); Therefore, raw_release() must _also_ use the same order, or risk deadlock. Please build a LOCKDEP enabled kernel, and run your tests ?
在 2023/7/19 13:04, Eric Dumazet 写道: > On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) > <william.xuanziyang@huawei.com> wrote: >> >>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: >>>> >>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote: >>>>> Got kmemleak errors with the following ltp can_filter testcase: >>>>> >>>>> for ((i=1; i<=100; i++)) >>>>> do >>>>> ./can_filter & >>>>> sleep 0.1 >>>>> done >>>>> >>>>> ============================================================== >>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] >>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] >>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 >>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 >>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40 >>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 >>>>> >>>>> It's a bug in the concurrent scenario of unregister_netdevice_many() >>>>> and raw_release() as following: >>>>> >>>>> cpu0 cpu1 >>>>> unregister_netdevice_many(can_dev) >>>>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this >>>>> net_set_todo(can_dev) >>>>> raw_release(can_socket) >>>>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL >>>>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL >>>>> raw_disable_allfilters(, dev, ); >>>>> dev_put(dev); >>>>> } >>>>> ... >>>>> ro->bound = 0; >>>>> ... >>>>> >>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) >>>>> raw_notify(, NETDEV_UNREGISTER, ) >>>>> if (ro->bound) // invalid because ro->bound has been set 0 >>>>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed >>>>> >>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev, >>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), >>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in >>>>> dev_rcv_lists. >>>>> >>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") >>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> >>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>> >>>> Added to linux-can/testing. >>>> >>> >>> This patch causes three syzbot LOCKDEP reports so far. >> >> Hello Eric, >> >> Is there reproducer? I want to understand the specific root cause. >> > > No repro yet, but simply look at other functions in net/can/raw.c > > You must always take locks in the same order. > > raw_bind(), raw_setsockopt() use: > > rtnl_lock(); > lock_sock(sk); > > Therefore, raw_release() must _also_ use the same order, or risk deadlock. > > Please build a LOCKDEP enabled kernel, and run your tests ? I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release(). And there is not the scenario in my current testcase. I did not get it. I will try to reproduce it and add the testcase. Thank you for your patient explanation. William Xuan. > . >
On Wed, Jul 19, 2023 at 9:49 AM Ziyang Xuan (William) <william.xuanziyang@huawei.com> wrote: > > > > 在 2023/7/19 13:04, Eric Dumazet 写道: > > On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) > > <william.xuanziyang@huawei.com> wrote: > >> > >>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: > >>>> > >>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote: > >>>>> Got kmemleak errors with the following ltp can_filter testcase: > >>>>> > >>>>> for ((i=1; i<=100; i++)) > >>>>> do > >>>>> ./can_filter & > >>>>> sleep 0.1 > >>>>> done > >>>>> > >>>>> ============================================================== > >>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > >>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > >>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > >>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > >>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40 > >>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > >>>>> > >>>>> It's a bug in the concurrent scenario of unregister_netdevice_many() > >>>>> and raw_release() as following: > >>>>> > >>>>> cpu0 cpu1 > >>>>> unregister_netdevice_many(can_dev) > >>>>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > >>>>> net_set_todo(can_dev) > >>>>> raw_release(can_socket) > >>>>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL > >>>>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > >>>>> raw_disable_allfilters(, dev, ); > >>>>> dev_put(dev); > >>>>> } > >>>>> ... > >>>>> ro->bound = 0; > >>>>> ... > >>>>> > >>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) > >>>>> raw_notify(, NETDEV_UNREGISTER, ) > >>>>> if (ro->bound) // invalid because ro->bound has been set 0 > >>>>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > >>>>> > >>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev, > >>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > >>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > >>>>> dev_rcv_lists. > >>>>> > >>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > >>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > >>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > >>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> > >>>> > >>>> Added to linux-can/testing. > >>>> > >>> > >>> This patch causes three syzbot LOCKDEP reports so far. > >> > >> Hello Eric, > >> > >> Is there reproducer? I want to understand the specific root cause. > >> > > > > No repro yet, but simply look at other functions in net/can/raw.c > > > > You must always take locks in the same order. > > > > raw_bind(), raw_setsockopt() use: > > > > rtnl_lock(); > > lock_sock(sk); > > > > Therefore, raw_release() must _also_ use the same order, or risk deadlock. > > > > Please build a LOCKDEP enabled kernel, and run your tests ? > > I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release(). > And there is not the scenario in my current testcase. I did not get it. I will try to > reproduce it and add the testcase. > > Thank you for your patient explanation. Another syzbot report is firing because of your patch Apparently we store in ro->dev a pointer to a netdev without holding a refcount on it.
>>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) >>> <william.xuanziyang@huawei.com> wrote: >>>> >>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: >>>>>> >>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote: >>>>>>> Got kmemleak errors with the following ltp can_filter testcase: >>>>>>> >>>>>>> for ((i=1; i<=100; i++)) >>>>>>> do >>>>>>> ./can_filter & >>>>>>> sleep 0.1 >>>>>>> done >>>>>>> >>>>>>> ============================================================== >>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] >>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] >>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 >>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 >>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40 >>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 >>>>>>> >>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many() >>>>>>> and raw_release() as following: >>>>>>> >>>>>>> cpu0 cpu1 >>>>>>> unregister_netdevice_many(can_dev) >>>>>>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this >>>>>>> net_set_todo(can_dev) >>>>>>> raw_release(can_socket) >>>>>>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL >>>>>>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL >>>>>>> raw_disable_allfilters(, dev, ); >>>>>>> dev_put(dev); >>>>>>> } >>>>>>> ... >>>>>>> ro->bound = 0; >>>>>>> ... >>>>>>> >>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) >>>>>>> raw_notify(, NETDEV_UNREGISTER, ) >>>>>>> if (ro->bound) // invalid because ro->bound has been set 0 >>>>>>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed >>>>>>> >>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev, >>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), >>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in >>>>>>> dev_rcv_lists. >>>>>>> >>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") >>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> >>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>>>> >>>>>> Added to linux-can/testing. >>>>>> >>>>> >>>>> This patch causes three syzbot LOCKDEP reports so far. >>>> >>>> Hello Eric, >>>> >>>> Is there reproducer? I want to understand the specific root cause. >>>> >>> >>> No repro yet, but simply look at other functions in net/can/raw.c >>> >>> You must always take locks in the same order. >>> >>> raw_bind(), raw_setsockopt() use: >>> >>> rtnl_lock(); >>> lock_sock(sk); >>> >>> Therefore, raw_release() must _also_ use the same order, or risk deadlock. >>> >>> Please build a LOCKDEP enabled kernel, and run your tests ? >> >> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release(). >> And there is not the scenario in my current testcase. I did not get it. I will try to >> reproduce it and add the testcase. >> >> Thank you for your patient explanation. > > Another syzbot report is firing because of your patch > > Apparently we store in ro->dev a pointer to a netdev without holding a > refcount on it. > . Hello Eric, Is there a syzbot link or reproducer can be provided? Thank you! William Xuan >
On Thu, Aug 3, 2023 at 2:44 AM Ziyang Xuan (William) <william.xuanziyang@huawei.com> wrote: > > >>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) > >>> <william.xuanziyang@huawei.com> wrote: > >>>> > >>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: > >>>>>> > >>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote: > >>>>>>> Got kmemleak errors with the following ltp can_filter testcase: > >>>>>>> > >>>>>>> for ((i=1; i<=100; i++)) > >>>>>>> do > >>>>>>> ./can_filter & > >>>>>>> sleep 0.1 > >>>>>>> done > >>>>>>> > >>>>>>> ============================================================== > >>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] > >>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] > >>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 > >>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 > >>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40 > >>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 > >>>>>>> > >>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many() > >>>>>>> and raw_release() as following: > >>>>>>> > >>>>>>> cpu0 cpu1 > >>>>>>> unregister_netdevice_many(can_dev) > >>>>>>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this > >>>>>>> net_set_todo(can_dev) > >>>>>>> raw_release(can_socket) > >>>>>>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL > >>>>>>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL > >>>>>>> raw_disable_allfilters(, dev, ); > >>>>>>> dev_put(dev); > >>>>>>> } > >>>>>>> ... > >>>>>>> ro->bound = 0; > >>>>>>> ... > >>>>>>> > >>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) > >>>>>>> raw_notify(, NETDEV_UNREGISTER, ) > >>>>>>> if (ro->bound) // invalid because ro->bound has been set 0 > >>>>>>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed > >>>>>>> > >>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev, > >>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), > >>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in > >>>>>>> dev_rcv_lists. > >>>>>>> > >>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") > >>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> > >>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> > >>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> > >>>>>> > >>>>>> Added to linux-can/testing. > >>>>>> > >>>>> > >>>>> This patch causes three syzbot LOCKDEP reports so far. > >>>> > >>>> Hello Eric, > >>>> > >>>> Is there reproducer? I want to understand the specific root cause. > >>>> > >>> > >>> No repro yet, but simply look at other functions in net/can/raw.c > >>> > >>> You must always take locks in the same order. > >>> > >>> raw_bind(), raw_setsockopt() use: > >>> > >>> rtnl_lock(); > >>> lock_sock(sk); > >>> > >>> Therefore, raw_release() must _also_ use the same order, or risk deadlock. > >>> > >>> Please build a LOCKDEP enabled kernel, and run your tests ? > >> > >> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release(). > >> And there is not the scenario in my current testcase. I did not get it. I will try to > >> reproduce it and add the testcase. > >> > >> Thank you for your patient explanation. > > > > Another syzbot report is firing because of your patch > > > > Apparently we store in ro->dev a pointer to a netdev without holding a > > refcount on it. > > . > Hello Eric, > > Is there a syzbot link or reproducer can be provided? > Not yet, but really we should not cache a dev pointer without making sure it does not disappear later. Caching the ifindex is fine though. BUG: KASAN: use-after-free in read_pnet include/net/net_namespace.h:383 [inline] BUG: KASAN: use-after-free in dev_net include/linux/netdevice.h:2590 [inline] BUG: KASAN: use-after-free in raw_release+0x960/0x9b0 net/can/raw.c:395 Read of size 8 at addr ffff88802b9b8670 by task syz-executor.1/7705 CPU: 0 PID: 7705 Comm: syz-executor.1 Not tainted 6.5.0-rc3-syzkaller-00176-g13d2618b48f1 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2023 Call Trace: <TASK> __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 print_address_description mm/kasan/report.c:364 [inline] print_report+0xc4/0x620 mm/kasan/report.c:475 kasan_report+0xda/0x110 mm/kasan/report.c:588 read_pnet include/net/net_namespace.h:383 [inline] dev_net include/linux/netdevice.h:2590 [inline] raw_release+0x960/0x9b0 net/can/raw.c:395 __sock_release+0xcd/0x290 net/socket.c:654 sock_close+0x1c/0x20 net/socket.c:1386 __fput+0x3fd/0xac0 fs/file_table.c:384 task_work_run+0x14d/0x240 kernel/task_work.c:179 resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x210/0x240 kernel/entry/common.c:204 __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:297 do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fb7e927b9da Code: 48 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 7c 24 0c e8 03 7f 02 00 8b 7c 24 0c 89 c2 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 36 89 d7 89 44 24 0c e8 63 7f 02 00 8b 44 24 RSP: 002b:00007ffd15e54c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 RAX: 0000000000000000 RBX: 000000000000000a RCX: 00007fb7e927b9da RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 RBP: 00007fb7e939d980 R08: 0000001b2fb20000 R09: 000000000000040e R10: 000000008190df57 R11: 0000000000000293 R12: 000000000011244a R13: ffffffffffffffff R14: 00007fb7e8e00000 R15: 0000000000112109 </TASK> The buggy address belongs to the physical page: page:ffffea0000ae6e00 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x2b9b8 flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) page_type: 0xffffffff() raw: 00fff00000000000 ffffea0001e32208 ffffea0000ae7c08 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as freed page last allocated via order 3, migratetype Unmovable, gfp_mask 0x446dc0(GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP|__GFP_ZERO), pid 5067, tgid 5067 (syz-executor.1), ts 153775196806, free_ts 1123753306319 set_page_owner include/linux/page_owner.h:31 [inline] post_alloc_hook+0x2d2/0x350 mm/page_alloc.c:1570 prep_new_page mm/page_alloc.c:1577 [inline] get_page_from_freelist+0x10a9/0x31e0 mm/page_alloc.c:3221 __alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4477 __alloc_pages_node include/linux/gfp.h:237 [inline] alloc_pages_node include/linux/gfp.h:260 [inline] __kmalloc_large_node+0x87/0x1c0 mm/slab_common.c:1126 __do_kmalloc_node mm/slab_common.c:973 [inline] __kmalloc_node.cold+0x5/0xdd mm/slab_common.c:992 kmalloc_node include/linux/slab.h:602 [inline] kvmalloc_node+0x6f/0x1a0 mm/util.c:604 kvmalloc include/linux/slab.h:720 [inline] kvzalloc include/linux/slab.h:728 [inline] alloc_netdev_mqs+0x9b/0x1240 net/core/dev.c:10594 rtnl_create_link+0xc9c/0xfd0 net/core/rtnetlink.c:3336 rtnl_newlink_create net/core/rtnetlink.c:3462 [inline] __rtnl_newlink+0x1075/0x18c0 net/core/rtnetlink.c:3689 rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3702 rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6428 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914 sock_sendmsg_nosec net/socket.c:725 [inline] sock_sendmsg+0xd9/0x180 net/socket.c:748 __sys_sendto+0x255/0x340 net/socket.c:2134 page last free stack trace: reset_page_owner include/linux/page_owner.h:24 [inline] free_pages_prepare mm/page_alloc.c:1161 [inline] free_unref_page_prepare+0x508/0xb90 mm/page_alloc.c:2348 free_unref_page+0x33/0x3b0 mm/page_alloc.c:2443 kvfree+0x47/0x50 mm/util.c:650 device_release+0xa1/0x240 drivers/base/core.c:2484 kobject_cleanup lib/kobject.c:682 [inline] kobject_release lib/kobject.c:713 [inline] kref_put include/linux/kref.h:65 [inline] kobject_put+0x1f7/0x5b0 lib/kobject.c:730 netdev_run_todo+0x7dd/0x11d0 net/core/dev.c:10366 rtnl_unlock net/core/rtnetlink.c:151 [inline] rtnetlink_rcv_msg+0x446/0xd30 net/core/rtnetlink.c:6429 netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549 netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365 netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914 sock_sendmsg_nosec net/socket.c:725 [inline] sock_sendmsg+0xd9/0x180 net/socket.c:748 ____sys_sendmsg+0x6ac/0x940 net/socket.c:2494 ___sys_sendmsg+0x135/0x1d0 net/socket.c:2548 __sys_sendmsg+0x117/0x1e0 net/socket.c:2577 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Memory state around the buggy address: ffff88802b9b8500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88802b9b8580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff88802b9b8600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff88802b9b8680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88802b9b8700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ==================================================================
On 03.08.23 09:11, Eric Dumazet wrote: > On Thu, Aug 3, 2023 at 2:44 AM Ziyang Xuan (William) > <william.xuanziyang@huawei.com> wrote: >> >>>>> On Wed, Jul 19, 2023 at 6:41 AM Ziyang Xuan (William) >>>>> <william.xuanziyang@huawei.com> wrote: >>>>>> >>>>>>> On Mon, Jul 17, 2023 at 9:27 AM Marc Kleine-Budde <mkl@pengutronix.de> wrote: >>>>>>>> >>>>>>>> On 11.07.2023 09:17:37, Ziyang Xuan wrote: >>>>>>>>> Got kmemleak errors with the following ltp can_filter testcase: >>>>>>>>> >>>>>>>>> for ((i=1; i<=100; i++)) >>>>>>>>> do >>>>>>>>> ./can_filter & >>>>>>>>> sleep 0.1 >>>>>>>>> done >>>>>>>>> >>>>>>>>> ============================================================== >>>>>>>>> [<00000000db4a4943>] can_rx_register+0x147/0x360 [can] >>>>>>>>> [<00000000a289549d>] raw_setsockopt+0x5ef/0x853 [can_raw] >>>>>>>>> [<000000006d3d9ebd>] __sys_setsockopt+0x173/0x2c0 >>>>>>>>> [<00000000407dbfec>] __x64_sys_setsockopt+0x61/0x70 >>>>>>>>> [<00000000fd468496>] do_syscall_64+0x33/0x40 >>>>>>>>> [<00000000b7e47d51>] entry_SYSCALL_64_after_hwframe+0x61/0xc6 >>>>>>>>> >>>>>>>>> It's a bug in the concurrent scenario of unregister_netdevice_many() >>>>>>>>> and raw_release() as following: >>>>>>>>> >>>>>>>>> cpu0 cpu1 >>>>>>>>> unregister_netdevice_many(can_dev) >>>>>>>>> unlist_netdevice(can_dev) // dev_get_by_index() return NULL after this >>>>>>>>> net_set_todo(can_dev) >>>>>>>>> raw_release(can_socket) >>>>>>>>> dev = dev_get_by_index(, ro->ifindex); // dev == NULL >>>>>>>>> if (dev) { // receivers in dev_rcv_lists not free because dev is NULL >>>>>>>>> raw_disable_allfilters(, dev, ); >>>>>>>>> dev_put(dev); >>>>>>>>> } >>>>>>>>> ... >>>>>>>>> ro->bound = 0; >>>>>>>>> ... >>>>>>>>> >>>>>>>>> call_netdevice_notifiers(NETDEV_UNREGISTER, ) >>>>>>>>> raw_notify(, NETDEV_UNREGISTER, ) >>>>>>>>> if (ro->bound) // invalid because ro->bound has been set 0 >>>>>>>>> raw_disable_allfilters(, dev, ); // receivers in dev_rcv_lists will never be freed >>>>>>>>> >>>>>>>>> Add a net_device pointer member in struct raw_sock to record bound can_dev, >>>>>>>>> and use rtnl_lock to serialize raw_socket members between raw_bind(), raw_release(), >>>>>>>>> raw_setsockopt() and raw_notify(). Use ro->dev to decide whether to free receivers in >>>>>>>>> dev_rcv_lists. >>>>>>>>> >>>>>>>>> Fixes: 8d0caedb7596 ("can: bcm/raw/isotp: use per module netdevice notifier") >>>>>>>>> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> >>>>>>>>> Reviewed-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>>>>>>> Acked-by: Oliver Hartkopp <socketcan@hartkopp.net> >>>>>>>> >>>>>>>> Added to linux-can/testing. >>>>>>>> >>>>>>> >>>>>>> This patch causes three syzbot LOCKDEP reports so far. >>>>>> >>>>>> Hello Eric, >>>>>> >>>>>> Is there reproducer? I want to understand the specific root cause. >>>>>> >>>>> >>>>> No repro yet, but simply look at other functions in net/can/raw.c >>>>> >>>>> You must always take locks in the same order. >>>>> >>>>> raw_bind(), raw_setsockopt() use: >>>>> >>>>> rtnl_lock(); >>>>> lock_sock(sk); >>>>> >>>>> Therefore, raw_release() must _also_ use the same order, or risk deadlock. >>>>> >>>>> Please build a LOCKDEP enabled kernel, and run your tests ? >>>> >>>> I know now. This needs raw_bind() and raw_setsockopt() concurrent with raw_release(). >>>> And there is not the scenario in my current testcase. I did not get it. I will try to >>>> reproduce it and add the testcase. >>>> >>>> Thank you for your patient explanation. >>> >>> Another syzbot report is firing because of your patch >>> >>> Apparently we store in ro->dev a pointer to a netdev without holding a >>> refcount on it. >>> . I've sent a patch addressing this issue by holding the dev refcount as long as it is needed. https://lore.kernel.org/linux-can/20230804112811.42259-1-socketcan@hartkopp.net/T/#u Another review is appreciated. Many thanks, Oliver >> Hello Eric, >> >> Is there a syzbot link or reproducer can be provided? >> > > Not yet, but really we should not cache a dev pointer without making > sure it does not disappear later. > > Caching the ifindex is fine though. > > BUG: KASAN: use-after-free in read_pnet include/net/net_namespace.h:383 [inline] > BUG: KASAN: use-after-free in dev_net include/linux/netdevice.h:2590 [inline] > BUG: KASAN: use-after-free in raw_release+0x960/0x9b0 net/can/raw.c:395 > Read of size 8 at addr ffff88802b9b8670 by task syz-executor.1/7705 > > CPU: 0 PID: 7705 Comm: syz-executor.1 Not tainted > 6.5.0-rc3-syzkaller-00176-g13d2618b48f1 #0 > Hardware name: Google Google Compute Engine/Google Compute Engine, > BIOS Google 07/12/2023 > Call Trace: > <TASK> > __dump_stack lib/dump_stack.c:88 [inline] > dump_stack_lvl+0xd9/0x1b0 lib/dump_stack.c:106 > print_address_description mm/kasan/report.c:364 [inline] > print_report+0xc4/0x620 mm/kasan/report.c:475 > kasan_report+0xda/0x110 mm/kasan/report.c:588 > read_pnet include/net/net_namespace.h:383 [inline] > dev_net include/linux/netdevice.h:2590 [inline] > raw_release+0x960/0x9b0 net/can/raw.c:395 > __sock_release+0xcd/0x290 net/socket.c:654 > sock_close+0x1c/0x20 net/socket.c:1386 > __fput+0x3fd/0xac0 fs/file_table.c:384 > task_work_run+0x14d/0x240 kernel/task_work.c:179 > resume_user_mode_work include/linux/resume_user_mode.h:49 [inline] > exit_to_user_mode_loop kernel/entry/common.c:171 [inline] > exit_to_user_mode_prepare+0x210/0x240 kernel/entry/common.c:204 > __syscall_exit_to_user_mode_work kernel/entry/common.c:286 [inline] > syscall_exit_to_user_mode+0x1d/0x50 kernel/entry/common.c:297 > do_syscall_64+0x44/0xb0 arch/x86/entry/common.c:86 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > RIP: 0033:0x7fb7e927b9da > Code: 48 3d 00 f0 ff ff 77 48 c3 0f 1f 80 00 00 00 00 48 83 ec 18 89 > 7c 24 0c e8 03 7f 02 00 8b 7c 24 0c 89 c2 b8 03 00 00 00 0f 05 <48> 3d > 00 f0 ff ff 77 36 89 d7 89 44 24 0c e8 63 7f 02 00 8b 44 24 > RSP: 002b:00007ffd15e54c90 EFLAGS: 00000293 ORIG_RAX: 0000000000000003 > RAX: 0000000000000000 RBX: 000000000000000a RCX: 00007fb7e927b9da > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000009 > RBP: 00007fb7e939d980 R08: 0000001b2fb20000 R09: 000000000000040e > R10: 000000008190df57 R11: 0000000000000293 R12: 000000000011244a > R13: ffffffffffffffff R14: 00007fb7e8e00000 R15: 0000000000112109 > </TASK> > > The buggy address belongs to the physical page: > page:ffffea0000ae6e00 refcount:0 mapcount:0 mapping:0000000000000000 > index:0x0 pfn:0x2b9b8 > flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) > page_type: 0xffffffff() > raw: 00fff00000000000 ffffea0001e32208 ffffea0000ae7c08 0000000000000000 > raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 > page dumped because: kasan: bad access detected > page_owner tracks the page as freed > page last allocated via order 3, migratetype Unmovable, gfp_mask > 0x446dc0(GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP|__GFP_ZERO), > pid 5067, tgid 5067 (syz-executor.1), ts 153775196806, free_ts > 1123753306319 > set_page_owner include/linux/page_owner.h:31 [inline] > post_alloc_hook+0x2d2/0x350 mm/page_alloc.c:1570 > prep_new_page mm/page_alloc.c:1577 [inline] > get_page_from_freelist+0x10a9/0x31e0 mm/page_alloc.c:3221 > __alloc_pages+0x1d0/0x4a0 mm/page_alloc.c:4477 > __alloc_pages_node include/linux/gfp.h:237 [inline] > alloc_pages_node include/linux/gfp.h:260 [inline] > __kmalloc_large_node+0x87/0x1c0 mm/slab_common.c:1126 > __do_kmalloc_node mm/slab_common.c:973 [inline] > __kmalloc_node.cold+0x5/0xdd mm/slab_common.c:992 > kmalloc_node include/linux/slab.h:602 [inline] > kvmalloc_node+0x6f/0x1a0 mm/util.c:604 > kvmalloc include/linux/slab.h:720 [inline] > kvzalloc include/linux/slab.h:728 [inline] > alloc_netdev_mqs+0x9b/0x1240 net/core/dev.c:10594 > rtnl_create_link+0xc9c/0xfd0 net/core/rtnetlink.c:3336 > rtnl_newlink_create net/core/rtnetlink.c:3462 [inline] > __rtnl_newlink+0x1075/0x18c0 net/core/rtnetlink.c:3689 > rtnl_newlink+0x67/0xa0 net/core/rtnetlink.c:3702 > rtnetlink_rcv_msg+0x439/0xd30 net/core/rtnetlink.c:6428 > netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549 > netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] > netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365 > netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914 > sock_sendmsg_nosec net/socket.c:725 [inline] > sock_sendmsg+0xd9/0x180 net/socket.c:748 > __sys_sendto+0x255/0x340 net/socket.c:2134 > page last free stack trace: > reset_page_owner include/linux/page_owner.h:24 [inline] > free_pages_prepare mm/page_alloc.c:1161 [inline] > free_unref_page_prepare+0x508/0xb90 mm/page_alloc.c:2348 > free_unref_page+0x33/0x3b0 mm/page_alloc.c:2443 > kvfree+0x47/0x50 mm/util.c:650 > device_release+0xa1/0x240 drivers/base/core.c:2484 > kobject_cleanup lib/kobject.c:682 [inline] > kobject_release lib/kobject.c:713 [inline] > kref_put include/linux/kref.h:65 [inline] > kobject_put+0x1f7/0x5b0 lib/kobject.c:730 > netdev_run_todo+0x7dd/0x11d0 net/core/dev.c:10366 > rtnl_unlock net/core/rtnetlink.c:151 [inline] > rtnetlink_rcv_msg+0x446/0xd30 net/core/rtnetlink.c:6429 > netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2549 > netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] > netlink_unicast+0x539/0x800 net/netlink/af_netlink.c:1365 > netlink_sendmsg+0x93c/0xe30 net/netlink/af_netlink.c:1914 > sock_sendmsg_nosec net/socket.c:725 [inline] > sock_sendmsg+0xd9/0x180 net/socket.c:748 > ____sys_sendmsg+0x6ac/0x940 net/socket.c:2494 > ___sys_sendmsg+0x135/0x1d0 net/socket.c:2548 > __sys_sendmsg+0x117/0x1e0 net/socket.c:2577 > do_syscall_x64 arch/x86/entry/common.c:50 [inline] > do_syscall_64+0x38/0xb0 arch/x86/entry/common.c:80 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > > Memory state around the buggy address: > ffff88802b9b8500: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff88802b9b8580: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >> ffff88802b9b8600: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ^ > ffff88802b9b8680: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ffff88802b9b8700: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff > ==================================================================
diff --git a/net/can/raw.c b/net/can/raw.c index 15c79b079184..2302e4882967 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -84,6 +84,7 @@ struct raw_sock { struct sock sk; int bound; int ifindex; + struct net_device *dev; struct list_head notifier; int loopback; int recv_own_msgs; @@ -277,7 +278,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, if (!net_eq(dev_net(dev), sock_net(sk))) return; - if (ro->ifindex != dev->ifindex) + if (ro->dev != dev) return; switch (msg) { @@ -292,6 +293,7 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg, ro->ifindex = 0; ro->bound = 0; + ro->dev = NULL; ro->count = 0; release_sock(sk); @@ -337,6 +339,7 @@ static int raw_init(struct sock *sk) ro->bound = 0; ro->ifindex = 0; + ro->dev = NULL; /* set default filter to single entry dfilter */ ro->dfilter.can_id = 0; @@ -385,19 +388,13 @@ static int raw_release(struct socket *sock) lock_sock(sk); + rtnl_lock(); /* remove current filters & unregister */ if (ro->bound) { - if (ro->ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (dev) { - raw_disable_allfilters(dev_net(dev), dev, sk); - dev_put(dev); - } - } else { + if (ro->dev) + raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk); + else raw_disable_allfilters(sock_net(sk), NULL, sk); - } } if (ro->count > 1) @@ -405,8 +402,10 @@ static int raw_release(struct socket *sock) ro->ifindex = 0; ro->bound = 0; + ro->dev = NULL; ro->count = 0; free_percpu(ro->uniq); + rtnl_unlock(); sock_orphan(sk); sock->sk = NULL; @@ -422,6 +421,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); + struct net_device *dev = NULL; int ifindex; int err = 0; int notify_enetdown = 0; @@ -431,14 +431,13 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (addr->can_family != AF_CAN) return -EINVAL; + rtnl_lock(); lock_sock(sk); if (ro->bound && addr->can_ifindex == ro->ifindex) goto out; if (addr->can_ifindex) { - struct net_device *dev; - dev = dev_get_by_index(sock_net(sk), addr->can_ifindex); if (!dev) { err = -ENODEV; @@ -467,26 +466,20 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (!err) { if (ro->bound) { /* unregister old filters */ - if (ro->ifindex) { - struct net_device *dev; - - dev = dev_get_by_index(sock_net(sk), - ro->ifindex); - if (dev) { - raw_disable_allfilters(dev_net(dev), - dev, sk); - dev_put(dev); - } - } else { + if (ro->dev) + raw_disable_allfilters(dev_net(ro->dev), + ro->dev, sk); + else raw_disable_allfilters(sock_net(sk), NULL, sk); - } } ro->ifindex = ifindex; ro->bound = 1; + ro->dev = dev; } out: release_sock(sk); + rtnl_unlock(); if (notify_enetdown) { sk->sk_err = ENETDOWN; @@ -553,9 +546,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) { - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (!dev) { + dev = ro->dev; + if (ro->bound && dev) { + if (dev->reg_state != NETREG_REGISTERED) { if (count > 1) kfree(filter); err = -ENODEV; @@ -596,7 +589,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->count = count; out_fil: - dev_put(dev); release_sock(sk); rtnl_unlock(); @@ -614,9 +606,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) { - dev = dev_get_by_index(sock_net(sk), ro->ifindex); - if (!dev) { + dev = ro->dev; + if (ro->bound && dev) { + if (dev->reg_state != NETREG_REGISTERED) { err = -ENODEV; goto out_err; } @@ -640,7 +632,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->err_mask = err_mask; out_err: - dev_put(dev); release_sock(sk); rtnl_unlock();