diff mbox series

[1/2] usb: gadget: u_ether: Do not make UDC parent of the net device

Message ID 20221104131031.850850-2-s.hauer@pengutronix.de (mailing list archive)
State Accepted
Commit 321b59870f850a10dbb211ecd2bd87b41497ea6f
Headers show
Series use-after-free issues in configfs | expand

Commit Message

Sascha Hauer Nov. 4, 2022, 1:10 p.m. UTC
The UDC is not a suitable parent of the net device as the UDC can
change or vanish during the lifecycle of the ethernet gadget. This
can be illustrated with the following:

mkdir -p /sys/kernel/config/usb_gadget/mygadget
cd /sys/kernel/config/usb_gadget/mygadget
mkdir -p configs/c.1/strings/0x409
echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
mkdir -p functions/ecm.usb0
ln -s functions/ecm.usb0 configs/c.1/
echo "dummy_udc.0" > UDC
rmmod dummy_hcd

The 'rmmod' removes the UDC from the just created gadget, leaving
the still existing net device with a no longer existing parent.

Accessing the ethernet device with commands like:

ip --details link show usb0

will result in a KASAN splat:

==================================================================
BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
Read of size 4 at addr c5c84754 by task ip/357

CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-dirty #324
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x58/0x70
 dump_stack_lvl from print_report+0x134/0x4d4
 print_report from kasan_report+0x78/0x10c
 kasan_report from if_nlmsg_size+0x3e8/0x528
 if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
 rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
 rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
 netlink_rcv_skb from netlink_unicast+0x294/0x478
 netlink_unicast from netlink_sendmsg+0x328/0x640
 netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
 ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
 ___sys_sendmsg from sys_sendmsg+0xa0/0x120
 sys_sendmsg from ret_fast_syscall+0x0/0x1c

Solve this by not setting the parent of the ethernet device.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
---
 drivers/usb/gadget/function/u_ether.c | 4 ----
 1 file changed, 4 deletions(-)

Comments

Paul Cercueil Feb. 1, 2023, 1:32 p.m. UTC | #1
Hi Sascha, Greg,

I have a breakage in 6.2-rc* that I eventually bisected to this commit,
on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
configured through gadgetfs.

When plugging the board to my PC, the USB network interface is
recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
reverted on v6.2-rc5, everything works fine.

Cheers,
-Paul

Le vendredi 04 novembre 2022 à 14:10 +0100, Sascha Hauer a écrit :
> The UDC is not a suitable parent of the net device as the UDC can
> change or vanish during the lifecycle of the ethernet gadget. This
> can be illustrated with the following:
> 
> mkdir -p /sys/kernel/config/usb_gadget/mygadget
> cd /sys/kernel/config/usb_gadget/mygadget
> mkdir -p configs/c.1/strings/0x409
> echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
> mkdir -p functions/ecm.usb0
> ln -s functions/ecm.usb0 configs/c.1/
> echo "dummy_udc.0" > UDC
> rmmod dummy_hcd
> 
> The 'rmmod' removes the UDC from the just created gadget, leaving
> the still existing net device with a no longer existing parent.
> 
> Accessing the ethernet device with commands like:
> 
> ip --details link show usb0
> 
> will result in a KASAN splat:
> 
> ==================================================================
> BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
> Read of size 4 at addr c5c84754 by task ip/357
> 
> CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-
> dirty #324
> Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
>  unwind_backtrace from show_stack+0x10/0x14
>  show_stack from dump_stack_lvl+0x58/0x70
>  dump_stack_lvl from print_report+0x134/0x4d4
>  print_report from kasan_report+0x78/0x10c
>  kasan_report from if_nlmsg_size+0x3e8/0x528
>  if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
>  rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
>  rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
>  netlink_rcv_skb from netlink_unicast+0x294/0x478
>  netlink_unicast from netlink_sendmsg+0x328/0x640
>  netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
>  ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
>  ___sys_sendmsg from sys_sendmsg+0xa0/0x120
>  sys_sendmsg from ret_fast_syscall+0x0/0x1c
> 
> Solve this by not setting the parent of the ethernet device.
> 
> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> ---
>  drivers/usb/gadget/function/u_ether.c | 4 ----
>  1 file changed, 4 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/u_ether.c
> b/drivers/usb/gadget/function/u_ether.c
> index e06022873df16..8f12f3f8f6eeb 100644
> --- a/drivers/usb/gadget/function/u_ether.c
> +++ b/drivers/usb/gadget/function/u_ether.c
> @@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct
> usb_gadget *g,
>         net->max_mtu = GETHER_MAX_MTU_SIZE;
>  
>         dev->gadget = g;
> -       SET_NETDEV_DEV(net, &g->dev);
>         SET_NETDEV_DEVTYPE(net, &gadget_type);
>  
>         status = register_netdev(net);
> @@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device
> *net)
>         struct usb_gadget *g;
>         int status;
>  
> -       if (!net->dev.parent)
> -               return -EINVAL;
>         dev = netdev_priv(net);
>         g = dev->gadget;
>  
> @@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net,
> struct usb_gadget *g)
>  
>         dev = netdev_priv(net);
>         dev->gadget = g;
> -       SET_NETDEV_DEV(net, &g->dev);
>  }
>  EXPORT_SYMBOL_GPL(gether_set_gadget);
>
Thorsten Leemhuis Feb. 3, 2023, 12:46 p.m. UTC | #2
[TLDR: I'm adding this report to the list of tracked Linux kernel
regressions; the text you find below is based on a few templates
paragraphs you might have encountered already in similar form.
See link in footer if these mails annoy you.]

On 01.02.23 14:32, Paul Cercueil wrote:
> Hi Sascha, Greg,
> 
> I have a breakage in 6.2-rc* that I eventually bisected to this commit,
> on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> configured through gadgetfs.
> 
> When plugging the board to my PC, the USB network interface is
> recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> reverted on v6.2-rc5, everything works fine.

[CCing the regression list, as it should be in the loop for regressions:
https://docs.kernel.org/admin-guide/reporting-regressions.html]

Thanks for the report. To be sure the issue doesn't fall through the
cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
tracking bot:

#regzbot ^introduced 321b59870f850
#regzbot title usb: gadget: USB network interface is recognized, but 'ip
link' sees it as 'NO-CARRIER'
#regzbot ignore-activity

This isn't a regression? This issue or a fix for it are already
discussed somewhere else? It was fixed already? You want to clarify when
the regression started to happen? Or point out I got the title or
something else totally wrong? Then just reply and tell me -- ideally
while also telling regzbot about it, as explained by the page listed in
the footer of this mail.

Developers: When fixing the issue, remember to add 'Link:' tags pointing
to the report (the parent of this mail). See page linked in footer for
details.

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.	

> Le vendredi 04 novembre 2022 à 14:10 +0100, Sascha Hauer a écrit :
>> The UDC is not a suitable parent of the net device as the UDC can
>> change or vanish during the lifecycle of the ethernet gadget. This
>> can be illustrated with the following:
>>
>> mkdir -p /sys/kernel/config/usb_gadget/mygadget
>> cd /sys/kernel/config/usb_gadget/mygadget
>> mkdir -p configs/c.1/strings/0x409
>> echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
>> mkdir -p functions/ecm.usb0
>> ln -s functions/ecm.usb0 configs/c.1/
>> echo "dummy_udc.0" > UDC
>> rmmod dummy_hcd
>>
>> The 'rmmod' removes the UDC from the just created gadget, leaving
>> the still existing net device with a no longer existing parent.
>>
>> Accessing the ethernet device with commands like:
>>
>> ip --details link show usb0
>>
>> will result in a KASAN splat:
>>
>> ==================================================================
>> BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
>> Read of size 4 at addr c5c84754 by task ip/357
>>
>> CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-
>> dirty #324
>> Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
>>  unwind_backtrace from show_stack+0x10/0x14
>>  show_stack from dump_stack_lvl+0x58/0x70
>>  dump_stack_lvl from print_report+0x134/0x4d4
>>  print_report from kasan_report+0x78/0x10c
>>  kasan_report from if_nlmsg_size+0x3e8/0x528
>>  if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
>>  rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
>>  rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
>>  netlink_rcv_skb from netlink_unicast+0x294/0x478
>>  netlink_unicast from netlink_sendmsg+0x328/0x640
>>  netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
>>  ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
>>  ___sys_sendmsg from sys_sendmsg+0xa0/0x120
>>  sys_sendmsg from ret_fast_syscall+0x0/0x1c
>>
>> Solve this by not setting the parent of the ethernet device.
>>
>> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
>> ---
>>  drivers/usb/gadget/function/u_ether.c | 4 ----
>>  1 file changed, 4 deletions(-)
>>
>> diff --git a/drivers/usb/gadget/function/u_ether.c
>> b/drivers/usb/gadget/function/u_ether.c
>> index e06022873df16..8f12f3f8f6eeb 100644
>> --- a/drivers/usb/gadget/function/u_ether.c
>> +++ b/drivers/usb/gadget/function/u_ether.c
>> @@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct
>> usb_gadget *g,
>>         net->max_mtu = GETHER_MAX_MTU_SIZE;
>>  
>>         dev->gadget = g;
>> -       SET_NETDEV_DEV(net, &g->dev);
>>         SET_NETDEV_DEVTYPE(net, &gadget_type);
>>  
>>         status = register_netdev(net);
>> @@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device
>> *net)
>>         struct usb_gadget *g;
>>         int status;
>>  
>> -       if (!net->dev.parent)
>> -               return -EINVAL;
>>         dev = netdev_priv(net);
>>         g = dev->gadget;
>>  
>> @@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net,
>> struct usb_gadget *g)
>>  
>>         dev = netdev_priv(net);
>>         dev->gadget = g;
>> -       SET_NETDEV_DEV(net, &g->dev);
>>  }
>>  EXPORT_SYMBOL_GPL(gether_set_gadget);
>>  
>
Greg KH Feb. 8, 2023, 12:06 p.m. UTC | #3
On Wed, Feb 01, 2023 at 01:32:51PM +0000, Paul Cercueil wrote:
> Hi Sascha, Greg,
> 
> I have a breakage in 6.2-rc* that I eventually bisected to this commit,
> on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> configured through gadgetfs.
> 
> When plugging the board to my PC, the USB network interface is
> recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> reverted on v6.2-rc5, everything works fine.

Ick, that's not good.  Can you send a revert for this?  Sascha, any
ideas?

thanks,

greg k-h
Paul Cercueil Feb. 8, 2023, 1:45 p.m. UTC | #4
Hi Greg,

Le mercredi 08 février 2023 à 13:06 +0100, Greg Kroah-Hartman a écrit :
> On Wed, Feb 01, 2023 at 01:32:51PM +0000, Paul Cercueil wrote:
> > Hi Sascha, Greg,
> > 
> > I have a breakage in 6.2-rc* that I eventually bisected to this
> > commit,
> > on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> > configured through gadgetfs.
> > 
> > When plugging the board to my PC, the USB network interface is
> > recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> > reverted on v6.2-rc5, everything works fine.
> 
> Ick, that's not good.  Can you send a revert for this?  Sascha, any
> ideas?

Yes, I was hoping Sascha would have a better idea, but I'll send a
revert tomorrow.

-Paul
Sascha Hauer Feb. 9, 2023, 10:18 a.m. UTC | #5
Hi Paul,

On Wed, Feb 01, 2023 at 01:32:51PM +0000, Paul Cercueil wrote:
> Hi Sascha, Greg,
> 
> I have a breakage in 6.2-rc* that I eventually bisected to this commit,
> on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> configured through gadgetfs.
> 
> When plugging the board to my PC, the USB network interface is
> recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> reverted on v6.2-rc5, everything works fine.

I don't have this hardware available. I just tried with a i.MX hardware
and it works as expected. I have no idea where the jz4740 musb could
behave differently.

Here's exactly what I did:

mkdir -p /sys/kernel/config/usb_gadget/mygadget
cd /sys/kernel/config/usb_gadget/mygadget
mkdir -p configs/c.1/strings/0x409
echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
mkdir -p functions/ecm.usb0
ln -s functions/ecm.usb0 configs/c.1/
echo "ci_hdrc.0" > UDC

Did you do something differently apart from the "ci_hdrc.0" of course?

BTW when you say 'NO-CARRIER' is it on the PC side, board side, or
both?

It would be great if we could sort this out, but if not I am fine with
reverting this patch. I guess this topic will come back to my desk
sooner or later then

Sascha
Paul Cercueil Feb. 9, 2023, 10:37 a.m. UTC | #6
Hi Sascha,

Le jeudi 09 février 2023 à 11:18 +0100, Sascha Hauer a écrit :
> Hi Paul,
> 
> On Wed, Feb 01, 2023 at 01:32:51PM +0000, Paul Cercueil wrote:
> > Hi Sascha, Greg,
> > 
> > I have a breakage in 6.2-rc* that I eventually bisected to this
> > commit,
> > on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> > configured through gadgetfs.
> > 
> > When plugging the board to my PC, the USB network interface is
> > recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> > reverted on v6.2-rc5, everything works fine.
> 
> I don't have this hardware available. I just tried with a i.MX
> hardware
> and it works as expected. I have no idea where the jz4740 musb could
> behave differently.
> 
> Here's exactly what I did:
> 
> mkdir -p /sys/kernel/config/usb_gadget/mygadget
> cd /sys/kernel/config/usb_gadget/mygadget
> mkdir -p configs/c.1/strings/0x409
> echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
> mkdir -p functions/ecm.usb0
> ln -s functions/ecm.usb0 configs/c.1/
> echo "ci_hdrc.0" > UDC
> 
> Did you do something differently apart from the "ci_hdrc.0" of
> course?

Nothing very different, no.

I do:

cd /sys/kernel/config/usb_gadget
mkdir mtp \
  mtp/strings/0x409 \
  mtp/configs/c.1 \
  mtp/configs/c.1/strings/0x409 \
  mtp/functions/ffs.mtp \
  mtp/functions/ecm.net \
  mtp/functions/rndis.net

echo 0x80 > mtp/configs/c.1/bmAttributes
echo 500 > mtp/configs/c.1/MaxPower

echo 0x049f > mtp/idVendor
echo 0x505a > mtp/idProduct
echo cdc > mtp/configs/c.1/strings/0x409/configuration
ln -s mtp/functions/ecm.net mtp/configs/c.1/ecm.net

echo ci_hdrc.0 > mtp/UDC

> BTW when you say 'NO-CARRIER' is it on the PC side, board side, or
> both?

PC side. I don't know what it says on the board side, I can't
telnet/SSH.

> It would be great if we could sort this out, but if not I am fine
> with
> reverting this patch. I guess this topic will come back to my desk
> sooner or later then

Considering that the clock is ticking, let's revert it for now; that
will give me some time to debug the issue, and then we can work on a
revised patch.

Cheers,
-Paul
Sascha Hauer Feb. 9, 2023, 11:41 a.m. UTC | #7
On Thu, Feb 09, 2023 at 10:37:05AM +0000, Paul Cercueil wrote:
> Hi Sascha,
> 
> Le jeudi 09 février 2023 à 11:18 +0100, Sascha Hauer a écrit :
> > Hi Paul,
> > 
> > On Wed, Feb 01, 2023 at 01:32:51PM +0000, Paul Cercueil wrote:
> > > Hi Sascha, Greg,
> > > 
> > > I have a breakage in 6.2-rc* that I eventually bisected to this
> > > commit,
> > > on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
> > > configured through gadgetfs.
> > > 
> > > When plugging the board to my PC, the USB network interface is
> > > recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
> > > reverted on v6.2-rc5, everything works fine.
> > 
> > I don't have this hardware available. I just tried with a i.MX
> > hardware
> > and it works as expected. I have no idea where the jz4740 musb could
> > behave differently.
> > 
> > Here's exactly what I did:
> > 
> > mkdir -p /sys/kernel/config/usb_gadget/mygadget
> > cd /sys/kernel/config/usb_gadget/mygadget
> > mkdir -p configs/c.1/strings/0x409
> > echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
> > mkdir -p functions/ecm.usb0
> > ln -s functions/ecm.usb0 configs/c.1/
> > echo "ci_hdrc.0" > UDC
> > 
> > Did you do something differently apart from the "ci_hdrc.0" of
> > course?
> 
> Nothing very different, no.
> 
> I do:
> 
> cd /sys/kernel/config/usb_gadget
> mkdir mtp \
>   mtp/strings/0x409 \
>   mtp/configs/c.1 \
>   mtp/configs/c.1/strings/0x409 \
>   mtp/functions/ffs.mtp \
>   mtp/functions/ecm.net \
>   mtp/functions/rndis.net
> 
> echo 0x80 > mtp/configs/c.1/bmAttributes
> echo 500 > mtp/configs/c.1/MaxPower
> 
> echo 0x049f > mtp/idVendor
> echo 0x505a > mtp/idProduct
> echo cdc > mtp/configs/c.1/strings/0x409/configuration
> ln -s mtp/functions/ecm.net mtp/configs/c.1/ecm.net
> 
> echo ci_hdrc.0 > mtp/UDC
> 
> > BTW when you say 'NO-CARRIER' is it on the PC side, board side, or
> > both?
> 
> PC side. I don't know what it says on the board side, I can't
> telnet/SSH.

I just checked on the host side: With or without my patch I get
NO-CARRIER on the host. I have to do a 'ip link set usb0 up' on
the device side, with that I get a <BROADCAST,MULTICAST,UP,LOWER_UP>
on the host side.

Could it be that my patch breaks something on the device side that
prevents the device from bringing the link up?

Sascha
Alan Stern Feb. 9, 2023, 3:05 p.m. UTC | #8
On Thu, Feb 09, 2023 at 12:41:03PM +0100, Sascha Hauer wrote:
> I just checked on the host side: With or without my patch I get
> NO-CARRIER on the host. I have to do a 'ip link set usb0 up' on
> the device side, with that I get a <BROADCAST,MULTICAST,UP,LOWER_UP>
> on the host side.
> 
> Could it be that my patch breaks something on the device side that
> prevents the device from bringing the link up?

Sascha:

When you first posted your original patch, I wondered if it was really 
the right thing to do.  Making the net device not be a child of the UDC 
device means you can (in theory) have strange behavior such as the 
kernel suspending the USB device controller while expecting the network 
interface to keep on working.

Is there a different way of solving the original problem?

Alan Stern
Sascha Hauer Feb. 10, 2023, 2:49 p.m. UTC | #9
On Thu, Feb 09, 2023 at 10:05:35AM -0500, Alan Stern wrote:
> On Thu, Feb 09, 2023 at 12:41:03PM +0100, Sascha Hauer wrote:
> > I just checked on the host side: With or without my patch I get
> > NO-CARRIER on the host. I have to do a 'ip link set usb0 up' on
> > the device side, with that I get a <BROADCAST,MULTICAST,UP,LOWER_UP>
> > on the host side.
> > 
> > Could it be that my patch breaks something on the device side that
> > prevents the device from bringing the link up?
> 
> Sascha:
> 
> When you first posted your original patch, I wondered if it was really 
> the right thing to do.  Making the net device not be a child of the UDC 
> device means you can (in theory) have strange behavior such as the 
> kernel suspending the USB device controller while expecting the network 
> interface to keep on working.
> 
> Is there a different way of solving the original problem?

I don't know which. One thing would be to couple the lifetime of the
ethernet device to the lifetime of the UDC, but the result would look
different to userspace, so wouldn't be ideal either.

Note the original reason doing this change was that we saw backtraces
when doing a 'reboot -f', the 'rmmod dummy_hcd' was just an easy
reproducer for the problem.

One other possibility might be to take a reference to the UDC while
it is in use so that the module can't be rmmoded. Not sure if that fixes
my original problem though.

Sascha
Alan Stern Feb. 10, 2023, 3:45 p.m. UTC | #10
On Fri, Feb 10, 2023 at 03:49:41PM +0100, Sascha Hauer wrote:
> On Thu, Feb 09, 2023 at 10:05:35AM -0500, Alan Stern wrote:
> > Sascha:
> > 
> > When you first posted your original patch, I wondered if it was really 
> > the right thing to do.  Making the net device not be a child of the UDC 
> > device means you can (in theory) have strange behavior such as the 
> > kernel suspending the USB device controller while expecting the network 
> > interface to keep on working.
> > 
> > Is there a different way of solving the original problem?
> 
> I don't know which. One thing would be to couple the lifetime of the
> ethernet device to the lifetime of the UDC, but the result would look
> different to userspace, so wouldn't be ideal either.
> 
> Note the original reason doing this change was that we saw backtraces
> when doing a 'reboot -f', the 'rmmod dummy_hcd' was just an easy
> reproducer for the problem.
> 
> One other possibility might be to take a reference to the UDC while
> it is in use so that the module can't be rmmoded. Not sure if that fixes
> my original problem though.

Not being familiar with the networking code, I don't really understand 
the original problem.  Does the use-after-free error occur when you try 
to dereference a dev->parent pointer in the ethernet device?

If that's so, then taking a reference (i.e. get_device()) on the parent 
device should fix the problem.

If not, maybe you can give a more detailed guide as to what's going 
wrong.

Alan Stern
Sascha Hauer Feb. 10, 2023, 6:46 p.m. UTC | #11
On Fri, Feb 10, 2023 at 10:45:54AM -0500, Alan Stern wrote:
> On Fri, Feb 10, 2023 at 03:49:41PM +0100, Sascha Hauer wrote:
> > On Thu, Feb 09, 2023 at 10:05:35AM -0500, Alan Stern wrote:
> > > Sascha:
> > > 
> > > When you first posted your original patch, I wondered if it was really 
> > > the right thing to do.  Making the net device not be a child of the UDC 
> > > device means you can (in theory) have strange behavior such as the 
> > > kernel suspending the USB device controller while expecting the network 
> > > interface to keep on working.
> > > 
> > > Is there a different way of solving the original problem?
> > 
> > I don't know which. One thing would be to couple the lifetime of the
> > ethernet device to the lifetime of the UDC, but the result would look
> > different to userspace, so wouldn't be ideal either.
> > 
> > Note the original reason doing this change was that we saw backtraces
> > when doing a 'reboot -f', the 'rmmod dummy_hcd' was just an easy
> > reproducer for the problem.
> > 
> > One other possibility might be to take a reference to the UDC while
> > it is in use so that the module can't be rmmoded. Not sure if that fixes
> > my original problem though.
> 
> Not being familiar with the networking code, I don't really understand 
> the original problem.  Does the use-after-free error occur when you try 
> to dereference a dev->parent pointer in the ethernet device?
> 
> If that's so, then taking a reference (i.e. get_device()) on the parent 
> device should fix the problem.
> 
> If not, maybe you can give a more detailed guide as to what's going 
> wrong.

I don't remember the details anymore. I'll do some more investigation
next week.

Sascha
Thorsten Leemhuis Feb. 16, 2023, 11:07 a.m. UTC | #12
[TLDR: This mail in primarily relevant for Linux regression tracking. A
change or fix related to the regression discussed in this thread was
posted or applied, but it did not use a Link: tag to point to the
report, as Linus and the documentation call for. Things happen, no
worries -- but now the regression tracking bot needs to be told manually
about the fix. See link in footer if these mails annoy you.]

On 03.02.23 13:46, Linux kernel regression tracking (#adding) wrote:
> On 01.02.23 14:32, Paul Cercueil wrote:
>> Hi Sascha, Greg,
>>
>> I have a breakage in 6.2-rc* that I eventually bisected to this commit,
>> on a Ingenic SoC (using the jz4740 musb driver) with ECM or RNDIS
>> configured through gadgetfs.
>>
>> When plugging the board to my PC, the USB network interface is
>> recognized, but 'ip link' sees it as 'NO-CARRIER'. With this commit
>> reverted on v6.2-rc5, everything works fine.
> 
> [CCing the regression list, as it should be in the loop for regressions:
> https://docs.kernel.org/admin-guide/reporting-regressions.html]
> 
> Thanks for the report. To be sure the issue doesn't fall through the
> cracks unnoticed, I'm adding it to regzbot, the Linux kernel regression
> tracking bot:
> 
> #regzbot ^introduced 321b59870f850
> #regzbot title usb: gadget: USB network interface is recognized, but 'ip
> link' sees it as 'NO-CARRIER'
> #regzbot ignore-activity

#regzbot fix: bb07bd68fa0983e3915
#regzbot ignore-activity

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
That page also explains what to do if mails like this annoy you.

#regzbot ignore-activity
Ondřej Jirman Sept. 4, 2023, 1:14 p.m. UTC | #13
Hi,

On Fri, Nov 04, 2022 at 02:10:30PM +0100, Sascha Hauer wrote:
> ing-List: linux-kernel@vger.kernel.org
> 
> The UDC is not a suitable parent of the net device as the UDC can
> change or vanish during the lifecycle of the ethernet gadget. This
> can be illustrated with the following:
> 
> mkdir -p /sys/kernel/config/usb_gadget/mygadget
> cd /sys/kernel/config/usb_gadget/mygadget
> mkdir -p configs/c.1/strings/0x409
> echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
> mkdir -p functions/ecm.usb0
> ln -s functions/ecm.usb0 configs/c.1/
> echo "dummy_udc.0" > UDC
> rmmod dummy_hcd
> 
> The 'rmmod' removes the UDC from the just created gadget, leaving
> the still existing net device with a no longer existing parent.

I have an even simpler reproducer on Pinephone Pro/RK3399 SoC. All it takes to
trigger the use after free and kernel panic in my case is to plug in a USB dock
to make DWC3 DRD switch to host mode and then unplug it to make it switch back to
peripheral mode.

This triggers a call to dwc3_gadget_exit and then to dwc3_gadget_init later on

https://elixir.bootlin.com/linux/latest/source/drivers/usb/dwc3/gadget.c#L4546

Then symlink to the net device becames broken in sysfs:

  https://megous.com/dl/tmp/3d8061f1749a7b2b.png

And after this happens, there's a kernel panic when removing the rndis gadget
configuration from configfs: https://paste.mozilla.org/Z5DFP9BV
(and possibly other issues, but the kernel panic is the most noticable :))

Applaying this patch makes the issue go away. So there definitely seems to
be some device lifetime issue somewhere in there.

kind regards,
	o.

> Accessing the ethernet device with commands like:
> 
> ip --details link show usb0
> 
> will result in a KASAN splat:
> 
> ==================================================================
> BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
> Read of size 4 at addr c5c84754 by task ip/357
> 
> CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-dirty #324
> Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
>  unwind_backtrace from show_stack+0x10/0x14
>  show_stack from dump_stack_lvl+0x58/0x70
>  dump_stack_lvl from print_report+0x134/0x4d4
>  print_report from kasan_report+0x78/0x10c
>  kasan_report from if_nlmsg_size+0x3e8/0x528
>  if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
>  rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
>  rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
>  netlink_rcv_skb from netlink_unicast+0x294/0x478
>  netlink_unicast from netlink_sendmsg+0x328/0x640
>  netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
>  ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
>  ___sys_sendmsg from sys_sendmsg+0xa0/0x120
>  sys_sendmsg from ret_fast_syscall+0x0/0x1c
> 
> Solve this by not setting the parent of the ethernet device.
> 
> Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> ---
>  drivers/usb/gadget/function/u_ether.c | 4 ----
>  1 file changed, 4 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
> index e06022873df16..8f12f3f8f6eeb 100644
> --- a/drivers/usb/gadget/function/u_ether.c
> +++ b/drivers/usb/gadget/function/u_ether.c
> @@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
>  	net->max_mtu = GETHER_MAX_MTU_SIZE;
>  
>  	dev->gadget = g;
> -	SET_NETDEV_DEV(net, &g->dev);
>  	SET_NETDEV_DEVTYPE(net, &gadget_type);
>  
>  	status = register_netdev(net);
> @@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device *net)
>  	struct usb_gadget *g;
>  	int status;
>  
> -	if (!net->dev.parent)
> -		return -EINVAL;
>  	dev = netdev_priv(net);
>  	g = dev->gadget;
>  
> @@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
>  
>  	dev = netdev_priv(net);
>  	dev->gadget = g;
> -	SET_NETDEV_DEV(net, &g->dev);
>  }
>  EXPORT_SYMBOL_GPL(gether_set_gadget);
>  
> -- 
> 2.30.2
>
Ondřej Jirman Sept. 5, 2023, 12:12 a.m. UTC | #14
On Mon, Sep 04, 2023 at 03:14:30PM +0200, megi xff wrote:
> 
> Hi,
> 
> On Fri, Nov 04, 2022 at 02:10:30PM +0100, Sascha Hauer wrote:
> > ing-List: linux-kernel@vger.kernel.org
> > 
> > The UDC is not a suitable parent of the net device as the UDC can
> > change or vanish during the lifecycle of the ethernet gadget. This
> > can be illustrated with the following:
> > 
> > mkdir -p /sys/kernel/config/usb_gadget/mygadget
> > cd /sys/kernel/config/usb_gadget/mygadget
> > mkdir -p configs/c.1/strings/0x409
> > echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
> > mkdir -p functions/ecm.usb0
> > ln -s functions/ecm.usb0 configs/c.1/
> > echo "dummy_udc.0" > UDC
> > rmmod dummy_hcd
> > 
> > The 'rmmod' removes the UDC from the just created gadget, leaving
> > the still existing net device with a no longer existing parent.
> 
> I have an even simpler reproducer on Pinephone Pro/RK3399 SoC. All it takes to
> trigger the use after free and kernel panic in my case is to plug in a USB dock
> to make DWC3 DRD switch to host mode and then unplug it to make it switch back to
> peripheral mode.
> 
> This triggers a call to dwc3_gadget_exit and then to dwc3_gadget_init later on
> 
> https://elixir.bootlin.com/linux/latest/source/drivers/usb/dwc3/gadget.c#L4546
> 
> Then symlink to the net device becames broken in sysfs:
> 
>   https://megous.com/dl/tmp/3d8061f1749a7b2b.png
> 
> And after this happens, there's a kernel panic when removing the rndis gadget
> configuration from configfs: https://paste.mozilla.org/Z5DFP9BV
> (and possibly other issues, but the kernel panic is the most noticable :))
> 
> Applaying this patch makes the issue go away. So there definitely seems to
> be some device lifetime issue somewhere in there.

Looks like this patch just masks an underlying issue. Eg. with DWC3 as UDC,
rndis_bind/rndis_unbind is called multiple times (each time USB role switches
from host to peripheral). This is because DWC3 re-creates the gadget device
each time. During every call rndis_bind gets a pointer to a newly created
gadget, but gether_set_gadget() is only called:

  if (!rndis_opts->bound) {
  	...
  }

which only happens on the first call to rndis_bind.

On any further calls to rndis_bind(), gether_set_gadget is not called, because
rndis_opts->bound is already set and thus netdev private data keep pointing to
some old, deleted gadget device.

https://elixir.bootlin.com/linux/latest/source/drivers/usb/gadget/function/f_rndis.c#L704
https://elixir.bootlin.com/linux/latest/source/drivers/usb/gadget/function/u_ether.c#L890

It looks like this whole code was not really written with assumption that
gadget devices can be deleted and re-added like this.

Not sure where the bug really is... if f_* drivers should better handle gadget changes,
or DWC3 should not re-create the gadget like it does on mode changes, or
elsewhere.

kind regards,
	o.

> kind regards,
> 	o.
> 
> > Accessing the ethernet device with commands like:
> > 
> > ip --details link show usb0
> > 
> > will result in a KASAN splat:
> > 
> > ==================================================================
> > BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
> > Read of size 4 at addr c5c84754 by task ip/357
> > 
> > CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-dirty #324
> > Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
> >  unwind_backtrace from show_stack+0x10/0x14
> >  show_stack from dump_stack_lvl+0x58/0x70
> >  dump_stack_lvl from print_report+0x134/0x4d4
> >  print_report from kasan_report+0x78/0x10c
> >  kasan_report from if_nlmsg_size+0x3e8/0x528
> >  if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
> >  rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
> >  rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
> >  netlink_rcv_skb from netlink_unicast+0x294/0x478
> >  netlink_unicast from netlink_sendmsg+0x328/0x640
> >  netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
> >  ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
> >  ___sys_sendmsg from sys_sendmsg+0xa0/0x120
> >  sys_sendmsg from ret_fast_syscall+0x0/0x1c
> > 
> > Solve this by not setting the parent of the ethernet device.
> > 
> > Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
> > ---
> >  drivers/usb/gadget/function/u_ether.c | 4 ----
> >  1 file changed, 4 deletions(-)
> > 
> > diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
> > index e06022873df16..8f12f3f8f6eeb 100644
> > --- a/drivers/usb/gadget/function/u_ether.c
> > +++ b/drivers/usb/gadget/function/u_ether.c
> > @@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
> >  	net->max_mtu = GETHER_MAX_MTU_SIZE;
> >  
> >  	dev->gadget = g;
> > -	SET_NETDEV_DEV(net, &g->dev);
> >  	SET_NETDEV_DEVTYPE(net, &gadget_type);
> >  
> >  	status = register_netdev(net);
> > @@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device *net)
> >  	struct usb_gadget *g;
> >  	int status;
> >  
> > -	if (!net->dev.parent)
> > -		return -EINVAL;
> >  	dev = netdev_priv(net);
> >  	g = dev->gadget;
> >  
> > @@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
> >  
> >  	dev = netdev_priv(net);
> >  	dev->gadget = g;
> > -	SET_NETDEV_DEV(net, &g->dev);
> >  }
> >  EXPORT_SYMBOL_GPL(gether_set_gadget);
> >  
> > -- 
> > 2.30.2
> >
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index e06022873df16..8f12f3f8f6eeb 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -798,7 +798,6 @@  struct eth_dev *gether_setup_name(struct usb_gadget *g,
 	net->max_mtu = GETHER_MAX_MTU_SIZE;
 
 	dev->gadget = g;
-	SET_NETDEV_DEV(net, &g->dev);
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
 	status = register_netdev(net);
@@ -873,8 +872,6 @@  int gether_register_netdev(struct net_device *net)
 	struct usb_gadget *g;
 	int status;
 
-	if (!net->dev.parent)
-		return -EINVAL;
 	dev = netdev_priv(net);
 	g = dev->gadget;
 
@@ -905,7 +902,6 @@  void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
 
 	dev = netdev_priv(net);
 	dev->gadget = g;
-	SET_NETDEV_DEV(net, &g->dev);
 }
 EXPORT_SYMBOL_GPL(gether_set_gadget);