diff mbox series

usb: gadget: u_serial: Fix the issue that gs_start_io crashed due to accessing null pointer

Message ID TYUPR06MB621737D16F68B5ABD6CF5772D2272@TYUPR06MB6217.apcprd06.prod.outlook.com (mailing list archive)
State New
Headers show
Series usb: gadget: u_serial: Fix the issue that gs_start_io crashed due to accessing null pointer | expand

Commit Message

胡连勤 Nov. 18, 2024, 3:12 a.m. UTC
From: Lianqin Hu <hulianqin@vivo.com>

Considering that in some extreme cases,
when u_serial driver is accessed by multiple threads,
Thread A is executing the open operation and calling the gs_open,
Thread B is executing the disconnect operation and calling the
gserial_disconnect function,The port->port_usb pointer will be set to NULL.

E.g.
    Thread A                                 Thread B

    gs_open()                                gadget_unbind_driver()

    gs_start_io()                            composite_disconnect()

    gs_start_rx()                            gserial_disconnect()
    ...                                      ...
    spin_unlock(&port->port_lock)
    status = usb_ep_queue()                  spin_lock(&port->port_lock)
    spin_lock(&port->port_lock)              port->port_usb = NULL
    gs_free_requests(port->port_usb->in)     spin_unlock(&port->port_lock)
    Crash

This causes thread A to access a null pointer (port->port_usb is null)
when calling the gs_free_requests function, causing a crash.

To solve this problem, add the read_busy flag, before setting port_usb
to null in gserial_disconnect, add the read_busy flag judgment.

Unable to handle kernel NULL pointer dereference at
virtual address 00000000000000e8
pc : gs_start_io+0x164/0x25c
lr : gs_start_io+0x238/0x25c
sp : ffffffc08b75ba00
x29: ffffffc08b75ba00 x28: ffffffed8ba01000 x27: 0000000000020902
x26: dead000000000100 x25: ffffff899f43a400 x24: ffffff8862325400
x23: ffffff88623256a4 x22: ffffff8862325690 x21: ffffff88623255ec
x20: ffffff88623255d8 x19: ffffff885e19d700 x18: ffffffed8c45ae40
x17: 00000000d48d30ad x16: 00000000d48d30ad x15: 0010000000000001
x14: ffffffed8c50fcc0 x13: 0000000040000000 x12: 0000000000000001
x11: 0000000080200012 x10: 0000000080200012 x9 : ffffff88623255d8
x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f
x5 : ffffffed8ae0b9a4 x4 : fffffffe267d0ea0 x3 : 0000000080200012
x2 : ffffff899f43a400 x1 : 0000000080200013 x0 : ffffff899f43b100
Call trace:
 gs_start_io+0x164/0x25c
 gs_open+0x108/0x13c
 tty_open+0x314/0x638
 chrdev_open+0x1b8/0x258
 do_dentry_open+0x2c4/0x700
 vfs_open+0x2c/0x3c
 path_openat+0xa64/0xc60
 do_filp_open+0xb8/0x164
 do_sys_openat2+0x84/0xf0
 __arm64_sys_openat+0x70/0x9c
 invoke_syscall+0x58/0x114
 el0_svc_common+0x80/0xe0
 do_el0_svc+0x1c/0x28
 el0_svc+0x38/0x68
 el0t_64_sync_handler+0x68/0xbc
 el0t_64_sync+0x1a8/0x1ac
Code: f2fbd5ba eb14013f 540004a1 f940e708 (f9407513)
---[ end trace 0000000000000000 ]---

Signed-off-by: Lianqin Hu <hulianqin@vivo.com>
---
 drivers/usb/gadget/function/u_serial.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

Comments

Prashanth K Nov. 19, 2024, 11:53 a.m. UTC | #1
On 18-11-24 08:42 am, 胡连勤 wrote:
> From: Lianqin Hu <hulianqin@vivo.com>
> 
> Considering that in some extreme cases,
> when u_serial driver is accessed by multiple threads,
> Thread A is executing the open operation and calling the gs_open,
> Thread B is executing the disconnect operation and calling the
> gserial_disconnect function,The port->port_usb pointer will be set to NULL.
> 
[...]
> ---
>  drivers/usb/gadget/function/u_serial.c | 25 +++++++++++++++----------
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
> index 0a8c05b2746b..9ab2dbed60a8 100644
> --- a/drivers/usb/gadget/function/u_serial.c
> +++ b/drivers/usb/gadget/function/u_serial.c
> @@ -124,6 +124,7 @@ struct gs_port {
>  	struct kfifo		port_write_buf;
>  	wait_queue_head_t	drain_wait;	/* wait while writes drain */
>  	bool                    write_busy;
> +	bool                    read_busy;
>  	wait_queue_head_t	close_wait;
>  	bool			suspended;	/* port suspended */
>  	bool			start_delayed;	/* delay start when suspended */
> @@ -331,9 +332,11 @@ __acquires(&port->port_lock)
>  		/* drop lock while we call out; the controller driver
>  		 * may need to call us back (e.g. for disconnect)
>  		 */
> +		port->read_busy = true;
>  		spin_unlock(&port->port_lock);
>  		status = usb_ep_queue(out, req, GFP_ATOMIC);
>  		spin_lock(&port->port_lock);
> +		port->read_busy = false;
>  
>  		if (status) {
>  			pr_debug("%s: %s %s err %d\n",
> @@ -1412,19 +1415,21 @@ void gserial_disconnect(struct gserial *gser)
>  	/* tell the TTY glue not to do I/O here any more */
>  	spin_lock(&port->port_lock);
>  
> -	gs_console_disconnect(port);
> +	if (!port->read_busy) {
start_tx/rx rely on port->port_usb for queuing the requests, and if its
not null during disconnect, tx/rx would keep on queuing requests to UDC
even after disconnect (which is not ideal). Here in your case, after
read_busy is set, start_rx would queue something outside of spinlock,
meanwhile disconnect happens but port_usb is still valid (because
read_busy is set) and start_rx would break early. But start_tx could
continue queuing into disconnected UDC (if 'started' is non-zero, which
could happen due to timing). Can't you try something like this,

--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -579,9 +579,12 @@ static int gs_start_io(struct gs_port *port)
                 * we didn't in gs_start_tx() */
                tty_wakeup(port->port.tty);
        } else {
-               gs_free_requests(ep, head, &port->read_allocated);
-               gs_free_requests(port->port_usb->in, &port->write_pool,
-                       &port->write_allocated);
+               /* Free reqs only if we are still connected */
+               if (port->port_usb) {
+                       gs_free_requests(ep, head, &port->read_allocated);
+                       gs_free_requests(port->port_usb->in,
&port->write_pool,
+                               &port->write_allocated);
+               }
                status = -EIO;
        }

This will skip freeing reqs (and your crash) if port_usb is null and
freeing would be taken care by disconnect callback.


> +		gs_console_disconnect(port);
>  
> -	/* REVISIT as above: how best to track this? */
> -	port->port_line_coding = gser->port_line_coding;
> +		/* REVISIT as above: how best to track this? */
> +		port->port_line_coding = gser->port_line_coding;
>  
> -	port->port_usb = NULL;
> -	gser->ioport = NULL;
> -	if (port->port.count > 0) {
> -		wake_up_interruptible(&port->drain_wait);
> -		if (port->port.tty)
> -			tty_hangup(port->port.tty);
> +		port->port_usb = NULL;
> +		gser->ioport = NULL;
> +		if (port->port.count > 0) {
> +			wake_up_interruptible(&port->drain_wait);
> +			if (port->port.tty)
> +				tty_hangup(port->port.tty);
> +		}
> +		port->suspended = false;
>  	}
> -	port->suspended = false;
>  	spin_unlock(&port->port_lock);
>  	spin_unlock_irqrestore(&serial_port_lock, flags);
>  
Regards,
Prashanth K
胡连勤 Nov. 19, 2024, 12:33 p.m. UTC | #2
Hello Prashanth:

> > Considering that in some extreme cases, when u_serial driver is
> > accessed by multiple threads, Thread A is executing the open operation
> > and calling the gs_open, Thread B is executing the disconnect
> > operation and calling the gserial_disconnect function,The
> > port->port_usb pointer will be set to NULL.
> >
> [...]
> > ---
> >  drivers/usb/gadget/function/u_serial.c | 25 +++++++++++++++----------
> >  1 file changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/usb/gadget/function/u_serial.c
> > b/drivers/usb/gadget/function/u_serial.c
> > index 0a8c05b2746b..9ab2dbed60a8 100644
> > --- a/drivers/usb/gadget/function/u_serial.c
> > +++ b/drivers/usb/gadget/function/u_serial.c
> > @@ -124,6 +124,7 @@ struct gs_port {
> >  	struct kfifo		port_write_buf;
> >  	wait_queue_head_t	drain_wait;	/* wait while writes drain */
> >  	bool                    write_busy;
> > +	bool                    read_busy;
> >  	wait_queue_head_t	close_wait;
> >  	bool			suspended;	/* port suspended */
> >  	bool			start_delayed;	/* delay start when
> suspended */
> > @@ -331,9 +332,11 @@ __acquires(&port->port_lock)
> >  		/* drop lock while we call out; the controller driver
> >  		 * may need to call us back (e.g. for disconnect)
> >  		 */
> > +		port->read_busy = true;
> >  		spin_unlock(&port->port_lock);
> >  		status = usb_ep_queue(out, req, GFP_ATOMIC);
> >  		spin_lock(&port->port_lock);
> > +		port->read_busy = false;
> >
> >  		if (status) {
> >  			pr_debug("%s: %s %s err %d\n",
> > @@ -1412,19 +1415,21 @@ void gserial_disconnect(struct gserial *gser)
> >  	/* tell the TTY glue not to do I/O here any more */
> >  	spin_lock(&port->port_lock);
> >
> > -	gs_console_disconnect(port);
> > +	if (!port->read_busy) {
> start_tx/rx rely on port->port_usb for queuing the requests, and if its not
> null during disconnect, tx/rx would keep on queuing requests to UDC even
> after disconnect (which is not ideal). Here in your case, after read_busy is set,
> start_rx would queue something outside of spinlock, meanwhile disconnect
> happens but port_usb is still valid (because read_busy is set) and start_rx
> would break early. But start_tx could continue queuing into disconnected
> UDC (if 'started' is non-zero, which could happen due to timing). Can't you try
> something like this,
> 
> --- a/drivers/usb/gadget/function/u_serial.c
> +++ b/drivers/usb/gadget/function/u_serial.c
> @@ -579,9 +579,12 @@ static int gs_start_io(struct gs_port *port)
>                  * we didn't in gs_start_tx() */
>                 tty_wakeup(port->port.tty);
>         } else {
> -               gs_free_requests(ep, head, &port->read_allocated);
> -               gs_free_requests(port->port_usb->in, &port->write_pool,
> -                       &port->write_allocated);
> +               /* Free reqs only if we are still connected */
> +               if (port->port_usb) {
> +                       gs_free_requests(ep, head, &port->read_allocated);
> +                       gs_free_requests(port->port_usb->in,
> &port->write_pool,
> +                               &port->write_allocated);
> +               }
>                 status = -EIO;
>         }
> 
> This will skip freeing reqs (and your crash) if port_usb is null and freeing
> would be taken care by disconnect callback.
> 
> 
First of all, the patch you gave can solve the problem we are currently facing.

When we first encountered this problem, we also thought about adding a null check operation to deal with it,
but we saw that the entry of this function (gs_start_io) had a null check operation for port->port_usb, so I gave up
the idea of ​​null check during free_req (maybe I made a simple problem complicated),
and thought about optimizing it from the software logic, so that the port->port_usb pointer is always valid before gs_start_io is executed.

Thanks
Prashanth K Nov. 19, 2024, 12:44 p.m. UTC | #3
On 19-11-24 06:03 pm, 胡连勤 wrote:
> Hello Prashanth:
> 
>>> Considering that in some extreme cases, when u_serial driver is
>>> accessed by multiple threads, Thread A is executing the open operation
>>> and calling the gs_open, Thread B is executing the disconnect
>>> operation and calling the gserial_disconnect function,The
>>> port->port_usb pointer will be set to NULL.
>>>
>> [...]
>>> ---
>>>  drivers/usb/gadget/function/u_serial.c | 25 +++++++++++++++----------
>>>  1 file changed, 15 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/usb/gadget/function/u_serial.c
>>> b/drivers/usb/gadget/function/u_serial.c
>>> index 0a8c05b2746b..9ab2dbed60a8 100644
>>> --- a/drivers/usb/gadget/function/u_serial.c
>>> +++ b/drivers/usb/gadget/function/u_serial.c
>>> @@ -124,6 +124,7 @@ struct gs_port {
>>>  	struct kfifo		port_write_buf;
>>>  	wait_queue_head_t	drain_wait;	/* wait while writes drain */
>>>  	bool                    write_busy;
>>> +	bool                    read_busy;
>>>  	wait_queue_head_t	close_wait;
>>>  	bool			suspended;	/* port suspended */
>>>  	bool			start_delayed;	/* delay start when
>> suspended */
>>> @@ -331,9 +332,11 @@ __acquires(&port->port_lock)
>>>  		/* drop lock while we call out; the controller driver
>>>  		 * may need to call us back (e.g. for disconnect)
>>>  		 */
>>> +		port->read_busy = true;
>>>  		spin_unlock(&port->port_lock);
>>>  		status = usb_ep_queue(out, req, GFP_ATOMIC);
>>>  		spin_lock(&port->port_lock);
>>> +		port->read_busy = false;
>>>
>>>  		if (status) {
>>>  			pr_debug("%s: %s %s err %d\n",
>>> @@ -1412,19 +1415,21 @@ void gserial_disconnect(struct gserial *gser)
>>>  	/* tell the TTY glue not to do I/O here any more */
>>>  	spin_lock(&port->port_lock);
>>>
>>> -	gs_console_disconnect(port);
>>> +	if (!port->read_busy) {
>> start_tx/rx rely on port->port_usb for queuing the requests, and if its not
>> null during disconnect, tx/rx would keep on queuing requests to UDC even
>> after disconnect (which is not ideal). Here in your case, after read_busy is set,
>> start_rx would queue something outside of spinlock, meanwhile disconnect
>> happens but port_usb is still valid (because read_busy is set) and start_rx
>> would break early. But start_tx could continue queuing into disconnected
>> UDC (if 'started' is non-zero, which could happen due to timing). Can't you try
>> something like this,
>>
>> --- a/drivers/usb/gadget/function/u_serial.c
>> +++ b/drivers/usb/gadget/function/u_serial.c
>> @@ -579,9 +579,12 @@ static int gs_start_io(struct gs_port *port)
>>                  * we didn't in gs_start_tx() */
>>                 tty_wakeup(port->port.tty);
>>         } else {
>> -               gs_free_requests(ep, head, &port->read_allocated);
>> -               gs_free_requests(port->port_usb->in, &port->write_pool,
>> -                       &port->write_allocated);
>> +               /* Free reqs only if we are still connected */
>> +               if (port->port_usb) {
>> +                       gs_free_requests(ep, head, &port->read_allocated);
>> +                       gs_free_requests(port->port_usb->in,
>> &port->write_pool,
>> +                               &port->write_allocated);
>> +               }
>>                 status = -EIO;
>>         }
>>
>> This will skip freeing reqs (and your crash) if port_usb is null and freeing
>> would be taken care by disconnect callback.
>>
>>
> First of all, the patch you gave can solve the problem we are currently facing.
> 
> When we first encountered this problem, we also thought about adding a null check operation to deal with it,
> but we saw that the entry of this function (gs_start_io) had a null check operation for port->port_usb, so I gave up
> the idea of ​​null check during free_req (maybe I made a simple problem complicated),
> and thought about optimizing it from the software logic, so that the port->port_usb pointer is always valid before gs_start_io is executed.
> 

If it solves the problem, i guess you can use the null pointer check as
I suggested and send a new patchset, the current one will introduce new
problems. Keep the issue analysis as it is in commit text since its
descriptive enough to understand the problem.

Regards,
Prashanth K
胡连勤 Nov. 19, 2024, 12:50 p.m. UTC | #4
Hello Prashanth:

> >
> >>> Considering that in some extreme cases, when u_serial driver is
> >>> accessed by multiple threads, Thread A is executing the open
> >>> operation and calling the gs_open, Thread B is executing the
> >>> disconnect operation and calling the gserial_disconnect function,The
> >>> port->port_usb pointer will be set to NULL.
> >>>
> >> [...]
> >>> ---
> >>>  drivers/usb/gadget/function/u_serial.c | 25
> >>> +++++++++++++++----------
> >>>  1 file changed, 15 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/drivers/usb/gadget/function/u_serial.c
> >>> b/drivers/usb/gadget/function/u_serial.c
> >>> index 0a8c05b2746b..9ab2dbed60a8 100644
> >>> --- a/drivers/usb/gadget/function/u_serial.c
> >>> +++ b/drivers/usb/gadget/function/u_serial.c
> >>> @@ -124,6 +124,7 @@ struct gs_port {
> >>>  	struct kfifo		port_write_buf;
> >>>  	wait_queue_head_t	drain_wait;	/* wait while writes drain */
> >>>  	bool                    write_busy;
> >>> +	bool                    read_busy;
> >>>  	wait_queue_head_t	close_wait;
> >>>  	bool			suspended;	/* port suspended */
> >>>  	bool			start_delayed;	/* delay start when
> >> suspended */
> >>> @@ -331,9 +332,11 @@ __acquires(&port->port_lock)
> >>>  		/* drop lock while we call out; the controller driver
> >>>  		 * may need to call us back (e.g. for disconnect)
> >>>  		 */
> >>> +		port->read_busy = true;
> >>>  		spin_unlock(&port->port_lock);
> >>>  		status = usb_ep_queue(out, req, GFP_ATOMIC);
> >>>  		spin_lock(&port->port_lock);
> >>> +		port->read_busy = false;
> >>>
> >>>  		if (status) {
> >>>  			pr_debug("%s: %s %s err %d\n",
> >>> @@ -1412,19 +1415,21 @@ void gserial_disconnect(struct gserial *gser)
> >>>  	/* tell the TTY glue not to do I/O here any more */
> >>>  	spin_lock(&port->port_lock);
> >>>
> >>> -	gs_console_disconnect(port);
> >>> +	if (!port->read_busy) {
> >> start_tx/rx rely on port->port_usb for queuing the requests, and if
> >> its not null during disconnect, tx/rx would keep on queuing requests
> >> to UDC even after disconnect (which is not ideal). Here in your case,
> >> after read_busy is set, start_rx would queue something outside of
> >> spinlock, meanwhile disconnect happens but port_usb is still valid
> >> (because read_busy is set) and start_rx would break early. But
> >> start_tx could continue queuing into disconnected UDC (if 'started'
> >> is non-zero, which could happen due to timing). Can't you try
> >> something like this,
> >>
> >> --- a/drivers/usb/gadget/function/u_serial.c
> >> +++ b/drivers/usb/gadget/function/u_serial.c
> >> @@ -579,9 +579,12 @@ static int gs_start_io(struct gs_port *port)
> >>                  * we didn't in gs_start_tx() */
> >>                 tty_wakeup(port->port.tty);
> >>         } else {
> >> -               gs_free_requests(ep, head, &port->read_allocated);
> >> -               gs_free_requests(port->port_usb->in, &port->write_pool,
> >> -                       &port->write_allocated);
> >> +               /* Free reqs only if we are still connected */
> >> +               if (port->port_usb) {
> >> +                       gs_free_requests(ep, head, &port->read_allocated);
> >> +                       gs_free_requests(port->port_usb->in,
> >> &port->write_pool,
> >> +                               &port->write_allocated);
> >> +               }
> >>                 status = -EIO;
> >>         }
> >>
> >> This will skip freeing reqs (and your crash) if port_usb is null and
> >> freeing would be taken care by disconnect callback.
> >>
> >>
> > First of all, the patch you gave can solve the problem we are currently
> facing.
> >
> > When we first encountered this problem, we also thought about adding a
> > null check operation to deal with it, but we saw that the entry of
> > this function (gs_start_io) had a null check operation for
> > port->port_usb, so I gave up the idea of ​​null check during free_req (maybe
> I made a simple problem complicated), and thought about optimizing it from
> the software logic, so that the port->port_usb pointer is always valid before
> gs_start_io is executed.
> >
> 
> If it solves the problem, i guess you can use the null pointer check as I
> suggested and send a new patchset, the current one will introduce new
> problems. Keep the issue analysis as it is in commit text since its descriptive
> enough to understand the problem.

OK, I'll do a stress test and send out a new patch later.

Thanks
diff mbox series

Patch

diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 0a8c05b2746b..9ab2dbed60a8 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -124,6 +124,7 @@  struct gs_port {
 	struct kfifo		port_write_buf;
 	wait_queue_head_t	drain_wait;	/* wait while writes drain */
 	bool                    write_busy;
+	bool                    read_busy;
 	wait_queue_head_t	close_wait;
 	bool			suspended;	/* port suspended */
 	bool			start_delayed;	/* delay start when suspended */
@@ -331,9 +332,11 @@  __acquires(&port->port_lock)
 		/* drop lock while we call out; the controller driver
 		 * may need to call us back (e.g. for disconnect)
 		 */
+		port->read_busy = true;
 		spin_unlock(&port->port_lock);
 		status = usb_ep_queue(out, req, GFP_ATOMIC);
 		spin_lock(&port->port_lock);
+		port->read_busy = false;
 
 		if (status) {
 			pr_debug("%s: %s %s err %d\n",
@@ -1412,19 +1415,21 @@  void gserial_disconnect(struct gserial *gser)
 	/* tell the TTY glue not to do I/O here any more */
 	spin_lock(&port->port_lock);
 
-	gs_console_disconnect(port);
+	if (!port->read_busy) {
+		gs_console_disconnect(port);
 
-	/* REVISIT as above: how best to track this? */
-	port->port_line_coding = gser->port_line_coding;
+		/* REVISIT as above: how best to track this? */
+		port->port_line_coding = gser->port_line_coding;
 
-	port->port_usb = NULL;
-	gser->ioport = NULL;
-	if (port->port.count > 0) {
-		wake_up_interruptible(&port->drain_wait);
-		if (port->port.tty)
-			tty_hangup(port->port.tty);
+		port->port_usb = NULL;
+		gser->ioport = NULL;
+		if (port->port.count > 0) {
+			wake_up_interruptible(&port->drain_wait);
+			if (port->port.tty)
+				tty_hangup(port->port.tty);
+		}
+		port->suspended = false;
 	}
-	port->suspended = false;
 	spin_unlock(&port->port_lock);
 	spin_unlock_irqrestore(&serial_port_lock, flags);