diff mbox series

[v3] usb: dwc3: Stop active transfers before halting the controller

Message ID 20200903210954.24504-1-wcheng@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series [v3] usb: dwc3: Stop active transfers before halting the controller | expand

Commit Message

Wesley Cheng Sept. 3, 2020, 9:09 p.m. UTC
In the DWC3 databook, for a device initiated disconnect or bus reset, the
driver is required to send dependxfer commands for any pending transfers.
In addition, before the controller can move to the halted state, the SW
needs to acknowledge any pending events.  If the controller is not halted
properly, there is a chance the controller will continue accessing stale or
freed TRBs and buffers.

Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>

---
Changes in v3:
 - Removed DWC3_EP_ENABLED check from dwc3_gadget_stop_active_transfers()
   as dwc3_stop_active_transfer() has a check already in place.
 - Calling __dwc3_gadget_stop() which ensures that DWC3 interrupt events
   are cleared, and ep0 eps are cleared for the pullup disabled case.  Not
   required to call __dwc3_gadget_start() on pullup enable, as the
   composite driver will execute udc_start() before calling pullup().

Changes in v2:
 - Moved cleanup code to the pullup() API to differentiate between device
   disconnect and hibernation.
 - Added cleanup code to the bus reset case as well.
 - Verified the move to pullup() did not reproduce the problen using the
   same test sequence.

Verified fix by adding a check for ETIMEDOUT during the run stop call.
Shell script writing to the configfs UDC file to trigger disconnect and
connect.  Batch script to have PC execute data transfers over adb (ie adb
push)  After a few iterations, we'd run into a scenario where the
controller wasn't halted.  With the following change, no failed halts after
many iterations.
---
 drivers/usb/dwc3/ep0.c    |  2 +-
 drivers/usb/dwc3/gadget.c | 49 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 2 deletions(-)

Comments

Thinh Nguyen Sept. 4, 2020, 12:47 a.m. UTC | #1
Wesley Cheng wrote:
> In the DWC3 databook, for a device initiated disconnect or bus reset, the
> driver is required to send dependxfer commands for any pending transfers.
> In addition, before the controller can move to the halted state, the SW
> needs to acknowledge any pending events.  If the controller is not halted
> properly, there is a chance the controller will continue accessing stale or
> freed TRBs and buffers.
>
> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>
> ---
> Changes in v3:
>  - Removed DWC3_EP_ENABLED check from dwc3_gadget_stop_active_transfers()
>    as dwc3_stop_active_transfer() has a check already in place.
>  - Calling __dwc3_gadget_stop() which ensures that DWC3 interrupt events
>    are cleared, and ep0 eps are cleared for the pullup disabled case.  Not
>    required to call __dwc3_gadget_start() on pullup enable, as the
>    composite driver will execute udc_start() before calling pullup().
>
> Changes in v2:
>  - Moved cleanup code to the pullup() API to differentiate between device
>    disconnect and hibernation.
>  - Added cleanup code to the bus reset case as well.
>  - Verified the move to pullup() did not reproduce the problen using the
>    same test sequence.
>
> Verified fix by adding a check for ETIMEDOUT during the run stop call.
> Shell script writing to the configfs UDC file to trigger disconnect and
> connect.  Batch script to have PC execute data transfers over adb (ie adb
> push)  After a few iterations, we'd run into a scenario where the
> controller wasn't halted.  With the following change, no failed halts after
> many iterations.
> ---
>  drivers/usb/dwc3/ep0.c    |  2 +-
>  drivers/usb/dwc3/gadget.c | 49 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 49 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index 59f2e8c31bd1..456aa87e8778 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>  	int				ret;
>  
>  	spin_lock_irqsave(&dwc->lock, flags);
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>  				dep->name);
>  		ret = -ESHUTDOWN;
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index 3ab6f118c508..73bda7eaa773 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -1516,7 +1516,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
>  {
>  	struct dwc3		*dwc = dep->dwc;
>  
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>  		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
>  				dep->name);
>  		return -ESHUTDOWN;
> @@ -1926,6 +1926,21 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>  	return 0;
>  }
>  
> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
> +{
> +	u32 epnum;
> +
> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
> +		struct dwc3_ep *dep;
> +
> +		dep = dwc->eps[epnum];
> +		if (!dep)
> +			continue;
> +
> +		dwc3_remove_requests(dwc, dep);
> +	}
> +}
> +
>  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  {
>  	u32			reg;
> @@ -1971,6 +1986,8 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  	return 0;
>  }
>  
> +static void __dwc3_gadget_stop(struct dwc3 *dwc);
> +
>  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  {
>  	struct dwc3		*dwc = gadget_to_dwc(g);
> @@ -1994,9 +2011,37 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  		}
>  	}
>  
> +	/*
> +	 * Synchronize and disable any further event handling while controller
> +	 * is being enabled/disabled.
> +	 */
> +	disable_irq(dwc->irq_gadget);
>  	spin_lock_irqsave(&dwc->lock, flags);
> +
> +	/* Controller is not halted until pending events are acknowledged */
> +	if (!is_on) {
> +		u32 count;
> +
> +		/*
> +		 * The databook explicitly mentions for a device-initiated
> +		 * disconnect sequence, the SW needs to ensure that it ends any
> +		 * active transfers.
> +		 */
> +		dwc3_stop_active_transfers(dwc);
> +		__dwc3_gadget_stop(dwc);
> +
> +		count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
> +		count &= DWC3_GEVNTCOUNT_MASK;
> +		if (count > 0) {
> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
> +						dwc->ev_buf->length;
> +		}
> +	}
> +
>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>  	spin_unlock_irqrestore(&dwc->lock, flags);
> +	enable_irq(dwc->irq_gadget);
>  
>  	return ret;
>  }
> @@ -3100,6 +3145,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
>  	}
>  
>  	dwc3_reset_gadget(dwc);
> +	/* Stop any active/pending transfers when receiving bus reset */
> +	dwc3_stop_active_transfers(dwc);
>  
>  	reg = dwc3_readl(dwc->regs, DWC3_DCTL);
>  	reg &= ~DWC3_DCTL_TSTCTRL_MASK;

Looks good to me.

Reviewed-by: Thinh Nguyen <thinhn@synopsys.com>

Thanks,
Thinh
Felipe Balbi Sept. 7, 2020, 6:20 a.m. UTC | #2
Hi,

Wesley Cheng <wcheng@codeaurora.org> writes:
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index 59f2e8c31bd1..456aa87e8778 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>  	int				ret;
>  
>  	spin_lock_irqsave(&dwc->lock, flags);
> -	if (!dep->endpoint.desc) {
> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {

this looks odd. If we don't have pullups connected, we shouldn't have a
descriptor, likewise if we don't have a a description, we haven't been
enumerated, therefore we shouldn't have pullups connected.

What am I missing here?

> @@ -1926,6 +1926,21 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>  	return 0;
>  }
>  
> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
> +{
> +	u32 epnum;
> +
> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {

dwc3 knows the number of endpoints available in the HW. Use dwc->num_eps
instead.

> @@ -1971,6 +1986,8 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>  	return 0;
>  }
>  
> +static void __dwc3_gadget_stop(struct dwc3 *dwc);
> +
>  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  {
>  	struct dwc3		*dwc = gadget_to_dwc(g);
> @@ -1994,9 +2011,37 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>  		}
>  	}
>  
> +	/*
> +	 * Synchronize and disable any further event handling while controller
> +	 * is being enabled/disabled.
> +	 */
> +	disable_irq(dwc->irq_gadget);

why isn't dwc3_gadget_disable_irq() enough?

>  	spin_lock_irqsave(&dwc->lock, flags);

spin_lock_irqsave() will disable interrupts, why disable_irq() above?

> +	/* Controller is not halted until pending events are acknowledged */
> +	if (!is_on) {
> +		u32 count;
> +
> +		/*
> +		 * The databook explicitly mentions for a device-initiated
> +		 * disconnect sequence, the SW needs to ensure that it ends any
> +		 * active transfers.
> +		 */

make this a little better by mentioning the version and section of the
databook you're reading. That makes it easier for future
reference. Also, use an actual quote from the databook, along the lines
of:

		/*
                 * Synopsys DesignWare Cores USB3 Databook Revision
                 * X.YYa states in section W.Z that "device-initiated
                 * disconnect ...."
                 */

> +		dwc3_stop_active_transfers(dwc);
> +		__dwc3_gadget_stop(dwc);
> +
> +		count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
> +		count &= DWC3_GEVNTCOUNT_MASK;
> +		if (count > 0) {
> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
> +						dwc->ev_buf->length;
> +		}

don't duplicate code. Add a patch before this extracting this into
helper and use it for both irq handler and gadget pullup.

> +	}
> +
>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>  	spin_unlock_irqrestore(&dwc->lock, flags);
> +	enable_irq(dwc->irq_gadget);
>  
>  	return ret;
>  }
> @@ -3100,6 +3145,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
>  	}
>  
>  	dwc3_reset_gadget(dwc);
> +	/* Stop any active/pending transfers when receiving bus reset */

unnecessary comment. We're calling a function named "stop active
transfers" from within the "bus reset handler".
Wesley Cheng Sept. 8, 2020, 9:42 p.m. UTC | #3
On 9/6/2020 11:20 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Wesley Cheng <wcheng@codeaurora.org> writes:
>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>> index 59f2e8c31bd1..456aa87e8778 100644
>> --- a/drivers/usb/dwc3/ep0.c
>> +++ b/drivers/usb/dwc3/ep0.c
>> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>>  	int				ret;
>>  
>>  	spin_lock_irqsave(&dwc->lock, flags);
>> -	if (!dep->endpoint.desc) {
>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
> 
> this looks odd. If we don't have pullups connected, we shouldn't have a
> descriptor, likewise if we don't have a a description, we haven't been
> enumerated, therefore we shouldn't have pullups connected.
> 
> What am I missing here?
> 

Hi Felipe,

When we
echo "" > /sys/kernel/config/usb_gadget/g1/UDC

This triggers the usb_gadget_disconnect() routine to execute.

int usb_gadget_disconnect(struct usb_gadget *gadget)
{
...
	ret = gadget->ops->pullup(gadget, 0);
	if (!ret) {
		gadget->connected = 0;
		gadget->udc->driver->disconnect(gadget);
	}

So it is possible that we've already disabled the pullup before running
the disable() callbacks in the function drivers.  The disable()
callbacks usually are the ones responsible for calling usb_ep_disable(),
where we clear the desc field.  This means there is a brief period where
the pullups_connected = 0, but we still have valid ep desc, as it has
not been disabled yet.

Also, for function drivers like mass storage, the fsg_disable() routine
defers the actual usb_ep_disable() call to the fsg_thread, so its not
always ensured that the disconnect() execution would result in the
usb_ep_disable() to occur synchronously.

>> @@ -1926,6 +1926,21 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>>  	return 0;
>>  }
>>  
>> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
>> +{
>> +	u32 epnum;
>> +
>> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
> 
> dwc3 knows the number of endpoints available in the HW. Use dwc->num_eps
> instead.
> 

Sure, will do.

>> @@ -1971,6 +1986,8 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>  	return 0;
>>  }
>>  
>> +static void __dwc3_gadget_stop(struct dwc3 *dwc);
>> +
>>  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>  {
>>  	struct dwc3		*dwc = gadget_to_dwc(g);
>> @@ -1994,9 +2011,37 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>  		}
>>  	}
>>  
>> +	/*
>> +	 * Synchronize and disable any further event handling while controller
>> +	 * is being enabled/disabled.
>> +	 */
>> +	disable_irq(dwc->irq_gadget);
> 
> why isn't dwc3_gadget_disable_irq() enough?
> 
>>  	spin_lock_irqsave(&dwc->lock, flags);
> 
> spin_lock_irqsave() will disable interrupts, why disable_irq() above?
> 

In the discussion I had with Thinh, the concern was that with the newly
added code to override the lpos here, if the interrupt routine
(dwc3_check_event_buf()) runs, then it will reference the lpos for
copying the event buffer contents to the event cache, and potentially
process events.  There is no locking in place, so it could be possible
to have both run in parallel.

Hence, the reason if there was already a pending IRQ triggered, the
dwc3_gadget_disable_irq() won't ensure the IRQ is handled.  We can do
something like:
if (!is_on)
	dwc3_gadget_disable_irq()
synchronize_irq()
spin_lock_irqsave()
if(!is_on) {
...

But the logic to only apply this on the pullup removal case is a little
messy.  Also, from my understanding, the spin_lock_irqsave() will only
disable the local CPU IRQs, but not the interrupt line on the GIC, which
means other CPUs can handle it, unless we explicitly set the IRQ
affinity to CPUX.

>> +	/* Controller is not halted until pending events are acknowledged */
>> +	if (!is_on) {
>> +		u32 count;
>> +
>> +		/*
>> +		 * The databook explicitly mentions for a device-initiated
>> +		 * disconnect sequence, the SW needs to ensure that it ends any
>> +		 * active transfers.
>> +		 */
> 
> make this a little better by mentioning the version and section of the
> databook you're reading. That makes it easier for future
> reference. Also, use an actual quote from the databook, along the lines
> of:
> 
> 		/*
>                  * Synopsys DesignWare Cores USB3 Databook Revision
>                  * X.YYa states in section W.Z that "device-initiated
>                  * disconnect ...."
>                  */
> 

Got it.

>> +		dwc3_stop_active_transfers(dwc);
>> +		__dwc3_gadget_stop(dwc);
>> +
>> +		count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
>> +		count &= DWC3_GEVNTCOUNT_MASK;
>> +		if (count > 0) {
>> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
>> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
>> +						dwc->ev_buf->length;
>> +		}
> 
> don't duplicate code. Add a patch before this extracting this into
> helper and use it for both irq handler and gadget pullup.
> 

We actually removed this call in the IRQ handler, as if we ensure that
the IRQ routine has fully complete and won't trigger anymore, then this
sequence will handle clearing of the event count.

>> +	}
>> +
>>  	ret = dwc3_gadget_run_stop(dwc, is_on, false);
>>  	spin_unlock_irqrestore(&dwc->lock, flags);
>> +	enable_irq(dwc->irq_gadget);
>>  
>>  	return ret;
>>  }
>> @@ -3100,6 +3145,8 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
>>  	}
>>  
>>  	dwc3_reset_gadget(dwc);
>> +	/* Stop any active/pending transfers when receiving bus reset */
> 
> unnecessary comment. We're calling a function named "stop active
> transfers" from within the "bus reset handler".
> 

I can remove this.

Thanks
Wesley
Felipe Balbi Sept. 24, 2020, 7:39 a.m. UTC | #4
Hi,

Wesley Cheng <wcheng@codeaurora.org> writes:
> On 9/6/2020 11:20 PM, Felipe Balbi wrote:
>> 
>> Hi,
>> 
>> Wesley Cheng <wcheng@codeaurora.org> writes:
>>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>>> index 59f2e8c31bd1..456aa87e8778 100644
>>> --- a/drivers/usb/dwc3/ep0.c
>>> +++ b/drivers/usb/dwc3/ep0.c
>>> @@ -197,7 +197,7 @@ int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
>>>  	int				ret;
>>>  
>>>  	spin_lock_irqsave(&dwc->lock, flags);
>>> -	if (!dep->endpoint.desc) {
>>> +	if (!dep->endpoint.desc || !dwc->pullups_connected) {
>> 
>> this looks odd. If we don't have pullups connected, we shouldn't have a
>> descriptor, likewise if we don't have a a description, we haven't been
>> enumerated, therefore we shouldn't have pullups connected.
>> 
>> What am I missing here?
>> 
>
> Hi Felipe,
>
> When we
> echo "" > /sys/kernel/config/usb_gadget/g1/UDC
>
> This triggers the usb_gadget_disconnect() routine to execute.
>
> int usb_gadget_disconnect(struct usb_gadget *gadget)
> {
> ...
> 	ret = gadget->ops->pullup(gadget, 0);
> 	if (!ret) {
> 		gadget->connected = 0;
> 		gadget->udc->driver->disconnect(gadget);
> 	}
>
> So it is possible that we've already disabled the pullup before running
> the disable() callbacks in the function drivers.  The disable()

we used to have usage counts for those, are they gone? I think they're
still there.

> callbacks usually are the ones responsible for calling usb_ep_disable(),
> where we clear the desc field.  This means there is a brief period where
> the pullups_connected = 0, but we still have valid ep desc, as it has
> not been disabled yet.

this is a valid point, though

> Also, for function drivers like mass storage, the fsg_disable() routine
> defers the actual usb_ep_disable() call to the fsg_thread, so its not
> always ensured that the disconnect() execution would result in the
> usb_ep_disable() to occur synchronously.

also a good point.

>>> @@ -1926,6 +1926,21 @@ static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
>>>  	return 0;
>>>  }
>>>  
>>> +static void dwc3_stop_active_transfers(struct dwc3 *dwc)
>>> +{
>>> +	u32 epnum;
>>> +
>>> +	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
>> 
>> dwc3 knows the number of endpoints available in the HW. Use dwc->num_eps
>> instead.
>> 
>
> Sure, will do.
>
>>> @@ -1971,6 +1986,8 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
>>>  	return 0;
>>>  }
>>>  
>>> +static void __dwc3_gadget_stop(struct dwc3 *dwc);
>>> +
>>>  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>>  {
>>>  	struct dwc3		*dwc = gadget_to_dwc(g);
>>> @@ -1994,9 +2011,37 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
>>>  		}
>>>  	}
>>>  
>>> +	/*
>>> +	 * Synchronize and disable any further event handling while controller
>>> +	 * is being enabled/disabled.
>>> +	 */
>>> +	disable_irq(dwc->irq_gadget);
>> 
>> why isn't dwc3_gadget_disable_irq() enough?
>> 
>>>  	spin_lock_irqsave(&dwc->lock, flags);
>> 
>> spin_lock_irqsave() will disable interrupts, why disable_irq() above?
>> 
>
> In the discussion I had with Thinh, the concern was that with the newly
> added code to override the lpos here, if the interrupt routine
> (dwc3_check_event_buf()) runs, then it will reference the lpos for

that's running in hardirq context. All interrupts are disabled while
that runs, there's no risk of race, right?

> copying the event buffer contents to the event cache, and potentially
> process events.  There is no locking in place, so it could be possible
> to have both run in parallel.

Is this academic or have you actually found a situation where this
could, indeed, happen? The spin_lock_irqsave() should be enough to
synchronize dwc3_gadget_pullup() and the interrupt handler.

> Hence, the reason if there was already a pending IRQ triggered, the
> dwc3_gadget_disable_irq() won't ensure the IRQ is handled.  We can do
> something like:
> if (!is_on)
> 	dwc3_gadget_disable_irq()
> synchronize_irq()
> spin_lock_irqsave()
> if(!is_on) {
> ...
>
> But the logic to only apply this on the pullup removal case is a little
> messy.  Also, from my understanding, the spin_lock_irqsave() will only
> disable the local CPU IRQs, but not the interrupt line on the GIC, which
> means other CPUs can handle it, unless we explicitly set the IRQ
> affinity to CPUX.

Yeah, the way I understand this can't really happen. But I'm open to
being educated. Maybe Alan can explain if this is really possibility?

>>> +		dwc3_stop_active_transfers(dwc);
>>> +		__dwc3_gadget_stop(dwc);
>>> +
>>> +		count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
>>> +		count &= DWC3_GEVNTCOUNT_MASK;
>>> +		if (count > 0) {
>>> +			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
>>> +			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
>>> +						dwc->ev_buf->length;
>>> +		}
>> 
>> don't duplicate code. Add a patch before this extracting this into
>> helper and use it for both irq handler and gadget pullup.
>> 
>
> We actually removed this call in the IRQ handler, as if we ensure that
> the IRQ routine has fully complete and won't trigger anymore, then this
> sequence will handle clearing of the event count.

oh, makes sense :-)
Alan Stern Sept. 24, 2020, 3:50 p.m. UTC | #5
On Thu, Sep 24, 2020 at 10:39:24AM +0300, Felipe Balbi wrote:
> >>> +	/*
> >>> +	 * Synchronize and disable any further event handling while controller
> >>> +	 * is being enabled/disabled.
> >>> +	 */
> >>> +	disable_irq(dwc->irq_gadget);
> >> 
> >> why isn't dwc3_gadget_disable_irq() enough?
> >> 
> >>>  	spin_lock_irqsave(&dwc->lock, flags);
> >> 
> >> spin_lock_irqsave() will disable interrupts, why disable_irq() above?
> >> 
> >
> > In the discussion I had with Thinh, the concern was that with the newly
> > added code to override the lpos here, if the interrupt routine
> > (dwc3_check_event_buf()) runs, then it will reference the lpos for
> 
> that's running in hardirq context. All interrupts are disabled while
> that runs, there's no risk of race, right?
> 
> > copying the event buffer contents to the event cache, and potentially
> > process events.  There is no locking in place, so it could be possible
> > to have both run in parallel.
> 
> Is this academic or have you actually found a situation where this
> could, indeed, happen? The spin_lock_irqsave() should be enough to
> synchronize dwc3_gadget_pullup() and the interrupt handler.
> 
> > Hence, the reason if there was already a pending IRQ triggered, the
> > dwc3_gadget_disable_irq() won't ensure the IRQ is handled.  We can do
> > something like:
> > if (!is_on)
> > 	dwc3_gadget_disable_irq()
> > synchronize_irq()
> > spin_lock_irqsave()
> > if(!is_on) {
> > ...
> >
> > But the logic to only apply this on the pullup removal case is a little
> > messy.  Also, from my understanding, the spin_lock_irqsave() will only
> > disable the local CPU IRQs, but not the interrupt line on the GIC, which
> > means other CPUs can handle it, unless we explicitly set the IRQ
> > affinity to CPUX.
> 
> Yeah, the way I understand this can't really happen. But I'm open to
> being educated. Maybe Alan can explain if this is really possibility?

It depends on the details of the hardware, but yes, it is possible in
general for an interrupt handler to run after you have turned off the
device's interrupt-request line.  For example:

	CPU A				CPU B
	---------------------------	----------------------
	Gets an IRQ from the device
	Calls handler routine		spin_lock_irq
	  spin_lock_irq			Turns off the IRQ line
	  ...spins...			spin_unlock_irq
	  Rest of handler runs
	  spin_unlock_irq

That's why we have synchronize_irq().  The usual pattern is something
like this:

	spin_lock_irq(&priv->lock);
	priv->disconnected = true;
	my_disable_irq(priv);
	spin_unlock_irq(&priv->lock);
	synchronize_irq(priv->irq);

And of course this has to be done in a context that can sleep.

Does this answer your question?

Alan Stern
Felipe Balbi Sept. 25, 2020, 6:06 a.m. UTC | #6
Hi,

Alan Stern <stern@rowland.harvard.edu> writes:
>> > Hence, the reason if there was already a pending IRQ triggered, the
>> > dwc3_gadget_disable_irq() won't ensure the IRQ is handled.  We can do
>> > something like:
>> > if (!is_on)
>> > 	dwc3_gadget_disable_irq()
>> > synchronize_irq()
>> > spin_lock_irqsave()
>> > if(!is_on) {
>> > ...
>> >
>> > But the logic to only apply this on the pullup removal case is a little
>> > messy.  Also, from my understanding, the spin_lock_irqsave() will only
>> > disable the local CPU IRQs, but not the interrupt line on the GIC, which
>> > means other CPUs can handle it, unless we explicitly set the IRQ
>> > affinity to CPUX.
>> 
>> Yeah, the way I understand this can't really happen. But I'm open to
>> being educated. Maybe Alan can explain if this is really possibility?
>
> It depends on the details of the hardware, but yes, it is possible in
> general for an interrupt handler to run after you have turned off the
> device's interrupt-request line.  For example:
>
> 	CPU A				CPU B
> 	---------------------------	----------------------
> 	Gets an IRQ from the device
> 	Calls handler routine		spin_lock_irq
> 	  spin_lock_irq			Turns off the IRQ line
> 	  ...spins...			spin_unlock_irq
> 	  Rest of handler runs
> 	  spin_unlock_irq
>
> That's why we have synchronize_irq().  The usual pattern is something
> like this:
>
> 	spin_lock_irq(&priv->lock);
> 	priv->disconnected = true;
> 	my_disable_irq(priv);
> 	spin_unlock_irq(&priv->lock);
> 	synchronize_irq(priv->irq);
>
> And of course this has to be done in a context that can sleep.
>
> Does this answer your question?

It does, thank you Alan. It seems like we don't need a call to
disable_irq(), only synchronize_irq() is enough, however it should be
called with spinlocks released, not held.

Thanks
Wesley Cheng Sept. 25, 2020, 7:33 p.m. UTC | #7
On 9/24/2020 11:06 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Alan Stern <stern@rowland.harvard.edu> writes:
>>>> Hence, the reason if there was already a pending IRQ triggered, the
>>>> dwc3_gadget_disable_irq() won't ensure the IRQ is handled.  We can do
>>>> something like:
>>>> if (!is_on)
>>>> 	dwc3_gadget_disable_irq()
>>>> synchronize_irq()
>>>> spin_lock_irqsave()
>>>> if(!is_on) {
>>>> ...
>>>>
>>>> But the logic to only apply this on the pullup removal case is a little
>>>> messy.  Also, from my understanding, the spin_lock_irqsave() will only
>>>> disable the local CPU IRQs, but not the interrupt line on the GIC, which
>>>> means other CPUs can handle it, unless we explicitly set the IRQ
>>>> affinity to CPUX.
>>>
>>> Yeah, the way I understand this can't really happen. But I'm open to
>>> being educated. Maybe Alan can explain if this is really possibility?
>>

Hi Felipe/Alan,

Thanks for the detailed explanations and inputs.  Useful information to
have!

>> It depends on the details of the hardware, but yes, it is possible in
>> general for an interrupt handler to run after you have turned off the
>> device's interrupt-request line.  For example:
>>
>> 	CPU A				CPU B
>> 	---------------------------	----------------------
>> 	Gets an IRQ from the device
>> 	Calls handler routine		spin_lock_irq
>> 	  spin_lock_irq			Turns off the IRQ line
>> 	  ...spins...			spin_unlock_irq
>> 	  Rest of handler runs
>> 	  spin_unlock_irq
>>
>> That's why we have synchronize_irq().  The usual pattern is something
>> like this:
>>
>> 	spin_lock_irq(&priv->lock);
>> 	priv->disconnected = true;
>> 	my_disable_irq(priv);
>> 	spin_unlock_irq(&priv->lock);
>> 	synchronize_irq(priv->irq);
>>
>> And of course this has to be done in a context that can sleep.
>>
>> Does this answer your question?
> 
> It does, thank you Alan. It seems like we don't need a call to
> disable_irq(), only synchronize_irq() is enough, however it should be
> called with spinlocks released, not held.
> 

I mean...I'm not against using the synchronize_irq() +
dwc3_gadget_disable_irq() route, since that will address the concern as
well.  It was just with the disable/enable IRQ route, I didn't need to
explicitly check the is_on flag again, since I didn't need to worry
about overwriting the DEVTEN reg (for the pullup enable case).  Will
include this on the next version.

Thanks
Wesley Cheng

> Thanks
>
diff mbox series

Patch

diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 59f2e8c31bd1..456aa87e8778 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -197,7 +197,7 @@  int dwc3_gadget_ep0_queue(struct usb_ep *ep, struct usb_request *request,
 	int				ret;
 
 	spin_lock_irqsave(&dwc->lock, flags);
-	if (!dep->endpoint.desc) {
+	if (!dep->endpoint.desc || !dwc->pullups_connected) {
 		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
 				dep->name);
 		ret = -ESHUTDOWN;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 3ab6f118c508..73bda7eaa773 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1516,7 +1516,7 @@  static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 {
 	struct dwc3		*dwc = dep->dwc;
 
-	if (!dep->endpoint.desc) {
+	if (!dep->endpoint.desc || !dwc->pullups_connected) {
 		dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
 				dep->name);
 		return -ESHUTDOWN;
@@ -1926,6 +1926,21 @@  static int dwc3_gadget_set_selfpowered(struct usb_gadget *g,
 	return 0;
 }
 
+static void dwc3_stop_active_transfers(struct dwc3 *dwc)
+{
+	u32 epnum;
+
+	for (epnum = 2; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
+		struct dwc3_ep *dep;
+
+		dep = dwc->eps[epnum];
+		if (!dep)
+			continue;
+
+		dwc3_remove_requests(dwc, dep);
+	}
+}
+
 static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 {
 	u32			reg;
@@ -1971,6 +1986,8 @@  static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on, int suspend)
 	return 0;
 }
 
+static void __dwc3_gadget_stop(struct dwc3 *dwc);
+
 static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 {
 	struct dwc3		*dwc = gadget_to_dwc(g);
@@ -1994,9 +2011,37 @@  static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
 		}
 	}
 
+	/*
+	 * Synchronize and disable any further event handling while controller
+	 * is being enabled/disabled.
+	 */
+	disable_irq(dwc->irq_gadget);
 	spin_lock_irqsave(&dwc->lock, flags);
+
+	/* Controller is not halted until pending events are acknowledged */
+	if (!is_on) {
+		u32 count;
+
+		/*
+		 * The databook explicitly mentions for a device-initiated
+		 * disconnect sequence, the SW needs to ensure that it ends any
+		 * active transfers.
+		 */
+		dwc3_stop_active_transfers(dwc);
+		__dwc3_gadget_stop(dwc);
+
+		count = dwc3_readl(dwc->regs, DWC3_GEVNTCOUNT(0));
+		count &= DWC3_GEVNTCOUNT_MASK;
+		if (count > 0) {
+			dwc3_writel(dwc->regs, DWC3_GEVNTCOUNT(0), count);
+			dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
+						dwc->ev_buf->length;
+		}
+	}
+
 	ret = dwc3_gadget_run_stop(dwc, is_on, false);
 	spin_unlock_irqrestore(&dwc->lock, flags);
+	enable_irq(dwc->irq_gadget);
 
 	return ret;
 }
@@ -3100,6 +3145,8 @@  static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
 	}
 
 	dwc3_reset_gadget(dwc);
+	/* Stop any active/pending transfers when receiving bus reset */
+	dwc3_stop_active_transfers(dwc);
 
 	reg = dwc3_readl(dwc->regs, DWC3_DCTL);
 	reg &= ~DWC3_DCTL_TSTCTRL_MASK;