diff mbox series

[v14,3/6] usb: dwc3: Resize TX FIFOs to meet EP bursting requirements

Message ID 1625908395-5498-4-git-send-email-wcheng@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show
Series Re-introduce TX FIFO resize for larger EP bursting | expand

Commit Message

Wesley Cheng July 10, 2021, 9:13 a.m. UTC
Some devices have USB compositions which may require multiple endpoints
that support EP bursting.  HW defined TX FIFO sizes may not always be
sufficient for these compositions.  By utilizing flexible TX FIFO
allocation, this allows for endpoints to request the required FIFO depth to
achieve higher bandwidth.  With some higher bMaxBurst configurations, using
a larger TX FIFO size results in better TX throughput.

By introducing the check_config() callback, the resizing logic can fetch
the maximum number of endpoints used in the USB composition (can contain
multiple configurations), which helps ensure that the resizing logic can
fulfill the configuration(s), or return an error to the gadget layer
otherwise during bind time.

Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
---
 drivers/usb/dwc3/core.c   |  15 +++
 drivers/usb/dwc3/core.h   |  16 ++++
 drivers/usb/dwc3/ep0.c    |   2 +
 drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 265 insertions(+)

Comments

Thinh Nguyen July 12, 2021, 11:38 p.m. UTC | #1
Hi,

Wesley Cheng wrote:
> Some devices have USB compositions which may require multiple endpoints
> that support EP bursting.  HW defined TX FIFO sizes may not always be
> sufficient for these compositions.  By utilizing flexible TX FIFO
> allocation, this allows for endpoints to request the required FIFO depth to
> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
> a larger TX FIFO size results in better TX throughput.
> 
> By introducing the check_config() callback, the resizing logic can fetch
> the maximum number of endpoints used in the USB composition (can contain
> multiple configurations), which helps ensure that the resizing logic can
> fulfill the configuration(s), or return an error to the gadget layer
> otherwise during bind time.
> 
> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
> ---
>  drivers/usb/dwc3/core.c   |  15 +++
>  drivers/usb/dwc3/core.h   |  16 ++++
>  drivers/usb/dwc3/ep0.c    |   2 +
>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 265 insertions(+)
> 
> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
> index ba74ad7..b194aecd 100644
> --- a/drivers/usb/dwc3/core.c
> +++ b/drivers/usb/dwc3/core.c
> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>  	u8			rx_max_burst_prd;
>  	u8			tx_thr_num_pkt_prd;
>  	u8			tx_max_burst_prd;
> +	u8			tx_fifo_resize_max_num;
>  	const char		*usb_psy_name;
>  	int			ret;
>  
> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>  	 */
>  	hird_threshold = 12;
>  
> +	/*
> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
> +	 * allows for systems with larger bus latencies to have some headroom
> +	 * for endpoints that have a large bMaxBurst value.
> +	 */
> +	tx_fifo_resize_max_num = 6;
> +
>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>  	dwc->dr_mode = usb_get_dr_mode(dev);
> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>  				&tx_thr_num_pkt_prd);
>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>  				&tx_max_burst_prd);
> +	dwc->do_fifo_resize = device_property_read_bool(dev,
> +							"tx-fifo-resize");
> +	if (dwc->do_fifo_resize)
> +		device_property_read_u8(dev, "tx-fifo-max-num",
> +					&tx_fifo_resize_max_num);

Why is this check here? The dwc->tx_fifo_resize_max_num should store
whatever property the user sets. Whether the driver wants to use this
property should depend on "dwc->do_fifo_resize". Also why don't we have
"snps," prefix to be consistent with the other properties?

Can we enforce to a single property? If the designer wants to enable
this feature, he/she can to provide the tx-fifo-max-num. This would
simplify the driver a bit. Since this is to optimize for performance,
the user should know/want/test the specific value if they want to set
for their setup and not hoping that the default setting not break their
setup. So we can remove the "do_fifo_resize" property and just check
whether tx_fifo_resize_max_num is set.

>  
>  	dwc->disable_scramble_quirk = device_property_read_bool(dev,
>  				"snps,disable_scramble_quirk");
> @@ -1390,6 +1403,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>  	dwc->tx_max_burst_prd = tx_max_burst_prd;
>  
>  	dwc->imod_interval = 0;
> +
> +	dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num;

See comment above

>  }
>  
>  /* check whether the core supports IMOD */
> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
> index dccdf13..735e9be 100644
> --- a/drivers/usb/dwc3/core.h
> +++ b/drivers/usb/dwc3/core.h
> @@ -1023,6 +1023,7 @@ struct dwc3_scratchpad_array {
>   * @rx_max_burst_prd: max periodic ESS receive burst size
>   * @tx_thr_num_pkt_prd: periodic ESS transmit packet count
>   * @tx_max_burst_prd: max periodic ESS transmit burst size
> + * @tx_fifo_resize_max_num: max number of fifos allocated during txfifo resize
>   * @hsphy_interface: "utmi" or "ulpi"
>   * @connected: true when we're connected to a host, false otherwise
>   * @delayed_status: true when gadget driver asks for delayed status
> @@ -1037,6 +1038,7 @@ struct dwc3_scratchpad_array {
>   *	1	- utmi_l1_suspend_n
>   * @is_fpga: true when we are using the FPGA board
>   * @pending_events: true when we have pending IRQs to be handled
> + * @do_fifo_resize: true when txfifo resizing is enabled for dwc3 endpoints
>   * @pullups_connected: true when Run/Stop bit is set
>   * @setup_packet_pending: true when there's a Setup Packet in FIFO. Workaround
>   * @three_stage_setup: set if we perform a three phase setup
> @@ -1079,6 +1081,11 @@ struct dwc3_scratchpad_array {
>   * @dis_split_quirk: set to disable split boundary.
>   * @imod_interval: set the interrupt moderation interval in 250ns
>   *			increments or 0 to disable.
> + * @max_cfg_eps: current max number of IN eps used across all USB configs.
> + * @last_fifo_depth: last fifo depth used to determine next fifo ram start
> + *		     address.
> + * @num_ep_resized: carries the current number endpoints which have had its tx
> + *		    fifo resized.
>   */
>  struct dwc3 {
>  	struct work_struct	drd_work;
> @@ -1233,6 +1240,7 @@ struct dwc3 {
>  	u8			rx_max_burst_prd;
>  	u8			tx_thr_num_pkt_prd;
>  	u8			tx_max_burst_prd;
> +	u8			tx_fifo_resize_max_num;
>  
>  	const char		*hsphy_interface;
>  
> @@ -1246,6 +1254,7 @@ struct dwc3 {
>  	unsigned		is_utmi_l1_suspend:1;
>  	unsigned		is_fpga:1;
>  	unsigned		pending_events:1;
> +	unsigned		do_fifo_resize:1;
>  	unsigned		pullups_connected:1;
>  	unsigned		setup_packet_pending:1;
>  	unsigned		three_stage_setup:1;
> @@ -1281,6 +1290,10 @@ struct dwc3 {
>  	unsigned		dis_split_quirk:1;
>  
>  	u16			imod_interval;
> +
> +	int			max_cfg_eps;
> +	int			last_fifo_depth;
> +	int			num_ep_resized;
>  };
>  
>  #define INCRX_BURST_MODE 0
> @@ -1512,6 +1525,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>  		struct dwc3_gadget_ep_cmd_params *params);
>  int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd,
>  		u32 param);
> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc);
>  #else
>  static inline int dwc3_gadget_init(struct dwc3 *dwc)
>  { return 0; }
> @@ -1531,6 +1545,8 @@ static inline int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>  static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
>  		int cmd, u32 param)
>  { return 0; }
> +static inline void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
> +{ }
>  #endif
>  
>  #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
> index 3cd2942..d28d085 100644
> --- a/drivers/usb/dwc3/ep0.c
> +++ b/drivers/usb/dwc3/ep0.c
> @@ -619,6 +619,8 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
>  		return -EINVAL;
>  
>  	case USB_STATE_ADDRESS:
> +		dwc3_gadget_clear_tx_fifos(dwc);
> +
>  		ret = dwc3_ep0_delegate_req(dwc, ctrl);
>  		/* if the cfg matches and the cfg is non zero */
>  		if (cfg && (!ret || (ret == USB_GADGET_DELAYED_STATUS))) {
> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
> index af6d7f1..e56f1a6 100644
> --- a/drivers/usb/dwc3/gadget.c
> +++ b/drivers/usb/dwc3/gadget.c
> @@ -632,6 +632,187 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
>  		bool interrupt);
>  
>  /**
> + * dwc3_gadget_calc_tx_fifo_size - calculates the txfifo size value
> + * @dwc: pointer to the DWC3 context
> + * @nfifos: number of fifos to calculate for
> + *
> + * Calculates the size value based on the equation below:
> + *
> + * DWC3 revision 280A and prior:
> + * fifo_size = mult * (max_packet / mdwidth) + 1;
> + *
> + * DWC3 revision 290A and onwards:
> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
> + *
> + * The max packet size is set to 1024, as the txfifo requirements mainly apply
> + * to super speed USB use cases.  However, it is safe to overestimate the fifo
> + * allocations for other scenarios, i.e. high speed USB.
> + */
> +static int dwc3_gadget_calc_tx_fifo_size(struct dwc3 *dwc, int mult)
> +{
> +	int max_packet = 1024;
> +	int fifo_size;
> +	int mdwidth;
> +
> +	mdwidth = dwc3_mdwidth(dwc);
> +
> +	/* MDWIDTH is represented in bits, we need it in bytes */
> +	mdwidth >>= 3;
> +
> +	if (DWC3_VER_IS_PRIOR(DWC3, 290A))
> +		fifo_size = mult * (max_packet / mdwidth) + 1;
> +	else
> +		fifo_size = mult * ((max_packet + mdwidth) / mdwidth) + 1;
> +	return fifo_size;
> +}
> +
> +/**
> + * dwc3_gadget_clear_tx_fifo_size - Clears txfifo allocation
> + * @dwc: pointer to the DWC3 context
> + *
> + * Iterates through all the endpoint registers and clears the previous txfifo
> + * allocations.
> + */
> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
> +{
> +	struct dwc3_ep *dep;
> +	int fifo_depth;
> +	int size;
> +	int num;
> +
> +	if (!dwc->do_fifo_resize)
> +		return;
> +
> +	/* Read ep0IN related TXFIFO size */
> +	dep = dwc->eps[1];
> +	size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
> +	if (DWC3_IP_IS(DWC3))
> +		fifo_depth = DWC3_GTXFIFOSIZ_TXFDEP(size);
> +	else
> +		fifo_depth = DWC31_GTXFIFOSIZ_TXFDEP(size);
> +
> +	dwc->last_fifo_depth = fifo_depth;
> +	/* Clear existing TXFIFO for all IN eps except ep0 */
> +	for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM);
> +	     num += 2) {
> +		dep = dwc->eps[num];
> +		/* Don't change TXFRAMNUM on usb31 version */
> +		size = DWC3_IP_IS(DWC3) ? 0 :
> +			dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1)) &
> +				   DWC31_GTXFIFOSIZ_TXFRAMNUM;
> +
> +		dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size);
> +	}
> +	dwc->num_ep_resized = 0;
> +}
> +
> +/*
> + * dwc3_gadget_resize_tx_fifos - reallocate fifo spaces for current use-case
> + * @dwc: pointer to our context structure
> + *
> + * This function will a best effort FIFO allocation in order
> + * to improve FIFO usage and throughput, while still allowing
> + * us to enable as many endpoints as possible.
> + *
> + * Keep in mind that this operation will be highly dependent
> + * on the configured size for RAM1 - which contains TxFifo -,
> + * the amount of endpoints enabled on coreConsultant tool, and
> + * the width of the Master Bus.
> + *
> + * In general, FIFO depths are represented with the following equation:
> + *
> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
> + *
> + * In conjunction with dwc3_gadget_check_config(), this resizing logic will
> + * ensure that all endpoints will have enough internal memory for one max
> + * packet per endpoint.
> + */
> +static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
> +{
> +	struct dwc3 *dwc = dep->dwc;
> +	int fifo_0_start;
> +	int ram1_depth;
> +	int fifo_size;
> +	int min_depth;
> +	int num_in_ep;
> +	int remaining;
> +	int num_fifos = 1;
> +	int fifo;
> +	int tmp;
> +
> +	if (!dwc->do_fifo_resize)
> +		return 0;
> +
> +	/* resize IN endpoints except ep0 */
> +	if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
> +		return 0;

> +
> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
> +
> +	if ((dep->endpoint.maxburst > 1 &&
> +	     usb_endpoint_xfer_bulk(dep->endpoint.desc)) ||
> +	    usb_endpoint_xfer_isoc(dep->endpoint.desc))
> +		num_fifos = 3;
> +
> +	if (dep->endpoint.maxburst > 6 &&
> +	    usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31))
> +		num_fifos = dwc->tx_fifo_resize_max_num;

Why only bulk? Isoc should be at least equal or more than bulk.
Also, make this applicable to DWC_usb32 also.

> +
> +	/* FIFO size for a single buffer */
> +	fifo = dwc3_gadget_calc_tx_fifo_size(dwc, 1);
> +
> +	/* Calculate the number of remaining EPs w/o any FIFO */
> +	num_in_ep = dwc->max_cfg_eps;
> +	num_in_ep -= dwc->num_ep_resized;

Does this calculation account for multiple alt-setting interfaces?

> +
> +	/* Reserve at least one FIFO for the number of IN EPs */
> +	min_depth = num_in_ep * (fifo + 1);
> +	remaining = ram1_depth - min_depth - dwc->last_fifo_depth;
> +	remaining = max_t(int, 0, remaining);

Add blank line

> +	/*
> +	 * We've already reserved 1 FIFO per EP, so check what we can fit in
> +	 * addition to it.  If there is not enough remaining space, allocate
> +	 * all the remaining space to the EP.
> +	 */
> +	fifo_size = (num_fifos - 1) * fifo;
> +	if (remaining < fifo_size)
> +		fifo_size = remaining;
> +
> +	fifo_size += fifo;
> +	/* Last increment according to the TX FIFO size equation */
> +	fifo_size++;
> +
> +	/* Check if TXFIFOs start at non-zero addr */
> +	tmp = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
> +	fifo_0_start = DWC3_GTXFIFOSIZ_TXFSTADDR(tmp);
> +
> +	fifo_size |= (fifo_0_start + (dwc->last_fifo_depth << 16));
> +	if (DWC3_IP_IS(DWC3))
> +		dwc->last_fifo_depth += DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
> +	else
> +		dwc->last_fifo_depth += DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
> +
> +	/* Check fifo size allocation doesn't exceed available RAM size. */
> +	if (dwc->last_fifo_depth >= ram1_depth) {
> +		dev_err(dwc->dev, "Fifosize(%d) > RAM size(%d) %s depth:%d\n",
> +			dwc->last_fifo_depth, ram1_depth,
> +			dep->endpoint.name, fifo_size);
> +		if (DWC3_IP_IS(DWC3))
> +			fifo_size = DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
> +		else
> +			fifo_size = DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
> +
> +		dwc->last_fifo_depth -= fifo_size;
> +		return -ENOMEM;
> +	}
> +
> +	dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size);
> +	dwc->num_ep_resized++;
> +
> +	return 0;
> +}
> +
> +/**
>   * __dwc3_gadget_ep_enable - initializes a hw endpoint
>   * @dep: endpoint to be initialized
>   * @action: one of INIT, MODIFY or RESTORE
> @@ -648,6 +829,10 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
>  	int			ret;
>  
>  	if (!(dep->flags & DWC3_EP_ENABLED)) {
> +		ret = dwc3_gadget_resize_tx_fifos(dep);
> +		if (ret)
> +			return ret;
> +
>  		ret = dwc3_gadget_start_config(dep);
>  		if (ret)
>  			return ret;
> @@ -2498,6 +2683,7 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>  
>  	spin_lock_irqsave(&dwc->lock, flags);
>  	dwc->gadget_driver	= NULL;
> +	dwc->max_cfg_eps = 0;
>  	spin_unlock_irqrestore(&dwc->lock, flags);
>  
>  	free_irq(dwc->irq_gadget, dwc->ev_buf);
> @@ -2585,6 +2771,51 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
>  	return ret;
>  }
>  
> +/**
> + * dwc3_gadget_check_config - ensure dwc3 can support the USB configuration
> + * @g: pointer to the USB gadget
> + *
> + * Used to record the maximum number of endpoints being used in a USB composite
> + * device. (across all configurations)  This is to be used in the calculation
> + * of the TXFIFO sizes when resizing internal memory for individual endpoints.
> + * It will help ensured that the resizing logic reserves enough space for at
> + * least one max packet.
> + */
> +static int dwc3_gadget_check_config(struct usb_gadget *g)
> +{
> +	struct dwc3 *dwc = gadget_to_dwc(g);
> +	struct usb_ep *ep;
> +	int fifo_size = 0;
> +	int ram1_depth;
> +	int ep_num = 0;
> +
> +	if (!dwc->do_fifo_resize)
> +		return 0;
> +
> +	list_for_each_entry(ep, &g->ep_list, ep_list) {
> +		/* Only interested in the IN endpoints */
> +		if (ep->claimed && (ep->address & USB_DIR_IN))
> +			ep_num++;
> +	}
> +
> +	if (ep_num <= dwc->max_cfg_eps)
> +		return 0;
> +
> +	/* Update the max number of eps in the composition */
> +	dwc->max_cfg_eps = ep_num;
> +
> +	fifo_size = dwc3_gadget_calc_tx_fifo_size(dwc, dwc->max_cfg_eps);
> +	/* Based on the equation, increment by one for every ep */
> +	fifo_size += dwc->max_cfg_eps;
> +
> +	/* Check if we can fit a single fifo per endpoint */
> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
> +	if (fifo_size > ram1_depth)
> +		return -ENOMEM;
> +
> +	return 0;
> +}
> +
>  static const struct usb_gadget_ops dwc3_gadget_ops = {
>  	.get_frame		= dwc3_gadget_get_frame,
>  	.wakeup			= dwc3_gadget_wakeup,
> @@ -2596,6 +2827,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = {
>  	.udc_set_ssp_rate	= dwc3_gadget_set_ssp_rate,
>  	.get_config_params	= dwc3_gadget_config_params,
>  	.vbus_draw		= dwc3_gadget_vbus_draw,
> +	.check_config		= dwc3_gadget_check_config,
>  };
>  
>  /* -------------------------------------------------------------------------- */
> 

Btw, I'm a little busy at the moment and can't review all the
calculations in the patch in detail. I'll comment on what I can. Thanks
for the work and the patches.

BR,
Thinh
Felipe Balbi July 13, 2021, 8:39 a.m. UTC | #2
Hi,

Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> Wesley Cheng wrote:
>> Some devices have USB compositions which may require multiple endpoints
>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>> sufficient for these compositions.  By utilizing flexible TX FIFO
>> allocation, this allows for endpoints to request the required FIFO depth to
>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>> a larger TX FIFO size results in better TX throughput.
>> 
>> By introducing the check_config() callback, the resizing logic can fetch
>> the maximum number of endpoints used in the USB composition (can contain
>> multiple configurations), which helps ensure that the resizing logic can
>> fulfill the configuration(s), or return an error to the gadget layer
>> otherwise during bind time.
>> 
>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>> ---
>>  drivers/usb/dwc3/core.c   |  15 +++
>>  drivers/usb/dwc3/core.h   |  16 ++++
>>  drivers/usb/dwc3/ep0.c    |   2 +
>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 265 insertions(+)
>> 
>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>> index ba74ad7..b194aecd 100644
>> --- a/drivers/usb/dwc3/core.c
>> +++ b/drivers/usb/dwc3/core.c
>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  	u8			rx_max_burst_prd;
>>  	u8			tx_thr_num_pkt_prd;
>>  	u8			tx_max_burst_prd;
>> +	u8			tx_fifo_resize_max_num;
>>  	const char		*usb_psy_name;
>>  	int			ret;
>>  
>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  	 */
>>  	hird_threshold = 12;
>>  
>> +	/*
>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>> +	 * allows for systems with larger bus latencies to have some headroom
>> +	 * for endpoints that have a large bMaxBurst value.
>> +	 */
>> +	tx_fifo_resize_max_num = 6;
>> +
>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  				&tx_thr_num_pkt_prd);
>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>  				&tx_max_burst_prd);
>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>> +							"tx-fifo-resize");
>> +	if (dwc->do_fifo_resize)
>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>> +					&tx_fifo_resize_max_num);
>
> Why is this check here? The dwc->tx_fifo_resize_max_num should store
> whatever property the user sets. Whether the driver wants to use this

Ack!

> property should depend on "dwc->do_fifo_resize". Also why don't we have
> "snps," prefix to be consistent with the other properties?

Ack!

> Can we enforce to a single property? If the designer wants to enable
> this feature, he/she can to provide the tx-fifo-max-num. This would
> simplify the driver a bit. Since this is to optimize for performance,
> the user should know/want/test the specific value if they want to set
> for their setup and not hoping that the default setting not break their
> setup. So we can remove the "do_fifo_resize" property and just check
> whether tx_fifo_resize_max_num is set.

Ack!

All very valid points :-)

>> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
>> index dccdf13..735e9be 100644
>> --- a/drivers/usb/dwc3/core.h
>> +++ b/drivers/usb/dwc3/core.h
>> @@ -1023,6 +1023,7 @@ struct dwc3_scratchpad_array {
>>   * @rx_max_burst_prd: max periodic ESS receive burst size
>>   * @tx_thr_num_pkt_prd: periodic ESS transmit packet count
>>   * @tx_max_burst_prd: max periodic ESS transmit burst size
>> + * @tx_fifo_resize_max_num: max number of fifos allocated during txfifo resize
>>   * @hsphy_interface: "utmi" or "ulpi"
>>   * @connected: true when we're connected to a host, false otherwise
>>   * @delayed_status: true when gadget driver asks for delayed status
>> @@ -1037,6 +1038,7 @@ struct dwc3_scratchpad_array {
>>   *	1	- utmi_l1_suspend_n
>>   * @is_fpga: true when we are using the FPGA board
>>   * @pending_events: true when we have pending IRQs to be handled
>> + * @do_fifo_resize: true when txfifo resizing is enabled for dwc3 endpoints
>>   * @pullups_connected: true when Run/Stop bit is set
>>   * @setup_packet_pending: true when there's a Setup Packet in FIFO. Workaround
>>   * @three_stage_setup: set if we perform a three phase setup
>> @@ -1079,6 +1081,11 @@ struct dwc3_scratchpad_array {
>>   * @dis_split_quirk: set to disable split boundary.
>>   * @imod_interval: set the interrupt moderation interval in 250ns
>>   *			increments or 0 to disable.
>> + * @max_cfg_eps: current max number of IN eps used across all USB configs.
>> + * @last_fifo_depth: last fifo depth used to determine next fifo ram start
>> + *		     address.
>> + * @num_ep_resized: carries the current number endpoints which have had its tx
>> + *		    fifo resized.
>>   */
>>  struct dwc3 {
>>  	struct work_struct	drd_work;
>> @@ -1233,6 +1240,7 @@ struct dwc3 {
>>  	u8			rx_max_burst_prd;
>>  	u8			tx_thr_num_pkt_prd;
>>  	u8			tx_max_burst_prd;
>> +	u8			tx_fifo_resize_max_num;
>>  
>>  	const char		*hsphy_interface;
>>  
>> @@ -1246,6 +1254,7 @@ struct dwc3 {
>>  	unsigned		is_utmi_l1_suspend:1;
>>  	unsigned		is_fpga:1;
>>  	unsigned		pending_events:1;
>> +	unsigned		do_fifo_resize:1;
>>  	unsigned		pullups_connected:1;
>>  	unsigned		setup_packet_pending:1;
>>  	unsigned		three_stage_setup:1;
>> @@ -1281,6 +1290,10 @@ struct dwc3 {
>>  	unsigned		dis_split_quirk:1;
>>  
>>  	u16			imod_interval;
>> +
>> +	int			max_cfg_eps;
>> +	int			last_fifo_depth;
>> +	int			num_ep_resized;
>>  };
>>  
>>  #define INCRX_BURST_MODE 0
>> @@ -1512,6 +1525,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>>  		struct dwc3_gadget_ep_cmd_params *params);
>>  int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd,
>>  		u32 param);
>> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc);
>>  #else
>>  static inline int dwc3_gadget_init(struct dwc3 *dwc)
>>  { return 0; }
>> @@ -1531,6 +1545,8 @@ static inline int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>>  static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
>>  		int cmd, u32 param)
>>  { return 0; }
>> +static inline void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
>> +{ }
>>  #endif
>>  
>>  #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>> index 3cd2942..d28d085 100644
>> --- a/drivers/usb/dwc3/ep0.c
>> +++ b/drivers/usb/dwc3/ep0.c
>> @@ -619,6 +619,8 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
>>  		return -EINVAL;
>>  
>>  	case USB_STATE_ADDRESS:
>> +		dwc3_gadget_clear_tx_fifos(dwc);
>> +
>>  		ret = dwc3_ep0_delegate_req(dwc, ctrl);
>>  		/* if the cfg matches and the cfg is non zero */
>>  		if (cfg && (!ret || (ret == USB_GADGET_DELAYED_STATUS))) {
>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>> index af6d7f1..e56f1a6 100644
>> --- a/drivers/usb/dwc3/gadget.c
>> +++ b/drivers/usb/dwc3/gadget.c
>> @@ -632,6 +632,187 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
>>  		bool interrupt);
>>  
>>  /**
>> + * dwc3_gadget_calc_tx_fifo_size - calculates the txfifo size value
>> + * @dwc: pointer to the DWC3 context
>> + * @nfifos: number of fifos to calculate for
>> + *
>> + * Calculates the size value based on the equation below:
>> + *
>> + * DWC3 revision 280A and prior:
>> + * fifo_size = mult * (max_packet / mdwidth) + 1;
>> + *
>> + * DWC3 revision 290A and onwards:
>> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
>> + *
>> + * The max packet size is set to 1024, as the txfifo requirements mainly apply
>> + * to super speed USB use cases.  However, it is safe to overestimate the fifo
>> + * allocations for other scenarios, i.e. high speed USB.
>> + */
>> +static int dwc3_gadget_calc_tx_fifo_size(struct dwc3 *dwc, int mult)
>> +{
>> +	int max_packet = 1024;
>> +	int fifo_size;
>> +	int mdwidth;
>> +
>> +	mdwidth = dwc3_mdwidth(dwc);
>> +
>> +	/* MDWIDTH is represented in bits, we need it in bytes */
>> +	mdwidth >>= 3;
>> +
>> +	if (DWC3_VER_IS_PRIOR(DWC3, 290A))
>> +		fifo_size = mult * (max_packet / mdwidth) + 1;
>> +	else
>> +		fifo_size = mult * ((max_packet + mdwidth) / mdwidth) + 1;
>> +	return fifo_size;
>> +}
>> +
>> +/**
>> + * dwc3_gadget_clear_tx_fifo_size - Clears txfifo allocation
>> + * @dwc: pointer to the DWC3 context
>> + *
>> + * Iterates through all the endpoint registers and clears the previous txfifo
>> + * allocations.
>> + */
>> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
>> +{
>> +	struct dwc3_ep *dep;
>> +	int fifo_depth;
>> +	int size;
>> +	int num;
>> +
>> +	if (!dwc->do_fifo_resize)
>> +		return;
>> +
>> +	/* Read ep0IN related TXFIFO size */
>> +	dep = dwc->eps[1];
>> +	size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
>> +	if (DWC3_IP_IS(DWC3))
>> +		fifo_depth = DWC3_GTXFIFOSIZ_TXFDEP(size);
>> +	else
>> +		fifo_depth = DWC31_GTXFIFOSIZ_TXFDEP(size);
>> +
>> +	dwc->last_fifo_depth = fifo_depth;
>> +	/* Clear existing TXFIFO for all IN eps except ep0 */
>> +	for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM);
>> +	     num += 2) {
>> +		dep = dwc->eps[num];
>> +		/* Don't change TXFRAMNUM on usb31 version */
>> +		size = DWC3_IP_IS(DWC3) ? 0 :
>> +			dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1)) &
>> +				   DWC31_GTXFIFOSIZ_TXFRAMNUM;
>> +
>> +		dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size);
>> +	}
>> +	dwc->num_ep_resized = 0;
>> +}
>> +
>> +/*
>> + * dwc3_gadget_resize_tx_fifos - reallocate fifo spaces for current use-case
>> + * @dwc: pointer to our context structure
>> + *
>> + * This function will a best effort FIFO allocation in order
>> + * to improve FIFO usage and throughput, while still allowing
>> + * us to enable as many endpoints as possible.
>> + *
>> + * Keep in mind that this operation will be highly dependent
>> + * on the configured size for RAM1 - which contains TxFifo -,
>> + * the amount of endpoints enabled on coreConsultant tool, and
>> + * the width of the Master Bus.
>> + *
>> + * In general, FIFO depths are represented with the following equation:
>> + *
>> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
>> + *
>> + * In conjunction with dwc3_gadget_check_config(), this resizing logic will
>> + * ensure that all endpoints will have enough internal memory for one max
>> + * packet per endpoint.
>> + */
>> +static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
>> +{
>> +	struct dwc3 *dwc = dep->dwc;
>> +	int fifo_0_start;
>> +	int ram1_depth;
>> +	int fifo_size;
>> +	int min_depth;
>> +	int num_in_ep;
>> +	int remaining;
>> +	int num_fifos = 1;
>> +	int fifo;
>> +	int tmp;
>> +
>> +	if (!dwc->do_fifo_resize)
>> +		return 0;
>> +
>> +	/* resize IN endpoints except ep0 */
>> +	if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
>> +		return 0;
>
>> +
>> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
>> +
>> +	if ((dep->endpoint.maxburst > 1 &&
>> +	     usb_endpoint_xfer_bulk(dep->endpoint.desc)) ||
>> +	    usb_endpoint_xfer_isoc(dep->endpoint.desc))
>> +		num_fifos = 3;
>> +
>> +	if (dep->endpoint.maxburst > 6 &&
>> +	    usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31))
>> +		num_fifos = dwc->tx_fifo_resize_max_num;
>
> Why only bulk? Isoc should be at least equal or more than bulk.
> Also, make this applicable to DWC_usb32 also.

this should be applicable to all DWC3 versions, no? dwc3, 31 and 32.

>> @@ -648,6 +829,10 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
>>  	int			ret;
>>  
>>  	if (!(dep->flags & DWC3_EP_ENABLED)) {
>> +		ret = dwc3_gadget_resize_tx_fifos(dep);
>> +		if (ret)
>> +			return ret;
>> +
>>  		ret = dwc3_gadget_start_config(dep);
>>  		if (ret)
>>  			return ret;
>> @@ -2498,6 +2683,7 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>>  
>>  	spin_lock_irqsave(&dwc->lock, flags);
>>  	dwc->gadget_driver	= NULL;
>> +	dwc->max_cfg_eps = 0;
>>  	spin_unlock_irqrestore(&dwc->lock, flags);
>>  
>>  	free_irq(dwc->irq_gadget, dwc->ev_buf);
>> @@ -2585,6 +2771,51 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
>>  	return ret;
>>  }
>>  
>> +/**
>> + * dwc3_gadget_check_config - ensure dwc3 can support the USB configuration
>> + * @g: pointer to the USB gadget
>> + *
>> + * Used to record the maximum number of endpoints being used in a USB composite
>> + * device. (across all configurations)  This is to be used in the calculation
>> + * of the TXFIFO sizes when resizing internal memory for individual endpoints.
>> + * It will help ensured that the resizing logic reserves enough space for at
>> + * least one max packet.
>> + */
>> +static int dwc3_gadget_check_config(struct usb_gadget *g)

Might be better to pass the actual config being used. And, instead of
check_config, perhaps validate_config is a little more fitting.
Thinh Nguyen July 14, 2021, 2:58 a.m. UTC | #3
Felipe Balbi wrote:
> 
> Hi,
> 
> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>> Wesley Cheng wrote:
>>> Some devices have USB compositions which may require multiple endpoints
>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>>> sufficient for these compositions.  By utilizing flexible TX FIFO
>>> allocation, this allows for endpoints to request the required FIFO depth to
>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>>> a larger TX FIFO size results in better TX throughput.
>>>
>>> By introducing the check_config() callback, the resizing logic can fetch
>>> the maximum number of endpoints used in the USB composition (can contain
>>> multiple configurations), which helps ensure that the resizing logic can
>>> fulfill the configuration(s), or return an error to the gadget layer
>>> otherwise during bind time.
>>>
>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>> ---
>>>  drivers/usb/dwc3/core.c   |  15 +++
>>>  drivers/usb/dwc3/core.h   |  16 ++++
>>>  drivers/usb/dwc3/ep0.c    |   2 +
>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 265 insertions(+)
>>>

<snip>

>>> +/*
>>> + * dwc3_gadget_resize_tx_fifos - reallocate fifo spaces for current use-case
>>> + * @dwc: pointer to our context structure
>>> + *
>>> + * This function will a best effort FIFO allocation in order
>>> + * to improve FIFO usage and throughput, while still allowing
>>> + * us to enable as many endpoints as possible.
>>> + *
>>> + * Keep in mind that this operation will be highly dependent
>>> + * on the configured size for RAM1 - which contains TxFifo -,
>>> + * the amount of endpoints enabled on coreConsultant tool, and
>>> + * the width of the Master Bus.
>>> + *
>>> + * In general, FIFO depths are represented with the following equation:
>>> + *
>>> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
>>> + *
>>> + * In conjunction with dwc3_gadget_check_config(), this resizing logic will
>>> + * ensure that all endpoints will have enough internal memory for one max
>>> + * packet per endpoint.
>>> + */
>>> +static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
>>> +{
>>> +	struct dwc3 *dwc = dep->dwc;
>>> +	int fifo_0_start;
>>> +	int ram1_depth;
>>> +	int fifo_size;
>>> +	int min_depth;
>>> +	int num_in_ep;
>>> +	int remaining;
>>> +	int num_fifos = 1;
>>> +	int fifo;
>>> +	int tmp;
>>> +
>>> +	if (!dwc->do_fifo_resize)
>>> +		return 0;
>>> +
>>> +	/* resize IN endpoints except ep0 */
>>> +	if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
>>> +		return 0;
>>
>>> +
>>> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
>>> +
>>> +	if ((dep->endpoint.maxburst > 1 &&
>>> +	     usb_endpoint_xfer_bulk(dep->endpoint.desc)) ||
>>> +	    usb_endpoint_xfer_isoc(dep->endpoint.desc))
>>> +		num_fifos = 3;
>>> +
>>> +	if (dep->endpoint.maxburst > 6 &&
>>> +	    usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31))
>>> +		num_fifos = dwc->tx_fifo_resize_max_num;
>>
>> Why only bulk? Isoc should be at least equal or more than bulk.
>> Also, make this applicable to DWC_usb32 also.
> 
> this should be applicable to all DWC3 versions, no? dwc3, 31 and 32.
> 

Yes.

BR,
Thinh
Thinh Nguyen July 14, 2021, 2:59 a.m. UTC | #4
Thinh Nguyen wrote:
> Hi,
> 
> Wesley Cheng wrote:
>> Some devices have USB compositions which may require multiple endpoints
>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>> sufficient for these compositions.  By utilizing flexible TX FIFO
>> allocation, this allows for endpoints to request the required FIFO depth to
>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>> a larger TX FIFO size results in better TX throughput.
>>
>> By introducing the check_config() callback, the resizing logic can fetch
>> the maximum number of endpoints used in the USB composition (can contain
>> multiple configurations), which helps ensure that the resizing logic can
>> fulfill the configuration(s), or return an error to the gadget layer
>> otherwise during bind time.
>>
>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>> ---
>>  drivers/usb/dwc3/core.c   |  15 +++
>>  drivers/usb/dwc3/core.h   |  16 ++++
>>  drivers/usb/dwc3/ep0.c    |   2 +
>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 265 insertions(+)
>>
>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>> index ba74ad7..b194aecd 100644
>> --- a/drivers/usb/dwc3/core.c
>> +++ b/drivers/usb/dwc3/core.c
>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  	u8			rx_max_burst_prd;
>>  	u8			tx_thr_num_pkt_prd;
>>  	u8			tx_max_burst_prd;
>> +	u8			tx_fifo_resize_max_num;
>>  	const char		*usb_psy_name;
>>  	int			ret;
>>  
>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  	 */
>>  	hird_threshold = 12;
>>  
>> +	/*
>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>> +	 * allows for systems with larger bus latencies to have some headroom
>> +	 * for endpoints that have a large bMaxBurst value.
>> +	 */
>> +	tx_fifo_resize_max_num = 6;
>> +
>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  				&tx_thr_num_pkt_prd);
>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>  				&tx_max_burst_prd);
>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>> +							"tx-fifo-resize");
>> +	if (dwc->do_fifo_resize)
>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>> +					&tx_fifo_resize_max_num);
> 
> Why is this check here? The dwc->tx_fifo_resize_max_num should store
> whatever property the user sets. Whether the driver wants to use this
> property should depend on "dwc->do_fifo_resize". Also why don't we have
> "snps," prefix to be consistent with the other properties?
> 
> Can we enforce to a single property? If the designer wants to enable
> this feature, he/she can to provide the tx-fifo-max-num. This would
> simplify the driver a bit. Since this is to optimize for performance,
> the user should know/want/test the specific value if they want to set
> for their setup and not hoping that the default setting not break their
> setup. So we can remove the "do_fifo_resize" property and just check
> whether tx_fifo_resize_max_num is set.
> 
>>  
>>  	dwc->disable_scramble_quirk = device_property_read_bool(dev,
>>  				"snps,disable_scramble_quirk");
>> @@ -1390,6 +1403,8 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>  	dwc->tx_max_burst_prd = tx_max_burst_prd;
>>  
>>  	dwc->imod_interval = 0;
>> +
>> +	dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num;
> 
> See comment above
> 
>>  }
>>  
>>  /* check whether the core supports IMOD */
>> diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
>> index dccdf13..735e9be 100644
>> --- a/drivers/usb/dwc3/core.h
>> +++ b/drivers/usb/dwc3/core.h
>> @@ -1023,6 +1023,7 @@ struct dwc3_scratchpad_array {
>>   * @rx_max_burst_prd: max periodic ESS receive burst size
>>   * @tx_thr_num_pkt_prd: periodic ESS transmit packet count
>>   * @tx_max_burst_prd: max periodic ESS transmit burst size
>> + * @tx_fifo_resize_max_num: max number of fifos allocated during txfifo resize
>>   * @hsphy_interface: "utmi" or "ulpi"
>>   * @connected: true when we're connected to a host, false otherwise
>>   * @delayed_status: true when gadget driver asks for delayed status
>> @@ -1037,6 +1038,7 @@ struct dwc3_scratchpad_array {
>>   *	1	- utmi_l1_suspend_n
>>   * @is_fpga: true when we are using the FPGA board
>>   * @pending_events: true when we have pending IRQs to be handled
>> + * @do_fifo_resize: true when txfifo resizing is enabled for dwc3 endpoints
>>   * @pullups_connected: true when Run/Stop bit is set
>>   * @setup_packet_pending: true when there's a Setup Packet in FIFO. Workaround
>>   * @three_stage_setup: set if we perform a three phase setup
>> @@ -1079,6 +1081,11 @@ struct dwc3_scratchpad_array {
>>   * @dis_split_quirk: set to disable split boundary.
>>   * @imod_interval: set the interrupt moderation interval in 250ns
>>   *			increments or 0 to disable.
>> + * @max_cfg_eps: current max number of IN eps used across all USB configs.
>> + * @last_fifo_depth: last fifo depth used to determine next fifo ram start
>> + *		     address.
>> + * @num_ep_resized: carries the current number endpoints which have had its tx
>> + *		    fifo resized.
>>   */
>>  struct dwc3 {
>>  	struct work_struct	drd_work;
>> @@ -1233,6 +1240,7 @@ struct dwc3 {
>>  	u8			rx_max_burst_prd;
>>  	u8			tx_thr_num_pkt_prd;
>>  	u8			tx_max_burst_prd;
>> +	u8			tx_fifo_resize_max_num;
>>  
>>  	const char		*hsphy_interface;
>>  
>> @@ -1246,6 +1254,7 @@ struct dwc3 {
>>  	unsigned		is_utmi_l1_suspend:1;
>>  	unsigned		is_fpga:1;
>>  	unsigned		pending_events:1;
>> +	unsigned		do_fifo_resize:1;
>>  	unsigned		pullups_connected:1;
>>  	unsigned		setup_packet_pending:1;
>>  	unsigned		three_stage_setup:1;
>> @@ -1281,6 +1290,10 @@ struct dwc3 {
>>  	unsigned		dis_split_quirk:1;
>>  
>>  	u16			imod_interval;
>> +
>> +	int			max_cfg_eps;
>> +	int			last_fifo_depth;
>> +	int			num_ep_resized;
>>  };
>>  
>>  #define INCRX_BURST_MODE 0
>> @@ -1512,6 +1525,7 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>>  		struct dwc3_gadget_ep_cmd_params *params);
>>  int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd,
>>  		u32 param);
>> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc);
>>  #else
>>  static inline int dwc3_gadget_init(struct dwc3 *dwc)
>>  { return 0; }
>> @@ -1531,6 +1545,8 @@ static inline int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
>>  static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
>>  		int cmd, u32 param)
>>  { return 0; }
>> +static inline void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
>> +{ }
>>  #endif
>>  
>>  #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
>> diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
>> index 3cd2942..d28d085 100644
>> --- a/drivers/usb/dwc3/ep0.c
>> +++ b/drivers/usb/dwc3/ep0.c
>> @@ -619,6 +619,8 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
>>  		return -EINVAL;
>>  
>>  	case USB_STATE_ADDRESS:
>> +		dwc3_gadget_clear_tx_fifos(dwc);
>> +
>>  		ret = dwc3_ep0_delegate_req(dwc, ctrl);
>>  		/* if the cfg matches and the cfg is non zero */
>>  		if (cfg && (!ret || (ret == USB_GADGET_DELAYED_STATUS))) {
>> diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
>> index af6d7f1..e56f1a6 100644
>> --- a/drivers/usb/dwc3/gadget.c
>> +++ b/drivers/usb/dwc3/gadget.c
>> @@ -632,6 +632,187 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
>>  		bool interrupt);
>>  
>>  /**
>> + * dwc3_gadget_calc_tx_fifo_size - calculates the txfifo size value
>> + * @dwc: pointer to the DWC3 context
>> + * @nfifos: number of fifos to calculate for
>> + *
>> + * Calculates the size value based on the equation below:
>> + *
>> + * DWC3 revision 280A and prior:
>> + * fifo_size = mult * (max_packet / mdwidth) + 1;
>> + *
>> + * DWC3 revision 290A and onwards:
>> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
>> + *
>> + * The max packet size is set to 1024, as the txfifo requirements mainly apply
>> + * to super speed USB use cases.  However, it is safe to overestimate the fifo
>> + * allocations for other scenarios, i.e. high speed USB.
>> + */
>> +static int dwc3_gadget_calc_tx_fifo_size(struct dwc3 *dwc, int mult)

"mult" should be "nfifos" just as documented at the start of the function.

>> +{
>> +	int max_packet = 1024;
>> +	int fifo_size;
>> +	int mdwidth;
>> +
>> +	mdwidth = dwc3_mdwidth(dwc);

dwc3_mdwidth() returns u32 type, can we keep it consistent and use
unsigned types where it makes sense? (same for dwc->max_cfg_eps,
last_fifo_depth, and num_ep_resized)

Also, let's have this function return type u32 too. Usually int implies
it may return an error code.

>> +
>> +	/* MDWIDTH is represented in bits, we need it in bytes */
>> +	mdwidth >>= 3;

It's unlikely, but let's check if mdwidth is not 0.

>> +
>> +	if (DWC3_VER_IS_PRIOR(DWC3, 290A))
>> +		fifo_size = mult * (max_packet / mdwidth) + 1;
>> +	else
>> +		fifo_size = mult * ((max_packet + mdwidth) / mdwidth) + 1;

Please add blank line here.

>> +	return fifo_size;
>> +}
>> +
>> +/**
>> + * dwc3_gadget_clear_tx_fifo_size - Clears txfifo allocation
>> + * @dwc: pointer to the DWC3 context
>> + *
>> + * Iterates through all the endpoint registers and clears the previous txfifo
>> + * allocations.
>> + */
>> +void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
>> +{
>> +	struct dwc3_ep *dep;
>> +	int fifo_depth;
>> +	int size;
>> +	int num;
>> +
>> +	if (!dwc->do_fifo_resize)
>> +		return;
>> +
>> +	/* Read ep0IN related TXFIFO size */
>> +	dep = dwc->eps[1];
>> +	size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
>> +	if (DWC3_IP_IS(DWC3))
>> +		fifo_depth = DWC3_GTXFIFOSIZ_TXFDEP(size);
>> +	else
>> +		fifo_depth = DWC31_GTXFIFOSIZ_TXFDEP(size);
>> +
>> +	dwc->last_fifo_depth = fifo_depth;
>> +	/* Clear existing TXFIFO for all IN eps except ep0 */
>> +	for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM);
>> +	     num += 2) {
>> +		dep = dwc->eps[num];
>> +		/* Don't change TXFRAMNUM on usb31 version */
>> +		size = DWC3_IP_IS(DWC3) ? 0 :
>> +			dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1)) &
>> +				   DWC31_GTXFIFOSIZ_TXFRAMNUM;
>> +
>> +		dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size);
>> +	}
>> +	dwc->num_ep_resized = 0;
>> +}
>> +
>> +/*
>> + * dwc3_gadget_resize_tx_fifos - reallocate fifo spaces for current use-case
>> + * @dwc: pointer to our context structure
>> + *
>> + * This function will a best effort FIFO allocation in order
>> + * to improve FIFO usage and throughput, while still allowing
>> + * us to enable as many endpoints as possible.
>> + *
>> + * Keep in mind that this operation will be highly dependent
>> + * on the configured size for RAM1 - which contains TxFifo -,
>> + * the amount of endpoints enabled on coreConsultant tool, and
>> + * the width of the Master Bus.
>> + *
>> + * In general, FIFO depths are represented with the following equation:
>> + *
>> + * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
>> + *
>> + * In conjunction with dwc3_gadget_check_config(), this resizing logic will
>> + * ensure that all endpoints will have enough internal memory for one max
>> + * packet per endpoint.
>> + */
>> +static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
>> +{
>> +	struct dwc3 *dwc = dep->dwc;
>> +	int fifo_0_start;
>> +	int ram1_depth;
>> +	int fifo_size;
>> +	int min_depth;
>> +	int num_in_ep;
>> +	int remaining;
>> +	int num_fifos = 1;
>> +	int fifo;
>> +	int tmp;
>> +
>> +	if (!dwc->do_fifo_resize)
>> +		return 0;
>> +
>> +	/* resize IN endpoints except ep0 */
>> +	if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
>> +		return 0;
> 
>> +
>> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
>> +
>> +	if ((dep->endpoint.maxburst > 1 &&
>> +	     usb_endpoint_xfer_bulk(dep->endpoint.desc)) ||
>> +	    usb_endpoint_xfer_isoc(dep->endpoint.desc))
>> +		num_fifos = 3;
>> +
>> +	if (dep->endpoint.maxburst > 6 &&
>> +	    usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31))
>> +		num_fifos = dwc->tx_fifo_resize_max_num;
> 
> Why only bulk? Isoc should be at least equal or more than bulk.
> Also, make this applicable to DWC_usb32 also.
> 
>> +
>> +	/* FIFO size for a single buffer */
>> +	fifo = dwc3_gadget_calc_tx_fifo_size(dwc, 1);
>> +
>> +	/* Calculate the number of remaining EPs w/o any FIFO */
>> +	num_in_ep = dwc->max_cfg_eps;
>> +	num_in_ep -= dwc->num_ep_resized;
> 
> Does this calculation account for multiple alt-setting interfaces?
> 
>> +
>> +	/* Reserve at least one FIFO for the number of IN EPs */
>> +	min_depth = num_in_ep * (fifo + 1);
>> +	remaining = ram1_depth - min_depth - dwc->last_fifo_depth;
>> +	remaining = max_t(int, 0, remaining);
> 
> Add blank line
> 
>> +	/*
>> +	 * We've already reserved 1 FIFO per EP, so check what we can fit in
>> +	 * addition to it.  If there is not enough remaining space, allocate
>> +	 * all the remaining space to the EP.
>> +	 */
>> +	fifo_size = (num_fifos - 1) * fifo;
>> +	if (remaining < fifo_size)
>> +		fifo_size = remaining;
>> +
>> +	fifo_size += fifo;
>> +	/* Last increment according to the TX FIFO size equation */
>> +	fifo_size++;
>> +
>> +	/* Check if TXFIFOs start at non-zero addr */
>> +	tmp = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
>> +	fifo_0_start = DWC3_GTXFIFOSIZ_TXFSTADDR(tmp);
>> +
>> +	fifo_size |= (fifo_0_start + (dwc->last_fifo_depth << 16));
>> +	if (DWC3_IP_IS(DWC3))
>> +		dwc->last_fifo_depth += DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
>> +	else
>> +		dwc->last_fifo_depth += DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
>> +
>> +	/* Check fifo size allocation doesn't exceed available RAM size. */
>> +	if (dwc->last_fifo_depth >= ram1_depth) {
>> +		dev_err(dwc->dev, "Fifosize(%d) > RAM size(%d) %s depth:%d\n",
>> +			dwc->last_fifo_depth, ram1_depth,
>> +			dep->endpoint.name, fifo_size);
>> +		if (DWC3_IP_IS(DWC3))
>> +			fifo_size = DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
>> +		else
>> +			fifo_size = DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
>> +
>> +		dwc->last_fifo_depth -= fifo_size;
>> +		return -ENOMEM;
>> +	}
>> +
>> +	dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size);
>> +	dwc->num_ep_resized++;

I see this num_ep_resized gets incremented, but I don't see where it
gets decremented. Probably should be done in ep disable. This doesn't
seem to account for multiple alt-settings.

>> +
>> +	return 0;
>> +}
>> +
>> +/**
>>   * __dwc3_gadget_ep_enable - initializes a hw endpoint
>>   * @dep: endpoint to be initialized
>>   * @action: one of INIT, MODIFY or RESTORE
>> @@ -648,6 +829,10 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
>>  	int			ret;
>>  
>>  	if (!(dep->flags & DWC3_EP_ENABLED)) {
>> +		ret = dwc3_gadget_resize_tx_fifos(dep);
>> +		if (ret)
>> +			return ret;
>> +
>>  		ret = dwc3_gadget_start_config(dep);
>>  		if (ret)
>>  			return ret;
>> @@ -2498,6 +2683,7 @@ static int dwc3_gadget_stop(struct usb_gadget *g)
>>  
>>  	spin_lock_irqsave(&dwc->lock, flags);
>>  	dwc->gadget_driver	= NULL;
>> +	dwc->max_cfg_eps = 0;
>>  	spin_unlock_irqrestore(&dwc->lock, flags);
>>  
>>  	free_irq(dwc->irq_gadget, dwc->ev_buf);
>> @@ -2585,6 +2771,51 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
>>  	return ret;
>>  }
>>  
>> +/**
>> + * dwc3_gadget_check_config - ensure dwc3 can support the USB configuration
>> + * @g: pointer to the USB gadget
>> + *
>> + * Used to record the maximum number of endpoints being used in a USB composite
>> + * device. (across all configurations)  This is to be used in the calculation
>> + * of the TXFIFO sizes when resizing internal memory for individual endpoints.
>> + * It will help ensured that the resizing logic reserves enough space for at
>> + * least one max packet.
>> + */
>> +static int dwc3_gadget_check_config(struct usb_gadget *g)
>> +{
>> +	struct dwc3 *dwc = gadget_to_dwc(g);
>> +	struct usb_ep *ep;
>> +	int fifo_size = 0;
>> +	int ram1_depth;
>> +	int ep_num = 0;
>> +
>> +	if (!dwc->do_fifo_resize)
>> +		return 0;
>> +
>> +	list_for_each_entry(ep, &g->ep_list, ep_list) {
>> +		/* Only interested in the IN endpoints */
>> +		if (ep->claimed && (ep->address & USB_DIR_IN))
>> +			ep_num++;
>> +	}
>> +
>> +	if (ep_num <= dwc->max_cfg_eps)
>> +		return 0;
>> +
>> +	/* Update the max number of eps in the composition */
>> +	dwc->max_cfg_eps = ep_num;
>> +
>> +	fifo_size = dwc3_gadget_calc_tx_fifo_size(dwc, dwc->max_cfg_eps);
>> +	/* Based on the equation, increment by one for every ep */
>> +	fifo_size += dwc->max_cfg_eps;
>> +
>> +	/* Check if we can fit a single fifo per endpoint */
>> +	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
>> +	if (fifo_size > ram1_depth)
>> +		return -ENOMEM;

This seems fragile... Can we fall back to the default settings if the tx
fifo resize doesn't work? I think the function will fail to bind if this
fails right?

>> +
>> +	return 0;
>> +}
>> +
>>  static const struct usb_gadget_ops dwc3_gadget_ops = {
>>  	.get_frame		= dwc3_gadget_get_frame,
>>  	.wakeup			= dwc3_gadget_wakeup,
>> @@ -2596,6 +2827,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = {
>>  	.udc_set_ssp_rate	= dwc3_gadget_set_ssp_rate,
>>  	.get_config_params	= dwc3_gadget_config_params,
>>  	.vbus_draw		= dwc3_gadget_vbus_draw,
>> +	.check_config		= dwc3_gadget_check_config,
>>  };
>>  
>>  /* -------------------------------------------------------------------------- */
>>

BR,
Thinh
Thinh Nguyen July 14, 2021, 3:10 a.m. UTC | #5
Felipe Balbi wrote:
> 
> Hi,
> 
> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>> Wesley Cheng wrote:
>>> Some devices have USB compositions which may require multiple endpoints
>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>>> sufficient for these compositions.  By utilizing flexible TX FIFO
>>> allocation, this allows for endpoints to request the required FIFO depth to
>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>>> a larger TX FIFO size results in better TX throughput.
>>>
>>> By introducing the check_config() callback, the resizing logic can fetch
>>> the maximum number of endpoints used in the USB composition (can contain
>>> multiple configurations), which helps ensure that the resizing logic can
>>> fulfill the configuration(s), or return an error to the gadget layer
>>> otherwise during bind time.
>>>
>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>> ---
>>>  drivers/usb/dwc3/core.c   |  15 +++
>>>  drivers/usb/dwc3/core.h   |  16 ++++
>>>  drivers/usb/dwc3/ep0.c    |   2 +
>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>>  4 files changed, 265 insertions(+)
>>>
>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>>> index ba74ad7..b194aecd 100644
>>> --- a/drivers/usb/dwc3/core.c
>>> +++ b/drivers/usb/dwc3/core.c
>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>  	u8			rx_max_burst_prd;
>>>  	u8			tx_thr_num_pkt_prd;
>>>  	u8			tx_max_burst_prd;
>>> +	u8			tx_fifo_resize_max_num;
>>>  	const char		*usb_psy_name;
>>>  	int			ret;
>>>  
>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>  	 */
>>>  	hird_threshold = 12;
>>>  
>>> +	/*
>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>>> +	 * allows for systems with larger bus latencies to have some headroom
>>> +	 * for endpoints that have a large bMaxBurst value.
>>> +	 */
>>> +	tx_fifo_resize_max_num = 6;
>>> +
>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>  				&tx_thr_num_pkt_prd);
>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>>  				&tx_max_burst_prd);
>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>>> +							"tx-fifo-resize");
>>> +	if (dwc->do_fifo_resize)
>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>>> +					&tx_fifo_resize_max_num);
>>
>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
>> whatever property the user sets. Whether the driver wants to use this
> 
> Ack!
> 
>> property should depend on "dwc->do_fifo_resize". Also why don't we have
>> "snps," prefix to be consistent with the other properties?
> 
> Ack!
> 
>> Can we enforce to a single property? If the designer wants to enable
>> this feature, he/she can to provide the tx-fifo-max-num. This would
>> simplify the driver a bit. Since this is to optimize for performance,
>> the user should know/want/test the specific value if they want to set
>> for their setup and not hoping that the default setting not break their
>> setup. So we can remove the "do_fifo_resize" property and just check
>> whether tx_fifo_resize_max_num is set.
> 
> Ack!
> 
> All very valid points :-)
> 

Looks like this series already landed in Greg's testing branch. Not sure
how we usually handle this to address some of our concerns. Add fix
patches on top of Greg's testing branch?

BR,
Thinh
Felipe Balbi July 14, 2021, 6:40 a.m. UTC | #6
Hi,

Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>>> Wesley Cheng wrote:
>>>> Some devices have USB compositions which may require multiple endpoints
>>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>>>> sufficient for these compositions.  By utilizing flexible TX FIFO
>>>> allocation, this allows for endpoints to request the required FIFO depth to
>>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>>>> a larger TX FIFO size results in better TX throughput.
>>>>
>>>> By introducing the check_config() callback, the resizing logic can fetch
>>>> the maximum number of endpoints used in the USB composition (can contain
>>>> multiple configurations), which helps ensure that the resizing logic can
>>>> fulfill the configuration(s), or return an error to the gadget layer
>>>> otherwise during bind time.
>>>>
>>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>>> ---
>>>>  drivers/usb/dwc3/core.c   |  15 +++
>>>>  drivers/usb/dwc3/core.h   |  16 ++++
>>>>  drivers/usb/dwc3/ep0.c    |   2 +
>>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>>>  4 files changed, 265 insertions(+)
>>>>
>>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>>>> index ba74ad7..b194aecd 100644
>>>> --- a/drivers/usb/dwc3/core.c
>>>> +++ b/drivers/usb/dwc3/core.c
>>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>  	u8			rx_max_burst_prd;
>>>>  	u8			tx_thr_num_pkt_prd;
>>>>  	u8			tx_max_burst_prd;
>>>> +	u8			tx_fifo_resize_max_num;
>>>>  	const char		*usb_psy_name;
>>>>  	int			ret;
>>>>  
>>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>  	 */
>>>>  	hird_threshold = 12;
>>>>  
>>>> +	/*
>>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>>>> +	 * allows for systems with larger bus latencies to have some headroom
>>>> +	 * for endpoints that have a large bMaxBurst value.
>>>> +	 */
>>>> +	tx_fifo_resize_max_num = 6;
>>>> +
>>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>  				&tx_thr_num_pkt_prd);
>>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>>>  				&tx_max_burst_prd);
>>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>>>> +							"tx-fifo-resize");
>>>> +	if (dwc->do_fifo_resize)
>>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>>>> +					&tx_fifo_resize_max_num);
>>>
>>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
>>> whatever property the user sets. Whether the driver wants to use this
>> 
>> Ack!
>> 
>>> property should depend on "dwc->do_fifo_resize". Also why don't we have
>>> "snps," prefix to be consistent with the other properties?
>> 
>> Ack!
>> 
>>> Can we enforce to a single property? If the designer wants to enable
>>> this feature, he/she can to provide the tx-fifo-max-num. This would
>>> simplify the driver a bit. Since this is to optimize for performance,
>>> the user should know/want/test the specific value if they want to set
>>> for their setup and not hoping that the default setting not break their
>>> setup. So we can remove the "do_fifo_resize" property and just check
>>> whether tx_fifo_resize_max_num is set.
>> 
>> Ack!
>> 
>> All very valid points :-)
>> 
>
> Looks like this series already landed in Greg's testing branch. Not sure
> how we usually handle this to address some of our concerns. Add fix
> patches on top of Greg's testing branch?

yup, no choice anymore :-(
Greg KH July 14, 2021, 6:51 a.m. UTC | #7
On Wed, Jul 14, 2021 at 03:10:25AM +0000, Thinh Nguyen wrote:
> Felipe Balbi wrote:
> > 
> > Hi,
> > 
> > Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> >> Wesley Cheng wrote:
> >>> Some devices have USB compositions which may require multiple endpoints
> >>> that support EP bursting.  HW defined TX FIFO sizes may not always be
> >>> sufficient for these compositions.  By utilizing flexible TX FIFO
> >>> allocation, this allows for endpoints to request the required FIFO depth to
> >>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
> >>> a larger TX FIFO size results in better TX throughput.
> >>>
> >>> By introducing the check_config() callback, the resizing logic can fetch
> >>> the maximum number of endpoints used in the USB composition (can contain
> >>> multiple configurations), which helps ensure that the resizing logic can
> >>> fulfill the configuration(s), or return an error to the gadget layer
> >>> otherwise during bind time.
> >>>
> >>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
> >>> ---
> >>>  drivers/usb/dwc3/core.c   |  15 +++
> >>>  drivers/usb/dwc3/core.h   |  16 ++++
> >>>  drivers/usb/dwc3/ep0.c    |   2 +
> >>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>  4 files changed, 265 insertions(+)
> >>>
> >>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
> >>> index ba74ad7..b194aecd 100644
> >>> --- a/drivers/usb/dwc3/core.c
> >>> +++ b/drivers/usb/dwc3/core.c
> >>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>  	u8			rx_max_burst_prd;
> >>>  	u8			tx_thr_num_pkt_prd;
> >>>  	u8			tx_max_burst_prd;
> >>> +	u8			tx_fifo_resize_max_num;
> >>>  	const char		*usb_psy_name;
> >>>  	int			ret;
> >>>  
> >>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>  	 */
> >>>  	hird_threshold = 12;
> >>>  
> >>> +	/*
> >>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
> >>> +	 * allows for systems with larger bus latencies to have some headroom
> >>> +	 * for endpoints that have a large bMaxBurst value.
> >>> +	 */
> >>> +	tx_fifo_resize_max_num = 6;
> >>> +
> >>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
> >>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
> >>>  	dwc->dr_mode = usb_get_dr_mode(dev);
> >>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>  				&tx_thr_num_pkt_prd);
> >>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
> >>>  				&tx_max_burst_prd);
> >>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
> >>> +							"tx-fifo-resize");
> >>> +	if (dwc->do_fifo_resize)
> >>> +		device_property_read_u8(dev, "tx-fifo-max-num",
> >>> +					&tx_fifo_resize_max_num);
> >>
> >> Why is this check here? The dwc->tx_fifo_resize_max_num should store
> >> whatever property the user sets. Whether the driver wants to use this
> > 
> > Ack!
> > 
> >> property should depend on "dwc->do_fifo_resize". Also why don't we have
> >> "snps," prefix to be consistent with the other properties?
> > 
> > Ack!
> > 
> >> Can we enforce to a single property? If the designer wants to enable
> >> this feature, he/she can to provide the tx-fifo-max-num. This would
> >> simplify the driver a bit. Since this is to optimize for performance,
> >> the user should know/want/test the specific value if they want to set
> >> for their setup and not hoping that the default setting not break their
> >> setup. So we can remove the "do_fifo_resize" property and just check
> >> whether tx_fifo_resize_max_num is set.
> > 
> > Ack!
> > 
> > All very valid points :-)
> > 
> 
> Looks like this series already landed in Greg's testing branch. Not sure
> how we usually handle this to address some of our concerns. Add fix
> patches on top of Greg's testing branch?

Yes please send patches on top of this series.

thanks,

greg k-h
Greg KH July 14, 2021, 6:52 a.m. UTC | #8
On Wed, Jul 14, 2021 at 09:40:13AM +0300, Felipe Balbi wrote:
> 
> Hi,
> 
> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> >> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> >>> Wesley Cheng wrote:
> >>>> Some devices have USB compositions which may require multiple endpoints
> >>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
> >>>> sufficient for these compositions.  By utilizing flexible TX FIFO
> >>>> allocation, this allows for endpoints to request the required FIFO depth to
> >>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
> >>>> a larger TX FIFO size results in better TX throughput.
> >>>>
> >>>> By introducing the check_config() callback, the resizing logic can fetch
> >>>> the maximum number of endpoints used in the USB composition (can contain
> >>>> multiple configurations), which helps ensure that the resizing logic can
> >>>> fulfill the configuration(s), or return an error to the gadget layer
> >>>> otherwise during bind time.
> >>>>
> >>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
> >>>> ---
> >>>>  drivers/usb/dwc3/core.c   |  15 +++
> >>>>  drivers/usb/dwc3/core.h   |  16 ++++
> >>>>  drivers/usb/dwc3/ep0.c    |   2 +
> >>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  4 files changed, 265 insertions(+)
> >>>>
> >>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
> >>>> index ba74ad7..b194aecd 100644
> >>>> --- a/drivers/usb/dwc3/core.c
> >>>> +++ b/drivers/usb/dwc3/core.c
> >>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>  	u8			rx_max_burst_prd;
> >>>>  	u8			tx_thr_num_pkt_prd;
> >>>>  	u8			tx_max_burst_prd;
> >>>> +	u8			tx_fifo_resize_max_num;
> >>>>  	const char		*usb_psy_name;
> >>>>  	int			ret;
> >>>>  
> >>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>  	 */
> >>>>  	hird_threshold = 12;
> >>>>  
> >>>> +	/*
> >>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
> >>>> +	 * allows for systems with larger bus latencies to have some headroom
> >>>> +	 * for endpoints that have a large bMaxBurst value.
> >>>> +	 */
> >>>> +	tx_fifo_resize_max_num = 6;
> >>>> +
> >>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
> >>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
> >>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
> >>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>  				&tx_thr_num_pkt_prd);
> >>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
> >>>>  				&tx_max_burst_prd);
> >>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
> >>>> +							"tx-fifo-resize");
> >>>> +	if (dwc->do_fifo_resize)
> >>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
> >>>> +					&tx_fifo_resize_max_num);
> >>>
> >>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
> >>> whatever property the user sets. Whether the driver wants to use this
> >> 
> >> Ack!
> >> 
> >>> property should depend on "dwc->do_fifo_resize". Also why don't we have
> >>> "snps," prefix to be consistent with the other properties?
> >> 
> >> Ack!
> >> 
> >>> Can we enforce to a single property? If the designer wants to enable
> >>> this feature, he/she can to provide the tx-fifo-max-num. This would
> >>> simplify the driver a bit. Since this is to optimize for performance,
> >>> the user should know/want/test the specific value if they want to set
> >>> for their setup and not hoping that the default setting not break their
> >>> setup. So we can remove the "do_fifo_resize" property and just check
> >>> whether tx_fifo_resize_max_num is set.
> >> 
> >> Ack!
> >> 
> >> All very valid points :-)
> >> 
> >
> > Looks like this series already landed in Greg's testing branch. Not sure
> > how we usually handle this to address some of our concerns. Add fix
> > patches on top of Greg's testing branch?
> 
> yup, no choice anymore :-(

I took these as they seemed correct.  If they need to be reverted,
that's fine I can do that.  But it looks like Wesley can just make some
simple changes on top of them to resolve the remaining issues, right?

thanks,

greg k-h
Wesley Cheng July 14, 2021, 7:30 a.m. UTC | #9
On 7/13/2021 11:40 PM, Felipe Balbi wrote:
> 
> Hi,
> 
> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>>> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>>>> Wesley Cheng wrote:
>>>>> Some devices have USB compositions which may require multiple endpoints
>>>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>>>>> sufficient for these compositions.  By utilizing flexible TX FIFO
>>>>> allocation, this allows for endpoints to request the required FIFO depth to
>>>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>>>>> a larger TX FIFO size results in better TX throughput.
>>>>>
>>>>> By introducing the check_config() callback, the resizing logic can fetch
>>>>> the maximum number of endpoints used in the USB composition (can contain
>>>>> multiple configurations), which helps ensure that the resizing logic can
>>>>> fulfill the configuration(s), or return an error to the gadget layer
>>>>> otherwise during bind time.
>>>>>
>>>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>>>> ---
>>>>>  drivers/usb/dwc3/core.c   |  15 +++
>>>>>  drivers/usb/dwc3/core.h   |  16 ++++
>>>>>  drivers/usb/dwc3/ep0.c    |   2 +
>>>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  4 files changed, 265 insertions(+)
>>>>>
>>>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>>>>> index ba74ad7..b194aecd 100644
>>>>> --- a/drivers/usb/dwc3/core.c
>>>>> +++ b/drivers/usb/dwc3/core.c
>>>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>  	u8			rx_max_burst_prd;
>>>>>  	u8			tx_thr_num_pkt_prd;
>>>>>  	u8			tx_max_burst_prd;
>>>>> +	u8			tx_fifo_resize_max_num;
>>>>>  	const char		*usb_psy_name;
>>>>>  	int			ret;
>>>>>  
>>>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>  	 */
>>>>>  	hird_threshold = 12;
>>>>>  
>>>>> +	/*
>>>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>>>>> +	 * allows for systems with larger bus latencies to have some headroom
>>>>> +	 * for endpoints that have a large bMaxBurst value.
>>>>> +	 */
>>>>> +	tx_fifo_resize_max_num = 6;
>>>>> +
>>>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>>>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>  				&tx_thr_num_pkt_prd);
>>>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>>>>  				&tx_max_burst_prd);
>>>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>>>>> +							"tx-fifo-resize");
>>>>> +	if (dwc->do_fifo_resize)
>>>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>>>>> +					&tx_fifo_resize_max_num);
>>>>
>>>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
>>>> whatever property the user sets. Whether the driver wants to use this
>>>
>>> Ack!
>>>
>>>> property should depend on "dwc->do_fifo_resize". Also why don't we have
>>>> "snps," prefix to be consistent with the other properties?
>>>
>>> Ack!
>>>
>>>> Can we enforce to a single property? If the designer wants to enable
>>>> this feature, he/she can to provide the tx-fifo-max-num. This would
>>>> simplify the driver a bit. Since this is to optimize for performance,
>>>> the user should know/want/test the specific value if they want to set
>>>> for their setup and not hoping that the default setting not break their
>>>> setup. So we can remove the "do_fifo_resize" property and just check
>>>> whether tx_fifo_resize_max_num is set.
>>>
>>> Ack!
>>>
>>> All very valid points :-)
>>>

Hi Thinh/Felipe,

>>
>> Looks like this series already landed in Greg's testing branch. Not sure
>> how we usually handle this to address some of our concerns. Add fix
>> patches on top of Greg's testing branch?
> 
> yup, no choice anymore :-(
> 

Let me review your feedback, which had some good points.  We can add a
change addressing everything on top of what is merged on Greg's branch.
 Thanks for all the input!

Thanks
Wesley Cheng
Greg KH July 20, 2021, 11:41 a.m. UTC | #10
On Wed, Jul 14, 2021 at 12:30:07AM -0700, Wesley Cheng wrote:
> 
> 
> On 7/13/2021 11:40 PM, Felipe Balbi wrote:
> > 
> > Hi,
> > 
> > Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> >>> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
> >>>> Wesley Cheng wrote:
> >>>>> Some devices have USB compositions which may require multiple endpoints
> >>>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
> >>>>> sufficient for these compositions.  By utilizing flexible TX FIFO
> >>>>> allocation, this allows for endpoints to request the required FIFO depth to
> >>>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
> >>>>> a larger TX FIFO size results in better TX throughput.
> >>>>>
> >>>>> By introducing the check_config() callback, the resizing logic can fetch
> >>>>> the maximum number of endpoints used in the USB composition (can contain
> >>>>> multiple configurations), which helps ensure that the resizing logic can
> >>>>> fulfill the configuration(s), or return an error to the gadget layer
> >>>>> otherwise during bind time.
> >>>>>
> >>>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
> >>>>> ---
> >>>>>  drivers/usb/dwc3/core.c   |  15 +++
> >>>>>  drivers/usb/dwc3/core.h   |  16 ++++
> >>>>>  drivers/usb/dwc3/ep0.c    |   2 +
> >>>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>  4 files changed, 265 insertions(+)
> >>>>>
> >>>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
> >>>>> index ba74ad7..b194aecd 100644
> >>>>> --- a/drivers/usb/dwc3/core.c
> >>>>> +++ b/drivers/usb/dwc3/core.c
> >>>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>>  	u8			rx_max_burst_prd;
> >>>>>  	u8			tx_thr_num_pkt_prd;
> >>>>>  	u8			tx_max_burst_prd;
> >>>>> +	u8			tx_fifo_resize_max_num;
> >>>>>  	const char		*usb_psy_name;
> >>>>>  	int			ret;
> >>>>>  
> >>>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>>  	 */
> >>>>>  	hird_threshold = 12;
> >>>>>  
> >>>>> +	/*
> >>>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
> >>>>> +	 * allows for systems with larger bus latencies to have some headroom
> >>>>> +	 * for endpoints that have a large bMaxBurst value.
> >>>>> +	 */
> >>>>> +	tx_fifo_resize_max_num = 6;
> >>>>> +
> >>>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
> >>>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
> >>>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
> >>>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
> >>>>>  				&tx_thr_num_pkt_prd);
> >>>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
> >>>>>  				&tx_max_burst_prd);
> >>>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
> >>>>> +							"tx-fifo-resize");
> >>>>> +	if (dwc->do_fifo_resize)
> >>>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
> >>>>> +					&tx_fifo_resize_max_num);
> >>>>
> >>>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
> >>>> whatever property the user sets. Whether the driver wants to use this
> >>>
> >>> Ack!
> >>>
> >>>> property should depend on "dwc->do_fifo_resize". Also why don't we have
> >>>> "snps," prefix to be consistent with the other properties?
> >>>
> >>> Ack!
> >>>
> >>>> Can we enforce to a single property? If the designer wants to enable
> >>>> this feature, he/she can to provide the tx-fifo-max-num. This would
> >>>> simplify the driver a bit. Since this is to optimize for performance,
> >>>> the user should know/want/test the specific value if they want to set
> >>>> for their setup and not hoping that the default setting not break their
> >>>> setup. So we can remove the "do_fifo_resize" property and just check
> >>>> whether tx_fifo_resize_max_num is set.
> >>>
> >>> Ack!
> >>>
> >>> All very valid points :-)
> >>>
> 
> Hi Thinh/Felipe,
> 
> >>
> >> Looks like this series already landed in Greg's testing branch. Not sure
> >> how we usually handle this to address some of our concerns. Add fix
> >> patches on top of Greg's testing branch?
> > 
> > yup, no choice anymore :-(
> > 
> 
> Let me review your feedback, which had some good points.  We can add a
> change addressing everything on top of what is merged on Greg's branch.

Any hint as to when these fixups will be sent?

thanks,

greg k-h
Wesley Cheng July 21, 2021, 6:23 a.m. UTC | #11
On 7/20/2021 4:41 AM, gregkh@linuxfoundation.org wrote:
> On Wed, Jul 14, 2021 at 12:30:07AM -0700, Wesley Cheng wrote:
>>
>>
>> On 7/13/2021 11:40 PM, Felipe Balbi wrote:
>>>
>>> Hi,
>>>
>>> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>>>>> Thinh Nguyen <Thinh.Nguyen@synopsys.com> writes:
>>>>>> Wesley Cheng wrote:
>>>>>>> Some devices have USB compositions which may require multiple endpoints
>>>>>>> that support EP bursting.  HW defined TX FIFO sizes may not always be
>>>>>>> sufficient for these compositions.  By utilizing flexible TX FIFO
>>>>>>> allocation, this allows for endpoints to request the required FIFO depth to
>>>>>>> achieve higher bandwidth.  With some higher bMaxBurst configurations, using
>>>>>>> a larger TX FIFO size results in better TX throughput.
>>>>>>>
>>>>>>> By introducing the check_config() callback, the resizing logic can fetch
>>>>>>> the maximum number of endpoints used in the USB composition (can contain
>>>>>>> multiple configurations), which helps ensure that the resizing logic can
>>>>>>> fulfill the configuration(s), or return an error to the gadget layer
>>>>>>> otherwise during bind time.
>>>>>>>
>>>>>>> Signed-off-by: Wesley Cheng <wcheng@codeaurora.org>
>>>>>>> ---
>>>>>>>  drivers/usb/dwc3/core.c   |  15 +++
>>>>>>>  drivers/usb/dwc3/core.h   |  16 ++++
>>>>>>>  drivers/usb/dwc3/ep0.c    |   2 +
>>>>>>>  drivers/usb/dwc3/gadget.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>  4 files changed, 265 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
>>>>>>> index ba74ad7..b194aecd 100644
>>>>>>> --- a/drivers/usb/dwc3/core.c
>>>>>>> +++ b/drivers/usb/dwc3/core.c
>>>>>>> @@ -1267,6 +1267,7 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>>>  	u8			rx_max_burst_prd;
>>>>>>>  	u8			tx_thr_num_pkt_prd;
>>>>>>>  	u8			tx_max_burst_prd;
>>>>>>> +	u8			tx_fifo_resize_max_num;
>>>>>>>  	const char		*usb_psy_name;
>>>>>>>  	int			ret;
>>>>>>>  
>>>>>>> @@ -1282,6 +1283,13 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>>>  	 */
>>>>>>>  	hird_threshold = 12;
>>>>>>>  
>>>>>>> +	/*
>>>>>>> +	 * default to a TXFIFO size large enough to fit 6 max packets.  This
>>>>>>> +	 * allows for systems with larger bus latencies to have some headroom
>>>>>>> +	 * for endpoints that have a large bMaxBurst value.
>>>>>>> +	 */
>>>>>>> +	tx_fifo_resize_max_num = 6;
>>>>>>> +
>>>>>>>  	dwc->maximum_speed = usb_get_maximum_speed(dev);
>>>>>>>  	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
>>>>>>>  	dwc->dr_mode = usb_get_dr_mode(dev);
>>>>>>> @@ -1325,6 +1333,11 @@ static void dwc3_get_properties(struct dwc3 *dwc)
>>>>>>>  				&tx_thr_num_pkt_prd);
>>>>>>>  	device_property_read_u8(dev, "snps,tx-max-burst-prd",
>>>>>>>  				&tx_max_burst_prd);
>>>>>>> +	dwc->do_fifo_resize = device_property_read_bool(dev,
>>>>>>> +							"tx-fifo-resize");
>>>>>>> +	if (dwc->do_fifo_resize)
>>>>>>> +		device_property_read_u8(dev, "tx-fifo-max-num",
>>>>>>> +					&tx_fifo_resize_max_num);
>>>>>>
>>>>>> Why is this check here? The dwc->tx_fifo_resize_max_num should store
>>>>>> whatever property the user sets. Whether the driver wants to use this
>>>>>
>>>>> Ack!
>>>>>
>>>>>> property should depend on "dwc->do_fifo_resize". Also why don't we have
>>>>>> "snps," prefix to be consistent with the other properties?
>>>>>
>>>>> Ack!
>>>>>
>>>>>> Can we enforce to a single property? If the designer wants to enable
>>>>>> this feature, he/she can to provide the tx-fifo-max-num. This would
>>>>>> simplify the driver a bit. Since this is to optimize for performance,
>>>>>> the user should know/want/test the specific value if they want to set
>>>>>> for their setup and not hoping that the default setting not break their
>>>>>> setup. So we can remove the "do_fifo_resize" property and just check
>>>>>> whether tx_fifo_resize_max_num is set.
>>>>>
>>>>> Ack!
>>>>>
>>>>> All very valid points :-)
>>>>>
>>
>> Hi Thinh/Felipe,
>>
>>>>
>>>> Looks like this series already landed in Greg's testing branch. Not sure
>>>> how we usually handle this to address some of our concerns. Add fix
>>>> patches on top of Greg's testing branch?
>>>
>>> yup, no choice anymore :-(
>>>
>>
>> Let me review your feedback, which had some good points.  We can add a
>> change addressing everything on top of what is merged on Greg's branch.

Hi Greg,

> 
> Any hint as to when these fixups will be sent?
> 

Will get something by mid-week next week.  Sorry have been occupied with
tasks on my end.

Thanks
Wesley Cheng
diff mbox series

Patch

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index ba74ad7..b194aecd 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1267,6 +1267,7 @@  static void dwc3_get_properties(struct dwc3 *dwc)
 	u8			rx_max_burst_prd;
 	u8			tx_thr_num_pkt_prd;
 	u8			tx_max_burst_prd;
+	u8			tx_fifo_resize_max_num;
 	const char		*usb_psy_name;
 	int			ret;
 
@@ -1282,6 +1283,13 @@  static void dwc3_get_properties(struct dwc3 *dwc)
 	 */
 	hird_threshold = 12;
 
+	/*
+	 * default to a TXFIFO size large enough to fit 6 max packets.  This
+	 * allows for systems with larger bus latencies to have some headroom
+	 * for endpoints that have a large bMaxBurst value.
+	 */
+	tx_fifo_resize_max_num = 6;
+
 	dwc->maximum_speed = usb_get_maximum_speed(dev);
 	dwc->max_ssp_rate = usb_get_maximum_ssp_rate(dev);
 	dwc->dr_mode = usb_get_dr_mode(dev);
@@ -1325,6 +1333,11 @@  static void dwc3_get_properties(struct dwc3 *dwc)
 				&tx_thr_num_pkt_prd);
 	device_property_read_u8(dev, "snps,tx-max-burst-prd",
 				&tx_max_burst_prd);
+	dwc->do_fifo_resize = device_property_read_bool(dev,
+							"tx-fifo-resize");
+	if (dwc->do_fifo_resize)
+		device_property_read_u8(dev, "tx-fifo-max-num",
+					&tx_fifo_resize_max_num);
 
 	dwc->disable_scramble_quirk = device_property_read_bool(dev,
 				"snps,disable_scramble_quirk");
@@ -1390,6 +1403,8 @@  static void dwc3_get_properties(struct dwc3 *dwc)
 	dwc->tx_max_burst_prd = tx_max_burst_prd;
 
 	dwc->imod_interval = 0;
+
+	dwc->tx_fifo_resize_max_num = tx_fifo_resize_max_num;
 }
 
 /* check whether the core supports IMOD */
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index dccdf13..735e9be 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -1023,6 +1023,7 @@  struct dwc3_scratchpad_array {
  * @rx_max_burst_prd: max periodic ESS receive burst size
  * @tx_thr_num_pkt_prd: periodic ESS transmit packet count
  * @tx_max_burst_prd: max periodic ESS transmit burst size
+ * @tx_fifo_resize_max_num: max number of fifos allocated during txfifo resize
  * @hsphy_interface: "utmi" or "ulpi"
  * @connected: true when we're connected to a host, false otherwise
  * @delayed_status: true when gadget driver asks for delayed status
@@ -1037,6 +1038,7 @@  struct dwc3_scratchpad_array {
  *	1	- utmi_l1_suspend_n
  * @is_fpga: true when we are using the FPGA board
  * @pending_events: true when we have pending IRQs to be handled
+ * @do_fifo_resize: true when txfifo resizing is enabled for dwc3 endpoints
  * @pullups_connected: true when Run/Stop bit is set
  * @setup_packet_pending: true when there's a Setup Packet in FIFO. Workaround
  * @three_stage_setup: set if we perform a three phase setup
@@ -1079,6 +1081,11 @@  struct dwc3_scratchpad_array {
  * @dis_split_quirk: set to disable split boundary.
  * @imod_interval: set the interrupt moderation interval in 250ns
  *			increments or 0 to disable.
+ * @max_cfg_eps: current max number of IN eps used across all USB configs.
+ * @last_fifo_depth: last fifo depth used to determine next fifo ram start
+ *		     address.
+ * @num_ep_resized: carries the current number endpoints which have had its tx
+ *		    fifo resized.
  */
 struct dwc3 {
 	struct work_struct	drd_work;
@@ -1233,6 +1240,7 @@  struct dwc3 {
 	u8			rx_max_burst_prd;
 	u8			tx_thr_num_pkt_prd;
 	u8			tx_max_burst_prd;
+	u8			tx_fifo_resize_max_num;
 
 	const char		*hsphy_interface;
 
@@ -1246,6 +1254,7 @@  struct dwc3 {
 	unsigned		is_utmi_l1_suspend:1;
 	unsigned		is_fpga:1;
 	unsigned		pending_events:1;
+	unsigned		do_fifo_resize:1;
 	unsigned		pullups_connected:1;
 	unsigned		setup_packet_pending:1;
 	unsigned		three_stage_setup:1;
@@ -1281,6 +1290,10 @@  struct dwc3 {
 	unsigned		dis_split_quirk:1;
 
 	u16			imod_interval;
+
+	int			max_cfg_eps;
+	int			last_fifo_depth;
+	int			num_ep_resized;
 };
 
 #define INCRX_BURST_MODE 0
@@ -1512,6 +1525,7 @@  int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
 		struct dwc3_gadget_ep_cmd_params *params);
 int dwc3_send_gadget_generic_command(struct dwc3 *dwc, unsigned int cmd,
 		u32 param);
+void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc);
 #else
 static inline int dwc3_gadget_init(struct dwc3 *dwc)
 { return 0; }
@@ -1531,6 +1545,8 @@  static inline int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd,
 static inline int dwc3_send_gadget_generic_command(struct dwc3 *dwc,
 		int cmd, u32 param)
 { return 0; }
+static inline void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
+{ }
 #endif
 
 #if IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3cd2942..d28d085 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -619,6 +619,8 @@  static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
 		return -EINVAL;
 
 	case USB_STATE_ADDRESS:
+		dwc3_gadget_clear_tx_fifos(dwc);
+
 		ret = dwc3_ep0_delegate_req(dwc, ctrl);
 		/* if the cfg matches and the cfg is non zero */
 		if (cfg && (!ret || (ret == USB_GADGET_DELAYED_STATUS))) {
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index af6d7f1..e56f1a6 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -632,6 +632,187 @@  static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
 		bool interrupt);
 
 /**
+ * dwc3_gadget_calc_tx_fifo_size - calculates the txfifo size value
+ * @dwc: pointer to the DWC3 context
+ * @nfifos: number of fifos to calculate for
+ *
+ * Calculates the size value based on the equation below:
+ *
+ * DWC3 revision 280A and prior:
+ * fifo_size = mult * (max_packet / mdwidth) + 1;
+ *
+ * DWC3 revision 290A and onwards:
+ * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
+ *
+ * The max packet size is set to 1024, as the txfifo requirements mainly apply
+ * to super speed USB use cases.  However, it is safe to overestimate the fifo
+ * allocations for other scenarios, i.e. high speed USB.
+ */
+static int dwc3_gadget_calc_tx_fifo_size(struct dwc3 *dwc, int mult)
+{
+	int max_packet = 1024;
+	int fifo_size;
+	int mdwidth;
+
+	mdwidth = dwc3_mdwidth(dwc);
+
+	/* MDWIDTH is represented in bits, we need it in bytes */
+	mdwidth >>= 3;
+
+	if (DWC3_VER_IS_PRIOR(DWC3, 290A))
+		fifo_size = mult * (max_packet / mdwidth) + 1;
+	else
+		fifo_size = mult * ((max_packet + mdwidth) / mdwidth) + 1;
+	return fifo_size;
+}
+
+/**
+ * dwc3_gadget_clear_tx_fifo_size - Clears txfifo allocation
+ * @dwc: pointer to the DWC3 context
+ *
+ * Iterates through all the endpoint registers and clears the previous txfifo
+ * allocations.
+ */
+void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc)
+{
+	struct dwc3_ep *dep;
+	int fifo_depth;
+	int size;
+	int num;
+
+	if (!dwc->do_fifo_resize)
+		return;
+
+	/* Read ep0IN related TXFIFO size */
+	dep = dwc->eps[1];
+	size = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
+	if (DWC3_IP_IS(DWC3))
+		fifo_depth = DWC3_GTXFIFOSIZ_TXFDEP(size);
+	else
+		fifo_depth = DWC31_GTXFIFOSIZ_TXFDEP(size);
+
+	dwc->last_fifo_depth = fifo_depth;
+	/* Clear existing TXFIFO for all IN eps except ep0 */
+	for (num = 3; num < min_t(int, dwc->num_eps, DWC3_ENDPOINTS_NUM);
+	     num += 2) {
+		dep = dwc->eps[num];
+		/* Don't change TXFRAMNUM on usb31 version */
+		size = DWC3_IP_IS(DWC3) ? 0 :
+			dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1)) &
+				   DWC31_GTXFIFOSIZ_TXFRAMNUM;
+
+		dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size);
+	}
+	dwc->num_ep_resized = 0;
+}
+
+/*
+ * dwc3_gadget_resize_tx_fifos - reallocate fifo spaces for current use-case
+ * @dwc: pointer to our context structure
+ *
+ * This function will a best effort FIFO allocation in order
+ * to improve FIFO usage and throughput, while still allowing
+ * us to enable as many endpoints as possible.
+ *
+ * Keep in mind that this operation will be highly dependent
+ * on the configured size for RAM1 - which contains TxFifo -,
+ * the amount of endpoints enabled on coreConsultant tool, and
+ * the width of the Master Bus.
+ *
+ * In general, FIFO depths are represented with the following equation:
+ *
+ * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
+ *
+ * In conjunction with dwc3_gadget_check_config(), this resizing logic will
+ * ensure that all endpoints will have enough internal memory for one max
+ * packet per endpoint.
+ */
+static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+	int fifo_0_start;
+	int ram1_depth;
+	int fifo_size;
+	int min_depth;
+	int num_in_ep;
+	int remaining;
+	int num_fifos = 1;
+	int fifo;
+	int tmp;
+
+	if (!dwc->do_fifo_resize)
+		return 0;
+
+	/* resize IN endpoints except ep0 */
+	if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1)
+		return 0;
+
+	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
+
+	if ((dep->endpoint.maxburst > 1 &&
+	     usb_endpoint_xfer_bulk(dep->endpoint.desc)) ||
+	    usb_endpoint_xfer_isoc(dep->endpoint.desc))
+		num_fifos = 3;
+
+	if (dep->endpoint.maxburst > 6 &&
+	    usb_endpoint_xfer_bulk(dep->endpoint.desc) && DWC3_IP_IS(DWC31))
+		num_fifos = dwc->tx_fifo_resize_max_num;
+
+	/* FIFO size for a single buffer */
+	fifo = dwc3_gadget_calc_tx_fifo_size(dwc, 1);
+
+	/* Calculate the number of remaining EPs w/o any FIFO */
+	num_in_ep = dwc->max_cfg_eps;
+	num_in_ep -= dwc->num_ep_resized;
+
+	/* Reserve at least one FIFO for the number of IN EPs */
+	min_depth = num_in_ep * (fifo + 1);
+	remaining = ram1_depth - min_depth - dwc->last_fifo_depth;
+	remaining = max_t(int, 0, remaining);
+	/*
+	 * We've already reserved 1 FIFO per EP, so check what we can fit in
+	 * addition to it.  If there is not enough remaining space, allocate
+	 * all the remaining space to the EP.
+	 */
+	fifo_size = (num_fifos - 1) * fifo;
+	if (remaining < fifo_size)
+		fifo_size = remaining;
+
+	fifo_size += fifo;
+	/* Last increment according to the TX FIFO size equation */
+	fifo_size++;
+
+	/* Check if TXFIFOs start at non-zero addr */
+	tmp = dwc3_readl(dwc->regs, DWC3_GTXFIFOSIZ(0));
+	fifo_0_start = DWC3_GTXFIFOSIZ_TXFSTADDR(tmp);
+
+	fifo_size |= (fifo_0_start + (dwc->last_fifo_depth << 16));
+	if (DWC3_IP_IS(DWC3))
+		dwc->last_fifo_depth += DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
+	else
+		dwc->last_fifo_depth += DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
+
+	/* Check fifo size allocation doesn't exceed available RAM size. */
+	if (dwc->last_fifo_depth >= ram1_depth) {
+		dev_err(dwc->dev, "Fifosize(%d) > RAM size(%d) %s depth:%d\n",
+			dwc->last_fifo_depth, ram1_depth,
+			dep->endpoint.name, fifo_size);
+		if (DWC3_IP_IS(DWC3))
+			fifo_size = DWC3_GTXFIFOSIZ_TXFDEP(fifo_size);
+		else
+			fifo_size = DWC31_GTXFIFOSIZ_TXFDEP(fifo_size);
+
+		dwc->last_fifo_depth -= fifo_size;
+		return -ENOMEM;
+	}
+
+	dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size);
+	dwc->num_ep_resized++;
+
+	return 0;
+}
+
+/**
  * __dwc3_gadget_ep_enable - initializes a hw endpoint
  * @dep: endpoint to be initialized
  * @action: one of INIT, MODIFY or RESTORE
@@ -648,6 +829,10 @@  static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep, unsigned int action)
 	int			ret;
 
 	if (!(dep->flags & DWC3_EP_ENABLED)) {
+		ret = dwc3_gadget_resize_tx_fifos(dep);
+		if (ret)
+			return ret;
+
 		ret = dwc3_gadget_start_config(dep);
 		if (ret)
 			return ret;
@@ -2498,6 +2683,7 @@  static int dwc3_gadget_stop(struct usb_gadget *g)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 	dwc->gadget_driver	= NULL;
+	dwc->max_cfg_eps = 0;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	free_irq(dwc->irq_gadget, dwc->ev_buf);
@@ -2585,6 +2771,51 @@  static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA)
 	return ret;
 }
 
+/**
+ * dwc3_gadget_check_config - ensure dwc3 can support the USB configuration
+ * @g: pointer to the USB gadget
+ *
+ * Used to record the maximum number of endpoints being used in a USB composite
+ * device. (across all configurations)  This is to be used in the calculation
+ * of the TXFIFO sizes when resizing internal memory for individual endpoints.
+ * It will help ensured that the resizing logic reserves enough space for at
+ * least one max packet.
+ */
+static int dwc3_gadget_check_config(struct usb_gadget *g)
+{
+	struct dwc3 *dwc = gadget_to_dwc(g);
+	struct usb_ep *ep;
+	int fifo_size = 0;
+	int ram1_depth;
+	int ep_num = 0;
+
+	if (!dwc->do_fifo_resize)
+		return 0;
+
+	list_for_each_entry(ep, &g->ep_list, ep_list) {
+		/* Only interested in the IN endpoints */
+		if (ep->claimed && (ep->address & USB_DIR_IN))
+			ep_num++;
+	}
+
+	if (ep_num <= dwc->max_cfg_eps)
+		return 0;
+
+	/* Update the max number of eps in the composition */
+	dwc->max_cfg_eps = ep_num;
+
+	fifo_size = dwc3_gadget_calc_tx_fifo_size(dwc, dwc->max_cfg_eps);
+	/* Based on the equation, increment by one for every ep */
+	fifo_size += dwc->max_cfg_eps;
+
+	/* Check if we can fit a single fifo per endpoint */
+	ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7);
+	if (fifo_size > ram1_depth)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static const struct usb_gadget_ops dwc3_gadget_ops = {
 	.get_frame		= dwc3_gadget_get_frame,
 	.wakeup			= dwc3_gadget_wakeup,
@@ -2596,6 +2827,7 @@  static const struct usb_gadget_ops dwc3_gadget_ops = {
 	.udc_set_ssp_rate	= dwc3_gadget_set_ssp_rate,
 	.get_config_params	= dwc3_gadget_config_params,
 	.vbus_draw		= dwc3_gadget_vbus_draw,
+	.check_config		= dwc3_gadget_check_config,
 };
 
 /* -------------------------------------------------------------------------- */