diff mbox series

[v8,3/3] usb: typec: cros_ec_ucsi: Recover from write timeouts

Message ID 20241128232035.1525978-4-ukaszb@chromium.org (mailing list archive)
State Superseded
Headers show
Series usb: typec: Implement UCSI driver for ChromeOS | expand

Commit Message

Łukasz Bartosik Nov. 28, 2024, 11:20 p.m. UTC
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>

In a suspend-resume edge case, the OPM is timing out in ucsi_resume and
the PPM is getting stuck waiting for a command complete ack. Add a write
timeout recovery task that will get us out of this state.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org>
---
 drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

Comments

Heikki Krogerus Dec. 2, 2024, 2:06 p.m. UTC | #1
Hi Łukasz,

On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote:
> From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> 
> In a suspend-resume edge case, the OPM is timing out in ucsi_resume and
> the PPM is getting stuck waiting for a command complete ack. Add a write
> timeout recovery task that will get us out of this state.

Sorry, I did not realise this before, but this is in practice a fix
(or workaround) to the driver that you just introduced in the previous
patch. Please merge it into that.

> Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org>
> ---
>  drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++-
>  1 file changed, 87 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> index c588d9a57643..6daf61e7e62a 100644
> --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> @@ -7,6 +7,7 @@
>  
>  #include <linux/container_of.h>
>  #include <linux/dev_printk.h>
> +#include <linux/jiffies.h>
>  #include <linux/mod_devicetable.h>
>  #include <linux/module.h>
>  #include <linux/platform_data/cros_ec_commands.h>
> @@ -29,6 +30,9 @@
>   */
>  #define WRITE_TMO_MS		5000
>  
> +/* Number of times to attempt recovery from a write timeout before giving up. */
> +#define WRITE_TMO_CTR_MAX	5
> +
>  struct cros_ucsi_data {
>  	struct device *dev;
>  	struct ucsi *ucsi;
> @@ -36,6 +40,8 @@ struct cros_ucsi_data {
>  	struct cros_ec_device *ec;
>  	struct notifier_block nb;
>  	struct work_struct work;
> +	struct delayed_work write_tmo;
> +	int tmo_counter;
>  
>  	struct completion complete;
>  	unsigned long flags;
> @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd)
>  	return 0;
>  }
>  
> +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd)
> +{
> +	struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi);
> +	int ret;
> +
> +	ret = ucsi_sync_control_common(ucsi, cmd);
> +	if (ret)
> +		goto out;
> +
> +	/* Successful write. Cancel any pending recovery work. */
> +	cancel_delayed_work_sync(&udata->write_tmo);
> +
> +	return 0;
> +out:

This label looks a bit unnecessary to me. How about a switch statement?

	ret = ucsi_sync_control_common(ucsi, cmd);
        switch (ret) {
        case -EBUSY:
        case -ETIMEDOUT:
		cancel_delayed_work_sync(&udata->write_tmo);
		schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS));
                break;
        case 0:
                cancel_delayed_work_sync(&udata->write_tmo);
                break;
        }

        return ret;

> +	/* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout.
> +	 */
> +	if (ret == -EBUSY)
> +		ret = -ETIMEDOUT;
> +
> +	/* Schedule recovery attempt when we timeout or tried to send a command
> +	 * while still busy.
> +	 */
> +	if (ret == -ETIMEDOUT) {
> +		cancel_delayed_work_sync(&udata->write_tmo);
> +		schedule_delayed_work(&udata->write_tmo,
> +				      msecs_to_jiffies(WRITE_TMO_MS));
> +	}
> +
> +	return ret;
> +}
> +
>  struct ucsi_operations cros_ucsi_ops = {
>  	.read_version = cros_ucsi_read_version,
>  	.read_cci = cros_ucsi_read_cci,
>  	.read_message_in = cros_ucsi_read_message_in,
>  	.async_control = cros_ucsi_async_control,
> -	.sync_control = ucsi_sync_control_common,
> +	.sync_control = cros_ucsi_sync_control,
>  };
>  
>  static void cros_ucsi_work(struct work_struct *work)
> @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work)
>  	ucsi_notify_common(udata->ucsi, cci);
>  }
>  
> +static void cros_ucsi_write_timeout(struct work_struct *work)
> +{
> +	struct cros_ucsi_data *udata =
> +		container_of(work, struct cros_ucsi_data, write_tmo.work);
> +	u32 cci;
> +	u64 cmd;
> +
> +	if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) {
> +		dev_err(udata->dev,
> +			"Reading CCI failed; no write timeout recovery possible.");
> +		return;
> +	}
> +
> +	if (cci & UCSI_CCI_BUSY) {
> +		udata->tmo_counter++;
> +
> +		if (udata->tmo_counter <= WRITE_TMO_CTR_MAX)
> +			schedule_delayed_work(&udata->write_tmo,
> +					      msecs_to_jiffies(WRITE_TMO_MS));
> +		else
> +			dev_err(udata->dev,
> +				"PPM unresponsive - too many write timeouts.");
> +
> +		return;
> +	}
> +
> +	/* No longer busy means we can reset our timeout counter. */
> +	udata->tmo_counter = 0;
> +
> +	/* Need to ack previous command which may have timed out. */
> +	if (cci & UCSI_CCI_COMMAND_COMPLETE) {
> +		cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
> +		cros_ucsi_async_control(udata->ucsi, &cmd);
> +
> +		/* Check again after a few seconds that the system has
> +		 * recovered to make sure our async write above was successful.
> +		 */
> +		schedule_delayed_work(&udata->write_tmo,
> +				      msecs_to_jiffies(WRITE_TMO_MS));
> +		return;
> +	}
> +
> +	/* We recovered from a previous timeout. Treat this as a recovery from
> +	 * suspend and call resume.
> +	 */
> +	ucsi_resume(udata->ucsi);
> +}
> +
>  static int cros_ucsi_event(struct notifier_block *nb,
>  			   unsigned long host_event, void *_notify)
>  {
> @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev)
>  	platform_set_drvdata(pdev, udata);
>  
>  	INIT_WORK(&udata->work, cros_ucsi_work);
> +	INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout);
>  	init_completion(&udata->complete);
>  
>  	udata->ucsi = ucsi_create(dev, &cros_ucsi_ops);

thanks,
Łukasz Bartosik Dec. 2, 2024, 3:34 p.m. UTC | #2
On Mon, Dec 2, 2024 at 3:06 PM Heikki Krogerus
<heikki.krogerus@linux.intel.com> wrote:
>
> Hi Łukasz,
>
> On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote:
> > From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> >
> > In a suspend-resume edge case, the OPM is timing out in ucsi_resume and
> > the PPM is getting stuck waiting for a command complete ack. Add a write
> > timeout recovery task that will get us out of this state.
>
> Sorry, I did not realise this before, but this is in practice a fix
> (or workaround) to the driver that you just introduced in the previous
> patch. Please merge it into that.
>

Hi Heikki,

Yes I will merge it. I deliberately sent it as a separate commit for
review so that
it would be easier to see the changes.

> > Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> > Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org>
> > ---
> >  drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++-
> >  1 file changed, 87 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> > index c588d9a57643..6daf61e7e62a 100644
> > --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> > +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
> > @@ -7,6 +7,7 @@
> >
> >  #include <linux/container_of.h>
> >  #include <linux/dev_printk.h>
> > +#include <linux/jiffies.h>
> >  #include <linux/mod_devicetable.h>
> >  #include <linux/module.h>
> >  #include <linux/platform_data/cros_ec_commands.h>
> > @@ -29,6 +30,9 @@
> >   */
> >  #define WRITE_TMO_MS         5000
> >
> > +/* Number of times to attempt recovery from a write timeout before giving up. */
> > +#define WRITE_TMO_CTR_MAX    5
> > +
> >  struct cros_ucsi_data {
> >       struct device *dev;
> >       struct ucsi *ucsi;
> > @@ -36,6 +40,8 @@ struct cros_ucsi_data {
> >       struct cros_ec_device *ec;
> >       struct notifier_block nb;
> >       struct work_struct work;
> > +     struct delayed_work write_tmo;
> > +     int tmo_counter;
> >
> >       struct completion complete;
> >       unsigned long flags;
> > @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd)
> >       return 0;
> >  }
> >
> > +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd)
> > +{
> > +     struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi);
> > +     int ret;
> > +
> > +     ret = ucsi_sync_control_common(ucsi, cmd);
> > +     if (ret)
> > +             goto out;
> > +
> > +     /* Successful write. Cancel any pending recovery work. */
> > +     cancel_delayed_work_sync(&udata->write_tmo);
> > +
> > +     return 0;
> > +out:
>
> This label looks a bit unnecessary to me. How about a switch statement?
>

Good point. I will rework this part of the code to use switch.

Thanks,
Łukasz

>         ret = ucsi_sync_control_common(ucsi, cmd);
>         switch (ret) {
>         case -EBUSY:
>         case -ETIMEDOUT:
>                 cancel_delayed_work_sync(&udata->write_tmo);
>                 schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS));
>                 break;
>         case 0:
>                 cancel_delayed_work_sync(&udata->write_tmo);
>                 break;
>         }
>
>         return ret;
>
> > +     /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout.
> > +      */
> > +     if (ret == -EBUSY)
> > +             ret = -ETIMEDOUT;
> > +
> > +     /* Schedule recovery attempt when we timeout or tried to send a command
> > +      * while still busy.
> > +      */
> > +     if (ret == -ETIMEDOUT) {
> > +             cancel_delayed_work_sync(&udata->write_tmo);
> > +             schedule_delayed_work(&udata->write_tmo,
> > +                                   msecs_to_jiffies(WRITE_TMO_MS));
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> >  struct ucsi_operations cros_ucsi_ops = {
> >       .read_version = cros_ucsi_read_version,
> >       .read_cci = cros_ucsi_read_cci,
> >       .read_message_in = cros_ucsi_read_message_in,
> >       .async_control = cros_ucsi_async_control,
> > -     .sync_control = ucsi_sync_control_common,
> > +     .sync_control = cros_ucsi_sync_control,
> >  };
> >
> >  static void cros_ucsi_work(struct work_struct *work)
> > @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work)
> >       ucsi_notify_common(udata->ucsi, cci);
> >  }
> >
> > +static void cros_ucsi_write_timeout(struct work_struct *work)
> > +{
> > +     struct cros_ucsi_data *udata =
> > +             container_of(work, struct cros_ucsi_data, write_tmo.work);
> > +     u32 cci;
> > +     u64 cmd;
> > +
> > +     if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) {
> > +             dev_err(udata->dev,
> > +                     "Reading CCI failed; no write timeout recovery possible.");
> > +             return;
> > +     }
> > +
> > +     if (cci & UCSI_CCI_BUSY) {
> > +             udata->tmo_counter++;
> > +
> > +             if (udata->tmo_counter <= WRITE_TMO_CTR_MAX)
> > +                     schedule_delayed_work(&udata->write_tmo,
> > +                                           msecs_to_jiffies(WRITE_TMO_MS));
> > +             else
> > +                     dev_err(udata->dev,
> > +                             "PPM unresponsive - too many write timeouts.");
> > +
> > +             return;
> > +     }
> > +
> > +     /* No longer busy means we can reset our timeout counter. */
> > +     udata->tmo_counter = 0;
> > +
> > +     /* Need to ack previous command which may have timed out. */
> > +     if (cci & UCSI_CCI_COMMAND_COMPLETE) {
> > +             cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
> > +             cros_ucsi_async_control(udata->ucsi, &cmd);
> > +
> > +             /* Check again after a few seconds that the system has
> > +              * recovered to make sure our async write above was successful.
> > +              */
> > +             schedule_delayed_work(&udata->write_tmo,
> > +                                   msecs_to_jiffies(WRITE_TMO_MS));
> > +             return;
> > +     }
> > +
> > +     /* We recovered from a previous timeout. Treat this as a recovery from
> > +      * suspend and call resume.
> > +      */
> > +     ucsi_resume(udata->ucsi);
> > +}
> > +
> >  static int cros_ucsi_event(struct notifier_block *nb,
> >                          unsigned long host_event, void *_notify)
> >  {
> > @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev)
> >       platform_set_drvdata(pdev, udata);
> >
> >       INIT_WORK(&udata->work, cros_ucsi_work);
> > +     INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout);
> >       init_completion(&udata->complete);
> >
> >       udata->ucsi = ucsi_create(dev, &cros_ucsi_ops);
>
> thanks,
>
> --
> heikki
diff mbox series

Patch

diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
index c588d9a57643..6daf61e7e62a 100644
--- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c
+++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c
@@ -7,6 +7,7 @@ 
 
 #include <linux/container_of.h>
 #include <linux/dev_printk.h>
+#include <linux/jiffies.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_data/cros_ec_commands.h>
@@ -29,6 +30,9 @@ 
  */
 #define WRITE_TMO_MS		5000
 
+/* Number of times to attempt recovery from a write timeout before giving up. */
+#define WRITE_TMO_CTR_MAX	5
+
 struct cros_ucsi_data {
 	struct device *dev;
 	struct ucsi *ucsi;
@@ -36,6 +40,8 @@  struct cros_ucsi_data {
 	struct cros_ec_device *ec;
 	struct notifier_block nb;
 	struct work_struct work;
+	struct delayed_work write_tmo;
+	int tmo_counter;
 
 	struct completion complete;
 	unsigned long flags;
@@ -99,12 +105,43 @@  static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd)
 	return 0;
 }
 
+static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd)
+{
+	struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi);
+	int ret;
+
+	ret = ucsi_sync_control_common(ucsi, cmd);
+	if (ret)
+		goto out;
+
+	/* Successful write. Cancel any pending recovery work. */
+	cancel_delayed_work_sync(&udata->write_tmo);
+
+	return 0;
+out:
+	/* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout.
+	 */
+	if (ret == -EBUSY)
+		ret = -ETIMEDOUT;
+
+	/* Schedule recovery attempt when we timeout or tried to send a command
+	 * while still busy.
+	 */
+	if (ret == -ETIMEDOUT) {
+		cancel_delayed_work_sync(&udata->write_tmo);
+		schedule_delayed_work(&udata->write_tmo,
+				      msecs_to_jiffies(WRITE_TMO_MS));
+	}
+
+	return ret;
+}
+
 struct ucsi_operations cros_ucsi_ops = {
 	.read_version = cros_ucsi_read_version,
 	.read_cci = cros_ucsi_read_cci,
 	.read_message_in = cros_ucsi_read_message_in,
 	.async_control = cros_ucsi_async_control,
-	.sync_control = ucsi_sync_control_common,
+	.sync_control = cros_ucsi_sync_control,
 };
 
 static void cros_ucsi_work(struct work_struct *work)
@@ -118,6 +155,54 @@  static void cros_ucsi_work(struct work_struct *work)
 	ucsi_notify_common(udata->ucsi, cci);
 }
 
+static void cros_ucsi_write_timeout(struct work_struct *work)
+{
+	struct cros_ucsi_data *udata =
+		container_of(work, struct cros_ucsi_data, write_tmo.work);
+	u32 cci;
+	u64 cmd;
+
+	if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) {
+		dev_err(udata->dev,
+			"Reading CCI failed; no write timeout recovery possible.");
+		return;
+	}
+
+	if (cci & UCSI_CCI_BUSY) {
+		udata->tmo_counter++;
+
+		if (udata->tmo_counter <= WRITE_TMO_CTR_MAX)
+			schedule_delayed_work(&udata->write_tmo,
+					      msecs_to_jiffies(WRITE_TMO_MS));
+		else
+			dev_err(udata->dev,
+				"PPM unresponsive - too many write timeouts.");
+
+		return;
+	}
+
+	/* No longer busy means we can reset our timeout counter. */
+	udata->tmo_counter = 0;
+
+	/* Need to ack previous command which may have timed out. */
+	if (cci & UCSI_CCI_COMMAND_COMPLETE) {
+		cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE;
+		cros_ucsi_async_control(udata->ucsi, &cmd);
+
+		/* Check again after a few seconds that the system has
+		 * recovered to make sure our async write above was successful.
+		 */
+		schedule_delayed_work(&udata->write_tmo,
+				      msecs_to_jiffies(WRITE_TMO_MS));
+		return;
+	}
+
+	/* We recovered from a previous timeout. Treat this as a recovery from
+	 * suspend and call resume.
+	 */
+	ucsi_resume(udata->ucsi);
+}
+
 static int cros_ucsi_event(struct notifier_block *nb,
 			   unsigned long host_event, void *_notify)
 {
@@ -162,6 +247,7 @@  static int cros_ucsi_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, udata);
 
 	INIT_WORK(&udata->work, cros_ucsi_work);
+	INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout);
 	init_completion(&udata->complete);
 
 	udata->ucsi = ucsi_create(dev, &cros_ucsi_ops);