Message ID | 20241128232035.1525978-4-ukaszb@chromium.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | usb: typec: Implement UCSI driver for ChromeOS | expand |
Hi Łukasz, On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote: > From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> > > In a suspend-resume edge case, the OPM is timing out in ucsi_resume and > the PPM is getting stuck waiting for a command complete ack. Add a write > timeout recovery task that will get us out of this state. Sorry, I did not realise this before, but this is in practice a fix (or workaround) to the driver that you just introduced in the previous patch. Please merge it into that. > Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> > Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org> > --- > drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++- > 1 file changed, 87 insertions(+), 1 deletion(-) > > diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > index c588d9a57643..6daf61e7e62a 100644 > --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c > +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > @@ -7,6 +7,7 @@ > > #include <linux/container_of.h> > #include <linux/dev_printk.h> > +#include <linux/jiffies.h> > #include <linux/mod_devicetable.h> > #include <linux/module.h> > #include <linux/platform_data/cros_ec_commands.h> > @@ -29,6 +30,9 @@ > */ > #define WRITE_TMO_MS 5000 > > +/* Number of times to attempt recovery from a write timeout before giving up. */ > +#define WRITE_TMO_CTR_MAX 5 > + > struct cros_ucsi_data { > struct device *dev; > struct ucsi *ucsi; > @@ -36,6 +40,8 @@ struct cros_ucsi_data { > struct cros_ec_device *ec; > struct notifier_block nb; > struct work_struct work; > + struct delayed_work write_tmo; > + int tmo_counter; > > struct completion complete; > unsigned long flags; > @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) > return 0; > } > > +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) > +{ > + struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); > + int ret; > + > + ret = ucsi_sync_control_common(ucsi, cmd); > + if (ret) > + goto out; > + > + /* Successful write. Cancel any pending recovery work. */ > + cancel_delayed_work_sync(&udata->write_tmo); > + > + return 0; > +out: This label looks a bit unnecessary to me. How about a switch statement? ret = ucsi_sync_control_common(ucsi, cmd); switch (ret) { case -EBUSY: case -ETIMEDOUT: cancel_delayed_work_sync(&udata->write_tmo); schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS)); break; case 0: cancel_delayed_work_sync(&udata->write_tmo); break; } return ret; > + /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout. > + */ > + if (ret == -EBUSY) > + ret = -ETIMEDOUT; > + > + /* Schedule recovery attempt when we timeout or tried to send a command > + * while still busy. > + */ > + if (ret == -ETIMEDOUT) { > + cancel_delayed_work_sync(&udata->write_tmo); > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + } > + > + return ret; > +} > + > struct ucsi_operations cros_ucsi_ops = { > .read_version = cros_ucsi_read_version, > .read_cci = cros_ucsi_read_cci, > .read_message_in = cros_ucsi_read_message_in, > .async_control = cros_ucsi_async_control, > - .sync_control = ucsi_sync_control_common, > + .sync_control = cros_ucsi_sync_control, > }; > > static void cros_ucsi_work(struct work_struct *work) > @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work) > ucsi_notify_common(udata->ucsi, cci); > } > > +static void cros_ucsi_write_timeout(struct work_struct *work) > +{ > + struct cros_ucsi_data *udata = > + container_of(work, struct cros_ucsi_data, write_tmo.work); > + u32 cci; > + u64 cmd; > + > + if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) { > + dev_err(udata->dev, > + "Reading CCI failed; no write timeout recovery possible."); > + return; > + } > + > + if (cci & UCSI_CCI_BUSY) { > + udata->tmo_counter++; > + > + if (udata->tmo_counter <= WRITE_TMO_CTR_MAX) > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + else > + dev_err(udata->dev, > + "PPM unresponsive - too many write timeouts."); > + > + return; > + } > + > + /* No longer busy means we can reset our timeout counter. */ > + udata->tmo_counter = 0; > + > + /* Need to ack previous command which may have timed out. */ > + if (cci & UCSI_CCI_COMMAND_COMPLETE) { > + cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; > + cros_ucsi_async_control(udata->ucsi, &cmd); > + > + /* Check again after a few seconds that the system has > + * recovered to make sure our async write above was successful. > + */ > + schedule_delayed_work(&udata->write_tmo, > + msecs_to_jiffies(WRITE_TMO_MS)); > + return; > + } > + > + /* We recovered from a previous timeout. Treat this as a recovery from > + * suspend and call resume. > + */ > + ucsi_resume(udata->ucsi); > +} > + > static int cros_ucsi_event(struct notifier_block *nb, > unsigned long host_event, void *_notify) > { > @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev) > platform_set_drvdata(pdev, udata); > > INIT_WORK(&udata->work, cros_ucsi_work); > + INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout); > init_completion(&udata->complete); > > udata->ucsi = ucsi_create(dev, &cros_ucsi_ops); thanks,
On Mon, Dec 2, 2024 at 3:06 PM Heikki Krogerus <heikki.krogerus@linux.intel.com> wrote: > > Hi Łukasz, > > On Thu, Nov 28, 2024 at 11:20:35PM +0000, Łukasz Bartosik wrote: > > From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> > > > > In a suspend-resume edge case, the OPM is timing out in ucsi_resume and > > the PPM is getting stuck waiting for a command complete ack. Add a write > > timeout recovery task that will get us out of this state. > > Sorry, I did not realise this before, but this is in practice a fix > (or workaround) to the driver that you just introduced in the previous > patch. Please merge it into that. > Hi Heikki, Yes I will merge it. I deliberately sent it as a separate commit for review so that it would be easier to see the changes. > > Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org> > > Signed-off-by: Łukasz Bartosik <ukaszb@chromium.org> > > --- > > drivers/usb/typec/ucsi/cros_ec_ucsi.c | 88 ++++++++++++++++++++++++++- > > 1 file changed, 87 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > index c588d9a57643..6daf61e7e62a 100644 > > --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c > > @@ -7,6 +7,7 @@ > > > > #include <linux/container_of.h> > > #include <linux/dev_printk.h> > > +#include <linux/jiffies.h> > > #include <linux/mod_devicetable.h> > > #include <linux/module.h> > > #include <linux/platform_data/cros_ec_commands.h> > > @@ -29,6 +30,9 @@ > > */ > > #define WRITE_TMO_MS 5000 > > > > +/* Number of times to attempt recovery from a write timeout before giving up. */ > > +#define WRITE_TMO_CTR_MAX 5 > > + > > struct cros_ucsi_data { > > struct device *dev; > > struct ucsi *ucsi; > > @@ -36,6 +40,8 @@ struct cros_ucsi_data { > > struct cros_ec_device *ec; > > struct notifier_block nb; > > struct work_struct work; > > + struct delayed_work write_tmo; > > + int tmo_counter; > > > > struct completion complete; > > unsigned long flags; > > @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) > > return 0; > > } > > > > +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) > > +{ > > + struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); > > + int ret; > > + > > + ret = ucsi_sync_control_common(ucsi, cmd); > > + if (ret) > > + goto out; > > + > > + /* Successful write. Cancel any pending recovery work. */ > > + cancel_delayed_work_sync(&udata->write_tmo); > > + > > + return 0; > > +out: > > This label looks a bit unnecessary to me. How about a switch statement? > Good point. I will rework this part of the code to use switch. Thanks, Łukasz > ret = ucsi_sync_control_common(ucsi, cmd); > switch (ret) { > case -EBUSY: > case -ETIMEDOUT: > cancel_delayed_work_sync(&udata->write_tmo); > schedule_delayed_work(&udata->write_tmo, msecs_to_jiffies(WRITE_TMO_MS)); > break; > case 0: > cancel_delayed_work_sync(&udata->write_tmo); > break; > } > > return ret; > > > + /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout. > > + */ > > + if (ret == -EBUSY) > > + ret = -ETIMEDOUT; > > + > > + /* Schedule recovery attempt when we timeout or tried to send a command > > + * while still busy. > > + */ > > + if (ret == -ETIMEDOUT) { > > + cancel_delayed_work_sync(&udata->write_tmo); > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + } > > + > > + return ret; > > +} > > + > > struct ucsi_operations cros_ucsi_ops = { > > .read_version = cros_ucsi_read_version, > > .read_cci = cros_ucsi_read_cci, > > .read_message_in = cros_ucsi_read_message_in, > > .async_control = cros_ucsi_async_control, > > - .sync_control = ucsi_sync_control_common, > > + .sync_control = cros_ucsi_sync_control, > > }; > > > > static void cros_ucsi_work(struct work_struct *work) > > @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work) > > ucsi_notify_common(udata->ucsi, cci); > > } > > > > +static void cros_ucsi_write_timeout(struct work_struct *work) > > +{ > > + struct cros_ucsi_data *udata = > > + container_of(work, struct cros_ucsi_data, write_tmo.work); > > + u32 cci; > > + u64 cmd; > > + > > + if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) { > > + dev_err(udata->dev, > > + "Reading CCI failed; no write timeout recovery possible."); > > + return; > > + } > > + > > + if (cci & UCSI_CCI_BUSY) { > > + udata->tmo_counter++; > > + > > + if (udata->tmo_counter <= WRITE_TMO_CTR_MAX) > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + else > > + dev_err(udata->dev, > > + "PPM unresponsive - too many write timeouts."); > > + > > + return; > > + } > > + > > + /* No longer busy means we can reset our timeout counter. */ > > + udata->tmo_counter = 0; > > + > > + /* Need to ack previous command which may have timed out. */ > > + if (cci & UCSI_CCI_COMMAND_COMPLETE) { > > + cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; > > + cros_ucsi_async_control(udata->ucsi, &cmd); > > + > > + /* Check again after a few seconds that the system has > > + * recovered to make sure our async write above was successful. > > + */ > > + schedule_delayed_work(&udata->write_tmo, > > + msecs_to_jiffies(WRITE_TMO_MS)); > > + return; > > + } > > + > > + /* We recovered from a previous timeout. Treat this as a recovery from > > + * suspend and call resume. > > + */ > > + ucsi_resume(udata->ucsi); > > +} > > + > > static int cros_ucsi_event(struct notifier_block *nb, > > unsigned long host_event, void *_notify) > > { > > @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev) > > platform_set_drvdata(pdev, udata); > > > > INIT_WORK(&udata->work, cros_ucsi_work); > > + INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout); > > init_completion(&udata->complete); > > > > udata->ucsi = ucsi_create(dev, &cros_ucsi_ops); > > thanks, > > -- > heikki
diff --git a/drivers/usb/typec/ucsi/cros_ec_ucsi.c b/drivers/usb/typec/ucsi/cros_ec_ucsi.c index c588d9a57643..6daf61e7e62a 100644 --- a/drivers/usb/typec/ucsi/cros_ec_ucsi.c +++ b/drivers/usb/typec/ucsi/cros_ec_ucsi.c @@ -7,6 +7,7 @@ #include <linux/container_of.h> #include <linux/dev_printk.h> +#include <linux/jiffies.h> #include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/platform_data/cros_ec_commands.h> @@ -29,6 +30,9 @@ */ #define WRITE_TMO_MS 5000 +/* Number of times to attempt recovery from a write timeout before giving up. */ +#define WRITE_TMO_CTR_MAX 5 + struct cros_ucsi_data { struct device *dev; struct ucsi *ucsi; @@ -36,6 +40,8 @@ struct cros_ucsi_data { struct cros_ec_device *ec; struct notifier_block nb; struct work_struct work; + struct delayed_work write_tmo; + int tmo_counter; struct completion complete; unsigned long flags; @@ -99,12 +105,43 @@ static int cros_ucsi_async_control(struct ucsi *ucsi, u64 cmd) return 0; } +static int cros_ucsi_sync_control(struct ucsi *ucsi, u64 cmd) +{ + struct cros_ucsi_data *udata = ucsi_get_drvdata(ucsi); + int ret; + + ret = ucsi_sync_control_common(ucsi, cmd); + if (ret) + goto out; + + /* Successful write. Cancel any pending recovery work. */ + cancel_delayed_work_sync(&udata->write_tmo); + + return 0; +out: + /* EC may return -EBUSY if CCI.busy is set. Convert this to a timeout. + */ + if (ret == -EBUSY) + ret = -ETIMEDOUT; + + /* Schedule recovery attempt when we timeout or tried to send a command + * while still busy. + */ + if (ret == -ETIMEDOUT) { + cancel_delayed_work_sync(&udata->write_tmo); + schedule_delayed_work(&udata->write_tmo, + msecs_to_jiffies(WRITE_TMO_MS)); + } + + return ret; +} + struct ucsi_operations cros_ucsi_ops = { .read_version = cros_ucsi_read_version, .read_cci = cros_ucsi_read_cci, .read_message_in = cros_ucsi_read_message_in, .async_control = cros_ucsi_async_control, - .sync_control = ucsi_sync_control_common, + .sync_control = cros_ucsi_sync_control, }; static void cros_ucsi_work(struct work_struct *work) @@ -118,6 +155,54 @@ static void cros_ucsi_work(struct work_struct *work) ucsi_notify_common(udata->ucsi, cci); } +static void cros_ucsi_write_timeout(struct work_struct *work) +{ + struct cros_ucsi_data *udata = + container_of(work, struct cros_ucsi_data, write_tmo.work); + u32 cci; + u64 cmd; + + if (cros_ucsi_read(udata->ucsi, UCSI_CCI, &cci, sizeof(cci))) { + dev_err(udata->dev, + "Reading CCI failed; no write timeout recovery possible."); + return; + } + + if (cci & UCSI_CCI_BUSY) { + udata->tmo_counter++; + + if (udata->tmo_counter <= WRITE_TMO_CTR_MAX) + schedule_delayed_work(&udata->write_tmo, + msecs_to_jiffies(WRITE_TMO_MS)); + else + dev_err(udata->dev, + "PPM unresponsive - too many write timeouts."); + + return; + } + + /* No longer busy means we can reset our timeout counter. */ + udata->tmo_counter = 0; + + /* Need to ack previous command which may have timed out. */ + if (cci & UCSI_CCI_COMMAND_COMPLETE) { + cmd = UCSI_ACK_CC_CI | UCSI_ACK_COMMAND_COMPLETE; + cros_ucsi_async_control(udata->ucsi, &cmd); + + /* Check again after a few seconds that the system has + * recovered to make sure our async write above was successful. + */ + schedule_delayed_work(&udata->write_tmo, + msecs_to_jiffies(WRITE_TMO_MS)); + return; + } + + /* We recovered from a previous timeout. Treat this as a recovery from + * suspend and call resume. + */ + ucsi_resume(udata->ucsi); +} + static int cros_ucsi_event(struct notifier_block *nb, unsigned long host_event, void *_notify) { @@ -162,6 +247,7 @@ static int cros_ucsi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, udata); INIT_WORK(&udata->work, cros_ucsi_work); + INIT_DELAYED_WORK(&udata->write_tmo, cros_ucsi_write_timeout); init_completion(&udata->complete); udata->ucsi = ucsi_create(dev, &cros_ucsi_ops);