Message ID | 20190510223437.84368-5-dianders@chromium.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | spi: A better solution for cros_ec_spi reliability | expand |
From: Douglas Anderson <dianders@chromium.org> Date: Fri, May 10, 2019 at 3:35 PM To: Mark Brown, Benson Leung, Enric Balletbo i Serra Cc: <linux-rockchip@lists.infradead.org>, <drinkcat@chromium.org>, Guenter Roeck, <briannorris@chromium.org>, <mka@chromium.org>, Douglas Anderson, <linux-kernel@vger.kernel.org> > This reverts commit 37a186225a0c020516bafad2727fdcdfc039a1e4. > > We have a better solution in the patch ("platform/chrome: cros_ec_spi: > Set ourselves as timing sensitive"). Let's revert the uglier and less > reliable solution. > > Signed-off-by: Douglas Anderson <dianders@chromium.org> Reviewed-by: Guenter Roeck <groeck@chromium.org> > --- > > drivers/platform/chrome/cros_ec_spi.c | 80 ++------------------------- > 1 file changed, 6 insertions(+), 74 deletions(-) > > diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c > index 757a115502ec..70ff1ad09012 100644 > --- a/drivers/platform/chrome/cros_ec_spi.c > +++ b/drivers/platform/chrome/cros_ec_spi.c > @@ -75,27 +75,6 @@ struct cros_ec_spi { > unsigned int end_of_msg_delay; > }; > > -typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg); > - > -/** > - * struct cros_ec_xfer_work_params - params for our high priority workers > - * > - * @work: The work_struct needed to queue work > - * @fn: The function to use to transfer > - * @ec_dev: ChromeOS EC device > - * @ec_msg: Message to transfer > - * @ret: The return value of the function > - */ > - > -struct cros_ec_xfer_work_params { > - struct work_struct work; > - cros_ec_xfer_fn_t fn; > - struct cros_ec_device *ec_dev; > - struct cros_ec_command *ec_msg; > - int ret; > -}; > - > static void debug_packet(struct device *dev, const char *name, u8 *ptr, > int len) > { > @@ -371,13 +350,13 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, > } > > /** > - * do_cros_ec_pkt_xfer_spi - Transfer a packet over SPI and receive the reply > + * cros_ec_pkt_xfer_spi - Transfer a packet over SPI and receive the reply > * > * @ec_dev: ChromeOS EC device > * @ec_msg: Message to transfer > */ > -static int do_cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg) > +static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, > + struct cros_ec_command *ec_msg) > { > struct ec_host_response *response; > struct cros_ec_spi *ec_spi = ec_dev->priv; > @@ -514,13 +493,13 @@ static int do_cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, > } > > /** > - * do_cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply > + * cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply > * > * @ec_dev: ChromeOS EC device > * @ec_msg: Message to transfer > */ > -static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg) > +static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, > + struct cros_ec_command *ec_msg) > { > struct cros_ec_spi *ec_spi = ec_dev->priv; > struct spi_transfer trans; > @@ -632,53 +611,6 @@ static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, > return ret; > } > > -static void cros_ec_xfer_high_pri_work(struct work_struct *work) > -{ > - struct cros_ec_xfer_work_params *params; > - > - params = container_of(work, struct cros_ec_xfer_work_params, work); > - params->ret = params->fn(params->ec_dev, params->ec_msg); > -} > - > -static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg, > - cros_ec_xfer_fn_t fn) > -{ > - struct cros_ec_xfer_work_params params; > - > - INIT_WORK_ONSTACK(¶ms.work, cros_ec_xfer_high_pri_work); > - params.ec_dev = ec_dev; > - params.ec_msg = ec_msg; > - params.fn = fn; > - > - /* > - * This looks a bit ridiculous. Why do the work on a > - * different thread if we're just going to block waiting for > - * the thread to finish? The key here is that the thread is > - * running at high priority but the calling context might not > - * be. We need to be at high priority to avoid getting > - * context switched out for too long and the EC giving up on > - * the transfer. > - */ > - queue_work(system_highpri_wq, ¶ms.work); > - flush_work(¶ms.work); > - destroy_work_on_stack(¶ms.work); > - > - return params.ret; > -} > - > -static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg) > -{ > - return cros_ec_xfer_high_pri(ec_dev, ec_msg, do_cros_ec_pkt_xfer_spi); > -} > - > -static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, > - struct cros_ec_command *ec_msg) > -{ > - return cros_ec_xfer_high_pri(ec_dev, ec_msg, do_cros_ec_cmd_xfer_spi); > -} > - > static void cros_ec_spi_dt_probe(struct cros_ec_spi *ec_spi, struct device *dev) > { > struct device_node *np = dev->of_node; > -- > 2.21.0.1020.gf2820cf01a-goog >
On Fri, May 10, 2019 at 03:34:37PM -0700, Douglas Anderson wrote: > This reverts commit 37a186225a0c020516bafad2727fdcdfc039a1e4. > > We have a better solution in the patch ("platform/chrome: cros_ec_spi: > Set ourselves as timing sensitive"). Let's revert the uglier and less > reliable solution. It isn't clear to me that it's a bad thing to have this even with the SPI thread at realime priority.
Hi, On Sun, May 12, 2019 at 10:05 AM Mark Brown <broonie@kernel.org> wrote: > > On Fri, May 10, 2019 at 03:34:37PM -0700, Douglas Anderson wrote: > > This reverts commit 37a186225a0c020516bafad2727fdcdfc039a1e4. > > > > We have a better solution in the patch ("platform/chrome: cros_ec_spi: > > Set ourselves as timing sensitive"). Let's revert the uglier and less > > reliable solution. > > It isn't clear to me that it's a bad thing to have this even with the > SPI thread at realime priority. The code that's there right now isn't enough. As per the description in the original patch, it didn't solve all problems but just made things an order of magnitude better. So if I don't do this revert I instead need a patch to bump cros_ec SPI up to realtime to get SPI transfers _truly_ reliable. I actually have a patch coded up to do just that. ...but then Guenter pointed out that I was effectively duplicating the work that the SPI framework could already do for me if I could use the pumping thread at real time priority. My current plan is parameterize things so that cros_ec_spi can request a forced transition to the realtime pump thread without breaking existing users. I'll code that up this morning and send out a v2 soon so you can see what you think of it. :-) NOTE: I actually tracked down one reason why the high priority thread wasn't enough and I needed something like real time. I found that commit a1b89132dc4f ("dm crypt: use WQ_HIGHPRI for the IO and crypt workqueues") was making dm-crypt preempt me. I'll start a separate discussion about that, but in the end it still seems better to use something like a real time priority for cros_ec. -Doug
On Mon, May 13, 2019 at 08:57:12AM -0700, Doug Anderson wrote: > On Sun, May 12, 2019 at 10:05 AM Mark Brown <broonie@kernel.org> wrote: > > It isn't clear to me that it's a bad thing to have this even with the > > SPI thread at realime priority. > The code that's there right now isn't enough. As per the description > in the original patch, it didn't solve all problems but just made > things an order of magnitude better. So if I don't do this revert I I'm not saying the other changes aren't helping, I'm saying that it's not clear that this revert is improving things.
Hi, On Mon, May 13, 2019 at 9:02 AM Mark Brown <broonie@kernel.org> wrote: > > On Mon, May 13, 2019 at 08:57:12AM -0700, Doug Anderson wrote: > > On Sun, May 12, 2019 at 10:05 AM Mark Brown <broonie@kernel.org> wrote: > > > > It isn't clear to me that it's a bad thing to have this even with the > > > SPI thread at realime priority. > > > The code that's there right now isn't enough. As per the description > > in the original patch, it didn't solve all problems but just made > > things an order of magnitude better. So if I don't do this revert I > > I'm not saying the other changes aren't helping, I'm saying that it's > not clear that this revert is improving things. If I add a call to force the pumping to happen on the SPI thread then the commit I'm reverting here is useless though, isn't it? -Doug
On Mon, May 13, 2019 at 09:03:28AM -0700, Doug Anderson wrote: > On Mon, May 13, 2019 at 9:02 AM Mark Brown <broonie@kernel.org> wrote: > > I'm not saying the other changes aren't helping, I'm saying that it's > > not clear that this revert is improving things. > If I add a call to force the pumping to happen on the SPI thread then > the commit I'm reverting here is useless though, isn't it? Well, I'm not convinced that that change is ideal anyway and it does leave you vulnerable to further changes in the SPI core pushing things out to calling context which feels like it isn't going to be helping robustness.
Hi, On Mon, May 13, 2019 at 9:47 AM, Mark Brown <broonie@kernel.org> wrote: > On Mon, May 13, 2019 at 09:03:28AM -0700, Doug Anderson wrote: > > On Mon, May 13, 2019 at 9:02 AM Mark Brown <broonie@kernel.org> wrote: > > > > I'm not saying the other changes aren't helping, I'm saying that it's > > > not clear that this revert is improving things. > > > If I add a call to force the pumping to happen on the SPI thread then > > the commit I'm reverting here is useless though, isn't it? > > Well, I'm not convinced that that change is ideal anyway and it does > leave you vulnerable to further changes in the SPI core pushing things > out to calling context which feels like it isn't going to be helping > robustness. OK. Here's my plan: in v2 I've still included this revert and you can see how things look. If you hate it as much as you think you will then let me know and I'll send a v3 that avoids to forcing and re-adds the realtime thread to cros_ec. One note just so you're aware: For my particular device I'm not nearly as concerned with latency / throughput as I am concerned with transfers not getting interrupted once started. I've added this explicitly in the commit message now, too. :-) -Doug
diff --git a/drivers/platform/chrome/cros_ec_spi.c b/drivers/platform/chrome/cros_ec_spi.c index 757a115502ec..70ff1ad09012 100644 --- a/drivers/platform/chrome/cros_ec_spi.c +++ b/drivers/platform/chrome/cros_ec_spi.c @@ -75,27 +75,6 @@ struct cros_ec_spi { unsigned int end_of_msg_delay; }; -typedef int (*cros_ec_xfer_fn_t) (struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg); - -/** - * struct cros_ec_xfer_work_params - params for our high priority workers - * - * @work: The work_struct needed to queue work - * @fn: The function to use to transfer - * @ec_dev: ChromeOS EC device - * @ec_msg: Message to transfer - * @ret: The return value of the function - */ - -struct cros_ec_xfer_work_params { - struct work_struct work; - cros_ec_xfer_fn_t fn; - struct cros_ec_device *ec_dev; - struct cros_ec_command *ec_msg; - int ret; -}; - static void debug_packet(struct device *dev, const char *name, u8 *ptr, int len) { @@ -371,13 +350,13 @@ static int cros_ec_spi_receive_response(struct cros_ec_device *ec_dev, } /** - * do_cros_ec_pkt_xfer_spi - Transfer a packet over SPI and receive the reply + * cros_ec_pkt_xfer_spi - Transfer a packet over SPI and receive the reply * * @ec_dev: ChromeOS EC device * @ec_msg: Message to transfer */ -static int do_cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg) +static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, + struct cros_ec_command *ec_msg) { struct ec_host_response *response; struct cros_ec_spi *ec_spi = ec_dev->priv; @@ -514,13 +493,13 @@ static int do_cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, } /** - * do_cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply + * cros_ec_cmd_xfer_spi - Transfer a message over SPI and receive the reply * * @ec_dev: ChromeOS EC device * @ec_msg: Message to transfer */ -static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg) +static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, + struct cros_ec_command *ec_msg) { struct cros_ec_spi *ec_spi = ec_dev->priv; struct spi_transfer trans; @@ -632,53 +611,6 @@ static int do_cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, return ret; } -static void cros_ec_xfer_high_pri_work(struct work_struct *work) -{ - struct cros_ec_xfer_work_params *params; - - params = container_of(work, struct cros_ec_xfer_work_params, work); - params->ret = params->fn(params->ec_dev, params->ec_msg); -} - -static int cros_ec_xfer_high_pri(struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg, - cros_ec_xfer_fn_t fn) -{ - struct cros_ec_xfer_work_params params; - - INIT_WORK_ONSTACK(¶ms.work, cros_ec_xfer_high_pri_work); - params.ec_dev = ec_dev; - params.ec_msg = ec_msg; - params.fn = fn; - - /* - * This looks a bit ridiculous. Why do the work on a - * different thread if we're just going to block waiting for - * the thread to finish? The key here is that the thread is - * running at high priority but the calling context might not - * be. We need to be at high priority to avoid getting - * context switched out for too long and the EC giving up on - * the transfer. - */ - queue_work(system_highpri_wq, ¶ms.work); - flush_work(¶ms.work); - destroy_work_on_stack(¶ms.work); - - return params.ret; -} - -static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg) -{ - return cros_ec_xfer_high_pri(ec_dev, ec_msg, do_cros_ec_pkt_xfer_spi); -} - -static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev, - struct cros_ec_command *ec_msg) -{ - return cros_ec_xfer_high_pri(ec_dev, ec_msg, do_cros_ec_cmd_xfer_spi); -} - static void cros_ec_spi_dt_probe(struct cros_ec_spi *ec_spi, struct device *dev) { struct device_node *np = dev->of_node;
This reverts commit 37a186225a0c020516bafad2727fdcdfc039a1e4. We have a better solution in the patch ("platform/chrome: cros_ec_spi: Set ourselves as timing sensitive"). Let's revert the uglier and less reliable solution. Signed-off-by: Douglas Anderson <dianders@chromium.org> --- drivers/platform/chrome/cros_ec_spi.c | 80 ++------------------------- 1 file changed, 6 insertions(+), 74 deletions(-)