Message ID | 20181004090900.32915-5-hverkuil@xs4all.nl (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | cec/adv/omap: fixes and new status flags | expand |
Tomi, Can you review this patch and the next? They should go to 4.20. This patch in particular is a nasty one, hard to reproduce. This patch should also be Cc-ed to stable for 4.15 and up. Tracking down randomly disappearing CEC transmits was no fun :-( Regards, Hans On 10/04/18 11:08, Hans Verkuil wrote: > From: Hans Verkuil <hans.verkuil@cisco.com> > > The TX FIFO has to be cleared if the transmit failed due to e.g. > a NACK condition, otherwise the hardware will keep trying to > transmit the message. > > An attempt was made to do this, but it was done after the call to > cec_transmit_done, which can cause a race condition since the call > to cec_transmit_done can cause a new transmit to be issued, and > then attempting to clear the TX FIFO will actually clear the new > transmit instead of the old transmit and the new transmit simply > never happens. > > By clearing the FIFO before transmit_done is called this race > is fixed. > > Note that there is no reason to clear the FIFO if the transmit > was successful, so the attempt to clear the FIFO in that case > was dropped. > > Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com> > --- > drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 35 ++++++++++++------------- > 1 file changed, 17 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > index 340383150fb9..dee66a5101b5 100644 > --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > @@ -106,6 +106,22 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) > } > } > > +static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) > +{ > + struct hdmi_core_data *core = cec_get_drvdata(adap); > + int retry = HDMI_CORE_CEC_RETRY; > + int temp; > + > + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > + while (retry) { > + temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); > + if (FLD_GET(temp, 7, 7) == 0) > + break; > + retry--; > + } > + return retry != 0; > +} > + > void hdmi4_cec_irq(struct hdmi_core_data *core) > { > u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); > @@ -117,36 +133,19 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) > if (stat0 & 0x20) { > cec_transmit_done(core->adap, CEC_TX_STATUS_OK, > 0, 0, 0, 0); > - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > } else if (stat1 & 0x02) { > u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); > > + hdmi_cec_clear_tx_fifo(core->adap); > cec_transmit_done(core->adap, > CEC_TX_STATUS_NACK | > CEC_TX_STATUS_MAX_RETRIES, > 0, (dbg3 >> 4) & 7, 0, 0); > - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > } > if (stat0 & 0x02) > hdmi_cec_received_msg(core); > } > > -static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) > -{ > - struct hdmi_core_data *core = cec_get_drvdata(adap); > - int retry = HDMI_CORE_CEC_RETRY; > - int temp; > - > - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > - while (retry) { > - temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); > - if (FLD_GET(temp, 7, 7) == 0) > - break; > - retry--; > - } > - return retry != 0; > -} > - > static bool hdmi_cec_clear_rx_fifo(struct cec_adapter *adap) > { > struct hdmi_core_data *core = cec_get_drvdata(adap); >
Hi Hans, On 04/10/18 12:08, Hans Verkuil wrote: > From: Hans Verkuil <hans.verkuil@cisco.com> > > The TX FIFO has to be cleared if the transmit failed due to e.g. > a NACK condition, otherwise the hardware will keep trying to > transmit the message. > > An attempt was made to do this, but it was done after the call to > cec_transmit_done, which can cause a race condition since the call > to cec_transmit_done can cause a new transmit to be issued, and > then attempting to clear the TX FIFO will actually clear the new > transmit instead of the old transmit and the new transmit simply > never happens. > > By clearing the FIFO before transmit_done is called this race > is fixed. > > Note that there is no reason to clear the FIFO if the transmit > was successful, so the attempt to clear the FIFO in that case > was dropped. > > Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com> > --- > drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 35 ++++++++++++------------- > 1 file changed, 17 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > index 340383150fb9..dee66a5101b5 100644 > --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c > @@ -106,6 +106,22 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) > } > } > > +static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) > +{ > + struct hdmi_core_data *core = cec_get_drvdata(adap); > + int retry = HDMI_CORE_CEC_RETRY; > + int temp; > + > + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > + while (retry) { > + temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); > + if (FLD_GET(temp, 7, 7) == 0) > + break; This is fine, but as you're using the helper macros already, there's REG_GET: REG_GET(core->base, HDMI_CEC_DBG_3, 7, 7) which removes the need for temp. Are you sure this works reliably? Usually when polling a register bit, I like to measure real-world-time in some way to ensure I actually poll for a certain amount of time. And just a matter of opinion, but I would've written: while (retry) { if (!REG_GET(..)) return true; retry--; } return false; > + retry--; > + } > + return retry != 0; > +} > + > void hdmi4_cec_irq(struct hdmi_core_data *core) > { > u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); > @@ -117,36 +133,19 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) > if (stat0 & 0x20) { > cec_transmit_done(core->adap, CEC_TX_STATUS_OK, > 0, 0, 0, 0); > - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); > } else if (stat1 & 0x02) { > u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); > > + hdmi_cec_clear_tx_fifo(core->adap); Would a dev_err be ok here? Tomi
On 05/10/18 17:13, Hans Verkuil wrote: > Tomi, > > Can you review this patch and the next? They should go to 4.20. > This patch in particular is a nasty one, hard to reproduce. > > This patch should also be Cc-ed to stable for 4.15 and up. Done. There's no dependency from the omapdrm patches to the first patch, is there? I.e. I can apply these via omapdrm? Tomi
On 10/08/2018 02:45 PM, Tomi Valkeinen wrote: > Hi Hans, > > On 04/10/18 12:08, Hans Verkuil wrote: >> From: Hans Verkuil <hans.verkuil@cisco.com> >> >> The TX FIFO has to be cleared if the transmit failed due to e.g. >> a NACK condition, otherwise the hardware will keep trying to >> transmit the message. >> >> An attempt was made to do this, but it was done after the call to >> cec_transmit_done, which can cause a race condition since the call >> to cec_transmit_done can cause a new transmit to be issued, and >> then attempting to clear the TX FIFO will actually clear the new >> transmit instead of the old transmit and the new transmit simply >> never happens. >> >> By clearing the FIFO before transmit_done is called this race >> is fixed. >> >> Note that there is no reason to clear the FIFO if the transmit >> was successful, so the attempt to clear the FIFO in that case >> was dropped. >> >> Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com> >> --- >> drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 35 ++++++++++++------------- >> 1 file changed, 17 insertions(+), 18 deletions(-) >> >> diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >> index 340383150fb9..dee66a5101b5 100644 >> --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >> +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >> @@ -106,6 +106,22 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) >> } >> } >> >> +static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) >> +{ >> + struct hdmi_core_data *core = cec_get_drvdata(adap); >> + int retry = HDMI_CORE_CEC_RETRY; >> + int temp; >> + >> + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); >> + while (retry) { >> + temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); >> + if (FLD_GET(temp, 7, 7) == 0) >> + break; > > This is fine, but as you're using the helper macros already, there's > REG_GET: > > REG_GET(core->base, HDMI_CEC_DBG_3, 7, 7) > > which removes the need for temp. Are you sure this works reliably? > Usually when polling a register bit, I like to measure real-world-time > in some way to ensure I actually poll for a certain amount of time. I'll add a bit of debugging to double-check but as far as I remember this is very fast and adding delays is overkill. FYI: we (Cisco) use this code in our products and we'd have seen it if this would fail. > > And just a matter of opinion, but I would've written: > > while (retry) { > if (!REG_GET(..)) > return true; > retry--; > } > > return false; > >> + retry--; >> + } >> + return retry != 0; >> +} >> + In this patch I just moved up the hdmi_cec_clear_tx_fifo so I can use it in hdmi4_cec_irq. I rather not make any changes to that function. Unless you object I prefer to make a new patch for 4.21 to improve it. >> void hdmi4_cec_irq(struct hdmi_core_data *core) >> { >> u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); >> @@ -117,36 +133,19 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) >> if (stat0 & 0x20) { >> cec_transmit_done(core->adap, CEC_TX_STATUS_OK, >> 0, 0, 0, 0); >> - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); >> } else if (stat1 & 0x02) { >> u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); >> >> + hdmi_cec_clear_tx_fifo(core->adap); > > Would a dev_err be ok here? Hmm. I would prefer a dev_err_once. Chances are that if this ever fails, it might fail continuously (as in: something is very seriously wrong), and you don't want that in an irq function. Regards, Hans
On 10/08/2018 02:52 PM, Tomi Valkeinen wrote: > > On 05/10/18 17:13, Hans Verkuil wrote: >> Tomi, >> >> Can you review this patch and the next? They should go to 4.20. >> This patch in particular is a nasty one, hard to reproduce. >> >> This patch should also be Cc-ed to stable for 4.15 and up. > > Done. There's no dependency from the omapdrm patches to the first patch, > is there? I.e. I can apply these via omapdrm? Correct. Regards, Hans
On 08/10/18 15:55, Hans Verkuil wrote: > On 10/08/2018 02:45 PM, Tomi Valkeinen wrote: >> Hi Hans, >> >> On 04/10/18 12:08, Hans Verkuil wrote: >>> From: Hans Verkuil <hans.verkuil@cisco.com> >>> >>> The TX FIFO has to be cleared if the transmit failed due to e.g. >>> a NACK condition, otherwise the hardware will keep trying to >>> transmit the message. >>> >>> An attempt was made to do this, but it was done after the call to >>> cec_transmit_done, which can cause a race condition since the call >>> to cec_transmit_done can cause a new transmit to be issued, and >>> then attempting to clear the TX FIFO will actually clear the new >>> transmit instead of the old transmit and the new transmit simply >>> never happens. >>> >>> By clearing the FIFO before transmit_done is called this race >>> is fixed. >>> >>> Note that there is no reason to clear the FIFO if the transmit >>> was successful, so the attempt to clear the FIFO in that case >>> was dropped. >>> >>> Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com> >>> --- >>> drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c | 35 ++++++++++++------------- >>> 1 file changed, 17 insertions(+), 18 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >>> index 340383150fb9..dee66a5101b5 100644 >>> --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >>> +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c >>> @@ -106,6 +106,22 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) >>> } >>> } >>> >>> +static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) >>> +{ >>> + struct hdmi_core_data *core = cec_get_drvdata(adap); >>> + int retry = HDMI_CORE_CEC_RETRY; >>> + int temp; >>> + >>> + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); >>> + while (retry) { >>> + temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); >>> + if (FLD_GET(temp, 7, 7) == 0) >>> + break; >> >> This is fine, but as you're using the helper macros already, there's >> REG_GET: >> >> REG_GET(core->base, HDMI_CEC_DBG_3, 7, 7) >> >> which removes the need for temp. Are you sure this works reliably? >> Usually when polling a register bit, I like to measure real-world-time >> in some way to ensure I actually poll for a certain amount of time. > > I'll add a bit of debugging to double-check but as far as I remember this > is very fast and adding delays is overkill. I agree, it's very unlikely that this would not work. Loops like this just make me a bit uneasy =). As we will catch the error via the return value, I think it's not worth adding real-time tracking here, unless problems start to show up. > FYI: we (Cisco) use this code in our products and we'd have seen it if this > would fail. > >> >> And just a matter of opinion, but I would've written: >> >> while (retry) { >> if (!REG_GET(..)) >> return true; >> retry--; >> } >> >> return false; >> >>> + retry--; >>> + } >>> + return retry != 0; >>> +} >>> + > > In this patch I just moved up the hdmi_cec_clear_tx_fifo so I can use it in > hdmi4_cec_irq. I rather not make any changes to that function. > > Unless you object I prefer to make a new patch for 4.21 to improve it. Sounds fine. > >>> void hdmi4_cec_irq(struct hdmi_core_data *core) >>> { >>> u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); >>> @@ -117,36 +133,19 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) >>> if (stat0 & 0x20) { >>> cec_transmit_done(core->adap, CEC_TX_STATUS_OK, >>> 0, 0, 0, 0); >>> - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); >>> } else if (stat1 & 0x02) { >>> u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); >>> >>> + hdmi_cec_clear_tx_fifo(core->adap); >> >> Would a dev_err be ok here? > > Hmm. I would prefer a dev_err_once. Chances are that if this ever fails, it > might fail continuously (as in: something is very seriously wrong), and you > don't want that in an irq function. That's ok. As long as there's some indication that we're having an issue. Or do you think some other part will break and print errors, and it's easy to pinpoint to the fifo clearing? Well, I don't even know how the fifo clearing could break.... Tomi
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c index 340383150fb9..dee66a5101b5 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_cec.c @@ -106,6 +106,22 @@ static void hdmi_cec_received_msg(struct hdmi_core_data *core) } } +static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) +{ + struct hdmi_core_data *core = cec_get_drvdata(adap); + int retry = HDMI_CORE_CEC_RETRY; + int temp; + + REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); + while (retry) { + temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); + if (FLD_GET(temp, 7, 7) == 0) + break; + retry--; + } + return retry != 0; +} + void hdmi4_cec_irq(struct hdmi_core_data *core) { u32 stat0 = hdmi_read_reg(core->base, HDMI_CEC_INT_STATUS_0); @@ -117,36 +133,19 @@ void hdmi4_cec_irq(struct hdmi_core_data *core) if (stat0 & 0x20) { cec_transmit_done(core->adap, CEC_TX_STATUS_OK, 0, 0, 0, 0); - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } else if (stat1 & 0x02) { u32 dbg3 = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); + hdmi_cec_clear_tx_fifo(core->adap); cec_transmit_done(core->adap, CEC_TX_STATUS_NACK | CEC_TX_STATUS_MAX_RETRIES, 0, (dbg3 >> 4) & 7, 0, 0); - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); } if (stat0 & 0x02) hdmi_cec_received_msg(core); } -static bool hdmi_cec_clear_tx_fifo(struct cec_adapter *adap) -{ - struct hdmi_core_data *core = cec_get_drvdata(adap); - int retry = HDMI_CORE_CEC_RETRY; - int temp; - - REG_FLD_MOD(core->base, HDMI_CEC_DBG_3, 0x1, 7, 7); - while (retry) { - temp = hdmi_read_reg(core->base, HDMI_CEC_DBG_3); - if (FLD_GET(temp, 7, 7) == 0) - break; - retry--; - } - return retry != 0; -} - static bool hdmi_cec_clear_rx_fifo(struct cec_adapter *adap) { struct hdmi_core_data *core = cec_get_drvdata(adap);