diff mbox series

drm/i915/psr: WA for panels stating bad link status after PSR is enabled

Message ID 20241028074642.804895-1-jouni.hogander@intel.com (mailing list archive)
State New
Headers show
Series drm/i915/psr: WA for panels stating bad link status after PSR is enabled | expand

Commit Message

Hogander, Jouni Oct. 28, 2024, 7:46 a.m. UTC
We are currently seeing unexpected link trainings with several different
eDP panels. These are caused by these panels stating bad link status in
their dpcd registers. This can be observed by doing following test:

1. Boot up without Xe module loaded

2. Load Xe module with PSR disabled:
    $ modprobe xe  enable_psr=0

3. Read panel link status register
    $ dpcd_reg read --offset 0x200e --count=1
    0x200e:  00

4. Enable PSR, sleep for 2 seconds and disable PSR again:

    $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
    $ echo "-1" > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
    $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
    $ sleep 2
    $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
    $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
    Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]

5. Now read panel link status registers again:
    $ dpcd_reg read --offset 0x200e --count=1
    0x200e:  80

Workaround this by not trusting link status registers after PSR is enabled
until first short pulse interrupt is received.

Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
---
 .../drm/i915/display/intel_display_types.h    |  2 +
 drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
 drivers/gpu/drm/i915/display/intel_psr.c      | 39 +++++++++++++++++++
 drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
 4 files changed, 44 insertions(+), 1 deletion(-)

Comments

Jani Nikula Oct. 28, 2024, 12:24 p.m. UTC | #1
On Mon, 28 Oct 2024, Jouni Högander <jouni.hogander@intel.com> wrote:
> We are currently seeing unexpected link trainings with several different
> eDP panels. These are caused by these panels stating bad link status in
> their dpcd registers. This can be observed by doing following test:
>
> 1. Boot up without Xe module loaded
>
> 2. Load Xe module with PSR disabled:
>     $ modprobe xe  enable_psr=0
>
> 3. Read panel link status register
>     $ dpcd_reg read --offset 0x200e --count=1
>     0x200e:  00
>
> 4. Enable PSR, sleep for 2 seconds and disable PSR again:
>
>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     $ echo "-1" > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
>     $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     $ sleep 2
>     $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]
>
> 5. Now read panel link status registers again:
>     $ dpcd_reg read --offset 0x200e --count=1
>     0x200e:  80
>
> Workaround this by not trusting link status registers after PSR is enabled
> until first short pulse interrupt is received.

Which eDP/DPCD version are we talking about?

Could this be related to AUX-less ALPM?

Some nitpicks below, less important.

> Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
> ---
>  .../drm/i915/display/intel_display_types.h    |  2 +
>  drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
>  drivers/gpu/drm/i915/display/intel_psr.c      | 39 +++++++++++++++++++
>  drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
>  4 files changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 2bb1fa64da2f..f0b7d7262961 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -1618,6 +1618,8 @@ struct intel_psr {
>  	u32 dc3co_exit_delay;
>  	struct delayed_work dc3co_work;
>  	u8 entry_setup_frames;
> +
> +	bool link_ok;
>  };
>  
>  struct intel_dp {
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index b036c6521659..efaaadfb12ea 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -5007,7 +5007,8 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
>  		return true;
>  
>  	/* Retrain if link not ok */
> -	return !intel_dp_link_ok(intel_dp, link_status);
> +	return !(intel_dp_link_ok(intel_dp, link_status) ||
> +		 intel_psr_link_ok(intel_dp));

Nitpick, in general I think "not A and not B" reads better than "not (A
or B)" because saying the parens aloud is kind of clumsy. Reading code
aloud (well, in my head anyway) is one of my main tests for readability.

>  }
>  
>  bool intel_dp_has_connector(struct intel_dp *intel_dp,
> diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
> index 992543f508c1..0cd7388389e0 100644
> --- a/drivers/gpu/drm/i915/display/intel_psr.c
> +++ b/drivers/gpu/drm/i915/display/intel_psr.c
> @@ -2009,6 +2009,15 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
>  	intel_dp->psr.enabled = true;
>  	intel_dp->psr.paused = false;
>  
> +	/*
> +	 * Link_ok is sticky and set here on PSR enable. We can assume link
> +	 * training is complete as we never continue to PSR enable with
> +	 * untrained link. Link_ok is kept as set until first short pulse
> +	 * interrupt. This is targeted to workaround panels stating bad link
> +	 * after PSR is enabled.
> +	 */
> +	intel_dp->psr.link_ok = true;
> +
>  	intel_psr_activate(intel_dp);
>  }
>  
> @@ -3458,6 +3467,9 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp)
>  
>  	mutex_lock(&psr->lock);
>  
> +	/* Let's clear possibly set link_ok flag here */

That's kind of a translation of C into English. If you need the comment,
then maybe state the why instead of the what?

> +	psr->link_ok = false;
> +
>  	if (!psr->enabled)
>  		goto exit;
>  
> @@ -3517,6 +3529,33 @@ bool intel_psr_enabled(struct intel_dp *intel_dp)
>  	return ret;
>  }
>  
> +/**
> + * intel_psr_link_ok - return psr->link_ok
> + * @intel_dp: struct intel_dp
> + *
> + * We are seeing unexpected link re-trainings with some panels. This is caused
> + * by panel stating bad link status after PSR is enabled. Code checking link
> + * status can call this to ensure it can ignore bad link status stated by the
> + * panel I.e. if panel is stating bad link and intel_psr_link_ok is stating link
> + * is ok caller should rely on latter.
> + *
> + * Return value of link_ok
> + */
> +bool intel_psr_link_ok(struct intel_dp *intel_dp)
> +{
> +	bool ret;
> +
> +	if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
> +	    !intel_dp_is_edp(intel_dp))
> +		return false;
> +
> +	mutex_lock(&intel_dp->psr.lock);
> +	ret = intel_dp->psr.link_ok;
> +	mutex_unlock(&intel_dp->psr.lock);
> +
> +	return ret;
> +}
> +
>  /**
>   * intel_psr_lock - grab PSR lock
>   * @crtc_state: the crtc state
> diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
> index 5f26f61f82aa..956be263c09e 100644
> --- a/drivers/gpu/drm/i915/display/intel_psr.h
> +++ b/drivers/gpu/drm/i915/display/intel_psr.h
> @@ -59,6 +59,7 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
>  void intel_psr_pause(struct intel_dp *intel_dp);
>  void intel_psr_resume(struct intel_dp *intel_dp);
>  bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state);
> +bool intel_psr_link_ok(struct intel_dp *intel_dp);
>  
>  void intel_psr_lock(const struct intel_crtc_state *crtc_state);
>  void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
Jani Nikula Oct. 28, 2024, 12:33 p.m. UTC | #2
On Mon, 28 Oct 2024, Jani Nikula <jani.nikula@linux.intel.com> wrote:
> On Mon, 28 Oct 2024, Jouni Högander <jouni.hogander@intel.com> wrote:
>> We are currently seeing unexpected link trainings with several different
>> eDP panels. These are caused by these panels stating bad link status in
>> their dpcd registers. This can be observed by doing following test:
>>
>> 1. Boot up without Xe module loaded
>>
>> 2. Load Xe module with PSR disabled:
>>     $ modprobe xe  enable_psr=0
>>
>> 3. Read panel link status register
>>     $ dpcd_reg read --offset 0x200e --count=1
>>     0x200e:  00
>>
>> 4. Enable PSR, sleep for 2 seconds and disable PSR again:
>>
>>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>>     $ echo "-1" > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
>>     $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>>     $ sleep 2
>>     $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
>>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>>     Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]
>>
>> 5. Now read panel link status registers again:
>>     $ dpcd_reg read --offset 0x200e --count=1
>>     0x200e:  80
>>
>> Workaround this by not trusting link status registers after PSR is enabled
>> until first short pulse interrupt is received.
>
> Which eDP/DPCD version are we talking about?
>
> Could this be related to AUX-less ALPM?

And what I'm after here, are we satisfied with saying several panels
exhibit unexpected behaviour, so work around them? It just somehow
doesn't smell like the root cause to me...

>
> Some nitpicks below, less important.
>
>> Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
>> ---
>>  .../drm/i915/display/intel_display_types.h    |  2 +
>>  drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
>>  drivers/gpu/drm/i915/display/intel_psr.c      | 39 +++++++++++++++++++
>>  drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
>>  4 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
>> index 2bb1fa64da2f..f0b7d7262961 100644
>> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
>> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
>> @@ -1618,6 +1618,8 @@ struct intel_psr {
>>  	u32 dc3co_exit_delay;
>>  	struct delayed_work dc3co_work;
>>  	u8 entry_setup_frames;
>> +
>> +	bool link_ok;
>>  };
>>  
>>  struct intel_dp {
>> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
>> index b036c6521659..efaaadfb12ea 100644
>> --- a/drivers/gpu/drm/i915/display/intel_dp.c
>> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
>> @@ -5007,7 +5007,8 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
>>  		return true;
>>  
>>  	/* Retrain if link not ok */
>> -	return !intel_dp_link_ok(intel_dp, link_status);
>> +	return !(intel_dp_link_ok(intel_dp, link_status) ||
>> +		 intel_psr_link_ok(intel_dp));
>
> Nitpick, in general I think "not A and not B" reads better than "not (A
> or B)" because saying the parens aloud is kind of clumsy. Reading code
> aloud (well, in my head anyway) is one of my main tests for readability.
>
>>  }
>>  
>>  bool intel_dp_has_connector(struct intel_dp *intel_dp,
>> diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
>> index 992543f508c1..0cd7388389e0 100644
>> --- a/drivers/gpu/drm/i915/display/intel_psr.c
>> +++ b/drivers/gpu/drm/i915/display/intel_psr.c
>> @@ -2009,6 +2009,15 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
>>  	intel_dp->psr.enabled = true;
>>  	intel_dp->psr.paused = false;
>>  
>> +	/*
>> +	 * Link_ok is sticky and set here on PSR enable. We can assume link
>> +	 * training is complete as we never continue to PSR enable with
>> +	 * untrained link. Link_ok is kept as set until first short pulse
>> +	 * interrupt. This is targeted to workaround panels stating bad link
>> +	 * after PSR is enabled.
>> +	 */
>> +	intel_dp->psr.link_ok = true;
>> +
>>  	intel_psr_activate(intel_dp);
>>  }
>>  
>> @@ -3458,6 +3467,9 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp)
>>  
>>  	mutex_lock(&psr->lock);
>>  
>> +	/* Let's clear possibly set link_ok flag here */
>
> That's kind of a translation of C into English. If you need the comment,
> then maybe state the why instead of the what?
>
>> +	psr->link_ok = false;
>> +
>>  	if (!psr->enabled)
>>  		goto exit;
>>  
>> @@ -3517,6 +3529,33 @@ bool intel_psr_enabled(struct intel_dp *intel_dp)
>>  	return ret;
>>  }
>>  
>> +/**
>> + * intel_psr_link_ok - return psr->link_ok
>> + * @intel_dp: struct intel_dp
>> + *
>> + * We are seeing unexpected link re-trainings with some panels. This is caused
>> + * by panel stating bad link status after PSR is enabled. Code checking link
>> + * status can call this to ensure it can ignore bad link status stated by the
>> + * panel I.e. if panel is stating bad link and intel_psr_link_ok is stating link
>> + * is ok caller should rely on latter.
>> + *
>> + * Return value of link_ok
>> + */
>> +bool intel_psr_link_ok(struct intel_dp *intel_dp)
>> +{
>> +	bool ret;
>> +
>> +	if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
>> +	    !intel_dp_is_edp(intel_dp))
>> +		return false;
>> +
>> +	mutex_lock(&intel_dp->psr.lock);
>> +	ret = intel_dp->psr.link_ok;
>> +	mutex_unlock(&intel_dp->psr.lock);
>> +
>> +	return ret;
>> +}
>> +
>>  /**
>>   * intel_psr_lock - grab PSR lock
>>   * @crtc_state: the crtc state
>> diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
>> index 5f26f61f82aa..956be263c09e 100644
>> --- a/drivers/gpu/drm/i915/display/intel_psr.h
>> +++ b/drivers/gpu/drm/i915/display/intel_psr.h
>> @@ -59,6 +59,7 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
>>  void intel_psr_pause(struct intel_dp *intel_dp);
>>  void intel_psr_resume(struct intel_dp *intel_dp);
>>  bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state);
>> +bool intel_psr_link_ok(struct intel_dp *intel_dp);
>>  
>>  void intel_psr_lock(const struct intel_crtc_state *crtc_state);
>>  void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
Hogander, Jouni Oct. 28, 2024, 12:47 p.m. UTC | #3
On Mon, 2024-10-28 at 14:24 +0200, Jani Nikula wrote:
> On Mon, 28 Oct 2024, Jouni Högander <jouni.hogander@intel.com> wrote:
> > We are currently seeing unexpected link trainings with several
> > different
> > eDP panels. These are caused by these panels stating bad link
> > status in
> > their dpcd registers. This can be observed by doing following test:
> > 
> > 1. Boot up without Xe module loaded
> > 
> > 2. Load Xe module with PSR disabled:
> >     $ modprobe xe  enable_psr=0
> > 
> > 3. Read panel link status register
> >     $ dpcd_reg read --offset 0x200e --count=1
> >     0x200e:  00
> > 
> > 4. Enable PSR, sleep for 2 seconds and disable PSR again:
> > 
> >     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     $ echo "-1" >
> > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
> >     $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     $ sleep 2
> >     $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
> >     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]
> > 
> > 5. Now read panel link status registers again:
> >     $ dpcd_reg read --offset 0x200e --count=1
> >     0x200e:  80
> > 
> > Workaround this by not trusting link status registers after PSR is
> > enabled
> > until first short pulse interrupt is received.
> 
> Which eDP/DPCD version are we talking about?

This is at least since eDP 1.4.

> 
> Could this be related to AUX-less ALPM?

Panels we are seeing this are not using AUX-less ALPM.

> 
> Some nitpicks below, less important.
> 
> > Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
> > ---
> >  .../drm/i915/display/intel_display_types.h    |  2 +
> >  drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
> >  drivers/gpu/drm/i915/display/intel_psr.c      | 39
> > +++++++++++++++++++
> >  drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
> >  4 files changed, 44 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h
> > b/drivers/gpu/drm/i915/display/intel_display_types.h
> > index 2bb1fa64da2f..f0b7d7262961 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> > @@ -1618,6 +1618,8 @@ struct intel_psr {
> >         u32 dc3co_exit_delay;
> >         struct delayed_work dc3co_work;
> >         u8 entry_setup_frames;
> > +
> > +       bool link_ok;
> >  };
> >  
> >  struct intel_dp {
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index b036c6521659..efaaadfb12ea 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -5007,7 +5007,8 @@ intel_dp_needs_link_retrain(struct intel_dp
> > *intel_dp)
> >                 return true;
> >  
> >         /* Retrain if link not ok */
> > -       return !intel_dp_link_ok(intel_dp, link_status);
> > +       return !(intel_dp_link_ok(intel_dp, link_status) ||
> > +                intel_psr_link_ok(intel_dp));
> 
> Nitpick, in general I think "not A and not B" reads better than "not
> (A
> or B)" because saying the parens aloud is kind of clumsy. Reading
> code
> aloud (well, in my head anyway) is one of my main tests for
> readability.

yes, I agree on this. I was just thinking this too much as want it to
accept intel_dp_link_ok result if it's true. I will change it.
 
> 
> >  }
> >  
> >  bool intel_dp_has_connector(struct intel_dp *intel_dp,
> > diff --git a/drivers/gpu/drm/i915/display/intel_psr.c
> > b/drivers/gpu/drm/i915/display/intel_psr.c
> > index 992543f508c1..0cd7388389e0 100644
> > --- a/drivers/gpu/drm/i915/display/intel_psr.c
> > +++ b/drivers/gpu/drm/i915/display/intel_psr.c
> > @@ -2009,6 +2009,15 @@ static void intel_psr_enable_locked(struct
> > intel_dp *intel_dp,
> >         intel_dp->psr.enabled = true;
> >         intel_dp->psr.paused = false;
> >  
> > +       /*
> > +        * Link_ok is sticky and set here on PSR enable. We can
> > assume link
> > +        * training is complete as we never continue to PSR enable
> > with
> > +        * untrained link. Link_ok is kept as set until first short
> > pulse
> > +        * interrupt. This is targeted to workaround panels stating
> > bad link
> > +        * after PSR is enabled.
> > +        */
> > +       intel_dp->psr.link_ok = true;
> > +
> >         intel_psr_activate(intel_dp);
> >  }
> >  
> > @@ -3458,6 +3467,9 @@ void intel_psr_short_pulse(struct intel_dp
> > *intel_dp)
> >  
> >         mutex_lock(&psr->lock);
> >  
> > +       /* Let's clear possibly set link_ok flag here */
> 
> That's kind of a translation of C into English. If you need the
> comment,
> then maybe state the why instead of the what?

You are right. I didn't succeed stating link_ok might not be set at all
but we are still clearing it here. Anyways useless comment. I will drop
it.

BR,

Jouni Högander

> 
> > +       psr->link_ok = false;
> > +
> >         if (!psr->enabled)
> >                 goto exit;
> >  
> > @@ -3517,6 +3529,33 @@ bool intel_psr_enabled(struct intel_dp
> > *intel_dp)
> >         return ret;
> >  }
> >  
> > +/**
> > + * intel_psr_link_ok - return psr->link_ok
> > + * @intel_dp: struct intel_dp
> > + *
> > + * We are seeing unexpected link re-trainings with some panels.
> > This is caused
> > + * by panel stating bad link status after PSR is enabled. Code
> > checking link
> > + * status can call this to ensure it can ignore bad link status
> > stated by the
> > + * panel I.e. if panel is stating bad link and intel_psr_link_ok
> > is stating link
> > + * is ok caller should rely on latter.
> > + *
> > + * Return value of link_ok
> > + */
> > +bool intel_psr_link_ok(struct intel_dp *intel_dp)
> > +{
> > +       bool ret;
> > +
> > +       if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
> > +           !intel_dp_is_edp(intel_dp))
> > +               return false;
> > +
> > +       mutex_lock(&intel_dp->psr.lock);
> > +       ret = intel_dp->psr.link_ok;
> > +       mutex_unlock(&intel_dp->psr.lock);
> > +
> > +       return ret;
> > +}
> > +
> >  /**
> >   * intel_psr_lock - grab PSR lock
> >   * @crtc_state: the crtc state
> > diff --git a/drivers/gpu/drm/i915/display/intel_psr.h
> > b/drivers/gpu/drm/i915/display/intel_psr.h
> > index 5f26f61f82aa..956be263c09e 100644
> > --- a/drivers/gpu/drm/i915/display/intel_psr.h
> > +++ b/drivers/gpu/drm/i915/display/intel_psr.h
> > @@ -59,6 +59,7 @@ void intel_psr2_program_trans_man_trk_ctl(const
> > struct intel_crtc_state *crtc_st
> >  void intel_psr_pause(struct intel_dp *intel_dp);
> >  void intel_psr_resume(struct intel_dp *intel_dp);
> >  bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state
> > *crtc_state);
> > +bool intel_psr_link_ok(struct intel_dp *intel_dp);
> >  
> >  void intel_psr_lock(const struct intel_crtc_state *crtc_state);
> >  void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
>
Imre Deak Oct. 28, 2024, 1:41 p.m. UTC | #4
On Mon, Oct 28, 2024 at 09:46:42AM +0200, Jouni Högander wrote:
> We are currently seeing unexpected link trainings with several different
> eDP panels. These are caused by these panels stating bad link status in
> their dpcd registers. This can be observed by doing following test:
> 
> 1. Boot up without Xe module loaded
> 
> 2. Load Xe module with PSR disabled:
>     $ modprobe xe  enable_psr=0
> 
> 3. Read panel link status register
>     $ dpcd_reg read --offset 0x200e --count=1
>     0x200e:  00
> 
> 4. Enable PSR, sleep for 2 seconds and disable PSR again:
> 
>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     $ echo "-1" > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
>     $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     $ sleep 2
>     $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
>     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
>     Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]
> 
> 5. Now read panel link status registers again:
>     $ dpcd_reg read --offset 0x200e --count=1
>     0x200e:  80
> 
> Workaround this by not trusting link status registers after PSR is enabled
> until first short pulse interrupt is received.
> 
> Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
> ---
>  .../drm/i915/display/intel_display_types.h    |  2 +
>  drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
>  drivers/gpu/drm/i915/display/intel_psr.c      | 39 +++++++++++++++++++
>  drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
>  4 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
> index 2bb1fa64da2f..f0b7d7262961 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> @@ -1618,6 +1618,8 @@ struct intel_psr {
>  	u32 dc3co_exit_delay;
>  	struct delayed_work dc3co_work;
>  	u8 entry_setup_frames;
> +
> +	bool link_ok;
>  };
>  
>  struct intel_dp {
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index b036c6521659..efaaadfb12ea 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -5007,7 +5007,8 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
>  		return true;
>  
>  	/* Retrain if link not ok */
> -	return !intel_dp_link_ok(intel_dp, link_status);
> +	return !(intel_dp_link_ok(intel_dp, link_status) ||
> +		 intel_psr_link_ok(intel_dp));
>  }
>  
>  bool intel_dp_has_connector(struct intel_dp *intel_dp,
> diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
> index 992543f508c1..0cd7388389e0 100644
> --- a/drivers/gpu/drm/i915/display/intel_psr.c
> +++ b/drivers/gpu/drm/i915/display/intel_psr.c
> @@ -2009,6 +2009,15 @@ static void intel_psr_enable_locked(struct intel_dp *intel_dp,
>  	intel_dp->psr.enabled = true;
>  	intel_dp->psr.paused = false;
>  
> +	/*
> +	 * Link_ok is sticky and set here on PSR enable. We can assume link
> +	 * training is complete as we never continue to PSR enable with
> +	 * untrained link. Link_ok is kept as set until first short pulse
> +	 * interrupt. This is targeted to workaround panels stating bad link
> +	 * after PSR is enabled.
> +	 */
> +	intel_dp->psr.link_ok = true;
> +
>  	intel_psr_activate(intel_dp);
>  }
>  
> @@ -3458,6 +3467,9 @@ void intel_psr_short_pulse(struct intel_dp *intel_dp)
>  
>  	mutex_lock(&psr->lock);
>  
> +	/* Let's clear possibly set link_ok flag here */
> +	psr->link_ok = false;

Should psr->link_ok also get reset when the output is disabled, so it's
effect is consistent (i.e. doesn't effect the link state from output
enabling/link training until PSR is enabled)?

> +
>  	if (!psr->enabled)
>  		goto exit;
>  
> @@ -3517,6 +3529,33 @@ bool intel_psr_enabled(struct intel_dp *intel_dp)
>  	return ret;
>  }
>  
> +/**
> + * intel_psr_link_ok - return psr->link_ok
> + * @intel_dp: struct intel_dp
> + *
> + * We are seeing unexpected link re-trainings with some panels. This is caused
> + * by panel stating bad link status after PSR is enabled. Code checking link
> + * status can call this to ensure it can ignore bad link status stated by the
> + * panel I.e. if panel is stating bad link and intel_psr_link_ok is stating link
> + * is ok caller should rely on latter.
> + *
> + * Return value of link_ok
> + */
> +bool intel_psr_link_ok(struct intel_dp *intel_dp)
> +{
> +	bool ret;
> +
> +	if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
> +	    !intel_dp_is_edp(intel_dp))
> +		return false;
> +
> +	mutex_lock(&intel_dp->psr.lock);
> +	ret = intel_dp->psr.link_ok;
> +	mutex_unlock(&intel_dp->psr.lock);
> +
> +	return ret;
> +}
> +
>  /**
>   * intel_psr_lock - grab PSR lock
>   * @crtc_state: the crtc state
> diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
> index 5f26f61f82aa..956be263c09e 100644
> --- a/drivers/gpu/drm/i915/display/intel_psr.h
> +++ b/drivers/gpu/drm/i915/display/intel_psr.h
> @@ -59,6 +59,7 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
>  void intel_psr_pause(struct intel_dp *intel_dp);
>  void intel_psr_resume(struct intel_dp *intel_dp);
>  bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state);
> +bool intel_psr_link_ok(struct intel_dp *intel_dp);
>  
>  void intel_psr_lock(const struct intel_crtc_state *crtc_state);
>  void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
> -- 
> 2.34.1
>
Hogander, Jouni Oct. 29, 2024, 7:07 a.m. UTC | #5
On Mon, 2024-10-28 at 15:41 +0200, Imre Deak wrote:
> On Mon, Oct 28, 2024 at 09:46:42AM +0200, Jouni Högander wrote:
> > We are currently seeing unexpected link trainings with several
> > different
> > eDP panels. These are caused by these panels stating bad link
> > status in
> > their dpcd registers. This can be observed by doing following test:
> > 
> > 1. Boot up without Xe module loaded
> > 
> > 2. Load Xe module with PSR disabled:
> >     $ modprobe xe  enable_psr=0
> > 
> > 3. Read panel link status register
> >     $ dpcd_reg read --offset 0x200e --count=1
> >     0x200e:  00
> > 
> > 4. Enable PSR, sleep for 2 seconds and disable PSR again:
> > 
> >     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     $ echo "-1" >
> > /sys/kernel/debug/dri/0000:00:02.0/xe_params/enable_psr
> >     $ echo 0x0 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     $ sleep 2
> >     $ cat /sys/kernel/debug/dri/0/i915_edp_psr_status | grep status
> >     $ echo 0x1 > /sys/kernel/debug/dri/0/i915_edp_psr_debug
> >     Source PSR/PanelReplay status: DEEP_SLEEP [0x80310030]
> > 
> > 5. Now read panel link status registers again:
> >     $ dpcd_reg read --offset 0x200e --count=1
> >     0x200e:  80
> > 
> > Workaround this by not trusting link status registers after PSR is
> > enabled
> > until first short pulse interrupt is received.
> > 
> > Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
> > ---
> >  .../drm/i915/display/intel_display_types.h    |  2 +
> >  drivers/gpu/drm/i915/display/intel_dp.c       |  3 +-
> >  drivers/gpu/drm/i915/display/intel_psr.c      | 39
> > +++++++++++++++++++
> >  drivers/gpu/drm/i915/display/intel_psr.h      |  1 +
> >  4 files changed, 44 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h
> > b/drivers/gpu/drm/i915/display/intel_display_types.h
> > index 2bb1fa64da2f..f0b7d7262961 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_types.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_types.h
> > @@ -1618,6 +1618,8 @@ struct intel_psr {
> >         u32 dc3co_exit_delay;
> >         struct delayed_work dc3co_work;
> >         u8 entry_setup_frames;
> > +
> > +       bool link_ok;
> >  };
> >  
> >  struct intel_dp {
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index b036c6521659..efaaadfb12ea 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -5007,7 +5007,8 @@ intel_dp_needs_link_retrain(struct intel_dp
> > *intel_dp)
> >                 return true;
> >  
> >         /* Retrain if link not ok */
> > -       return !intel_dp_link_ok(intel_dp, link_status);
> > +       return !(intel_dp_link_ok(intel_dp, link_status) ||
> > +                intel_psr_link_ok(intel_dp));
> >  }
> >  
> >  bool intel_dp_has_connector(struct intel_dp *intel_dp,
> > diff --git a/drivers/gpu/drm/i915/display/intel_psr.c
> > b/drivers/gpu/drm/i915/display/intel_psr.c
> > index 992543f508c1..0cd7388389e0 100644
> > --- a/drivers/gpu/drm/i915/display/intel_psr.c
> > +++ b/drivers/gpu/drm/i915/display/intel_psr.c
> > @@ -2009,6 +2009,15 @@ static void intel_psr_enable_locked(struct
> > intel_dp *intel_dp,
> >         intel_dp->psr.enabled = true;
> >         intel_dp->psr.paused = false;
> >  
> > +       /*
> > +        * Link_ok is sticky and set here on PSR enable. We can
> > assume link
> > +        * training is complete as we never continue to PSR enable
> > with
> > +        * untrained link. Link_ok is kept as set until first short
> > pulse
> > +        * interrupt. This is targeted to workaround panels stating
> > bad link
> > +        * after PSR is enabled.
> > +        */
> > +       intel_dp->psr.link_ok = true;
> > +
> >         intel_psr_activate(intel_dp);
> >  }
> >  
> > @@ -3458,6 +3467,9 @@ void intel_psr_short_pulse(struct intel_dp
> > *intel_dp)
> >  
> >         mutex_lock(&psr->lock);
> >  
> > +       /* Let's clear possibly set link_ok flag here */
> > +       psr->link_ok = false;
> 
> Should psr->link_ok also get reset when the output is disabled, so
> it's
> effect is consistent (i.e. doesn't effect the link state from output
> enabling/link training until PSR is enabled)?

Yes, I think this makes sense. I will address this.

BR,

Jouni Högander
> 
> > +
> >         if (!psr->enabled)
> >                 goto exit;
> >  
> > @@ -3517,6 +3529,33 @@ bool intel_psr_enabled(struct intel_dp
> > *intel_dp)
> >         return ret;
> >  }
> >  
> > +/**
> > + * intel_psr_link_ok - return psr->link_ok
> > + * @intel_dp: struct intel_dp
> > + *
> > + * We are seeing unexpected link re-trainings with some panels.
> > This is caused
> > + * by panel stating bad link status after PSR is enabled. Code
> > checking link
> > + * status can call this to ensure it can ignore bad link status
> > stated by the
> > + * panel I.e. if panel is stating bad link and intel_psr_link_ok
> > is stating link
> > + * is ok caller should rely on latter.
> > + *
> > + * Return value of link_ok
> > + */
> > +bool intel_psr_link_ok(struct intel_dp *intel_dp)
> > +{
> > +       bool ret;
> > +
> > +       if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
> > +           !intel_dp_is_edp(intel_dp))
> > +               return false;
> > +
> > +       mutex_lock(&intel_dp->psr.lock);
> > +       ret = intel_dp->psr.link_ok;
> > +       mutex_unlock(&intel_dp->psr.lock);
> > +
> > +       return ret;
> > +}
> > +
> >  /**
> >   * intel_psr_lock - grab PSR lock
> >   * @crtc_state: the crtc state
> > diff --git a/drivers/gpu/drm/i915/display/intel_psr.h
> > b/drivers/gpu/drm/i915/display/intel_psr.h
> > index 5f26f61f82aa..956be263c09e 100644
> > --- a/drivers/gpu/drm/i915/display/intel_psr.h
> > +++ b/drivers/gpu/drm/i915/display/intel_psr.h
> > @@ -59,6 +59,7 @@ void intel_psr2_program_trans_man_trk_ctl(const
> > struct intel_crtc_state *crtc_st
> >  void intel_psr_pause(struct intel_dp *intel_dp);
> >  void intel_psr_resume(struct intel_dp *intel_dp);
> >  bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state
> > *crtc_state);
> > +bool intel_psr_link_ok(struct intel_dp *intel_dp);
> >  
> >  void intel_psr_lock(const struct intel_crtc_state *crtc_state);
> >  void intel_psr_unlock(const struct intel_crtc_state *crtc_state);
> > -- 
> > 2.34.1
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 2bb1fa64da2f..f0b7d7262961 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1618,6 +1618,8 @@  struct intel_psr {
 	u32 dc3co_exit_delay;
 	struct delayed_work dc3co_work;
 	u8 entry_setup_frames;
+
+	bool link_ok;
 };
 
 struct intel_dp {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index b036c6521659..efaaadfb12ea 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -5007,7 +5007,8 @@  intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
 		return true;
 
 	/* Retrain if link not ok */
-	return !intel_dp_link_ok(intel_dp, link_status);
+	return !(intel_dp_link_ok(intel_dp, link_status) ||
+		 intel_psr_link_ok(intel_dp));
 }
 
 bool intel_dp_has_connector(struct intel_dp *intel_dp,
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 992543f508c1..0cd7388389e0 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -2009,6 +2009,15 @@  static void intel_psr_enable_locked(struct intel_dp *intel_dp,
 	intel_dp->psr.enabled = true;
 	intel_dp->psr.paused = false;
 
+	/*
+	 * Link_ok is sticky and set here on PSR enable. We can assume link
+	 * training is complete as we never continue to PSR enable with
+	 * untrained link. Link_ok is kept as set until first short pulse
+	 * interrupt. This is targeted to workaround panels stating bad link
+	 * after PSR is enabled.
+	 */
+	intel_dp->psr.link_ok = true;
+
 	intel_psr_activate(intel_dp);
 }
 
@@ -3458,6 +3467,9 @@  void intel_psr_short_pulse(struct intel_dp *intel_dp)
 
 	mutex_lock(&psr->lock);
 
+	/* Let's clear possibly set link_ok flag here */
+	psr->link_ok = false;
+
 	if (!psr->enabled)
 		goto exit;
 
@@ -3517,6 +3529,33 @@  bool intel_psr_enabled(struct intel_dp *intel_dp)
 	return ret;
 }
 
+/**
+ * intel_psr_link_ok - return psr->link_ok
+ * @intel_dp: struct intel_dp
+ *
+ * We are seeing unexpected link re-trainings with some panels. This is caused
+ * by panel stating bad link status after PSR is enabled. Code checking link
+ * status can call this to ensure it can ignore bad link status stated by the
+ * panel I.e. if panel is stating bad link and intel_psr_link_ok is stating link
+ * is ok caller should rely on latter.
+ *
+ * Return value of link_ok
+ */
+bool intel_psr_link_ok(struct intel_dp *intel_dp)
+{
+	bool ret;
+
+	if ((!CAN_PSR(intel_dp) && !CAN_PANEL_REPLAY(intel_dp)) ||
+	    !intel_dp_is_edp(intel_dp))
+		return false;
+
+	mutex_lock(&intel_dp->psr.lock);
+	ret = intel_dp->psr.link_ok;
+	mutex_unlock(&intel_dp->psr.lock);
+
+	return ret;
+}
+
 /**
  * intel_psr_lock - grab PSR lock
  * @crtc_state: the crtc state
diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h
index 5f26f61f82aa..956be263c09e 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.h
+++ b/drivers/gpu/drm/i915/display/intel_psr.h
@@ -59,6 +59,7 @@  void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
 void intel_psr_pause(struct intel_dp *intel_dp);
 void intel_psr_resume(struct intel_dp *intel_dp);
 bool intel_psr_needs_block_dc_vblank(const struct intel_crtc_state *crtc_state);
+bool intel_psr_link_ok(struct intel_dp *intel_dp);
 
 void intel_psr_lock(const struct intel_crtc_state *crtc_state);
 void intel_psr_unlock(const struct intel_crtc_state *crtc_state);