diff mbox series

drm/i915/dp: Reset link params on connector disconnect

Message ID 20200604002359.17128-1-manasi.d.navare@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/dp: Reset link params on connector disconnect | expand

Commit Message

Navare, Manasi June 4, 2020, 12:23 a.m. UTC
We have noticed that when link training fails the panel
sends a long pulse indicating connector disconnect. In this case
we need to reset the link parameters instead of continuing
to use the fallback parameters since else this long pulse
by the panel followed by a modeset request which was triggered by the userspace
before getting the connector status as disconnected, will
result into a modeset now using lower link rate/lane count values.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
 1 file changed, 19 insertions(+), 9 deletions(-)

Comments

Ville Syrjala June 4, 2020, 3:25 p.m. UTC | #1
On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> We have noticed that when link training fails the panel
> sends a long pulse indicating connector disconnect. In this case
> we need to reset the link parameters instead of continuing
> to use the fallback parameters since else this long pulse
> by the panel followed by a modeset request which was triggered by the userspace
> before getting the connector status as disconnected, will
> result into a modeset now using lower link rate/lane count values.
> 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> Cc: Jani Nikula <jani.nikula@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
>  1 file changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> index 55fda074c0ad..f7af372647dd 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
>  	intel_dp->edid_quirks = 0;
>  }
>  
> +static void
> +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> +{
> +	/* Initial max link lane count */
> +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> +
> +	/* Initial max link rate */
> +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> +
> +	intel_dp->reset_link_params = false;
> +}
> +
>  static int
>  intel_dp_detect(struct drm_connector *connector,
>  		struct drm_modeset_acquire_ctx *ctx,
> @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
>  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
>  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
>  
> +		/*Reset the immutable VRR Capable property */
> +		drm_connector_set_vrr_capable_property(connector,
> +						       false);
> +		intel_dp_reset_link_params(intel_dp);
> +

Why would we care what those are when the sink is disconnected?

>  		if (intel_dp->is_mst) {
>  			drm_dbg_kms(&dev_priv->drm,
>  				    "MST device may have disappeared %d vs %d\n",
> @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
>  		goto out;
>  	}
>  
> -	if (intel_dp->reset_link_params) {
> -		/* Initial max link lane count */
> -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> -
> -		/* Initial max link rate */
> -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> -
> -		intel_dp->reset_link_params = false;
> -	}
> +	if (intel_dp->reset_link_params)
> +		intel_dp_reset_link_params(intel_dp);
>  
>  	intel_dp_print_rates(intel_dp);
>  
> -- 
> 2.19.1
Navare, Manasi June 4, 2020, 6:35 p.m. UTC | #2
On Thu, Jun 04, 2020 at 06:25:43PM +0300, Ville Syrjälä wrote:
> On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> > We have noticed that when link training fails the panel
> > sends a long pulse indicating connector disconnect. In this case
> > we need to reset the link parameters instead of continuing
> > to use the fallback parameters since else this long pulse
> > by the panel followed by a modeset request which was triggered by the userspace
> > before getting the connector status as disconnected, will
> > result into a modeset now using lower link rate/lane count values.
> > 
> > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> > ---
> >  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
> >  1 file changed, 19 insertions(+), 9 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> > index 55fda074c0ad..f7af372647dd 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
> >  	intel_dp->edid_quirks = 0;
> >  }
> >  
> > +static void
> > +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> > +{
> > +	/* Initial max link lane count */
> > +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > +
> > +	/* Initial max link rate */
> > +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > +
> > +	intel_dp->reset_link_params = false;
> > +}
> > +
> >  static int
> >  intel_dp_detect(struct drm_connector *connector,
> >  		struct drm_modeset_acquire_ctx *ctx,
> > @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
> >  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
> >  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
> >  
> > +		/*Reset the immutable VRR Capable property */
> > +		drm_connector_set_vrr_capable_property(connector,
> > +						       false);
> > +		intel_dp_reset_link_params(intel_dp);
> > +
> 
> Why would we care what those are when the sink is disconnected?

We are noticing this happen in case the panel send this long pulse indicating
status change to disconnected, while the modeset was already triggered by userspace
in this case IGT, so the modeset continues right after i915_hotplug_work_fn
so we need to reset all params which fixes the bug mentioned.

Manasi

> 
> >  		if (intel_dp->is_mst) {
> >  			drm_dbg_kms(&dev_priv->drm,
> >  				    "MST device may have disappeared %d vs %d\n",
> > @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
> >  		goto out;
> >  	}
> >  
> > -	if (intel_dp->reset_link_params) {
> > -		/* Initial max link lane count */
> > -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > -
> > -		/* Initial max link rate */
> > -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > -
> > -		intel_dp->reset_link_params = false;
> > -	}
> > +	if (intel_dp->reset_link_params)
> > +		intel_dp_reset_link_params(intel_dp);
> >  
> >  	intel_dp_print_rates(intel_dp);
> >  
> > -- 
> > 2.19.1
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjala June 4, 2020, 6:38 p.m. UTC | #3
On Thu, Jun 04, 2020 at 11:35:30AM -0700, Manasi Navare wrote:
> On Thu, Jun 04, 2020 at 06:25:43PM +0300, Ville Syrjälä wrote:
> > On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> > > We have noticed that when link training fails the panel
> > > sends a long pulse indicating connector disconnect. In this case
> > > we need to reset the link parameters instead of continuing
> > > to use the fallback parameters since else this long pulse
> > > by the panel followed by a modeset request which was triggered by the userspace
> > > before getting the connector status as disconnected, will
> > > result into a modeset now using lower link rate/lane count values.
> > > 
> > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> > > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
> > >  1 file changed, 19 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> > > index 55fda074c0ad..f7af372647dd 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
> > >  	intel_dp->edid_quirks = 0;
> > >  }
> > >  
> > > +static void
> > > +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> > > +{
> > > +	/* Initial max link lane count */
> > > +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > +
> > > +	/* Initial max link rate */
> > > +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > +
> > > +	intel_dp->reset_link_params = false;
> > > +}
> > > +
> > >  static int
> > >  intel_dp_detect(struct drm_connector *connector,
> > >  		struct drm_modeset_acquire_ctx *ctx,
> > > @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
> > >  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
> > >  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
> > >  
> > > +		/*Reset the immutable VRR Capable property */
> > > +		drm_connector_set_vrr_capable_property(connector,
> > > +						       false);
> > > +		intel_dp_reset_link_params(intel_dp);
> > > +
> > 
> > Why would we care what those are when the sink is disconnected?
> 
> We are noticing this happen in case the panel send this long pulse indicating
> status change to disconnected, while the modeset was already triggered by userspace
> in this case IGT, so the modeset continues right after i915_hotplug_work_fn
> so we need to reset all params which fixes the bug mentioned.

Why did the link params get out of whack before hpd in the first place?

> 
> Manasi
> 
> > 
> > >  		if (intel_dp->is_mst) {
> > >  			drm_dbg_kms(&dev_priv->drm,
> > >  				    "MST device may have disappeared %d vs %d\n",
> > > @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
> > >  		goto out;
> > >  	}
> > >  
> > > -	if (intel_dp->reset_link_params) {
> > > -		/* Initial max link lane count */
> > > -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > -
> > > -		/* Initial max link rate */
> > > -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > -
> > > -		intel_dp->reset_link_params = false;
> > > -	}
> > > +	if (intel_dp->reset_link_params)
> > > +		intel_dp_reset_link_params(intel_dp);
> > >  
> > >  	intel_dp_print_rates(intel_dp);
> > >  
> > > -- 
> > > 2.19.1
> > 
> > -- 
> > Ville Syrjälä
> > Intel
Navare, Manasi June 4, 2020, 6:52 p.m. UTC | #4
On Thu, Jun 04, 2020 at 09:38:19PM +0300, Ville Syrjälä wrote:
> On Thu, Jun 04, 2020 at 11:35:30AM -0700, Manasi Navare wrote:
> > On Thu, Jun 04, 2020 at 06:25:43PM +0300, Ville Syrjälä wrote:
> > > On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> > > > We have noticed that when link training fails the panel
> > > > sends a long pulse indicating connector disconnect. In this case
> > > > we need to reset the link parameters instead of continuing
> > > > to use the fallback parameters since else this long pulse
> > > > by the panel followed by a modeset request which was triggered by the userspace
> > > > before getting the connector status as disconnected, will
> > > > result into a modeset now using lower link rate/lane count values.
> > > > 
> > > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> > > > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
> > > >  1 file changed, 19 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > index 55fda074c0ad..f7af372647dd 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
> > > >  	intel_dp->edid_quirks = 0;
> > > >  }
> > > >  
> > > > +static void
> > > > +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> > > > +{
> > > > +	/* Initial max link lane count */
> > > > +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > +
> > > > +	/* Initial max link rate */
> > > > +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > +
> > > > +	intel_dp->reset_link_params = false;
> > > > +}
> > > > +
> > > >  static int
> > > >  intel_dp_detect(struct drm_connector *connector,
> > > >  		struct drm_modeset_acquire_ctx *ctx,
> > > > @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
> > > >  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
> > > >  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
> > > >  
> > > > +		/*Reset the immutable VRR Capable property */
> > > > +		drm_connector_set_vrr_capable_property(connector,
> > > > +						       false);
> > > > +		intel_dp_reset_link_params(intel_dp);
> > > > +
> > > 
> > > Why would we care what those are when the sink is disconnected?
> > 
> > We are noticing this happen in case the panel send this long pulse indicating
> > status change to disconnected, while the modeset was already triggered by userspace
> > in this case IGT, so the modeset continues right after i915_hotplug_work_fn
> > so we need to reset all params which fixes the bug mentioned.
> 
> Why did the link params get out of whack before hpd in the first place?
>

Most of the failures, we see the link training fails due to AUX timeouts and then link params fallback to lower values
Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
with fallback values and return encoder config failure.

So after reseting the params, the modeset happens with original values and that time link training passes.
This is seen in all kms_atomic_transitions IGT tests

Manasi
 
> > 
> > Manasi
> > 
> > > 
> > > >  		if (intel_dp->is_mst) {
> > > >  			drm_dbg_kms(&dev_priv->drm,
> > > >  				    "MST device may have disappeared %d vs %d\n",
> > > > @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
> > > >  		goto out;
> > > >  	}
> > > >  
> > > > -	if (intel_dp->reset_link_params) {
> > > > -		/* Initial max link lane count */
> > > > -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > -
> > > > -		/* Initial max link rate */
> > > > -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > -
> > > > -		intel_dp->reset_link_params = false;
> > > > -	}
> > > > +	if (intel_dp->reset_link_params)
> > > > +		intel_dp_reset_link_params(intel_dp);
> > > >  
> > > >  	intel_dp_print_rates(intel_dp);
> > > >  
> > > > -- 
> > > > 2.19.1
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjala June 4, 2020, 6:58 p.m. UTC | #5
On Thu, Jun 04, 2020 at 11:52:24AM -0700, Manasi Navare wrote:
> On Thu, Jun 04, 2020 at 09:38:19PM +0300, Ville Syrjälä wrote:
> > On Thu, Jun 04, 2020 at 11:35:30AM -0700, Manasi Navare wrote:
> > > On Thu, Jun 04, 2020 at 06:25:43PM +0300, Ville Syrjälä wrote:
> > > > On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> > > > > We have noticed that when link training fails the panel
> > > > > sends a long pulse indicating connector disconnect. In this case
> > > > > we need to reset the link parameters instead of continuing
> > > > > to use the fallback parameters since else this long pulse
> > > > > by the panel followed by a modeset request which was triggered by the userspace
> > > > > before getting the connector status as disconnected, will
> > > > > result into a modeset now using lower link rate/lane count values.
> > > > > 
> > > > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> > > > > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
> > > > >  1 file changed, 19 insertions(+), 9 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > index 55fda074c0ad..f7af372647dd 100644
> > > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
> > > > >  	intel_dp->edid_quirks = 0;
> > > > >  }
> > > > >  
> > > > > +static void
> > > > > +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> > > > > +{
> > > > > +	/* Initial max link lane count */
> > > > > +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > > +
> > > > > +	/* Initial max link rate */
> > > > > +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > > +
> > > > > +	intel_dp->reset_link_params = false;
> > > > > +}
> > > > > +
> > > > >  static int
> > > > >  intel_dp_detect(struct drm_connector *connector,
> > > > >  		struct drm_modeset_acquire_ctx *ctx,
> > > > > @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
> > > > >  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
> > > > >  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
> > > > >  
> > > > > +		/*Reset the immutable VRR Capable property */
> > > > > +		drm_connector_set_vrr_capable_property(connector,
> > > > > +						       false);
> > > > > +		intel_dp_reset_link_params(intel_dp);
> > > > > +
> > > > 
> > > > Why would we care what those are when the sink is disconnected?
> > > 
> > > We are noticing this happen in case the panel send this long pulse indicating
> > > status change to disconnected, while the modeset was already triggered by userspace
> > > in this case IGT, so the modeset continues right after i915_hotplug_work_fn
> > > so we need to reset all params which fixes the bug mentioned.
> > 
> > Why did the link params get out of whack before hpd in the first place?
> >
> 
> Most of the failures, we see the link training fails due to AUX timeouts and then link params fallback to lower values
> Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> with fallback values and return encoder config failure.

If the link training failed then clearly the sink didn't like us anymore
anyway. So feels like resetting these here is just shifting some race
window around a bit, but it could still fail if the sink still doesn't
like us.

Would be good if someone was able to figure out why the sink goes bad in
the first place.

> 
> So after reseting the params, the modeset happens with original values and that time link training passes.
> This is seen in all kms_atomic_transitions IGT tests
> 
> Manasi
>  
> > > 
> > > Manasi
> > > 
> > > > 
> > > > >  		if (intel_dp->is_mst) {
> > > > >  			drm_dbg_kms(&dev_priv->drm,
> > > > >  				    "MST device may have disappeared %d vs %d\n",
> > > > > @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
> > > > >  		goto out;
> > > > >  	}
> > > > >  
> > > > > -	if (intel_dp->reset_link_params) {
> > > > > -		/* Initial max link lane count */
> > > > > -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > > -
> > > > > -		/* Initial max link rate */
> > > > > -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > > -
> > > > > -		intel_dp->reset_link_params = false;
> > > > > -	}
> > > > > +	if (intel_dp->reset_link_params)
> > > > > +		intel_dp_reset_link_params(intel_dp);
> > > > >  
> > > > >  	intel_dp_print_rates(intel_dp);
> > > > >  
> > > > > -- 
> > > > > 2.19.1
> > > > 
> > > > -- 
> > > > Ville Syrjälä
> > > > Intel
> > 
> > -- 
> > Ville Syrjälä
> > Intel
Ville Syrjala June 4, 2020, 7:01 p.m. UTC | #6
On Thu, Jun 04, 2020 at 09:58:24PM +0300, Ville Syrjälä wrote:
> On Thu, Jun 04, 2020 at 11:52:24AM -0700, Manasi Navare wrote:
> > On Thu, Jun 04, 2020 at 09:38:19PM +0300, Ville Syrjälä wrote:
> > > On Thu, Jun 04, 2020 at 11:35:30AM -0700, Manasi Navare wrote:
> > > > On Thu, Jun 04, 2020 at 06:25:43PM +0300, Ville Syrjälä wrote:
> > > > > On Wed, Jun 03, 2020 at 05:23:59PM -0700, Manasi Navare wrote:
> > > > > > We have noticed that when link training fails the panel
> > > > > > sends a long pulse indicating connector disconnect. In this case
> > > > > > we need to reset the link parameters instead of continuing
> > > > > > to use the fallback parameters since else this long pulse
> > > > > > by the panel followed by a modeset request which was triggered by the userspace
> > > > > > before getting the connector status as disconnected, will
> > > > > > result into a modeset now using lower link rate/lane count values.
> > > > > > 
> > > > > > Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1385
> > > > > > Cc: Jani Nikula <jani.nikula@linux.intel.com>
> > > > > > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > > > > > Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/display/intel_dp.c | 28 +++++++++++++++++--------
> > > > > >  1 file changed, 19 insertions(+), 9 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > > index 55fda074c0ad..f7af372647dd 100644
> > > > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > > > @@ -6111,6 +6111,18 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
> > > > > >  	intel_dp->edid_quirks = 0;
> > > > > >  }
> > > > > >  
> > > > > > +static void
> > > > > > +intel_dp_reset_link_params(struct intel_dp *intel_dp)
> > > > > > +{
> > > > > > +	/* Initial max link lane count */
> > > > > > +	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > > > +
> > > > > > +	/* Initial max link rate */
> > > > > > +	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > > > +
> > > > > > +	intel_dp->reset_link_params = false;
> > > > > > +}
> > > > > > +
> > > > > >  static int
> > > > > >  intel_dp_detect(struct drm_connector *connector,
> > > > > >  		struct drm_modeset_acquire_ctx *ctx,
> > > > > > @@ -6139,6 +6151,11 @@ intel_dp_detect(struct drm_connector *connector,
> > > > > >  		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
> > > > > >  		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
> > > > > >  
> > > > > > +		/*Reset the immutable VRR Capable property */
> > > > > > +		drm_connector_set_vrr_capable_property(connector,
> > > > > > +						       false);
> > > > > > +		intel_dp_reset_link_params(intel_dp);
> > > > > > +
> > > > > 
> > > > > Why would we care what those are when the sink is disconnected?
> > > > 
> > > > We are noticing this happen in case the panel send this long pulse indicating
> > > > status change to disconnected, while the modeset was already triggered by userspace
> > > > in this case IGT, so the modeset continues right after i915_hotplug_work_fn
> > > > so we need to reset all params which fixes the bug mentioned.
> > > 
> > > Why did the link params get out of whack before hpd in the first place?
> > >
> > 
> > Most of the failures, we see the link training fails due to AUX timeouts and then link params fallback to lower values
> > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > with fallback values and return encoder config failure.
> 
> If the link training failed then clearly the sink didn't like us anymore
> anyway. So feels like resetting these here is just shifting some race
> window around a bit, but it could still fail if the sink still doesn't
> like us.
> 
> Would be good if someone was able to figure out why the sink goes bad in
> the first place.

Oh, and don't we now have Imre's "weird hpd happened in the middle of
the test, don't trust the results" thing in igt?

> 
> > 
> > So after reseting the params, the modeset happens with original values and that time link training passes.
> > This is seen in all kms_atomic_transitions IGT tests
> > 
> > Manasi
> >  
> > > > 
> > > > Manasi
> > > > 
> > > > > 
> > > > > >  		if (intel_dp->is_mst) {
> > > > > >  			drm_dbg_kms(&dev_priv->drm,
> > > > > >  				    "MST device may have disappeared %d vs %d\n",
> > > > > > @@ -6152,15 +6169,8 @@ intel_dp_detect(struct drm_connector *connector,
> > > > > >  		goto out;
> > > > > >  	}
> > > > > >  
> > > > > > -	if (intel_dp->reset_link_params) {
> > > > > > -		/* Initial max link lane count */
> > > > > > -		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
> > > > > > -
> > > > > > -		/* Initial max link rate */
> > > > > > -		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
> > > > > > -
> > > > > > -		intel_dp->reset_link_params = false;
> > > > > > -	}
> > > > > > +	if (intel_dp->reset_link_params)
> > > > > > +		intel_dp_reset_link_params(intel_dp);
> > > > > >  
> > > > > >  	intel_dp_print_rates(intel_dp);
> > > > > >  
> > > > > > -- 
> > > > > > 2.19.1
> > > > > 
> > > > > -- 
> > > > > Ville Syrjälä
> > > > > Intel
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel
> 
> -- 
> Ville Syrjälä
> Intel
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Imre Deak June 4, 2020, 7:08 p.m. UTC | #7
On Thu, Jun 04, 2020 at 10:01:40PM +0300, Ville Syrjälä wrote:
> [...]
> > > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > > with fallback values and return encoder config failure.
> > 
> > If the link training failed then clearly the sink didn't like us anymore
> > anyway. So feels like resetting these here is just shifting some race
> > window around a bit, but it could still fail if the sink still doesn't
> > like us.
> > 
> > Would be good if someone was able to figure out why the sink goes bad in
> > the first place.
> 
> Oh, and don't we now have Imre's "weird hpd happened in the middle of
> the test, don't trust the results" thing in igt?

An LG and IIyama monitor this happens on disconnect and reconnect after
waking from an idle state when modesetting them, not sure if it's the
same case.

--Imre
Navare, Manasi June 4, 2020, 7:20 p.m. UTC | #8
On Thu, Jun 04, 2020 at 10:08:58PM +0300, Imre Deak wrote:
> On Thu, Jun 04, 2020 at 10:01:40PM +0300, Ville Syrjälä wrote:
> > [...]
> > > > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > > > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > > > with fallback values and return encoder config failure.
> > > 
> > > If the link training failed then clearly the sink didn't like us anymore
> > > anyway. So feels like resetting these here is just shifting some race
> > > window around a bit, but it could still fail if the sink still doesn't
> > > like us.
> > > 
> > > Would be good if someone was able to figure out why the sink goes bad in
> > > the first place.
> > 
> > Oh, and don't we now have Imre's "weird hpd happened in the middle of
> > the test, don't trust the results" thing in igt?
> 
> An LG and IIyama monitor this happens on disconnect and reconnect after
> waking from an idle state when modesetting them, not sure if it's the
> same case.

Well in this case, it happens just after link training failure due to some AUX timeouts
then looks like the panel detects that the link was not enabled and sends this HPD
which puts us into connector status changing from connected to disconnected.

But in IGT, we dont get any uevent so we dont reprobe and continue with the next
igt_display_commit. 
So should we in IGT in kms_atomic_transitions, plane-all-modeset-transitions subtest,
should we check the connector status everytime before back to back commit calls?

Like I think in real use case, after a link failure the userspace would get a uevent and
respond to it by reprobing a connector, but we dont do that in IGT so these
random link failures cause issues like in here.

Manasi

> 
> --Imre
Imre Deak June 4, 2020, 7:23 p.m. UTC | #9
On Thu, Jun 04, 2020 at 10:08:58PM +0300, Imre Deak wrote:
> On Thu, Jun 04, 2020 at 10:01:40PM +0300, Ville Syrjälä wrote:
> > [...]
> > > > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > > > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > > > with fallback values and return encoder config failure.
> > > 
> > > If the link training failed then clearly the sink didn't like us anymore
> > > anyway. So feels like resetting these here is just shifting some race
> > > window around a bit, but it could still fail if the sink still doesn't
> > > like us.
> > > 
> > > Would be good if someone was able to figure out why the sink goes bad in
> > > the first place.
> > 
> > Oh, and don't we now have Imre's "weird hpd happened in the middle of
> > the test, don't trust the results" thing in igt?
> 
> An LG and IIyama monitor this happens on disconnect and reconnect after
> waking from an idle state when modesetting them, not sure if it's the
> same case.

Manasi, could you try if a modeset on the monitor after it has been
disabled for a while always results in a long HPD pulse a few seconds
after the modeset? If so does this also happen when you just modeset in
a sequence from one mode to the other not letting the monitor idle? The
same monitor should be also tested then with the above sequences on
older platforms if it behaves the same on those too.

> 
> --Imre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Navare, Manasi June 4, 2020, 7:40 p.m. UTC | #10
On Thu, Jun 04, 2020 at 10:23:40PM +0300, Imre Deak wrote:
> On Thu, Jun 04, 2020 at 10:08:58PM +0300, Imre Deak wrote:
> > On Thu, Jun 04, 2020 at 10:01:40PM +0300, Ville Syrjälä wrote:
> > > [...]
> > > > > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > > > > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > > > > with fallback values and return encoder config failure.
> > > > 
> > > > If the link training failed then clearly the sink didn't like us anymore
> > > > anyway. So feels like resetting these here is just shifting some race
> > > > window around a bit, but it could still fail if the sink still doesn't
> > > > like us.
> > > > 
> > > > Would be good if someone was able to figure out why the sink goes bad in
> > > > the first place.
> > > 
> > > Oh, and don't we now have Imre's "weird hpd happened in the middle of
> > > the test, don't trust the results" thing in igt?
> > 
> > An LG and IIyama monitor this happens on disconnect and reconnect after
> > waking from an idle state when modesetting them, not sure if it's the
> > same case.
> 
> Manasi, could you try if a modeset on the monitor after it has been
> disabled for a while always results in a long HPD pulse a few seconds
> after the modeset? If so does this also happen when you just modeset in
> a sequence from one mode to the other not letting the monitor idle? The
> same monitor should be also tested then with the above sequences on
> older platforms if it behaves the same on those too.
>

This test has been passing on older ICL platforms. But on TGL
we do see these AUX E timeouts once in a while which recover on their
own for the next modeset. Any idea why these spurious AUX timeouts and how I can possibly
rootcause why these timeouts are seen only with AUX E?

Manasi
 
> > 
> > --Imre
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Imre Deak June 4, 2020, 7:49 p.m. UTC | #11
On Thu, Jun 04, 2020 at 12:40:20PM -0700, Manasi Navare wrote:
> On Thu, Jun 04, 2020 at 10:23:40PM +0300, Imre Deak wrote:
> > On Thu, Jun 04, 2020 at 10:08:58PM +0300, Imre Deak wrote:
> > > On Thu, Jun 04, 2020 at 10:01:40PM +0300, Ville Syrjälä wrote:
> > > > [...]
> > > > > > Then we get this hpd, in this case if we dont reset the param to max values, prev triggered modeset continues
> > > > > > with fallback values but since connector probe doesnt happen again through IGT, it tries the same mode
> > > > > > with fallback values and return encoder config failure.
> > > > > 
> > > > > If the link training failed then clearly the sink didn't like us anymore
> > > > > anyway. So feels like resetting these here is just shifting some race
> > > > > window around a bit, but it could still fail if the sink still doesn't
> > > > > like us.
> > > > > 
> > > > > Would be good if someone was able to figure out why the sink goes bad in
> > > > > the first place.
> > > > 
> > > > Oh, and don't we now have Imre's "weird hpd happened in the middle of
> > > > the test, don't trust the results" thing in igt?
> > > 
> > > An LG and IIyama monitor this happens on disconnect and reconnect after
> > > waking from an idle state when modesetting them, not sure if it's the
> > > same case.
> > 
> > Manasi, could you try if a modeset on the monitor after it has been
> > disabled for a while always results in a long HPD pulse a few seconds
> > after the modeset? If so does this also happen when you just modeset in
> > a sequence from one mode to the other not letting the monitor idle? The
> > same monitor should be also tested then with the above sequences on
> > older platforms if it behaves the same on those too.
> >
> 
> This test has been passing on older ICL platforms. But on TGL we do
> see these AUX E timeouts once in a while which recover on their own
> for the next modeset. Any idea why these spurious AUX timeouts and how
> I can possibly rootcause why these timeouts are seen only with AUX E?

If the monitor is in a disconnected state as you described, then AUX
will fail too. So you need to root cause why the monitor gets
disconnected. One possibility for that is what I described above. You
can't really make a conclusion on a test passing on ICL and not on TGL,
the timing can be different. You'd need to check if a disconnect happens
due to long HPD pulse when using the same monitor with the sequences I
described above, both on TGL and then also on ICL.

> 
> Manasi
>  
> > > 
> > > --Imre
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 55fda074c0ad..f7af372647dd 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -6111,6 +6111,18 @@  intel_dp_unset_edid(struct intel_dp *intel_dp)
 	intel_dp->edid_quirks = 0;
 }
 
+static void
+intel_dp_reset_link_params(struct intel_dp *intel_dp)
+{
+	/* Initial max link lane count */
+	intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
+
+	/* Initial max link rate */
+	intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
+
+	intel_dp->reset_link_params = false;
+}
+
 static int
 intel_dp_detect(struct drm_connector *connector,
 		struct drm_modeset_acquire_ctx *ctx,
@@ -6139,6 +6151,11 @@  intel_dp_detect(struct drm_connector *connector,
 		memset(&intel_dp->compliance, 0, sizeof(intel_dp->compliance));
 		memset(intel_dp->dsc_dpcd, 0, sizeof(intel_dp->dsc_dpcd));
 
+		/*Reset the immutable VRR Capable property */
+		drm_connector_set_vrr_capable_property(connector,
+						       false);
+		intel_dp_reset_link_params(intel_dp);
+
 		if (intel_dp->is_mst) {
 			drm_dbg_kms(&dev_priv->drm,
 				    "MST device may have disappeared %d vs %d\n",
@@ -6152,15 +6169,8 @@  intel_dp_detect(struct drm_connector *connector,
 		goto out;
 	}
 
-	if (intel_dp->reset_link_params) {
-		/* Initial max link lane count */
-		intel_dp->max_link_lane_count = intel_dp_max_common_lane_count(intel_dp);
-
-		/* Initial max link rate */
-		intel_dp->max_link_rate = intel_dp_max_common_rate(intel_dp);
-
-		intel_dp->reset_link_params = false;
-	}
+	if (intel_dp->reset_link_params)
+		intel_dp_reset_link_params(intel_dp);
 
 	intel_dp_print_rates(intel_dp);