diff mbox

drm/i915: Add a parameter to disable SAGV

Message ID 1519688751-47703-1-git-send-email-azhar.shaikh@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Azhar Shaikh Feb. 26, 2018, 11:45 p.m. UTC
SAGV handling is currently broken which can result in system hangs.
Add a parameter to disable SAGV, till the SAGV handling is fixed.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104975
Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com>
---
 drivers/gpu/drm/i915/i915_params.c | 3 +++
 drivers/gpu/drm/i915/i915_params.h | 3 ++-
 drivers/gpu/drm/i915/intel_pm.c    | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

Comments

Rodrigo Vivi Feb. 27, 2018, 12:38 a.m. UTC | #1
On Mon, Feb 26, 2018 at 03:45:51PM -0800, Azhar Shaikh wrote:
> SAGV handling is currently broken which can result in system hangs.
> Add a parameter to disable SAGV, till the SAGV handling is fixed.

Not just handling of the limitations we have with SAGV is broken but
also probably some hidden DBUF config issue that is worst when SAGV
is enabled.

But also the handling broken is not a good reason for the parameter
itself. But the good part of this is the ability to debug display
hard hangs issues with SAGV requirements.

I'm in favor of the paramenter. We just need a different justification
here.

> 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104975
> Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_params.c | 3 +++
>  drivers/gpu/drm/i915/i915_params.h | 3 ++-
>  drivers/gpu/drm/i915/intel_pm.c    | 2 +-
>  3 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
> index 08108ce5be21..6aede52240b0 100644
> --- a/drivers/gpu/drm/i915/i915_params.c
> +++ b/drivers/gpu/drm/i915/i915_params.c
> @@ -167,6 +167,9 @@ struct i915_params i915_modparams __read_mostly = {
>  i915_param_named_unsafe(enable_dp_mst, bool, 0600,
>  	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
>  
> +i915_param_named_unsafe(disable_sagv, bool, 0600,
> +	"Disable SAGV (default: false)");
> +

I understand that SAGV is default to enable and enabled by BIOS so we
actually need to disable it.

However the name will cause the same old confusion as disable power well disable....

So I'm in favor of bool i915.enable_sagv defaults to enable
and use i915.enable_sagv=0 when we need to disable it.

>  #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
>  i915_param_named_unsafe(inject_load_failure, uint, 0400,
>  	"Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)");
> diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
> index 430f5f9d0ff4..ff3c7d5dee2d 100644
> --- a/drivers/gpu/drm/i915/i915_params.h
> +++ b/drivers/gpu/drm/i915/i915_params.h
> @@ -69,7 +69,8 @@
>  	param(bool, nuclear_pageflip, false) \
>  	param(bool, enable_dp_mst, true) \
>  	param(bool, enable_dpcd_backlight, false) \
> -	param(bool, enable_gvt, false)
> +	param(bool, enable_gvt, false) \
> +	param(bool, disable_sagv, false)
>  
>  #define MEMBER(T, member, ...) T member;
>  struct i915_params {
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 21dac6ebc202..0b1a6cbf45aa 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3693,7 +3693,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state)
>  	int level, latency;
>  	int sagv_block_time_us;
>  
> -	if (!intel_has_sagv(dev_priv))
> +	if (!intel_has_sagv(dev_priv) || i915_modparams.disable_sagv)
>  		return false;
>  
>  	if (IS_GEN9(dev_priv))
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Jani Nikula Feb. 27, 2018, 2:35 p.m. UTC | #2
On Mon, 26 Feb 2018, Rodrigo Vivi <rodrigo.vivi@intel.com> wrote:
> On Mon, Feb 26, 2018 at 03:45:51PM -0800, Azhar Shaikh wrote:
>> SAGV handling is currently broken which can result in system hangs.
>> Add a parameter to disable SAGV, till the SAGV handling is fixed.
>
> Not just handling of the limitations we have with SAGV is broken but
> also probably some hidden DBUF config issue that is worst when SAGV
> is enabled.
>
> But also the handling broken is not a good reason for the parameter
> itself. But the good part of this is the ability to debug display
> hard hangs issues with SAGV requirements.
>
> I'm in favor of the paramenter. We just need a different justification
> here.

*cringe* at adding a parameter to workaround issues.

>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104975
>> Signed-off-by: Azhar Shaikh <azhar.shaikh@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_params.c | 3 +++
>>  drivers/gpu/drm/i915/i915_params.h | 3 ++-
>>  drivers/gpu/drm/i915/intel_pm.c    | 2 +-
>>  3 files changed, 6 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
>> index 08108ce5be21..6aede52240b0 100644
>> --- a/drivers/gpu/drm/i915/i915_params.c
>> +++ b/drivers/gpu/drm/i915/i915_params.c
>> @@ -167,6 +167,9 @@ struct i915_params i915_modparams __read_mostly = {
>>  i915_param_named_unsafe(enable_dp_mst, bool, 0600,
>>  	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
>>  
>> +i915_param_named_unsafe(disable_sagv, bool, 0600,
>> +	"Disable SAGV (default: false)");
>> +
>
> I understand that SAGV is default to enable and enabled by BIOS so we
> actually need to disable it.
>
> However the name will cause the same old confusion as disable power well disable....
>
> So I'm in favor of bool i915.enable_sagv defaults to enable
> and use i915.enable_sagv=0 when we need to disable it.

Agreed, if you must use a parameter.

BR,
Jani.
Marc Herbert March 1, 2018, 7:58 p.m. UTC | #3
Hi Jani,

> *cringe* at adding a parameter to workaround issues.

I understand that *each* parameter has the potential to *multiply* the total
number of configurations and that the resulting combinatorial explosion is
absolutely not scalable and sustainable from a validation perspective. No
one should expect to get support here when options like this one are set to
a non-default value.

When something breaks on the other hand, transparent _test_ knobs like this
one have proved invaluable countless times to help troubleshoot and isolate
issues. It's at least 10 times more productive to ask a non-expert in some
opposite timezone "please test again after rebooting with this parameter"
compared to "test again after applying this patch, recompiling, etc." -
assuming the latter has any chance of success at all.  I'm speaking from
actual experience as we are routinely experiencing both type of situations.

I hope the "unsafe" part of "i915_param_named_unsafe" provides a permanent
solution to both problems by making a clear distinction between the only one
single true supported configuration on one hand versus test datapoints
on the other hand.  Same for "tainted", sysfs or else.

Marc
Jani Nikula March 2, 2018, 2:23 p.m. UTC | #4
On Thu, 01 Mar 2018, Marc Herbert <Marc.Herbert@intel.com> wrote:
> Hi Jani,
>
>> *cringe* at adding a parameter to workaround issues.
>
> I understand that *each* parameter has the potential to *multiply* the total
> number of configurations and that the resulting combinatorial explosion is
> absolutely not scalable and sustainable from a validation perspective. No
> one should expect to get support here when options like this one are set to
> a non-default value.
>
> When something breaks on the other hand, transparent _test_ knobs like this
> one have proved invaluable countless times to help troubleshoot and isolate
> issues. It's at least 10 times more productive to ask a non-expert in some
> opposite timezone "please test again after rebooting with this parameter"
> compared to "test again after applying this patch, recompiling, etc." -
> assuming the latter has any chance of success at all.  I'm speaking from
> actual experience as we are routinely experiencing both type of situations.

Yes, I do understand, and that's why it's a "cringe", not a "nak".

The flip side are bug reports that we still get regardless of warnings
in dmesg and kernel taint when people try out parameters that they read
about in random forums, and expect support. And lack of bug reports when
people silently workaround their issues using module parameters.

> I hope the "unsafe" part of "i915_param_named_unsafe" provides a permanent
> solution to both problems by making a clear distinction between the only one
> single true supported configuration on one hand versus test datapoints
> on the other hand.  Same for "tainted", sysfs or else.

This is what I hoped too when I added support for the "unsafe"
parameters. :) Now I wish we could move this stuff to debugfs and flip
debugfs options as easily as module parameters. I think this is the
primary reason we have so many debugging module parameters: they are
more convenient than debugfs.

BR,
Jani.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 08108ce5be21..6aede52240b0 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -167,6 +167,9 @@  struct i915_params i915_modparams __read_mostly = {
 i915_param_named_unsafe(enable_dp_mst, bool, 0600,
 	"Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)");
 
+i915_param_named_unsafe(disable_sagv, bool, 0600,
+	"Disable SAGV (default: false)");
+
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
 i915_param_named_unsafe(inject_load_failure, uint, 0400,
 	"Force an error after a number of failure check points (0:disabled (default), N:force failure at the Nth failure check point)");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index 430f5f9d0ff4..ff3c7d5dee2d 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -69,7 +69,8 @@ 
 	param(bool, nuclear_pageflip, false) \
 	param(bool, enable_dp_mst, true) \
 	param(bool, enable_dpcd_backlight, false) \
-	param(bool, enable_gvt, false)
+	param(bool, enable_gvt, false) \
+	param(bool, disable_sagv, false)
 
 #define MEMBER(T, member, ...) T member;
 struct i915_params {
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 21dac6ebc202..0b1a6cbf45aa 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3693,7 +3693,7 @@  bool intel_can_enable_sagv(struct drm_atomic_state *state)
 	int level, latency;
 	int sagv_block_time_us;
 
-	if (!intel_has_sagv(dev_priv))
+	if (!intel_has_sagv(dev_priv) || i915_modparams.disable_sagv)
 		return false;
 
 	if (IS_GEN9(dev_priv))