diff mbox

drm/i915: Transform WaInPlaceDecompressionHang to a simple reg write

Message ID 1504732331-6178-1-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com Sept. 6, 2017, 9:12 p.m. UTC
Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
it on every context creation is overkill (and wrong).

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

Comments

oscar.mateo@intel.com Sept. 6, 2017, 9:17 p.m. UTC | #1
Hey Mika,

Regarding this patch: is there a consensus on where is the most 
appropriate place to apply workarounds? My understanding is that 
per-context workarounds (WAS_SET_BIT, etc...) go in 
xxx_init_workarounds, while those that are needed only during 
initialization (I915_WRITE) go in xxx_init_clock_gating. But it doesn't 
look like this general rule is being followed (probably because 
xxx_init_clock_gating is a very misleading name?).

This has probably been discussed before, so it would be good if we could 
document the answer somewhere (maybe it already is?).

Thanks,

Oscar



On 09/06/2017 02:12 PM, Oscar Mateo wrote:
> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
> it on every context creation is overkill (and wrong).
>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>   drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>   1 file changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 23812ec..9f01a5c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>   
>   	/* WaInPlaceDecompressionHang:skl */
>   	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   
>   	/* WaDisableLSQCROPERFforOCL:skl */
>   	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> @@ -1059,8 +1060,9 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
>   
>   	/* WaInPlaceDecompressionHang:bxt */
>   	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   
>   	return 0;
>   }
> @@ -1089,8 +1091,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
>   				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>   
>   	/* WaInPlaceDecompressionHang:cnl */
> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   
>   	/* WaPushConstantDereferenceHoldDisable:cnl */
>   	WA_SET_BIT(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
> @@ -1143,8 +1146,9 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
>   		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>   
>   	/* WaInPlaceDecompressionHang:kbl */
> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   
>   	/* WaDisableLSQCROPERFforOCL:kbl */
>   	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
> @@ -1196,8 +1200,9 @@ static int cfl_init_workarounds(struct intel_engine_cs *engine)
>   		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>   
>   	/* WaInPlaceDecompressionHang:cfl */
> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>   
>   	return 0;
>   }
Chris Wilson Sept. 6, 2017, 9:19 p.m. UTC | #2
Quoting Oscar Mateo (2017-09-06 22:12:11)
> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
> it on every context creation is overkill (and wrong).
> 
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 23812ec..9f01a5c 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>  
>         /* WaInPlaceDecompressionHang:skl */
>         if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);

Anything using a precalculated RMW value for a ctx register is indeed
fishy. Whilst you are checking this register, can you check whether the
other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
-Chris
oscar.mateo@intel.com Sept. 6, 2017, 9:27 p.m. UTC | #3
On 09/06/2017 02:19 PM, Chris Wilson wrote:
> Quoting Oscar Mateo (2017-09-06 22:12:11)
>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
>> it on every context creation is overkill (and wrong).
>>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>>   1 file changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 23812ec..9f01a5c 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>>   
>>          /* WaInPlaceDecompressionHang:skl */
>>          if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
>> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> Anything using a precalculated RMW value for a ctx register is indeed
> fishy. Whilst you are checking this register, can you check whether the
> other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
> -Chris

Sure, I'll try to go through all of them (but I'd like to clarify first 
if I should also be moving those I find to xxx_init_clock_gating).

-- Oscar
Chris Wilson Sept. 6, 2017, 9:43 p.m. UTC | #4
Quoting Oscar Mateo (2017-09-06 22:27:47)
> 
> 
> On 09/06/2017 02:19 PM, Chris Wilson wrote:
> > Quoting Oscar Mateo (2017-09-06 22:12:11)
> >> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
> >> it on every context creation is overkill (and wrong).
> >>
> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> >> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
> >>   1 file changed, 15 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >> index 23812ec..9f01a5c 100644
> >> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
> >>   
> >>          /* WaInPlaceDecompressionHang:skl */
> >>          if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> >> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> >> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> > Anything using a precalculated RMW value for a ctx register is indeed
> > fishy. Whilst you are checking this register, can you check whether the
> > other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
> > -Chris
> 
> Sure, I'll try to go through all of them (but I'd like to clarify first 
> if I should also be moving those I find to xxx_init_clock_gating).

The short answer is probably not, init_clock_gating we expect to be
targetting display w/a. There's not always a clear divide between GT and
display, but we keep on muttering that we should keep them them as
cleanly separated as possible so that we know where to look when
different IP blocks are updated. (And yes the name is one of those
things that we keep on waiting for someone else to fix.)
-Chris
oscar.mateo@intel.com Sept. 6, 2017, 9:51 p.m. UTC | #5
On 09/06/2017 02:43 PM, Chris Wilson wrote:
> Quoting Oscar Mateo (2017-09-06 22:27:47)
>>
>> On 09/06/2017 02:19 PM, Chris Wilson wrote:
>>> Quoting Oscar Mateo (2017-09-06 22:12:11)
>>>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
>>>> it on every context creation is overkill (and wrong).
>>>>
>>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>>>>    1 file changed, 15 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> index 23812ec..9f01a5c 100644
>>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>>>>    
>>>>           /* WaInPlaceDecompressionHang:skl */
>>>>           if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
>>>> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>>> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> Anything using a precalculated RMW value for a ctx register is indeed
>>> fishy. Whilst you are checking this register, can you check whether the
>>> other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
>>> -Chris
>> Sure, I'll try to go through all of them (but I'd like to clarify first
>> if I should also be moving those I find to xxx_init_clock_gating).
> The short answer is probably not, init_clock_gating we expect to be
> targetting display w/a. There's not always a clear divide between GT and
> display, but we keep on muttering that we should keep them them as
> cleanly separated as possible so that we know where to look when
> different IP blocks are updated. (And yes the name is one of those
> things that we keep on waiting for someone else to fix.)
> -Chris

It's not only the name, there is even a comment saying non-context, 
non-WABB GT workarounds go here:

/**
  * intel_init_clock_gating_hooks - setup the clock gating hooks
  * @dev_priv: device private
  *
  * Setup the hooks that configure which clocks of a given platform can be
  * gated and also apply various GT and display specific workarounds for 
these
  * platforms. Note that some GT specific workarounds are applied separately
  * when GPU contexts or batchbuffers start their execution.
  */
Mika Kuoppala Sept. 7, 2017, 9:30 a.m. UTC | #6
Oscar Mateo <oscar.mateo@intel.com> writes:

> Hey Mika,
>
> Regarding this patch: is there a consensus on where is the most 
> appropriate place to apply workarounds? My understanding is that 
> per-context workarounds (WAS_SET_BIT, etc...) go in 
> xxx_init_workarounds, while those that are needed only during 
> initialization (I915_WRITE) go in xxx_init_clock_gating. But it doesn't 
> look like this general rule is being followed (probably because 
> xxx_init_clock_gating is a very misleading name?).
>
> This has probably been discussed before, so it would be good if we could 
> document the answer somewhere (maybe it already is?).
>

Yep, xxx_init_workarounds for per context. init_clock_gating for
globals.

If I recall init_clock_gating was a just suitable spot wrt
to init/reset to inject globals.

Have we reached the pain treshold and move to genx_workarounds.c
where there are per context and global entries?

-Mika

> Thanks,
>
> Oscar
>
>
>
> On 09/06/2017 02:12 PM, Oscar Mateo wrote:
>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
>> it on every context creation is overkill (and wrong).
>>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>> ---
>>   drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>>   1 file changed, 15 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>> index 23812ec..9f01a5c 100644
>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>>   
>>   	/* WaInPlaceDecompressionHang:skl */
>>   	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
>> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   
>>   	/* WaDisableLSQCROPERFforOCL:skl */
>>   	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
>> @@ -1059,8 +1060,9 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
>>   
>>   	/* WaInPlaceDecompressionHang:bxt */
>>   	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   
>>   	return 0;
>>   }
>> @@ -1089,8 +1091,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
>>   				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>>   
>>   	/* WaInPlaceDecompressionHang:cnl */
>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   
>>   	/* WaPushConstantDereferenceHoldDisable:cnl */
>>   	WA_SET_BIT(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
>> @@ -1143,8 +1146,9 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
>>   		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>>   
>>   	/* WaInPlaceDecompressionHang:kbl */
>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   
>>   	/* WaDisableLSQCROPERFforOCL:kbl */
>>   	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
>> @@ -1196,8 +1200,9 @@ static int cfl_init_workarounds(struct intel_engine_cs *engine)
>>   		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>>   
>>   	/* WaInPlaceDecompressionHang:cfl */
>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>   
>>   	return 0;
>>   }
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjälä Sept. 7, 2017, 11:54 a.m. UTC | #7
On Wed, Sep 06, 2017 at 02:51:10PM -0700, Oscar Mateo wrote:
> 
> 
> On 09/06/2017 02:43 PM, Chris Wilson wrote:
> > Quoting Oscar Mateo (2017-09-06 22:27:47)
> >>
> >> On 09/06/2017 02:19 PM, Chris Wilson wrote:
> >>> Quoting Oscar Mateo (2017-09-06 22:12:11)
> >>>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
> >>>> it on every context creation is overkill (and wrong).
> >>>>
> >>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> >>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
> >>>>    1 file changed, 15 insertions(+), 10 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>> index 23812ec..9f01a5c 100644
> >>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> >>>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
> >>>>    
> >>>>           /* WaInPlaceDecompressionHang:skl */
> >>>>           if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> >>>> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> >>>> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> >>> Anything using a precalculated RMW value for a ctx register is indeed
> >>> fishy. Whilst you are checking this register, can you check whether the
> >>> other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
> >>> -Chris
> >> Sure, I'll try to go through all of them (but I'd like to clarify first
> >> if I should also be moving those I find to xxx_init_clock_gating).
> > The short answer is probably not, init_clock_gating we expect to be
> > targetting display w/a. There's not always a clear divide between GT and
> > display, but we keep on muttering that we should keep them them as
> > cleanly separated as possible so that we know where to look when
> > different IP blocks are updated. (And yes the name is one of those
> > things that we keep on waiting for someone else to fix.)
> > -Chris
> 
> It's not only the name, there is even a comment saying non-context, 
> non-WABB GT workarounds go here:
> 
> /**
>   * intel_init_clock_gating_hooks - setup the clock gating hooks
>   * @dev_priv: device private
>   *
>   * Setup the hooks that configure which clocks of a given platform can be
>   * gated and also apply various GT and display specific workarounds for 
> these
>   * platforms. Note that some GT specific workarounds are applied separately
>   * when GPU contexts or batchbuffers start their execution.
>   */

IIRC one problem at least used to be that we called .init_clock_gating()
after we'd already touched the GT. So it was probabably called too late.
Not sure what the order is these days.

And I have a feeling that some platforms had a few registers that are
clobbered by a GPU reset but aren't saved in the context. So those we'd
need to reconfigure somewhere else becase .init_clock_gating() isn't
called on GPU reset (except on pre-g4x where also the display gets
clobbered). I have a feeling these were some L3 related things on IVB
perhaps?
oscar.mateo@intel.com Sept. 7, 2017, 3:50 p.m. UTC | #8
On 09/07/2017 02:30 AM, Mika Kuoppala wrote:
> Oscar Mateo <oscar.mateo@intel.com> writes:
>
>> Hey Mika,
>>
>> Regarding this patch: is there a consensus on where is the most
>> appropriate place to apply workarounds? My understanding is that
>> per-context workarounds (WAS_SET_BIT, etc...) go in
>> xxx_init_workarounds, while those that are needed only during
>> initialization (I915_WRITE) go in xxx_init_clock_gating. But it doesn't
>> look like this general rule is being followed (probably because
>> xxx_init_clock_gating is a very misleading name?).
>>
>> This has probably been discussed before, so it would be good if we could
>> document the answer somewhere (maybe it already is?).
>>
> Yep, xxx_init_workarounds for per context. init_clock_gating for
> globals.
>
> If I recall init_clock_gating was a just suitable spot wrt
> to init/reset to inject globals.
>
> Have we reached the pain treshold and move to genx_workarounds.c
> where there are per context and global entries?
>
> -Mika
>

I don't know about several genX_workarounds.c, but at least one 
intel_workarounds.c where per-context and global entries live in harmony 
(maybe WABB stuff as well?).

-- Oscar

>> Thanks,
>>
>> Oscar
>>
>>
>>
>> On 09/06/2017 02:12 PM, Oscar Mateo wrote:
>>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
>>> it on every context creation is overkill (and wrong).
>>>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
>>>    1 file changed, 15 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> index 23812ec..9f01a5c 100644
>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
>>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
>>>    
>>>    	/* WaInPlaceDecompressionHang:skl */
>>>    	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
>>> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>>    
>>>    	/* WaDisableLSQCROPERFforOCL:skl */
>>>    	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
>>> @@ -1059,8 +1060,9 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine)
>>>    
>>>    	/* WaInPlaceDecompressionHang:bxt */
>>>    	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
>>> -		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>> -			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> +		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>> +			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>> +			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>>    
>>>    	return 0;
>>>    }
>>> @@ -1089,8 +1091,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
>>>    				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
>>>    
>>>    	/* WaInPlaceDecompressionHang:cnl */
>>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>>    
>>>    	/* WaPushConstantDereferenceHoldDisable:cnl */
>>>    	WA_SET_BIT(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
>>> @@ -1143,8 +1146,9 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine)
>>>    		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>>>    
>>>    	/* WaInPlaceDecompressionHang:kbl */
>>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>>    
>>>    	/* WaDisableLSQCROPERFforOCL:kbl */
>>>    	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
>>> @@ -1196,8 +1200,9 @@ static int cfl_init_workarounds(struct intel_engine_cs *engine)
>>>    		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
>>>    
>>>    	/* WaInPlaceDecompressionHang:cfl */
>>> -	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
>>> -		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>> +	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
>>> +		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
>>> +		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
>>>    
>>>    	return 0;
>>>    }
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Sept. 8, 2017, 7:15 a.m. UTC | #9
On Thu, Sep 07, 2017 at 02:54:53PM +0300, Ville Syrjälä wrote:
> On Wed, Sep 06, 2017 at 02:51:10PM -0700, Oscar Mateo wrote:
> > 
> > 
> > On 09/06/2017 02:43 PM, Chris Wilson wrote:
> > > Quoting Oscar Mateo (2017-09-06 22:27:47)
> > >>
> > >> On 09/06/2017 02:19 PM, Chris Wilson wrote:
> > >>> Quoting Oscar Mateo (2017-09-06 22:12:11)
> > >>>> Afaict, GEN9_GAMT_ECO_REG_RW_IA does not live in the context, so writing
> > >>>> it on every context creation is overkill (and wrong).
> > >>>>
> > >>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> > >>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > >>>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > >>>> ---
> > >>>>    drivers/gpu/drm/i915/intel_engine_cs.c | 25 +++++++++++++++----------
> > >>>>    1 file changed, 15 insertions(+), 10 deletions(-)
> > >>>>
> > >>>> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> > >>>> index 23812ec..9f01a5c 100644
> > >>>> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> > >>>> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> > >>>> @@ -985,8 +985,9 @@ static int skl_init_workarounds(struct intel_engine_cs *engine)
> > >>>>    
> > >>>>           /* WaInPlaceDecompressionHang:skl */
> > >>>>           if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
> > >>>> -               WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
> > >>>> -                          GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
> > >>> Anything using a precalculated RMW value for a ctx register is indeed
> > >>> fishy. Whilst you are checking this register, can you check whether the
> > >>> other users of WA_SET_BIT/WA_CLR_BIT are indeed context bound?
> > >>> -Chris
> > >> Sure, I'll try to go through all of them (but I'd like to clarify first
> > >> if I should also be moving those I find to xxx_init_clock_gating).
> > > The short answer is probably not, init_clock_gating we expect to be
> > > targetting display w/a. There's not always a clear divide between GT and
> > > display, but we keep on muttering that we should keep them them as
> > > cleanly separated as possible so that we know where to look when
> > > different IP blocks are updated. (And yes the name is one of those
> > > things that we keep on waiting for someone else to fix.)
> > > -Chris
> > 
> > It's not only the name, there is even a comment saying non-context, 
> > non-WABB GT workarounds go here:
> > 
> > /**
> >   * intel_init_clock_gating_hooks - setup the clock gating hooks
> >   * @dev_priv: device private
> >   *
> >   * Setup the hooks that configure which clocks of a given platform can be
> >   * gated and also apply various GT and display specific workarounds for 
> > these
> >   * platforms. Note that some GT specific workarounds are applied separately
> >   * when GPU contexts or batchbuffers start their execution.
> >   */
> 
> IIRC one problem at least used to be that we called .init_clock_gating()
> after we'd already touched the GT. So it was probabably called too late.
> Not sure what the order is these days.
> 
> And I have a feeling that some platforms had a few registers that are
> clobbered by a GPU reset but aren't saved in the context. So those we'd
> need to reconfigure somewhere else becase .init_clock_gating() isn't
> called on GPU reset (except on pre-g4x where also the display gets
> clobbered). I have a feeling these were some L3 related things on IVB
> perhaps?

Those should all be put in the engine setup. And yes we got this wrong a
few times, where a gpu reset broke of fixed mesa (depending upon which way
round the w/a vs reset defaults was).

Maybe we need a "how to apply w/a writes and where" doc page somewhere
that we use to whack every wa patch with in review? This comes up for
every platform a few times ...
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 23812ec..9f01a5c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -985,8 +985,9 @@  static int skl_init_workarounds(struct intel_engine_cs *engine)
 
 	/* WaInPlaceDecompressionHang:skl */
 	if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER))
-		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
-			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	/* WaDisableLSQCROPERFforOCL:skl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
@@ -1059,8 +1060,9 @@  static int bxt_init_workarounds(struct intel_engine_cs *engine)
 
 	/* WaInPlaceDecompressionHang:bxt */
 	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
-		WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
-			   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+		I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+			   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	return 0;
 }
@@ -1089,8 +1091,9 @@  static int cnl_init_workarounds(struct intel_engine_cs *engine)
 				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);
 
 	/* WaInPlaceDecompressionHang:cnl */
-	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	/* WaPushConstantDereferenceHoldDisable:cnl */
 	WA_SET_BIT(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
@@ -1143,8 +1146,9 @@  static int kbl_init_workarounds(struct intel_engine_cs *engine)
 		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
 	/* WaInPlaceDecompressionHang:kbl */
-	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	/* WaDisableLSQCROPERFforOCL:kbl */
 	ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4);
@@ -1196,8 +1200,9 @@  static int cfl_init_workarounds(struct intel_engine_cs *engine)
 		GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
 
 	/* WaInPlaceDecompressionHang:cfl */
-	WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA,
-		   GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
+	I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA,
+		   (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) |
+		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
 
 	return 0;
 }