diff mbox series

drm/i915/rc6: Disable RPG during workload execution

Message ID 20241022132226.1110383-1-badal.nilawar@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/rc6: Disable RPG during workload execution | expand

Commit Message

Nilawar, Badal Oct. 22, 2024, 1:22 p.m. UTC
Encountering forcewake errors related to render power gating;
therefore, disable it during workload execution.

Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

Comments

Andi Shyti Oct. 22, 2024, 1:28 p.m. UTC | #1
Hi Badal,

On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
> Encountering forcewake errors related to render power gating;

Can you please expand your explanation here?

> therefore, disable it during workload execution.

... and here.

> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
>  2 files changed, 18 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> index c864d101faf9..459394ab5258 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> @@ -140,6 +140,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
>  					      VDN_MFX_POWERGATE_ENABLE(i));
>  	}
>  
> +	rc6->pg_enable = pg_enable;

this looks borderline racy, it's fine only because this function
is called during resume which normally runs in atomic context.

>  	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
>  }
>  
> @@ -572,8 +573,11 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
>  	intel_guc_rc_disable(gt_to_guc(gt));
>  
>  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> -	if (GRAPHICS_VER(i915) >= 9)
> +	if (GRAPHICS_VER(i915) >= 9) {
> +		rc6->pg_enable = 0;
>  		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
> +	}
> +
>  	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
>  	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
>  	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
> @@ -687,6 +691,15 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
>  
>  	/* Restore HW timers for automatic RC6 entry while busy */
>  	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
> +
> +	/*
> +	 * Seeing render forcewake timeouts during active submissions so disable render PG
> +	 * while workloads are under execution.

Can you please improve this sentence? If I never new about the
issue I would be a bit confused.

> +	 * FIXME Remove this change once real cause of render force wake timeout is fixed
> +	 */
> +	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)

is this supposed to be "pg_enable == GEN9_RENDER_PG_ENABLE" or
"pg_enable & GEN9_RENDER_PG_ENABLE" ?

Andi
Rodrigo Vivi Oct. 22, 2024, 5:09 p.m. UTC | #2
On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
> Hi Badal,
> 
> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
> > Encountering forcewake errors related to render power gating;
> 
> Can you please expand your explanation here?

yeap. More explanation please. All platforms? really?

> 
> > therefore, disable it during workload execution.
> 
> ... and here.
> 
> > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
> >  drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
> >  2 files changed, 18 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> > index c864d101faf9..459394ab5258 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> > @@ -140,6 +140,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
> >  					      VDN_MFX_POWERGATE_ENABLE(i));
> >  	}
> >  
> > +	rc6->pg_enable = pg_enable;
> 
> this looks borderline racy, it's fine only because this function
> is called during resume which normally runs in atomic context.
> 
> >  	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
> >  }
> >  
> > @@ -572,8 +573,11 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
> >  	intel_guc_rc_disable(gt_to_guc(gt));
> >  
> >  	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> > -	if (GRAPHICS_VER(i915) >= 9)
> > +	if (GRAPHICS_VER(i915) >= 9) {
> > +		rc6->pg_enable = 0;
> >  		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
> > +	}
> > +
> >  	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
> >  	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
> >  	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
> > @@ -687,6 +691,15 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
> >  
> >  	/* Restore HW timers for automatic RC6 entry while busy */
> >  	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
> > +
> > +	/*
> > +	 * Seeing render forcewake timeouts during active submissions so disable render PG
> > +	 * while workloads are under execution.
> 
> Can you please improve this sentence? If I never new about the
> issue I would be a bit confused.
> 
> > +	 * FIXME Remove this change once real cause of render force wake timeout is fixed
> > +	 */
> > +	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
> 
> is this supposed to be "pg_enable == GEN9_RENDER_PG_ENABLE" or
> "pg_enable & GEN9_RENDER_PG_ENABLE" ?
> 
> Andi
Nilawar, Badal Oct. 23, 2024, 5:33 a.m. UTC | #3
On 22-10-2024 22:39, Rodrigo Vivi wrote:
> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
>> Hi Badal,
>>
>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
>>> Encountering forcewake errors related to render power gating;
>>
>> Can you please expand your explanation here?
> 
> yeap. More explanation please. All platforms? really?

We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL, DG1, 
rpl. Issue disappears after disabling RPG. Instead of fully disabling 
RPG I am disabling it during active submissions i.e. during unpark.
For MTL and ARL RPG is already disabled permanently.

Impact of doing this change should be performance improvement so kept 
for all platform otherwise I will add platform check.

This is the issue https://gitlab.freedesktop.org/drm/intel/issues/9413. 
Will add it in commit message.

Regards,
Badal

> 
>>
>>> therefore, disable it during workload execution.
>>
>> ... and here.
>>
>>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>>> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
>>>   drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
>>>   2 files changed, 18 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
>>> index c864d101faf9..459394ab5258 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
>>> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
>>> @@ -140,6 +140,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
>>>   					      VDN_MFX_POWERGATE_ENABLE(i));
>>>   	}
>>>   
>>> +	rc6->pg_enable = pg_enable;
>>
>> this looks borderline racy, it's fine only because this function
>> is called during resume which normally runs in atomic context.
>>
>>>   	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
>>>   }
>>>   
>>> @@ -572,8 +573,11 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
>>>   	intel_guc_rc_disable(gt_to_guc(gt));
>>>   
>>>   	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>>> -	if (GRAPHICS_VER(i915) >= 9)
>>> +	if (GRAPHICS_VER(i915) >= 9) {
>>> +		rc6->pg_enable = 0;
>>>   		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
>>> +	}
>>> +
>>>   	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
>>>   	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
>>>   	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
>>> @@ -687,6 +691,15 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
>>>   
>>>   	/* Restore HW timers for automatic RC6 entry while busy */
>>>   	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
>>> +
>>> +	/*
>>> +	 * Seeing render forcewake timeouts during active submissions so disable render PG
>>> +	 * while workloads are under execution.
>>
>> Can you please improve this sentence? If I never new about the
>> issue I would be a bit confused.
>>
>>> +	 * FIXME Remove this change once real cause of render force wake timeout is fixed
>>> +	 */
>>> +	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
>>
>> is this supposed to be "pg_enable == GEN9_RENDER_PG_ENABLE" or
>> "pg_enable & GEN9_RENDER_PG_ENABLE" ?
>>
>> Andi
Rodrigo Vivi Oct. 23, 2024, 2:48 p.m. UTC | #4
On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
> 
> 
> On 22-10-2024 22:39, Rodrigo Vivi wrote:
> > On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
> > > Hi Badal,
> > > 
> > > On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
> > > > Encountering forcewake errors related to render power gating;
> > > 
> > > Can you please expand your explanation here?
> > 
> > yeap. More explanation please. All platforms? really?
> 
> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL, DG1, rpl.

Is this a regression? or a new issue?

Is this happening with Xe on these platforms? or i915 only?

> Issue disappears after disabling RPG. Instead of fully disabling RPG I am
> disabling it during active submissions i.e. during unpark.
> For MTL and ARL RPG is already disabled permanently.

uhm. Interesting. Why that is disabled on these platforms? perhaps we should be
doing the same for all GuC enabled platforms?

> 
> Impact of doing this change should be performance improvement so kept for
> all platform otherwise I will add platform check.

it could cause power consumption and battery life regressions. Better to filter
per platform.

> 
> This is the issue https://gitlab.freedesktop.org/drm/intel/issues/9413. Will
> add it in commit message.

Next time please include the relevant links in the commit msg.

Thanks a lot for the info and for working on this,
Rodrigo.

> 
> Regards,
> Badal
> 
> > 
> > > 
> > > > therefore, disable it during workload execution.
> > > 
> > > ... and here.
> > > 
> > > > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > > > Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
> > > > ---
> > > >   drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
> > > >   drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
> > > >   2 files changed, 18 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
> > > > index c864d101faf9..459394ab5258 100644
> > > > --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
> > > > +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
> > > > @@ -140,6 +140,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
> > > >   					      VDN_MFX_POWERGATE_ENABLE(i));
> > > >   	}
> > > > +	rc6->pg_enable = pg_enable;
> > > 
> > > this looks borderline racy, it's fine only because this function
> > > is called during resume which normally runs in atomic context.
> > > 
> > > >   	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
> > > >   }
> > > > @@ -572,8 +573,11 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
> > > >   	intel_guc_rc_disable(gt_to_guc(gt));
> > > >   	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
> > > > -	if (GRAPHICS_VER(i915) >= 9)
> > > > +	if (GRAPHICS_VER(i915) >= 9) {
> > > > +		rc6->pg_enable = 0;
> > > >   		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
> > > > +	}
> > > > +
> > > >   	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
> > > >   	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
> > > >   	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
> > > > @@ -687,6 +691,15 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
> > > >   	/* Restore HW timers for automatic RC6 entry while busy */
> > > >   	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
> > > > +
> > > > +	/*
> > > > +	 * Seeing render forcewake timeouts during active submissions so disable render PG
> > > > +	 * while workloads are under execution.
> > > 
> > > Can you please improve this sentence? If I never new about the
> > > issue I would be a bit confused.
> > > 
> > > > +	 * FIXME Remove this change once real cause of render force wake timeout is fixed
> > > > +	 */
> > > > +	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
> > > 
> > > is this supposed to be "pg_enable == GEN9_RENDER_PG_ENABLE" or
> > > "pg_enable & GEN9_RENDER_PG_ENABLE" ?
> > > 
> > > Andi
>
Nilawar, Badal Oct. 23, 2024, 4:01 p.m. UTC | #5
On 23-10-2024 20:18, Rodrigo Vivi wrote:
> On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
>>
>>
>> On 22-10-2024 22:39, Rodrigo Vivi wrote:
>>> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
>>>> Hi Badal,
>>>>
>>>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
>>>>> Encountering forcewake errors related to render power gating;
>>>>
>>>> Can you please expand your explanation here?
>>>
>>> yeap. More explanation please. All platforms? really?
>>
>> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL, DG1, rpl.
> 
> Is this a regression? or a new issue?

This is old issue, first reported year back.

> 
> Is this happening with Xe on these platforms? or i915 only?

i915 only. This is not reported on Xe kmd.

> 
>> Issue disappears after disabling RPG. Instead of fully disabling RPG I am
>> disabling it during active submissions i.e. during unpark.
>> For MTL and ARL RPG is already disabled permanently.
> 
> uhm. Interesting. Why that is disabled on these platforms? 

 From commit log its temporary wa to avoid fw timeouts.

perhaps we should be
> doing the same for all GuC enabled platforms?

I think so as temporary Wa.

> 
>>
>> Impact of doing this change should be performance improvement so kept for
>> all platform otherwise I will add platform check.
> 
> it could cause power consumption and battery life regressions. Better to filter
> per platform.

Sure, will filter per platform.

> 
>>
>> This is the issue https://gitlab.freedesktop.org/drm/intel/issues/9413. Will
>> add it in commit message.
> 
> Next time please include the relevant links in the commit msg.

Sure.

> 
> Thanks a lot for the info and for working on this,

Thanks,
Badal

> Rodrigo.
> 
>>
>> Regards,
>> Badal
>>
>>>
>>>>
>>>>> therefore, disable it during workload execution.
>>>>
>>>> ... and here.
>>>>
>>>>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>>>>> Signed-off-by: Badal Nilawar <badal.nilawar@intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/gt/intel_rc6.c       | 18 +++++++++++++++++-
>>>>>    drivers/gpu/drm/i915/gt/intel_rc6_types.h |  1 +
>>>>>    2 files changed, 18 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
>>>>> index c864d101faf9..459394ab5258 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/intel_rc6.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
>>>>> @@ -140,6 +140,7 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
>>>>>    					      VDN_MFX_POWERGATE_ENABLE(i));
>>>>>    	}
>>>>> +	rc6->pg_enable = pg_enable;
>>>>
>>>> this looks borderline racy, it's fine only because this function
>>>> is called during resume which normally runs in atomic context.
>>>>
>>>>>    	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
>>>>>    }
>>>>> @@ -572,8 +573,11 @@ static void __intel_rc6_disable(struct intel_rc6 *rc6)
>>>>>    	intel_guc_rc_disable(gt_to_guc(gt));
>>>>>    	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
>>>>> -	if (GRAPHICS_VER(i915) >= 9)
>>>>> +	if (GRAPHICS_VER(i915) >= 9) {
>>>>> +		rc6->pg_enable = 0;
>>>>>    		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
>>>>> +	}
>>>>> +
>>>>>    	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
>>>>>    	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
>>>>>    	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
>>>>> @@ -687,6 +691,15 @@ void intel_rc6_unpark(struct intel_rc6 *rc6)
>>>>>    	/* Restore HW timers for automatic RC6 entry while busy */
>>>>>    	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
>>>>> +
>>>>> +	/*
>>>>> +	 * Seeing render forcewake timeouts during active submissions so disable render PG
>>>>> +	 * while workloads are under execution.
>>>>
>>>> Can you please improve this sentence? If I never new about the
>>>> issue I would be a bit confused.
>>>>
>>>>> +	 * FIXME Remove this change once real cause of render force wake timeout is fixed
>>>>> +	 */
>>>>> +	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
>>>>
>>>> is this supposed to be "pg_enable == GEN9_RENDER_PG_ENABLE" or
>>>> "pg_enable & GEN9_RENDER_PG_ENABLE" ?
>>>>
>>>> Andi
>>
Nilawar, Badal Oct. 23, 2024, 4:11 p.m. UTC | #6
On 23-10-2024 21:31, Nilawar, Badal wrote:
> 
> 
> On 23-10-2024 20:18, Rodrigo Vivi wrote:
>> On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
>>>
>>>
>>> On 22-10-2024 22:39, Rodrigo Vivi wrote:
>>>> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
>>>>> Hi Badal,
>>>>>
>>>>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
>>>>>> Encountering forcewake errors related to render power gating;
>>>>>
>>>>> Can you please expand your explanation here?
>>>>
>>>> yeap. More explanation please. All platforms? really?
>>>
>>> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL, 
>>> DG1, rpl.
>>
>> Is this a regression? or a new issue?
> 
> This is old issue, first reported year back.
> 
>>
>> Is this happening with Xe on these platforms? or i915 only?
> 
> i915 only. This is not reported on Xe kmd.
> 
>>
>>> Issue disappears after disabling RPG. Instead of fully disabling RPG 
>>> I am
>>> disabling it during active submissions i.e. during unpark.
>>> For MTL and ARL RPG is already disabled permanently.
>>
>> uhm. Interesting. Why that is disabled on these platforms? 
> 
>  From commit log its temporary wa to avoid fw timeouts.
> 
> perhaps we should be
>> doing the same for all GuC enabled platforms?
> 
> I think so as temporary Wa.

Correction, DG1 we are not seeing this. I think we can go with platform 
check.

Regards,
Badal
Gupta, Anshuman Oct. 24, 2024, 2:28 p.m. UTC | #7
> -----Original Message-----
> From: Nilawar, Badal <badal.nilawar@intel.com>
> Sent: Wednesday, October 23, 2024 9:42 PM
> To: Vivi, Rodrigo <rodrigo.vivi@intel.com>
> Cc: Andi Shyti <andi.shyti@linux.intel.com>; intel-gfx@lists.freedesktop.org;
> Gupta, Anshuman <anshuman.gupta@intel.com>;
> chris.p.wilson@linux.intel.com
> Subject: Re: [PATCH] drm/i915/rc6: Disable RPG during workload execution
> 
> 
> 
> On 23-10-2024 21:31, Nilawar, Badal wrote:
> >
> >
> > On 23-10-2024 20:18, Rodrigo Vivi wrote:
> >> On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
> >>>
> >>>
> >>> On 22-10-2024 22:39, Rodrigo Vivi wrote:
> >>>> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
> >>>>> Hi Badal,
> >>>>>
> >>>>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
> >>>>>> Encountering forcewake errors related to render power gating;
> >>>>>
> >>>>> Can you please expand your explanation here?
> >>>>
> >>>> yeap. More explanation please. All platforms? really?
> >>>
> >>> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL,
> >>> DG1, rpl.
> >>
> >> Is this a regression? or a new issue?
> >
> > This is old issue, first reported year back.
> >
> >>
> >> Is this happening with Xe on these platforms? or i915 only?
> >
> > i915 only. This is not reported on Xe kmd.
> >
> >>
> >>> Issue disappears after disabling RPG. Instead of fully disabling RPG
> >>> I am disabling it during active submissions i.e. during unpark.
> >>> For MTL and ARL RPG is already disabled permanently.
IMO this patch should be extended for MTL and ARL as well.
Don't disable the RPG completely, only disable it during workload submission.
That should save power on both MTL and ARL platforms, and right thing to do.
And patch should add the Fixes tag accordingly to the commit which disables the RPG on MTL.
@Vivi, Rodrigo what is your opinion on above ? it seems both MTL and ADL issues are same signature.
MTL issue got disappear as RPG is disabled globally but that will burn power.
If window does not have this issue then it is always difficult to get proper SV support.  
But OS like window may not catch these kind of issues as they don't reload the their graphics driver like
Linux reload module multiple times during selftest execution. Even chrome-os does not do that.
Not a real world use case.
Thanks,
Anshuman.
> >>
> >> uhm. Interesting. Why that is disabled on these platforms?
> >
> >  From commit log its temporary wa to avoid fw timeouts.
> >
> > perhaps we should be
> >> doing the same for all GuC enabled platforms?
> >
> > I think so as temporary Wa.
> 
> Correction, DG1 we are not seeing this. I think we can go with platform check.
> 
> Regards,
> Badal
Rodrigo Vivi Oct. 24, 2024, 5:34 p.m. UTC | #8
On Thu, Oct 24, 2024 at 10:28:38AM -0400, Gupta, Anshuman wrote:
> 
> 
> > -----Original Message-----
> > From: Nilawar, Badal <badal.nilawar@intel.com>
> > Sent: Wednesday, October 23, 2024 9:42 PM
> > To: Vivi, Rodrigo <rodrigo.vivi@intel.com>
> > Cc: Andi Shyti <andi.shyti@linux.intel.com>; intel-gfx@lists.freedesktop.org;
> > Gupta, Anshuman <anshuman.gupta@intel.com>;
> > chris.p.wilson@linux.intel.com
> > Subject: Re: [PATCH] drm/i915/rc6: Disable RPG during workload execution
> > 
> > 
> > 
> > On 23-10-2024 21:31, Nilawar, Badal wrote:
> > >
> > >
> > > On 23-10-2024 20:18, Rodrigo Vivi wrote:
> > >> On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
> > >>>
> > >>>
> > >>> On 22-10-2024 22:39, Rodrigo Vivi wrote:
> > >>>> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
> > >>>>> Hi Badal,
> > >>>>>
> > >>>>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
> > >>>>>> Encountering forcewake errors related to render power gating;
> > >>>>>
> > >>>>> Can you please expand your explanation here?
> > >>>>
> > >>>> yeap. More explanation please. All platforms? really?
> > >>>
> > >>> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL,
> > >>> DG1, rpl.
> > >>
> > >> Is this a regression? or a new issue?
> > >
> > > This is old issue, first reported year back.
> > >
> > >>
> > >> Is this happening with Xe on these platforms? or i915 only?
> > >
> > > i915 only. This is not reported on Xe kmd.
> > >
> > >>
> > >>> Issue disappears after disabling RPG. Instead of fully disabling RPG
> > >>> I am disabling it during active submissions i.e. during unpark.
> > >>> For MTL and ARL RPG is already disabled permanently.
> IMO this patch should be extended for MTL and ARL as well.
> Don't disable the RPG completely, only disable it during workload submission.
> That should save power on both MTL and ARL platforms, and right thing to do.
> And patch should add the Fixes tag accordingly to the commit which disables the RPG on MTL.
> @Vivi, Rodrigo what is your opinion on above ? it seems both MTL and ADL issues are same signature.
> MTL issue got disappear as RPG is disabled globally but that will burn power.
> If window does not have this issue then it is always difficult to get proper SV support.  
> But OS like window may not catch these kind of issues as they don't reload the their graphics driver like
> Linux reload module multiple times during selftest execution. Even chrome-os does not do that.
> Not a real world use case.

So we should change the tests without impacting the real world use case.

Let's try to grab the forcewake_all when using these self-tests, except the rc6 one.

> Thanks,
> Anshuman.
> > >>
> > >> uhm. Interesting. Why that is disabled on these platforms?
> > >
> > >  From commit log its temporary wa to avoid fw timeouts.
> > >
> > > perhaps we should be
> > >> doing the same for all GuC enabled platforms?
> > >
> > > I think so as temporary Wa.
> > 
> > Correction, DG1 we are not seeing this. I think we can go with platform check.
> > 
> > Regards,
> > Badal
>
Nilawar, Badal Oct. 24, 2024, 7:06 p.m. UTC | #9
On 24-10-2024 23:04, Rodrigo Vivi wrote:
> On Thu, Oct 24, 2024 at 10:28:38AM -0400, Gupta, Anshuman wrote:
>>
>>
>>> -----Original Message-----
>>> From: Nilawar, Badal <badal.nilawar@intel.com>
>>> Sent: Wednesday, October 23, 2024 9:42 PM
>>> To: Vivi, Rodrigo <rodrigo.vivi@intel.com>
>>> Cc: Andi Shyti <andi.shyti@linux.intel.com>; intel-gfx@lists.freedesktop.org;
>>> Gupta, Anshuman <anshuman.gupta@intel.com>;
>>> chris.p.wilson@linux.intel.com
>>> Subject: Re: [PATCH] drm/i915/rc6: Disable RPG during workload execution
>>>
>>>
>>>
>>> On 23-10-2024 21:31, Nilawar, Badal wrote:
>>>>
>>>>
>>>> On 23-10-2024 20:18, Rodrigo Vivi wrote:
>>>>> On Wed, Oct 23, 2024 at 11:03:57AM +0530, Nilawar, Badal wrote:
>>>>>>
>>>>>>
>>>>>> On 22-10-2024 22:39, Rodrigo Vivi wrote:
>>>>>>> On Tue, Oct 22, 2024 at 03:28:43PM +0200, Andi Shyti wrote:
>>>>>>>> Hi Badal,
>>>>>>>>
>>>>>>>> On Tue, Oct 22, 2024 at 06:52:26PM +0530, Badal Nilawar wrote:
>>>>>>>>> Encountering forcewake errors related to render power gating;
>>>>>>>>
>>>>>>>> Can you please expand your explanation here?
>>>>>>>
>>>>>>> yeap. More explanation please. All platforms? really?
>>>>>>
>>>>>> We are seeing Render forcewake timeouts on ADLP, ADLM, ADLN, TWL,
>>>>>> DG1, rpl.
>>>>>
>>>>> Is this a regression? or a new issue?
>>>>
>>>> This is old issue, first reported year back.
>>>>
>>>>>
>>>>> Is this happening with Xe on these platforms? or i915 only?
>>>>
>>>> i915 only. This is not reported on Xe kmd.
>>>>
>>>>>
>>>>>> Issue disappears after disabling RPG. Instead of fully disabling RPG
>>>>>> I am disabling it during active submissions i.e. during unpark.
>>>>>> For MTL and ARL RPG is already disabled permanently.
>> IMO this patch should be extended for MTL and ARL as well.
>> Don't disable the RPG completely, only disable it during workload submission.
>> That should save power on both MTL and ARL platforms, and right thing to do.
>> And patch should add the Fixes tag accordingly to the commit which disables the RPG on MTL.
>> @Vivi, Rodrigo what is your opinion on above ? it seems both MTL and ADL issues are same signature.
>> MTL issue got disappear as RPG is disabled globally but that will burn power.
>> If window does not have this issue then it is always difficult to get proper SV support.
>> But OS like window may not catch these kind of issues as they don't reload the their graphics driver like
>> Linux reload module multiple times during selftest execution. Even chrome-os does not do that.
>> Not a real world use case.
> 
> So we should change the tests without impacting the real world use case.
> 
> Let's try to grab the forcewake_all when using these self-tests, except the rc6 one.

Sure, I will try forcewake_all on affected tests first.

Thanks,
Badal

> 
>> Thanks,
>> Anshuman.
>>>>>
>>>>> uhm. Interesting. Why that is disabled on these platforms?
>>>>
>>>>   From commit log its temporary wa to avoid fw timeouts.
>>>>
>>>> perhaps we should be
>>>>> doing the same for all GuC enabled platforms?
>>>>
>>>> I think so as temporary Wa.
>>>
>>> Correction, DG1 we are not seeing this. I think we can go with platform check.
>>>
>>> Regards,
>>> Badal
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c
index c864d101faf9..459394ab5258 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6.c
+++ b/drivers/gpu/drm/i915/gt/intel_rc6.c
@@ -140,6 +140,7 @@  static void gen11_rc6_enable(struct intel_rc6 *rc6)
 					      VDN_MFX_POWERGATE_ENABLE(i));
 	}
 
+	rc6->pg_enable = pg_enable;
 	intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, pg_enable);
 }
 
@@ -572,8 +573,11 @@  static void __intel_rc6_disable(struct intel_rc6 *rc6)
 	intel_guc_rc_disable(gt_to_guc(gt));
 
 	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-	if (GRAPHICS_VER(i915) >= 9)
+	if (GRAPHICS_VER(i915) >= 9) {
+		rc6->pg_enable = 0;
 		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, 0);
+	}
+
 	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, 0);
 	intel_uncore_write_fw(uncore, GEN6_RC_STATE, 0);
 	intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
@@ -687,6 +691,15 @@  void intel_rc6_unpark(struct intel_rc6 *rc6)
 
 	/* Restore HW timers for automatic RC6 entry while busy */
 	intel_uncore_write_fw(uncore, GEN6_RC_CONTROL, rc6->ctl_enable);
+
+	/*
+	 * Seeing render forcewake timeouts during active submissions so disable render PG
+	 * while workloads are under execution.
+	 * FIXME Remove this change once real cause of render force wake timeout is fixed
+	 */
+	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
+		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE,
+				      rc6->pg_enable & ~GEN9_RENDER_PG_ENABLE);
 }
 
 void intel_rc6_park(struct intel_rc6 *rc6)
@@ -715,6 +728,9 @@  void intel_rc6_park(struct intel_rc6 *rc6)
 	else
 		target = 0x4; /* normal rc6 */
 	intel_uncore_write_fw(uncore, GEN6_RC_STATE, target << RC_SW_TARGET_STATE_SHIFT);
+
+	if (rc6->pg_enable == GEN9_RENDER_PG_ENABLE)
+		intel_uncore_write_fw(uncore, GEN9_PG_ENABLE, rc6->pg_enable);
 }
 
 void intel_rc6_disable(struct intel_rc6 *rc6)
diff --git a/drivers/gpu/drm/i915/gt/intel_rc6_types.h b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
index cd4587098162..58e8da74777c 100644
--- a/drivers/gpu/drm/i915/gt/intel_rc6_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_rc6_types.h
@@ -30,6 +30,7 @@  struct intel_rc6 {
 
 	u32 ctl_enable;
 	u32 bios_rc_state;
+	u32 pg_enable;
 
 	struct drm_i915_gem_object *pctx;