diff mbox

drm/i915: add cherryview specfic forcewake in execlists_elsp_write

Message ID 1410270256-26413-1-git-send-email-deepak.s@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

deepak.s@linux.intel.com Sept. 9, 2014, 1:44 p.m. UTC
From: Deepak S <deepak.s@linux.intel.com>

In chv, we have two power wells Render & Media. We need to use
corresponsing forcewake count. If we dont follow this we are getting
error "*ERROR*: Timed out waiting for forcewake old ack to clear" due to
multiple entry into __vlv_force_wake_get.

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

Comments

Ville Syrjälä Sept. 8, 2014, 2:02 p.m. UTC | #1
On Tue, Sep 09, 2014 at 07:14:16PM +0530, deepak.s@linux.intel.com wrote:
> From: Deepak S <deepak.s@linux.intel.com>
> 
> In chv, we have two power wells Render & Media. We need to use
> corresponsing forcewake count. If we dont follow this we are getting
> error "*ERROR*: Timed out waiting for forcewake old ack to clear" due to
> multiple entry into __vlv_force_wake_get.
> 
> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++----
>  1 file changed, 25 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index bd1b28d..bafd38b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
>  	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
>  	 */
>  	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> -	if (dev_priv->uncore.forcewake_count++ == 0)
> -		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> +		if (dev_priv->uncore.fw_rendercount++ == 0)
> +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> +							      FORCEWAKE_RENDER);
> +		if (dev_priv->uncore.fw_mediacount++ == 0)
> +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> +							      FORCEWAKE_MEDIA);

This will wake both wells. Is that needed or should we just pick one
based on the ring?

> +	} else {
> +		if (dev_priv->uncore.forcewake_count++ == 0)
> +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> +							      FORCEWAKE_ALL);
> +	}
>  	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>  
>  	I915_WRITE(RING_ELSP(ring), desc[1]);
> @@ -315,8 +325,19 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
>  
>  	/* Release Force Wakeup (see the big comment above). */
>  	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> -	if (--dev_priv->uncore.forcewake_count == 0)
> -		dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL);
> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> +		if (--dev_priv->uncore.fw_rendercount == 0)
> +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> +							      FORCEWAKE_RENDER);
> +		if (--dev_priv->uncore.fw_mediacount == 0)
> +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> +							      FORCEWAKE_MEDIA);
> +	} else {
> +		if (--dev_priv->uncore.forcewake_count == 0)
> +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> +							      FORCEWAKE_ALL);
> +	}
> +
>  	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
>  }
>  
> -- 
> 1.9.1
Ville Syrjälä Sept. 8, 2014, 2:14 p.m. UTC | #2
On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
> On Tue, Sep 09, 2014 at 07:14:16PM +0530, deepak.s@linux.intel.com wrote:
> > From: Deepak S <deepak.s@linux.intel.com>
> > 
> > In chv, we have two power wells Render & Media. We need to use
> > corresponsing forcewake count. If we dont follow this we are getting
> > error "*ERROR*: Timed out waiting for forcewake old ack to clear" due to
> > multiple entry into __vlv_force_wake_get.
> > 
> > Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++----
> >  1 file changed, 25 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > index bd1b28d..bafd38b 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
> >  	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
> >  	 */
> >  	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > -	if (dev_priv->uncore.forcewake_count++ == 0)
> > -		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
> > +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> > +		if (dev_priv->uncore.fw_rendercount++ == 0)
> > +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > +							      FORCEWAKE_RENDER);
> > +		if (dev_priv->uncore.fw_mediacount++ == 0)
> > +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > +							      FORCEWAKE_MEDIA);
> 
> This will wake both wells. Is that needed or should we just pick one
> based on the ring?

Also unlike the comment says runtime_pm_get() can't sleep since someone
must already be holding a reference, othwewise we surely can't go
writing any registers. So in theory we should be able to call
gen6_gt_force_wake_get() here, but maybe that would trigger a
might_sleep() warning. the current force wake code duplication (esp.
outside intel_uncore.c) is rather unfortunate and I'd like to see it
killed off. Maybe we just need to pull the rpm get/put outside
gen6_gt_force_wake_get()? I never really liked hiding it there anyway.

> 
> > +	} else {
> > +		if (dev_priv->uncore.forcewake_count++ == 0)
> > +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > +							      FORCEWAKE_ALL);
> > +	}
> >  	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
> >  
> >  	I915_WRITE(RING_ELSP(ring), desc[1]);
> > @@ -315,8 +325,19 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
> >  
> >  	/* Release Force Wakeup (see the big comment above). */
> >  	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > -	if (--dev_priv->uncore.forcewake_count == 0)
> > -		dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL);
> > +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> > +		if (--dev_priv->uncore.fw_rendercount == 0)
> > +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> > +							      FORCEWAKE_RENDER);
> > +		if (--dev_priv->uncore.fw_mediacount == 0)
> > +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> > +							      FORCEWAKE_MEDIA);
> > +	} else {
> > +		if (--dev_priv->uncore.forcewake_count == 0)
> > +			dev_priv->uncore.funcs.force_wake_put(dev_priv,
> > +							      FORCEWAKE_ALL);
> > +	}
> > +
> >  	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
> >  }
> >  
> > -- 
> > 1.9.1
> 
> -- 
> Ville Syrjälä
> Intel OTC
Daniel Vetter Sept. 8, 2014, 2:40 p.m. UTC | #3
On Mon, Sep 08, 2014 at 05:14:23PM +0300, Ville Syrjälä wrote:
> On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
> > On Tue, Sep 09, 2014 at 07:14:16PM +0530, deepak.s@linux.intel.com wrote:
> > > From: Deepak S <deepak.s@linux.intel.com>
> > > 
> > > In chv, we have two power wells Render & Media. We need to use
> > > corresponsing forcewake count. If we dont follow this we are getting
> > > error "*ERROR*: Timed out waiting for forcewake old ack to clear" due to
> > > multiple entry into __vlv_force_wake_get.
> > > 
> > > Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++----
> > >  1 file changed, 25 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> > > index bd1b28d..bafd38b 100644
> > > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > > @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
> > >  	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
> > >  	 */
> > >  	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > > -	if (dev_priv->uncore.forcewake_count++ == 0)
> > > -		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
> > > +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> > > +		if (dev_priv->uncore.fw_rendercount++ == 0)
> > > +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > +							      FORCEWAKE_RENDER);
> > > +		if (dev_priv->uncore.fw_mediacount++ == 0)
> > > +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > +							      FORCEWAKE_MEDIA);
> > 
> > This will wake both wells. Is that needed or should we just pick one
> > based on the ring?
> 
> Also unlike the comment says runtime_pm_get() can't sleep since someone
> must already be holding a reference, othwewise we surely can't go
> writing any registers. So in theory we should be able to call
> gen6_gt_force_wake_get() here, but maybe that would trigger a
> might_sleep() warning. the current force wake code duplication (esp.
> outside intel_uncore.c) is rather unfortunate and I'd like to see it
> killed off. Maybe we just need to pull the rpm get/put outside
> gen6_gt_force_wake_get()? I never really liked hiding it there anyway.

Yeah this is just broken design. And if you look at the other wheel to
track outstanding gpu work (requests) then it's not needed at all.

But I'm not sure what's the priority of the "rework execlists to use
requests" task is and when (if ever that will happen). Jesse is the
arbiter for this stuff anyway, so adding him.
-Daniel
deepak.s@intel.com Sept. 9, 2014, 4:15 p.m. UTC | #4
On Monday 08 September 2014 08:10 PM, Daniel Vetter wrote:
> On Mon, Sep 08, 2014 at 05:14:23PM +0300, Ville Syrjälä wrote:
>> On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
>>> On Tue, Sep 09, 2014 at 07:14:16PM +0530, deepak.s@linux.intel.com wrote:
>>>> From: Deepak S <deepak.s@linux.intel.com>
>>>>
>>>> In chv, we have two power wells Render & Media. We need to use
>>>> corresponsing forcewake count. If we dont follow this we are getting
>>>> error "*ERROR*: Timed out waiting for forcewake old ack to clear" due to
>>>> multiple entry into __vlv_force_wake_get.
>>>>
>>>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/intel_lrc.c | 29 +++++++++++++++++++++++++----
>>>>   1 file changed, 25 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index bd1b28d..bafd38b 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
>>>>   	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
>>>>   	 */
>>>>   	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
>>>> -	if (dev_priv->uncore.forcewake_count++ == 0)
>>>> -		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
>>>> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
>>>> +		if (dev_priv->uncore.fw_rendercount++ == 0)
>>>> +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
>>>> +							      FORCEWAKE_RENDER);
>>>> +		if (dev_priv->uncore.fw_mediacount++ == 0)
>>>> +			dev_priv->uncore.funcs.force_wake_get(dev_priv,
>>>> +							      FORCEWAKE_MEDIA);
>>> This will wake both wells. Is that needed or should we just pick one
>>> based on the ring?
>> Also unlike the comment says runtime_pm_get() can't sleep since someone
>> must already be holding a reference, othwewise we surely can't go
>> writing any registers. So in theory we should be able to call
>> gen6_gt_force_wake_get() here, but maybe that would trigger a
>> might_sleep() warning. the current force wake code duplication (esp.
>> outside intel_uncore.c) is rather unfortunate and I'd like to see it
>> killed off. Maybe we just need to pull the rpm get/put outside
>> gen6_gt_force_wake_get()? I never really liked hiding it there anyway.
> Yeah this is just broken design. And if you look at the other wheel to
> track outstanding gpu work (requests) then it's not needed at all.
>
> But I'm not sure what's the priority of the "rework execlists to use
> requests" task is and when (if ever that will happen). Jesse is the
> arbiter for this stuff anyway, so adding him.
> -Daniel

hmm , agreed do we have a reworked execlist? The reason why added this, on chv when i enable execlist, due to incorrect forcewake count
is causing multiple entries to forcewake_get resulting in "*ERROR*: Timed out waiting for forcewake old ack to clear" "and Hang.
Jesse Barnes Sept. 9, 2014, 9:25 p.m. UTC | #5
On Tue, 09 Sep 2014 21:45:08 +0530
Deepak S <deepak.s@intel.com> wrote:

> 
> On Monday 08 September 2014 08:10 PM, Daniel Vetter wrote:
> > On Mon, Sep 08, 2014 at 05:14:23PM +0300, Ville Syrjälä wrote:
> >> On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
> >>> On Tue, Sep 09, 2014 at 07:14:16PM +0530,
> >>> deepak.s@linux.intel.com wrote:
> >>>> From: Deepak S <deepak.s@linux.intel.com>
> >>>>
> >>>> In chv, we have two power wells Render & Media. We need to use
> >>>> corresponsing forcewake count. If we dont follow this we are
> >>>> getting error "*ERROR*: Timed out waiting for forcewake old ack
> >>>> to clear" due to multiple entry into __vlv_force_wake_get.
> >>>>
> >>>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> >>>> ---
> >>>>   drivers/gpu/drm/i915/intel_lrc.c | 29
> >>>> +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+),
> >>>> 4 deletions(-)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> >>>> b/drivers/gpu/drm/i915/intel_lrc.c index bd1b28d..bafd38b 100644
> >>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >>>> @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct
> >>>> intel_engine_cs *ring,
> >>>>   	 * Instead, we do the runtime_pm_get/put when
> >>>> creating/destroying requests. */
> >>>>   	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> >>>> -	if (dev_priv->uncore.forcewake_count++ == 0)
> >>>> -		dev_priv->uncore.funcs.force_wake_get(dev_priv,
> >>>> FORCEWAKE_ALL);
> >>>> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> >>>> +		if (dev_priv->uncore.fw_rendercount++ == 0)
> >>>> +
> >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> >>>> +
> >>>> FORCEWAKE_RENDER);
> >>>> +		if (dev_priv->uncore.fw_mediacount++ == 0)
> >>>> +
> >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> >>>> +
> >>>> FORCEWAKE_MEDIA);
> >>> This will wake both wells. Is that needed or should we just pick
> >>> one based on the ring?
> >> Also unlike the comment says runtime_pm_get() can't sleep since
> >> someone must already be holding a reference, othwewise we surely
> >> can't go writing any registers. So in theory we should be able to
> >> call gen6_gt_force_wake_get() here, but maybe that would trigger a
> >> might_sleep() warning. the current force wake code duplication
> >> (esp. outside intel_uncore.c) is rather unfortunate and I'd like
> >> to see it killed off. Maybe we just need to pull the rpm get/put
> >> outside gen6_gt_force_wake_get()? I never really liked hiding it
> >> there anyway.
> > Yeah this is just broken design. And if you look at the other wheel
> > to track outstanding gpu work (requests) then it's not needed at
> > all.
> >
> > But I'm not sure what's the priority of the "rework execlists to use
> > requests" task is and when (if ever that will happen). Jesse is the
> > arbiter for this stuff anyway, so adding him.
> > -Daniel
> 
> hmm , agreed do we have a reworked execlist? The reason why added
> this, on chv when i enable execlist, due to incorrect forcewake count
> is causing multiple entries to forcewake_get resulting in "*ERROR*:
> Timed out waiting for forcewake old ack to clear" "and Hang.

I'm hoping we can get execlists reworked on top of the request/seqno
stuff shortly after it lands, but I don't think that's a reason to
block this fix, since Chris is still busy fixing up the request
changes.

Jesse
Daniel Vetter Sept. 10, 2014, 7:16 a.m. UTC | #6
On Tue, Sep 09, 2014 at 02:25:50PM -0700, Jesse Barnes wrote:
> On Tue, 09 Sep 2014 21:45:08 +0530
> Deepak S <deepak.s@intel.com> wrote:
> 
> > 
> > On Monday 08 September 2014 08:10 PM, Daniel Vetter wrote:
> > > On Mon, Sep 08, 2014 at 05:14:23PM +0300, Ville Syrjälä wrote:
> > >> On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
> > >>> On Tue, Sep 09, 2014 at 07:14:16PM +0530,
> > >>> deepak.s@linux.intel.com wrote:
> > >>>> From: Deepak S <deepak.s@linux.intel.com>
> > >>>>
> > >>>> In chv, we have two power wells Render & Media. We need to use
> > >>>> corresponsing forcewake count. If we dont follow this we are
> > >>>> getting error "*ERROR*: Timed out waiting for forcewake old ack
> > >>>> to clear" due to multiple entry into __vlv_force_wake_get.
> > >>>>
> > >>>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> > >>>> ---
> > >>>>   drivers/gpu/drm/i915/intel_lrc.c | 29
> > >>>> +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+),
> > >>>> 4 deletions(-)
> > >>>>
> > >>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > >>>> b/drivers/gpu/drm/i915/intel_lrc.c index bd1b28d..bafd38b 100644
> > >>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> > >>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > >>>> @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct
> > >>>> intel_engine_cs *ring,
> > >>>>   	 * Instead, we do the runtime_pm_get/put when
> > >>>> creating/destroying requests. */
> > >>>>   	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > >>>> -	if (dev_priv->uncore.forcewake_count++ == 0)
> > >>>> -		dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > >>>> FORCEWAKE_ALL);
> > >>>> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> > >>>> +		if (dev_priv->uncore.fw_rendercount++ == 0)
> > >>>> +
> > >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > >>>> +
> > >>>> FORCEWAKE_RENDER);
> > >>>> +		if (dev_priv->uncore.fw_mediacount++ == 0)
> > >>>> +
> > >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > >>>> +
> > >>>> FORCEWAKE_MEDIA);
> > >>> This will wake both wells. Is that needed or should we just pick
> > >>> one based on the ring?
> > >> Also unlike the comment says runtime_pm_get() can't sleep since
> > >> someone must already be holding a reference, othwewise we surely
> > >> can't go writing any registers. So in theory we should be able to
> > >> call gen6_gt_force_wake_get() here, but maybe that would trigger a
> > >> might_sleep() warning. the current force wake code duplication
> > >> (esp. outside intel_uncore.c) is rather unfortunate and I'd like
> > >> to see it killed off. Maybe we just need to pull the rpm get/put
> > >> outside gen6_gt_force_wake_get()? I never really liked hiding it
> > >> there anyway.
> > > Yeah this is just broken design. And if you look at the other wheel
> > > to track outstanding gpu work (requests) then it's not needed at
> > > all.
> > >
> > > But I'm not sure what's the priority of the "rework execlists to use
> > > requests" task is and when (if ever that will happen). Jesse is the
> > > arbiter for this stuff anyway, so adding him.
> > > -Daniel
> > 
> > hmm , agreed do we have a reworked execlist? The reason why added
> > this, on chv when i enable execlist, due to incorrect forcewake count
> > is causing multiple entries to forcewake_get resulting in "*ERROR*:
> > Timed out waiting for forcewake old ack to clear" "and Hang.
> 
> I'm hoping we can get execlists reworked on top of the request/seqno
> stuff shortly after it lands, but I don't think that's a reason to
> block this fix, since Chris is still busy fixing up the request
> changes.

Queued for -next, thanks for the patch.
-Daniel
Daniel Vetter Sept. 10, 2014, 3:47 p.m. UTC | #7
On Wed, Sep 10, 2014 at 09:16:45AM +0200, Daniel Vetter wrote:
> On Tue, Sep 09, 2014 at 02:25:50PM -0700, Jesse Barnes wrote:
> > On Tue, 09 Sep 2014 21:45:08 +0530
> > Deepak S <deepak.s@intel.com> wrote:
> > 
> > > 
> > > On Monday 08 September 2014 08:10 PM, Daniel Vetter wrote:
> > > > On Mon, Sep 08, 2014 at 05:14:23PM +0300, Ville Syrjälä wrote:
> > > >> On Mon, Sep 08, 2014 at 05:02:43PM +0300, Ville Syrjälä wrote:
> > > >>> On Tue, Sep 09, 2014 at 07:14:16PM +0530,
> > > >>> deepak.s@linux.intel.com wrote:
> > > >>>> From: Deepak S <deepak.s@linux.intel.com>
> > > >>>>
> > > >>>> In chv, we have two power wells Render & Media. We need to use
> > > >>>> corresponsing forcewake count. If we dont follow this we are
> > > >>>> getting error "*ERROR*: Timed out waiting for forcewake old ack
> > > >>>> to clear" due to multiple entry into __vlv_force_wake_get.
> > > >>>>
> > > >>>> Signed-off-by: Deepak S <deepak.s@linux.intel.com>
> > > >>>> ---
> > > >>>>   drivers/gpu/drm/i915/intel_lrc.c | 29
> > > >>>> +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+),
> > > >>>> 4 deletions(-)
> > > >>>>
> > > >>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > > >>>> b/drivers/gpu/drm/i915/intel_lrc.c index bd1b28d..bafd38b 100644
> > > >>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
> > > >>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > > >>>> @@ -300,8 +300,18 @@ static void execlists_elsp_write(struct
> > > >>>> intel_engine_cs *ring,
> > > >>>>   	 * Instead, we do the runtime_pm_get/put when
> > > >>>> creating/destroying requests. */
> > > >>>>   	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
> > > >>>> -	if (dev_priv->uncore.forcewake_count++ == 0)
> > > >>>> -		dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > >>>> FORCEWAKE_ALL);
> > > >>>> +	if (IS_CHERRYVIEW(dev_priv->dev)) {
> > > >>>> +		if (dev_priv->uncore.fw_rendercount++ == 0)
> > > >>>> +
> > > >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > >>>> +
> > > >>>> FORCEWAKE_RENDER);
> > > >>>> +		if (dev_priv->uncore.fw_mediacount++ == 0)
> > > >>>> +
> > > >>>> dev_priv->uncore.funcs.force_wake_get(dev_priv,
> > > >>>> +
> > > >>>> FORCEWAKE_MEDIA);
> > > >>> This will wake both wells. Is that needed or should we just pick
> > > >>> one based on the ring?
> > > >> Also unlike the comment says runtime_pm_get() can't sleep since
> > > >> someone must already be holding a reference, othwewise we surely
> > > >> can't go writing any registers. So in theory we should be able to
> > > >> call gen6_gt_force_wake_get() here, but maybe that would trigger a
> > > >> might_sleep() warning. the current force wake code duplication
> > > >> (esp. outside intel_uncore.c) is rather unfortunate and I'd like
> > > >> to see it killed off. Maybe we just need to pull the rpm get/put
> > > >> outside gen6_gt_force_wake_get()? I never really liked hiding it
> > > >> there anyway.
> > > > Yeah this is just broken design. And if you look at the other wheel
> > > > to track outstanding gpu work (requests) then it's not needed at
> > > > all.
> > > >
> > > > But I'm not sure what's the priority of the "rework execlists to use
> > > > requests" task is and when (if ever that will happen). Jesse is the
> > > > arbiter for this stuff anyway, so adding him.
> > > > -Daniel
> > > 
> > > hmm , agreed do we have a reworked execlist? The reason why added
> > > this, on chv when i enable execlist, due to incorrect forcewake count
> > > is causing multiple entries to forcewake_get resulting in "*ERROR*:
> > > Timed out waiting for forcewake old ack to clear" "and Hang.
> > 
> > I'm hoping we can get execlists reworked on top of the request/seqno
> > stuff shortly after it lands, but I don't think that's a reason to
> > block this fix, since Chris is still busy fixing up the request
> > changes.
> 
> Queued for -next, thanks for the patch.

Aside if someone gets bored and wants to apply some polish: And __ variant
of force_wake_get/put which only asserts that the device isn't runtime
suspended instead of doing the runtime_pm_get/put would be cute - we have
one other user in the hsw/bdw rpm code already.

The current force_wake_get/put functions would then just wrap those raw
versions.
-Daniel
Chris Wilson Sept. 10, 2014, 3:51 p.m. UTC | #8
On Wed, Sep 10, 2014 at 05:47:39PM +0200, Daniel Vetter wrote:
> Aside if someone gets bored and wants to apply some polish: And __ variant
> of force_wake_get/put which only asserts that the device isn't runtime
> suspended instead of doing the runtime_pm_get/put would be cute - we have
> one other user in the hsw/bdw rpm code already.
> 
> The current force_wake_get/put functions would then just wrap those raw
> versions.

I fail to see the reason why they take it at all. Conceptually,
gen6_gt_force_wake_get() is just as lowlevel as the register access they
wrap. The only one that breaks the rule is i915_debugfs.c and that seems
easy enough to wrap with its own intel_runtime_pm_get/_put.

And once you've finish with that you can then sort out the mess that is
the vlv variants. And now skl continues in the cut'n'paste'n'paste vein.
-Chris
deepak.s@intel.com Sept. 10, 2014, 4:38 p.m. UTC | #9
Hmm Ok Daniel. Let me try and clean up the forcewake. :)

Chris, most of the debugfs is already covered with runtime_get/put. If anything is missed we can add that

Thanks
Deepak

-----Original Message-----
From: Chris Wilson [mailto:chris@chris-wilson.co.uk] 
Sent: Wednesday, September 10, 2014 9:21 PM
To: Daniel Vetter
Cc: Jesse Barnes; S, Deepak; intel-gfx@lists.freedesktop.org
Subject: Re: [Intel-gfx] [PATCH] drm/i915: add cherryview specfic forcewake in execlists_elsp_write

On Wed, Sep 10, 2014 at 05:47:39PM +0200, Daniel Vetter wrote:
> Aside if someone gets bored and wants to apply some polish: And __ 
> variant of force_wake_get/put which only asserts that the device isn't 
> runtime suspended instead of doing the runtime_pm_get/put would be 
> cute - we have one other user in the hsw/bdw rpm code already.
> 
> The current force_wake_get/put functions would then just wrap those 
> raw versions.

I fail to see the reason why they take it at all. Conceptually,
gen6_gt_force_wake_get() is just as lowlevel as the register access they wrap. The only one that breaks the rule is i915_debugfs.c and that seems easy enough to wrap with its own intel_runtime_pm_get/_put.

And once you've finish with that you can then sort out the mess that is the vlv variants. And now skl continues in the cut'n'paste'n'paste vein.
-Chris

--
Chris Wilson, Intel Open Source Technology Centre
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index bd1b28d..bafd38b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -300,8 +300,18 @@  static void execlists_elsp_write(struct intel_engine_cs *ring,
 	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
 	 */
 	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
-	if (dev_priv->uncore.forcewake_count++ == 0)
-		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
+	if (IS_CHERRYVIEW(dev_priv->dev)) {
+		if (dev_priv->uncore.fw_rendercount++ == 0)
+			dev_priv->uncore.funcs.force_wake_get(dev_priv,
+							      FORCEWAKE_RENDER);
+		if (dev_priv->uncore.fw_mediacount++ == 0)
+			dev_priv->uncore.funcs.force_wake_get(dev_priv,
+							      FORCEWAKE_MEDIA);
+	} else {
+		if (dev_priv->uncore.forcewake_count++ == 0)
+			dev_priv->uncore.funcs.force_wake_get(dev_priv,
+							      FORCEWAKE_ALL);
+	}
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 
 	I915_WRITE(RING_ELSP(ring), desc[1]);
@@ -315,8 +325,19 @@  static void execlists_elsp_write(struct intel_engine_cs *ring,
 
 	/* Release Force Wakeup (see the big comment above). */
 	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
-	if (--dev_priv->uncore.forcewake_count == 0)
-		dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL);
+	if (IS_CHERRYVIEW(dev_priv->dev)) {
+		if (--dev_priv->uncore.fw_rendercount == 0)
+			dev_priv->uncore.funcs.force_wake_put(dev_priv,
+							      FORCEWAKE_RENDER);
+		if (--dev_priv->uncore.fw_mediacount == 0)
+			dev_priv->uncore.funcs.force_wake_put(dev_priv,
+							      FORCEWAKE_MEDIA);
+	} else {
+		if (--dev_priv->uncore.forcewake_count == 0)
+			dev_priv->uncore.funcs.force_wake_put(dev_priv,
+							      FORCEWAKE_ALL);
+	}
+
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
 }