diff mbox

[09/12] drm/i915: wait for a vblank instead of 50ms when enabling FBC

Message ID 1447444424-17168-10-git-send-email-paulo.r.zanoni@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zanoni, Paulo R Nov. 13, 2015, 7:53 p.m. UTC
Instead of waiting for 50ms, just wait until the next vblank, since
it's the minimum requirement.

This moves PC7 residency on my specific BDW machine running Cinnamon
from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
3200x1800 eDP panel. Notice: this was the case when the patch was
originally proposed, the order of the FBC patches changed since then,
so the actual numbers might be slightly different now.

v2:
  - Rebase after changing the patch order.
  - Update the commit message.

Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
 2 files changed, 4 insertions(+), 10 deletions(-)

Comments

Chris Wilson Nov. 13, 2015, 9:03 p.m. UTC | #1
On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> Instead of waiting for 50ms, just wait until the next vblank, since
> it's the minimum requirement.
> 
> This moves PC7 residency on my specific BDW machine running Cinnamon
> from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
> 3200x1800 eDP panel. Notice: this was the case when the patch was
> originally proposed, the order of the FBC patches changed since then,
> so the actual numbers might be slightly different now.
> 
> v2:
>   - Rebase after changing the patch order.
>   - Update the commit message.
> 
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
>  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
>  2 files changed, 4 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 9418bd5..ea08714 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -919,9 +919,9 @@ struct i915_fbc {
>  
>  	struct intel_fbc_work {
>  		bool scheduled;
> +		u32 scheduled_vblank;
>  		struct work_struct work;
>  		struct drm_framebuffer *fb;
> -		unsigned long enable_jiffies;
>  	} work;
>  
>  	const char *no_fbc_reason;
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> index aa82075..72de8a1 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct work_struct *__work)
>  		container_of(__work, struct drm_i915_private, fbc.work.work);
>  	struct intel_fbc_work *work = &dev_priv->fbc.work;
>  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> -	unsigned long delay_jiffies = msecs_to_jiffies(50);
>  
>  retry:
>  	/* Delay the actual enabling to let pageflipping cease and the
> @@ -400,14 +399,9 @@ retry:
>  	 * vblank to pass after disabling the FBC before we attempt
>  	 * to modify the control registers.
>  	 *
> -	 * A more complicated solution would involve tracking vblanks
> -	 * following the termination of the page-flipping sequence
> -	 * and indeed performing the enable as a co-routine and not
> -	 * waiting synchronously upon the vblank.
> -	 *
>  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
>  	 */
> -	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_jiffies);
> +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
>  
>  	mutex_lock(&dev_priv->fbc.lock);
>  
> @@ -416,7 +410,7 @@ retry:
>  		goto out;
>  
>  	/* Were we delayed again while this function was sleeping? */
> -	if (time_after(work->enable_jiffies + delay_jiffies, jiffies)) {
> +	if (drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank) {
>  		mutex_unlock(&dev_priv->fbc.lock);
>  		goto retry;
>  	}
> @@ -449,7 +443,7 @@ static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
>  	 * jiffy count. */
>  	work->fb = crtc->base.primary->fb;
>  	work->scheduled = true;
> -	work->enable_jiffies = jiffies;
> +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base);

Isn't the frame counter only incrementing whilst the vblank IRQ is
enabled? Ville?
-Chris
Zanoni, Paulo R Nov. 13, 2015, 9:17 p.m. UTC | #2
Em Sex, 2015-11-13 às 21:03 +0000, Chris Wilson escreveu:
> On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:

> > Instead of waiting for 50ms, just wait until the next vblank, since

> > it's the minimum requirement.

> > 

> > This moves PC7 residency on my specific BDW machine running

> > Cinnamon

> > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a

> > 3200x1800 eDP panel. Notice: this was the case when the patch was

> > originally proposed, the order of the FBC patches changed since

> > then,

> > so the actual numbers might be slightly different now.

> > 

> > v2:

> >   - Rebase after changing the patch order.

> >   - Update the commit message.

> > 

> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > ---

> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-

> >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------

> >  2 files changed, 4 insertions(+), 10 deletions(-)

> > 

> > diff --git a/drivers/gpu/drm/i915/i915_drv.h

> > b/drivers/gpu/drm/i915/i915_drv.h

> > index 9418bd5..ea08714 100644

> > --- a/drivers/gpu/drm/i915/i915_drv.h

> > +++ b/drivers/gpu/drm/i915/i915_drv.h

> > @@ -919,9 +919,9 @@ struct i915_fbc {

> >  

> >  	struct intel_fbc_work {

> >  		bool scheduled;

> > +		u32 scheduled_vblank;

> >  		struct work_struct work;

> >  		struct drm_framebuffer *fb;

> > -		unsigned long enable_jiffies;

> >  	} work;

> >  

> >  	const char *no_fbc_reason;

> > diff --git a/drivers/gpu/drm/i915/intel_fbc.c

> > b/drivers/gpu/drm/i915/intel_fbc.c

> > index aa82075..72de8a1 100644

> > --- a/drivers/gpu/drm/i915/intel_fbc.c

> > +++ b/drivers/gpu/drm/i915/intel_fbc.c

> > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct

> > work_struct *__work)

> >  		container_of(__work, struct drm_i915_private,

> > fbc.work.work);

> >  	struct intel_fbc_work *work = &dev_priv->fbc.work;

> >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > -	unsigned long delay_jiffies = msecs_to_jiffies(50);

> >  

> >  retry:

> >  	/* Delay the actual enabling to let pageflipping cease and

> > the

> > @@ -400,14 +399,9 @@ retry:

> >  	 * vblank to pass after disabling the FBC before we

> > attempt

> >  	 * to modify the control registers.

> >  	 *

> > -	 * A more complicated solution would involve tracking

> > vblanks

> > -	 * following the termination of the page-flipping sequence

> > -	 * and indeed performing the enable as a co-routine and

> > not

> > -	 * waiting synchronously upon the vblank.

> > -	 *

> >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb

> >  	 */

> > -	wait_remaining_ms_from_jiffies(work->enable_jiffies,

> > delay_jiffies);

> > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);

> >  

> >  	mutex_lock(&dev_priv->fbc.lock);

> >  

> > @@ -416,7 +410,7 @@ retry:

> >  		goto out;

> >  

> >  	/* Were we delayed again while this function was sleeping?

> > */

> > -	if (time_after(work->enable_jiffies + delay_jiffies,

> > jiffies)) {

> > +	if (drm_crtc_vblank_get(&crtc->base) == work-

> > >scheduled_vblank) {

> >  		mutex_unlock(&dev_priv->fbc.lock);

> >  		goto retry;

> >  	}

> > @@ -449,7 +443,7 @@ static void

> > intel_fbc_schedule_activation(struct intel_crtc *crtc)

> >  	 * jiffy count. */

> >  	work->fb = crtc->base.primary->fb;

> >  	work->scheduled = true;

> > -	work->enable_jiffies = jiffies;

> > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc-

> > >base);

> 

> Isn't the frame counter only incrementing whilst the vblank IRQ is

> enabled? Ville?


At the work function we call intel_wait_for_vblank(), which calls
drm_wait_one_vblank(), which calls drm_vblank_get().

> -Chris

>
Ville Syrjala Nov. 13, 2015, 9:20 p.m. UTC | #3
On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:
> On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> > Instead of waiting for 50ms, just wait until the next vblank, since
> > it's the minimum requirement.
> > 
> > This moves PC7 residency on my specific BDW machine running Cinnamon
> > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
> > 3200x1800 eDP panel. Notice: this was the case when the patch was
> > originally proposed, the order of the FBC patches changed since then,
> > so the actual numbers might be slightly different now.
> > 
> > v2:
> >   - Rebase after changing the patch order.
> >   - Update the commit message.
> > 
> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
> >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
> >  2 files changed, 4 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 9418bd5..ea08714 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -919,9 +919,9 @@ struct i915_fbc {
> >  
> >  	struct intel_fbc_work {
> >  		bool scheduled;
> > +		u32 scheduled_vblank;
> >  		struct work_struct work;
> >  		struct drm_framebuffer *fb;
> > -		unsigned long enable_jiffies;
> >  	} work;
> >  
> >  	const char *no_fbc_reason;
> > diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> > index aa82075..72de8a1 100644
> > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct work_struct *__work)
> >  		container_of(__work, struct drm_i915_private, fbc.work.work);
> >  	struct intel_fbc_work *work = &dev_priv->fbc.work;
> >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > -	unsigned long delay_jiffies = msecs_to_jiffies(50);
> >  
> >  retry:
> >  	/* Delay the actual enabling to let pageflipping cease and the
> > @@ -400,14 +399,9 @@ retry:
> >  	 * vblank to pass after disabling the FBC before we attempt
> >  	 * to modify the control registers.
> >  	 *
> > -	 * A more complicated solution would involve tracking vblanks
> > -	 * following the termination of the page-flipping sequence
> > -	 * and indeed performing the enable as a co-routine and not
> > -	 * waiting synchronously upon the vblank.
> > -	 *
> >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
> >  	 */
> > -	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_jiffies);
> > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
> >  
> >  	mutex_lock(&dev_priv->fbc.lock);
> >  
> > @@ -416,7 +410,7 @@ retry:
> >  		goto out;
> >  
> >  	/* Were we delayed again while this function was sleeping? */
> > -	if (time_after(work->enable_jiffies + delay_jiffies, jiffies)) {
> > +	if (drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank) {
> >  		mutex_unlock(&dev_priv->fbc.lock);
> >  		goto retry;
> >  	}
> > @@ -449,7 +443,7 @@ static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
> >  	 * jiffy count. */
> >  	work->fb = crtc->base.primary->fb;
> >  	work->scheduled = true;
> > -	work->enable_jiffies = jiffies;
> > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base);
> 
> Isn't the frame counter only incrementing whilst the vblank IRQ is
> enabled? Ville?

I see a "+ if (drm_crtc_vblank_get(" earlier.

That said, drm_crtc_vblank_count() is racy. The reader may race with
the vblank irq handler after the start of vblank has already been
passed, thus sampling a stale value, and then actually not waiting
until the next vblank.

It's more of a problem on gen2/3 which didn't have the "start of
vblank" interrupt and instead rely on the "frame start" interrupt.
There's at least one (well, almost) scanline between the two events.

I've been meaning to add another function to the vblank code that
could be called between the drm_vblank_get() and drm_crtc_vblank_count()
to make sure the sampled count is really up to date. I'd use that on
gen2 at least since it lacks the hw counter. For the other platforms,
it ought to be easier to just use the hw counter everywhere since
that increments atomically with the start of vblank and doesn't suffer
from this race.
Chris Wilson Nov. 13, 2015, 9:23 p.m. UTC | #4
On Fri, Nov 13, 2015 at 09:17:04PM +0000, Zanoni, Paulo R wrote:
> Em Sex, 2015-11-13 às 21:03 +0000, Chris Wilson escreveu:
> > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> > > Instead of waiting for 50ms, just wait until the next vblank, since
> > > it's the minimum requirement.
> > > 
> > > This moves PC7 residency on my specific BDW machine running
> > > Cinnamon
> > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
> > > 3200x1800 eDP panel. Notice: this was the case when the patch was
> > > originally proposed, the order of the FBC patches changed since
> > > then,
> > > so the actual numbers might be slightly different now.
> > > 
> > > v2:
> > >   - Rebase after changing the patch order.
> > >   - Update the commit message.
> > > 
> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
> > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
> > >  2 files changed, 4 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > b/drivers/gpu/drm/i915/i915_drv.h
> > > index 9418bd5..ea08714 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -919,9 +919,9 @@ struct i915_fbc {
> > >  
> > >  	struct intel_fbc_work {
> > >  		bool scheduled;
> > > +		u32 scheduled_vblank;
> > >  		struct work_struct work;
> > >  		struct drm_framebuffer *fb;
> > > -		unsigned long enable_jiffies;
> > >  	} work;
> > >  
> > >  	const char *no_fbc_reason;
> > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > index aa82075..72de8a1 100644
> > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct
> > > work_struct *__work)
> > >  		container_of(__work, struct drm_i915_private,
> > > fbc.work.work);
> > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;
> > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > -	unsigned long delay_jiffies = msecs_to_jiffies(50);
> > >  
> > >  retry:
> > >  	/* Delay the actual enabling to let pageflipping cease and
> > > the
> > > @@ -400,14 +399,9 @@ retry:
> > >  	 * vblank to pass after disabling the FBC before we
> > > attempt
> > >  	 * to modify the control registers.
> > >  	 *
> > > -	 * A more complicated solution would involve tracking
> > > vblanks
> > > -	 * following the termination of the page-flipping sequence
> > > -	 * and indeed performing the enable as a co-routine and
> > > not
> > > -	 * waiting synchronously upon the vblank.
> > > -	 *
> > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
> > >  	 */
> > > -	wait_remaining_ms_from_jiffies(work->enable_jiffies,
> > > delay_jiffies);
> > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
> > >  
> > >  	mutex_lock(&dev_priv->fbc.lock);
> > >  
> > > @@ -416,7 +410,7 @@ retry:
> > >  		goto out;
> > >  
> > >  	/* Were we delayed again while this function was sleeping?
> > > */
> > > -	if (time_after(work->enable_jiffies + delay_jiffies,
> > > jiffies)) {
> > > +	if (drm_crtc_vblank_get(&crtc->base) == work-
> > > >scheduled_vblank) {
> > >  		mutex_unlock(&dev_priv->fbc.lock);
> > >  		goto retry;
> > >  	}
> > > @@ -449,7 +443,7 @@ static void
> > > intel_fbc_schedule_activation(struct intel_crtc *crtc)
> > >  	 * jiffy count. */
> > >  	work->fb = crtc->base.primary->fb;
> > >  	work->scheduled = true;
> > > -	work->enable_jiffies = jiffies;
> > > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc-
> > > >base);
> > 
> > Isn't the frame counter only incrementing whilst the vblank IRQ is
> > enabled? Ville?
> 
> At the work function we call intel_wait_for_vblank(), which calls
> drm_wait_one_vblank(), which calls drm_vblank_get().

And drm_vblank_put...
-Chris
Ville Syrjala Nov. 13, 2015, 9:26 p.m. UTC | #5
On Fri, Nov 13, 2015 at 11:20:19PM +0200, Ville Syrjälä wrote:
> On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:
> > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> > > Instead of waiting for 50ms, just wait until the next vblank, since
> > > it's the minimum requirement.
> > > 
> > > This moves PC7 residency on my specific BDW machine running Cinnamon
> > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
> > > 3200x1800 eDP panel. Notice: this was the case when the patch was
> > > originally proposed, the order of the FBC patches changed since then,
> > > so the actual numbers might be slightly different now.
> > > 
> > > v2:
> > >   - Rebase after changing the patch order.
> > >   - Update the commit message.
> > > 
> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
> > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
> > >  2 files changed, 4 insertions(+), 10 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > > index 9418bd5..ea08714 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -919,9 +919,9 @@ struct i915_fbc {
> > >  
> > >  	struct intel_fbc_work {
> > >  		bool scheduled;
> > > +		u32 scheduled_vblank;
> > >  		struct work_struct work;
> > >  		struct drm_framebuffer *fb;
> > > -		unsigned long enable_jiffies;
> > >  	} work;
> > >  
> > >  	const char *no_fbc_reason;
> > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> > > index aa82075..72de8a1 100644
> > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct work_struct *__work)
> > >  		container_of(__work, struct drm_i915_private, fbc.work.work);
> > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;
> > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > -	unsigned long delay_jiffies = msecs_to_jiffies(50);
> > >  
> > >  retry:
> > >  	/* Delay the actual enabling to let pageflipping cease and the
> > > @@ -400,14 +399,9 @@ retry:
> > >  	 * vblank to pass after disabling the FBC before we attempt
> > >  	 * to modify the control registers.
> > >  	 *
> > > -	 * A more complicated solution would involve tracking vblanks
> > > -	 * following the termination of the page-flipping sequence
> > > -	 * and indeed performing the enable as a co-routine and not
> > > -	 * waiting synchronously upon the vblank.
> > > -	 *
> > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
> > >  	 */
> > > -	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_jiffies);
> > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
> > >  
> > >  	mutex_lock(&dev_priv->fbc.lock);
> > >  
> > > @@ -416,7 +410,7 @@ retry:
> > >  		goto out;
> > >  
> > >  	/* Were we delayed again while this function was sleeping? */
> > > -	if (time_after(work->enable_jiffies + delay_jiffies, jiffies)) {
> > > +	if (drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank) {
> > >  		mutex_unlock(&dev_priv->fbc.lock);
> > >  		goto retry;
> > >  	}
> > > @@ -449,7 +443,7 @@ static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
> > >  	 * jiffy count. */
> > >  	work->fb = crtc->base.primary->fb;
> > >  	work->scheduled = true;
> > > -	work->enable_jiffies = jiffies;
> > > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base);
> > 
> > Isn't the frame counter only incrementing whilst the vblank IRQ is
> > enabled? Ville?
> 
> I see a "+ if (drm_crtc_vblank_get(" earlier.

Hmm. Actually it's doing
"drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank)"
which looks rather like nonsense.

Not sure what the intention here was...
Zanoni, Paulo R Nov. 13, 2015, 9:38 p.m. UTC | #6
Em Sex, 2015-11-13 às 23:26 +0200, Ville Syrjälä escreveu:
> On Fri, Nov 13, 2015 at 11:20:19PM +0200, Ville Syrjälä wrote:

> > On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:

> > > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:

> > > > Instead of waiting for 50ms, just wait until the next vblank,

> > > > since

> > > > it's the minimum requirement.

> > > > 

> > > > This moves PC7 residency on my specific BDW machine running

> > > > Cinnamon

> > > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a

> > > > 3200x1800 eDP panel. Notice: this was the case when the patch

> > > > was

> > > > originally proposed, the order of the FBC patches changed since

> > > > then,

> > > > so the actual numbers might be slightly different now.

> > > > 

> > > > v2:

> > > >   - Rebase after changing the patch order.

> > > >   - Update the commit message.

> > > > 

> > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > > > ---

> > > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-

> > > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------

> > > >  2 files changed, 4 insertions(+), 10 deletions(-)

> > > > 

> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h

> > > > b/drivers/gpu/drm/i915/i915_drv.h

> > > > index 9418bd5..ea08714 100644

> > > > --- a/drivers/gpu/drm/i915/i915_drv.h

> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h

> > > > @@ -919,9 +919,9 @@ struct i915_fbc {

> > > >  

> > > >  	struct intel_fbc_work {

> > > >  		bool scheduled;

> > > > +		u32 scheduled_vblank;

> > > >  		struct work_struct work;

> > > >  		struct drm_framebuffer *fb;

> > > > -		unsigned long enable_jiffies;

> > > >  	} work;

> > > >  

> > > >  	const char *no_fbc_reason;

> > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c

> > > > b/drivers/gpu/drm/i915/intel_fbc.c

> > > > index aa82075..72de8a1 100644

> > > > --- a/drivers/gpu/drm/i915/intel_fbc.c

> > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c

> > > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct

> > > > work_struct *__work)

> > > >  		container_of(__work, struct drm_i915_private,

> > > > fbc.work.work);

> > > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;

> > > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > -	unsigned long delay_jiffies = msecs_to_jiffies(50);

> > > >  

> > > >  retry:

> > > >  	/* Delay the actual enabling to let pageflipping cease

> > > > and the

> > > > @@ -400,14 +399,9 @@ retry:

> > > >  	 * vblank to pass after disabling the FBC before we

> > > > attempt

> > > >  	 * to modify the control registers.

> > > >  	 *

> > > > -	 * A more complicated solution would involve tracking

> > > > vblanks

> > > > -	 * following the termination of the page-flipping

> > > > sequence

> > > > -	 * and indeed performing the enable as a co-routine

> > > > and not

> > > > -	 * waiting synchronously upon the vblank.

> > > > -	 *

> > > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb

> > > >  	 */

> > > > -	wait_remaining_ms_from_jiffies(work->enable_jiffies,

> > > > delay_jiffies);

> > > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);

> > > >  

> > > >  	mutex_lock(&dev_priv->fbc.lock);

> > > >  

> > > > @@ -416,7 +410,7 @@ retry:

> > > >  		goto out;

> > > >  

> > > >  	/* Were we delayed again while this function was

> > > > sleeping? */

> > > > -	if (time_after(work->enable_jiffies + delay_jiffies,

> > > > jiffies)) {

> > > > +	if (drm_crtc_vblank_get(&crtc->base) == work-

> > > > >scheduled_vblank) {

> > > >  		mutex_unlock(&dev_priv->fbc.lock);

> > > >  		goto retry;

> > > >  	}

> > > > @@ -449,7 +443,7 @@ static void

> > > > intel_fbc_schedule_activation(struct intel_crtc *crtc)

> > > >  	 * jiffy count. */

> > > >  	work->fb = crtc->base.primary->fb;

> > > >  	work->scheduled = true;

> > > > -	work->enable_jiffies = jiffies;

> > > > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc-

> > > > >base);

> > > 

> > > Isn't the frame counter only incrementing whilst the vblank IRQ

> > > is

> > > enabled? Ville?

> > 

> > I see a "+ if (drm_crtc_vblank_get(" earlier.

> 

> Hmm. Actually it's doing

> "drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank)"

> which looks rather like nonsense.

> 

> Not sure what the intention here was...


Ouch. The intent was for that to be another call for
drm_crtc_vblank_count().

The code in discussion is completely based on the drm_wait_one_vblank()
code: call drm_vblank_count(), then call it again until it returns
something different. The difference is that we actually call
drm_wait_one_vblank() in the middle of the process, and that
scheduled_vblank may also be updated in the meantime, so so may have to
call drm_wait_one_vblank() again.

>
Ville Syrjala Nov. 13, 2015, 11:17 p.m. UTC | #7
On Fri, Nov 13, 2015 at 09:38:50PM +0000, Zanoni, Paulo R wrote:
> Em Sex, 2015-11-13 às 23:26 +0200, Ville Syrjälä escreveu:
> > On Fri, Nov 13, 2015 at 11:20:19PM +0200, Ville Syrjälä wrote:
> > > On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:
> > > > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> > > > > Instead of waiting for 50ms, just wait until the next vblank,
> > > > > since
> > > > > it's the minimum requirement.
> > > > > 
> > > > > This moves PC7 residency on my specific BDW machine running
> > > > > Cinnamon
> > > > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using a
> > > > > 3200x1800 eDP panel. Notice: this was the case when the patch
> > > > > was
> > > > > originally proposed, the order of the FBC patches changed since
> > > > > then,
> > > > > so the actual numbers might be slightly different now.
> > > > > 
> > > > > v2:
> > > > >   - Rebase after changing the patch order.
> > > > >   - Update the commit message.
> > > > > 
> > > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
> > > > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
> > > > >  2 files changed, 4 insertions(+), 10 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > > index 9418bd5..ea08714 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > @@ -919,9 +919,9 @@ struct i915_fbc {
> > > > >  
> > > > >  	struct intel_fbc_work {
> > > > >  		bool scheduled;
> > > > > +		u32 scheduled_vblank;
> > > > >  		struct work_struct work;
> > > > >  		struct drm_framebuffer *fb;
> > > > > -		unsigned long enable_jiffies;
> > > > >  	} work;
> > > > >  
> > > > >  	const char *no_fbc_reason;
> > > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > index aa82075..72de8a1 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct
> > > > > work_struct *__work)
> > > > >  		container_of(__work, struct drm_i915_private,
> > > > > fbc.work.work);
> > > > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;
> > > > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > -	unsigned long delay_jiffies = msecs_to_jiffies(50);
> > > > >  
> > > > >  retry:
> > > > >  	/* Delay the actual enabling to let pageflipping cease
> > > > > and the
> > > > > @@ -400,14 +399,9 @@ retry:
> > > > >  	 * vblank to pass after disabling the FBC before we
> > > > > attempt
> > > > >  	 * to modify the control registers.
> > > > >  	 *
> > > > > -	 * A more complicated solution would involve tracking
> > > > > vblanks
> > > > > -	 * following the termination of the page-flipping
> > > > > sequence
> > > > > -	 * and indeed performing the enable as a co-routine
> > > > > and not
> > > > > -	 * waiting synchronously upon the vblank.
> > > > > -	 *
> > > > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
> > > > >  	 */
> > > > > -	wait_remaining_ms_from_jiffies(work->enable_jiffies,
> > > > > delay_jiffies);
> > > > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
> > > > >  
> > > > >  	mutex_lock(&dev_priv->fbc.lock);
> > > > >  
> > > > > @@ -416,7 +410,7 @@ retry:
> > > > >  		goto out;
> > > > >  
> > > > >  	/* Were we delayed again while this function was
> > > > > sleeping? */
> > > > > -	if (time_after(work->enable_jiffies + delay_jiffies,
> > > > > jiffies)) {
> > > > > +	if (drm_crtc_vblank_get(&crtc->base) == work-
> > > > > >scheduled_vblank) {
> > > > >  		mutex_unlock(&dev_priv->fbc.lock);
> > > > >  		goto retry;
> > > > >  	}
> > > > > @@ -449,7 +443,7 @@ static void
> > > > > intel_fbc_schedule_activation(struct intel_crtc *crtc)
> > > > >  	 * jiffy count. */
> > > > >  	work->fb = crtc->base.primary->fb;
> > > > >  	work->scheduled = true;
> > > > > -	work->enable_jiffies = jiffies;
> > > > > +	work->scheduled_vblank = drm_crtc_vblank_count(&crtc-
> > > > > >base);
> > > > 
> > > > Isn't the frame counter only incrementing whilst the vblank IRQ
> > > > is
> > > > enabled? Ville?
> > > 
> > > I see a "+ if (drm_crtc_vblank_get(" earlier.
> > 
> > Hmm. Actually it's doing
> > "drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank)"
> > which looks rather like nonsense.
> > 
> > Not sure what the intention here was...
> 
> Ouch. The intent was for that to be another call for
> drm_crtc_vblank_count().
> 
> The code in discussion is completely based on the drm_wait_one_vblank()
> code: call drm_vblank_count(), then call it again until it returns
> something different. The difference is that we actually call
> drm_wait_one_vblank() in the middle of the process, and that
> scheduled_vblank may also be updated in the meantime, so so may have to
> call drm_wait_one_vblank() again.

You have no guarantees that drm_crtc_vblank_count() won't give you
something totally stale unless you have a vblank reference when calling
it. If you have a reference it's guaranteed to give you something fairly
recent, but the race I outlined in the earlier mail still exists. And yes,
drm_wait_one_vblank() is no good due to that same race.
Zanoni, Paulo R Nov. 17, 2015, 7:03 p.m. UTC | #8
Em Sáb, 2015-11-14 às 01:17 +0200, Ville Syrjälä escreveu:
> On Fri, Nov 13, 2015 at 09:38:50PM +0000, Zanoni, Paulo R wrote:

> > Em Sex, 2015-11-13 às 23:26 +0200, Ville Syrjälä escreveu:

> > > On Fri, Nov 13, 2015 at 11:20:19PM +0200, Ville Syrjälä wrote:

> > > > On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:

> > > > > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:

> > > > > > Instead of waiting for 50ms, just wait until the next

> > > > > > vblank,

> > > > > > since

> > > > > > it's the minimum requirement.

> > > > > > 

> > > > > > This moves PC7 residency on my specific BDW machine running

> > > > > > Cinnamon

> > > > > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using

> > > > > > a

> > > > > > 3200x1800 eDP panel. Notice: this was the case when the

> > > > > > patch

> > > > > > was

> > > > > > originally proposed, the order of the FBC patches changed

> > > > > > since

> > > > > > then,

> > > > > > so the actual numbers might be slightly different now.

> > > > > > 

> > > > > > v2:

> > > > > >   - Rebase after changing the patch order.

> > > > > >   - Update the commit message.

> > > > > > 

> > > > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > > > > > ---

> > > > > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-

> > > > > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------

> > > > > >  2 files changed, 4 insertions(+), 10 deletions(-)

> > > > > > 

> > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h

> > > > > > b/drivers/gpu/drm/i915/i915_drv.h

> > > > > > index 9418bd5..ea08714 100644

> > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h

> > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h

> > > > > > @@ -919,9 +919,9 @@ struct i915_fbc {

> > > > > >  

> > > > > >  	struct intel_fbc_work {

> > > > > >  		bool scheduled;

> > > > > > +		u32 scheduled_vblank;

> > > > > >  		struct work_struct work;

> > > > > >  		struct drm_framebuffer *fb;

> > > > > > -		unsigned long enable_jiffies;

> > > > > >  	} work;

> > > > > >  

> > > > > >  	const char *no_fbc_reason;

> > > > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c

> > > > > > b/drivers/gpu/drm/i915/intel_fbc.c

> > > > > > index aa82075..72de8a1 100644

> > > > > > --- a/drivers/gpu/drm/i915/intel_fbc.c

> > > > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c

> > > > > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct

> > > > > > work_struct *__work)

> > > > > >  		container_of(__work, struct

> > > > > > drm_i915_private,

> > > > > > fbc.work.work);

> > > > > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;

> > > > > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > > > -	unsigned long delay_jiffies =

> > > > > > msecs_to_jiffies(50);

> > > > > >  

> > > > > >  retry:

> > > > > >  	/* Delay the actual enabling to let pageflipping

> > > > > > cease

> > > > > > and the

> > > > > > @@ -400,14 +399,9 @@ retry:

> > > > > >  	 * vblank to pass after disabling the FBC before

> > > > > > we

> > > > > > attempt

> > > > > >  	 * to modify the control registers.

> > > > > >  	 *

> > > > > > -	 * A more complicated solution would involve

> > > > > > tracking

> > > > > > vblanks

> > > > > > -	 * following the termination of the page-flipping

> > > > > > sequence

> > > > > > -	 * and indeed performing the enable as a co-

> > > > > > routine

> > > > > > and not

> > > > > > -	 * waiting synchronously upon the vblank.

> > > > > > -	 *

> > > > > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb

> > > > > >  	 */

> > > > > > -	wait_remaining_ms_from_jiffies(work-

> > > > > > >enable_jiffies,

> > > > > > delay_jiffies);

> > > > > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);

> > > > > >  

> > > > > >  	mutex_lock(&dev_priv->fbc.lock);

> > > > > >  

> > > > > > @@ -416,7 +410,7 @@ retry:

> > > > > >  		goto out;

> > > > > >  

> > > > > >  	/* Were we delayed again while this function was

> > > > > > sleeping? */

> > > > > > -	if (time_after(work->enable_jiffies +

> > > > > > delay_jiffies,

> > > > > > jiffies)) {

> > > > > > +	if (drm_crtc_vblank_get(&crtc->base) == work-

> > > > > > > scheduled_vblank) {

> > > > > >  		mutex_unlock(&dev_priv->fbc.lock);

> > > > > >  		goto retry;

> > > > > >  	}

> > > > > > @@ -449,7 +443,7 @@ static void

> > > > > > intel_fbc_schedule_activation(struct intel_crtc *crtc)

> > > > > >  	 * jiffy count. */

> > > > > >  	work->fb = crtc->base.primary->fb;

> > > > > >  	work->scheduled = true;

> > > > > > -	work->enable_jiffies = jiffies;

> > > > > > +	work->scheduled_vblank =

> > > > > > drm_crtc_vblank_count(&crtc-

> > > > > > > base);

> > > > > 

> > > > > Isn't the frame counter only incrementing whilst the vblank

> > > > > IRQ

> > > > > is

> > > > > enabled? Ville?

> > > > 

> > > > I see a "+ if (drm_crtc_vblank_get(" earlier.

> > > 

> > > Hmm. Actually it's doing

> > > "drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank)"

> > > which looks rather like nonsense.

> > > 

> > > Not sure what the intention here was...

> > 

> > Ouch. The intent was for that to be another call for

> > drm_crtc_vblank_count().

> > 

> > The code in discussion is completely based on the

> > drm_wait_one_vblank()

> > code: call drm_vblank_count(), then call it again until it returns

> > something different. The difference is that we actually call

> > drm_wait_one_vblank() in the middle of the process, and that

> > scheduled_vblank may also be updated in the meantime, so so may

> > have to

> > call drm_wait_one_vblank() again.

> 

> You have no guarantees that drm_crtc_vblank_count() won't give you

> something totally stale unless you have a vblank reference when

> calling

> it. If you have a reference it's guaranteed to give you something

> fairly

> recent,


All the points mentioned above are easily fixable.

> but the race I outlined in the earlier mail still exists.


I've been trying to understand this, and it seems
i915_get_vblank_counter() tries to work around that problem with the
logic behind the "pixel" and "vbl_start" variables. Isn't that enough?
If no, why?


>  And yes,

> drm_wait_one_vblank() is no good due to that same race.


So shouldn't we stop using intel_wait_for_vblank() on the affected
platforms? Or at least add a big comment explaining the problem, or add
a WARN(gen == 3 || gen == 4 && !is_g4x), or something else. I got a
little surprised when you said the function is unreliable (was the
previous implementation based on I915_READ also equally unreliable?).

Also, since we are already running this risk in other parts of the
code, I'm not sure what are the conditions for my patch to be
acceptable. Just fixing the original implementation, while keeping it
based on drm_wait_one_vblank() and maybe adding some TODO comment would
do? Or do you see an actual simple solution for the race you mentioned?

Thanks,
Paulo

>
Ville Syrjala Nov. 17, 2015, 7:29 p.m. UTC | #9
On Tue, Nov 17, 2015 at 07:03:13PM +0000, Zanoni, Paulo R wrote:
> Em Sáb, 2015-11-14 às 01:17 +0200, Ville Syrjälä escreveu:
> > On Fri, Nov 13, 2015 at 09:38:50PM +0000, Zanoni, Paulo R wrote:
> > > Em Sex, 2015-11-13 às 23:26 +0200, Ville Syrjälä escreveu:
> > > > On Fri, Nov 13, 2015 at 11:20:19PM +0200, Ville Syrjälä wrote:
> > > > > On Fri, Nov 13, 2015 at 09:03:43PM +0000, Chris Wilson wrote:
> > > > > > On Fri, Nov 13, 2015 at 05:53:41PM -0200, Paulo Zanoni wrote:
> > > > > > > Instead of waiting for 50ms, just wait until the next
> > > > > > > vblank,
> > > > > > > since
> > > > > > > it's the minimum requirement.
> > > > > > > 
> > > > > > > This moves PC7 residency on my specific BDW machine running
> > > > > > > Cinnamon
> > > > > > > from 60-70% to 84-89%. Without FBC, I get 20-25%. I'm using
> > > > > > > a
> > > > > > > 3200x1800 eDP panel. Notice: this was the case when the
> > > > > > > patch
> > > > > > > was
> > > > > > > originally proposed, the order of the FBC patches changed
> > > > > > > since
> > > > > > > then,
> > > > > > > so the actual numbers might be slightly different now.
> > > > > > > 
> > > > > > > v2:
> > > > > > >   - Rebase after changing the patch order.
> > > > > > >   - Update the commit message.
> > > > > > > 
> > > > > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > > > > ---
> > > > > > >  drivers/gpu/drm/i915/i915_drv.h  |  2 +-
> > > > > > >  drivers/gpu/drm/i915/intel_fbc.c | 12 +++---------
> > > > > > >  2 files changed, 4 insertions(+), 10 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > index 9418bd5..ea08714 100644
> > > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > > @@ -919,9 +919,9 @@ struct i915_fbc {
> > > > > > >  
> > > > > > >  	struct intel_fbc_work {
> > > > > > >  		bool scheduled;
> > > > > > > +		u32 scheduled_vblank;
> > > > > > >  		struct work_struct work;
> > > > > > >  		struct drm_framebuffer *fb;
> > > > > > > -		unsigned long enable_jiffies;
> > > > > > >  	} work;
> > > > > > >  
> > > > > > >  	const char *no_fbc_reason;
> > > > > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > > index aa82075..72de8a1 100644
> > > > > > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > > @@ -391,7 +391,6 @@ static void intel_fbc_work_fn(struct
> > > > > > > work_struct *__work)
> > > > > > >  		container_of(__work, struct
> > > > > > > drm_i915_private,
> > > > > > > fbc.work.work);
> > > > > > >  	struct intel_fbc_work *work = &dev_priv->fbc.work;
> > > > > > >  	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > > > -	unsigned long delay_jiffies =
> > > > > > > msecs_to_jiffies(50);
> > > > > > >  
> > > > > > >  retry:
> > > > > > >  	/* Delay the actual enabling to let pageflipping
> > > > > > > cease
> > > > > > > and the
> > > > > > > @@ -400,14 +399,9 @@ retry:
> > > > > > >  	 * vblank to pass after disabling the FBC before
> > > > > > > we
> > > > > > > attempt
> > > > > > >  	 * to modify the control registers.
> > > > > > >  	 *
> > > > > > > -	 * A more complicated solution would involve
> > > > > > > tracking
> > > > > > > vblanks
> > > > > > > -	 * following the termination of the page-flipping
> > > > > > > sequence
> > > > > > > -	 * and indeed performing the enable as a co-
> > > > > > > routine
> > > > > > > and not
> > > > > > > -	 * waiting synchronously upon the vblank.
> > > > > > > -	 *
> > > > > > >  	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
> > > > > > >  	 */
> > > > > > > -	wait_remaining_ms_from_jiffies(work-
> > > > > > > >enable_jiffies,
> > > > > > > delay_jiffies);
> > > > > > > +	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
> > > > > > >  
> > > > > > >  	mutex_lock(&dev_priv->fbc.lock);
> > > > > > >  
> > > > > > > @@ -416,7 +410,7 @@ retry:
> > > > > > >  		goto out;
> > > > > > >  
> > > > > > >  	/* Were we delayed again while this function was
> > > > > > > sleeping? */
> > > > > > > -	if (time_after(work->enable_jiffies +
> > > > > > > delay_jiffies,
> > > > > > > jiffies)) {
> > > > > > > +	if (drm_crtc_vblank_get(&crtc->base) == work-
> > > > > > > > scheduled_vblank) {
> > > > > > >  		mutex_unlock(&dev_priv->fbc.lock);
> > > > > > >  		goto retry;
> > > > > > >  	}
> > > > > > > @@ -449,7 +443,7 @@ static void
> > > > > > > intel_fbc_schedule_activation(struct intel_crtc *crtc)
> > > > > > >  	 * jiffy count. */
> > > > > > >  	work->fb = crtc->base.primary->fb;
> > > > > > >  	work->scheduled = true;
> > > > > > > -	work->enable_jiffies = jiffies;
> > > > > > > +	work->scheduled_vblank =
> > > > > > > drm_crtc_vblank_count(&crtc-
> > > > > > > > base);
> > > > > > 
> > > > > > Isn't the frame counter only incrementing whilst the vblank
> > > > > > IRQ
> > > > > > is
> > > > > > enabled? Ville?
> > > > > 
> > > > > I see a "+ if (drm_crtc_vblank_get(" earlier.
> > > > 
> > > > Hmm. Actually it's doing
> > > > "drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank)"
> > > > which looks rather like nonsense.
> > > > 
> > > > Not sure what the intention here was...
> > > 
> > > Ouch. The intent was for that to be another call for
> > > drm_crtc_vblank_count().
> > > 
> > > The code in discussion is completely based on the
> > > drm_wait_one_vblank()
> > > code: call drm_vblank_count(), then call it again until it returns
> > > something different. The difference is that we actually call
> > > drm_wait_one_vblank() in the middle of the process, and that
> > > scheduled_vblank may also be updated in the meantime, so so may
> > > have to
> > > call drm_wait_one_vblank() again.
> > 
> > You have no guarantees that drm_crtc_vblank_count() won't give you
> > something totally stale unless you have a vblank reference when
> > calling
> > it. If you have a reference it's guaranteed to give you something
> > fairly
> > recent,
> 
> All the points mentioned above are easily fixable.
> 
> > but the race I outlined in the earlier mail still exists.
> 
> I've been trying to understand this, and it seems
> i915_get_vblank_counter() tries to work around that problem with the
> logic behind the "pixel" and "vbl_start" variables. Isn't that enough?
> If no, why?

No i915_get_vblank_counter() is there for another reason; On gen3/4
the hw counter increments at the wrong time, so we have to convert
it to look like it incremented at the right time. Fortunately we
can read out the hw counter and the pixel counter atomically, so
there's no race or anything due to the conversion.

> >  And yes,
> > drm_wait_one_vblank() is no good due to that same race.
> 
> So shouldn't we stop using intel_wait_for_vblank() on the affected
> platforms?

All platforms are affected, just some are more susceptible than others.
We should fix intel_wait_for_vblank() to do the right thing.

> Or at least add a big comment explaining the problem, or add
> a WARN(gen == 3 || gen == 4 && !is_g4x), or something else. I got a
> little surprised when you said the function is unreliable (was the
> previous implementation based on I915_READ also equally unreliable?).

On g4x+ the polling implementation didn't suffer from this issue since
it was polling the hw frame counter. But the fact that it was polling
was sub-optimal.

On older platforms a similar race was there since it was polling the
"frame start" bit and not the "start of vblank" bit. In addition
it could also effectively eat the interrupt that someone was waiting
for (on gen2/3 since they use the "frame start" as the interrupt as
well).

> Also, since we are already running this risk in other parts of the
> code, I'm not sure what are the conditions for my patch to be
> acceptable.

The main problem in your patch is using drm_vblank_count() w/o a
drm_vblank_get/put(). So that should be fixed somehow.

I guess we can ignore the race with the vblank interrupt handler
for now since the same problem exists everywhere else already.

> Just fixing the original implementation, while keeping it
> based on drm_wait_one_vblank() and maybe adding some TODO comment would
> do? Or do you see an actual simple solution for the race you mentioned?
> 
> Thanks,
> Paulo
> 
> >
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9418bd5..ea08714 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -919,9 +919,9 @@  struct i915_fbc {
 
 	struct intel_fbc_work {
 		bool scheduled;
+		u32 scheduled_vblank;
 		struct work_struct work;
 		struct drm_framebuffer *fb;
-		unsigned long enable_jiffies;
 	} work;
 
 	const char *no_fbc_reason;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index aa82075..72de8a1 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -391,7 +391,6 @@  static void intel_fbc_work_fn(struct work_struct *__work)
 		container_of(__work, struct drm_i915_private, fbc.work.work);
 	struct intel_fbc_work *work = &dev_priv->fbc.work;
 	struct intel_crtc *crtc = dev_priv->fbc.crtc;
-	unsigned long delay_jiffies = msecs_to_jiffies(50);
 
 retry:
 	/* Delay the actual enabling to let pageflipping cease and the
@@ -400,14 +399,9 @@  retry:
 	 * vblank to pass after disabling the FBC before we attempt
 	 * to modify the control registers.
 	 *
-	 * A more complicated solution would involve tracking vblanks
-	 * following the termination of the page-flipping sequence
-	 * and indeed performing the enable as a co-routine and not
-	 * waiting synchronously upon the vblank.
-	 *
 	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
 	 */
-	wait_remaining_ms_from_jiffies(work->enable_jiffies, delay_jiffies);
+	intel_wait_for_vblank(dev_priv->dev, crtc->pipe);
 
 	mutex_lock(&dev_priv->fbc.lock);
 
@@ -416,7 +410,7 @@  retry:
 		goto out;
 
 	/* Were we delayed again while this function was sleeping? */
-	if (time_after(work->enable_jiffies + delay_jiffies, jiffies)) {
+	if (drm_crtc_vblank_get(&crtc->base) == work->scheduled_vblank) {
 		mutex_unlock(&dev_priv->fbc.lock);
 		goto retry;
 	}
@@ -449,7 +443,7 @@  static void intel_fbc_schedule_activation(struct intel_crtc *crtc)
 	 * jiffy count. */
 	work->fb = crtc->base.primary->fb;
 	work->scheduled = true;
-	work->enable_jiffies = jiffies;
+	work->scheduled_vblank = drm_crtc_vblank_count(&crtc->base);
 
 	schedule_work(&work->work);
 }