diff mbox

[02/26] drm/i915: don't stop+start FBC at every flip

Message ID 1445964628-30226-3-git-send-email-paulo.r.zanoni@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Zanoni, Paulo R Oct. 27, 2015, 4:50 p.m. UTC
The hardware already takes care of disabling and recompressing FBC
when we do a page flip, so all we need to do is to update the fence
registers and move on.

One of the important things to notice is that on the pre-gen6
platforms the fence is programmed on the FBC control register and the
documentation says we can't update the control register while FBC is
enabled. This would basically mean we'd have to to disable+enable FBC
at every flip in order to make sure the hardware tracking still works,
which doesn't seem to make too much sense. So I sent an email to the
hardware team requesting some clarification. The information I got was
this:

"I don't think any hardware is latching on to 0x100100, 0x100104, or
the old fence number in FBC_CTL. Instructions against changing on the
fly would be to simplify testing and ensure you don't miss any
invalidation that happened right at that time."

So I guess we're fine for flips. But I can't really say I tested FBC
on these ancient platforms - nor that I'll ever propose enabling FBC
by default on them exactly because of problems like these.

v2: Add comment at flush() (Chris).

Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   1 +
 drivers/gpu/drm/i915/i915_reg.h          |   3 +
 drivers/gpu/drm/i915/intel_display.c     |   1 -
 drivers/gpu/drm/i915/intel_drv.h         |   2 +
 drivers/gpu/drm/i915/intel_fbc.c         | 103 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
 6 files changed, 108 insertions(+), 3 deletions(-)

Comments

Ville Syrjala Oct. 27, 2015, 6:32 p.m. UTC | #1
On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
> The hardware already takes care of disabling and recompressing FBC
> when we do a page flip, so all we need to do is to update the fence
> registers and move on.
> 
> One of the important things to notice is that on the pre-gen6
> platforms the fence is programmed on the FBC control register and the
> documentation says we can't update the control register while FBC is
> enabled. This would basically mean we'd have to to disable+enable FBC
> at every flip in order to make sure the hardware tracking still works,
> which doesn't seem to make too much sense. So I sent an email to the
> hardware team requesting some clarification. The information I got was
> this:
> 
> "I don't think any hardware is latching on to 0x100100, 0x100104, or
> the old fence number in FBC_CTL. Instructions against changing on the
> fly would be to simplify testing and ensure you don't miss any
> invalidation that happened right at that time."
> 
> So I guess we're fine for flips. But I can't really say I tested FBC
> on these ancient platforms - nor that I'll ever propose enabling FBC
> by default on them exactly because of problems like these.

Yeah, I did this in my patches too, and IIRC it seemed to work just fine
back then.

> 
> v2: Add comment at flush() (Chris).
> 
> Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |   1 +
>  drivers/gpu/drm/i915/i915_reg.h          |   3 +
>  drivers/gpu/drm/i915/intel_display.c     |   1 -
>  drivers/gpu/drm/i915/intel_drv.h         |   2 +
>  drivers/gpu/drm/i915/intel_fbc.c         | 103 ++++++++++++++++++++++++++++++-
>  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
>  6 files changed, 108 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ee14962..77da500 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -928,6 +928,7 @@ struct i915_fbc {
>  	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
>  	void (*enable_fbc)(struct intel_crtc *crtc);
>  	void (*disable_fbc)(struct drm_i915_private *dev_priv);
> +	void (*flip_prepare)(struct drm_i915_private *dev_priv);
>  };
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8942532..3d598a2 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {
>  #define   FBC_CTL_C3_IDLE	(1<<13)
>  #define   FBC_CTL_STRIDE_SHIFT	(5)
>  #define   FBC_CTL_FENCENO_SHIFT	(0)
> +#define   FBC_CTL_FENCENO_MASK	0xF

It's only three bits on gen2. But bit 3 is MBZ and there are only 8
fence registers on those platforms, so this is OK.

>  #define FBC_COMMAND		0x0320c
>  #define   FBC_CMD_COMPRESS	(1<<0)
>  #define FBC_STATUS		0x03210
> @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {
>  #define   DPFC_CTL_LIMIT_1X	(0<<6)
>  #define   DPFC_CTL_LIMIT_2X	(1<<6)
>  #define   DPFC_CTL_LIMIT_4X	(2<<6)
> +#define   DPFC_CTL_FENCE_MASK	0xF
>  #define DPFC_RECOMP_CTL		0x320c
>  #define   DPFC_RECOMP_STALL_EN	(1<<27)
>  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
> @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {
>  #define   FBC_CTL_FALSE_COLOR	(1<<10)
>  /* The bit 28-8 is reserved */
>  #define   DPFC_RESERVED		(0x1FFFFF00)
> +#define   ILK_DPFC_FENCE_MASK	0xF
>  #define ILK_DPFC_RECOMP_CTL	0x4320c
>  #define ILK_DPFC_STATUS		0x43210
>  #define ILK_DPFC_FENCE_YOFF	0x43218
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index bc1907e..d9378c3 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -11502,7 +11502,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
>  			  to_intel_plane(primary)->frontbuffer_bit);
>  	mutex_unlock(&dev->struct_mutex);
>  
> -	intel_fbc_disable_crtc(intel_crtc);
>  	intel_frontbuffer_flip_prepare(dev,
>  				       to_intel_plane(primary)->frontbuffer_bit);
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 08847d0..9065a8f 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
>  			  enum fb_op_origin origin);
>  void intel_fbc_flush(struct drm_i915_private *dev_priv,
>  		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
> +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> +			    unsigned int frontbuffer_bits);
>  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
>  
>  /* intel_hdmi.c */
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> index d9d7e54..62f158b 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct drm_i915_private *dev_priv)
>  	DRM_DEBUG_KMS("disabled FBC\n");
>  }
>  
> +static void i8xx_fbc_flip_prepare(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> +	uint32_t val;
> +
> +	/* Although the documentation suggests we can't change DPFC_CONTROL
> +	 * while compression is enabled, the hardware guys said that updating
> +	 * the fence register bits during a flip is fine. */
> +	val = I915_READ(FBC_CONTROL);
> +	val &= ~FBC_CTL_FENCENO_MASK;
> +	val |= obj->fence_reg;
> +	I915_WRITE(FBC_CONTROL, val);
> +}

IIRC I just called the enable function to reconstruct the entire
register contents to avoid code duplication. But maybe that's not
possible without reowrking the fbc state handling entirely
(which I had done).

> +
>  static void i8xx_fbc_enable(struct intel_crtc *crtc)
>  {
>  	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
> @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct intel_crtc *crtc)
>  	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
>  }
>  
> +static void g4x_fbc_flip_prepare(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> +	uint32_t val;
> +
> +	/* Although the documentation suggests we can't change DPFC_CONTROL
> +	 * while compression is enabled, the hardware guys said that updating
> +	 * the fence register bits during a flip is fine. */
> +	val = I915_READ(DPFC_CONTROL);
> +	val &= ~DPFC_CTL_FENCE_MASK;
> +	val |= obj->fence_reg;
> +	I915_WRITE(DPFC_CONTROL, val);
> +}
> +
>  static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
>  {
>  	u32 dpfc_ctl;
> @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct intel_crtc *crtc)
>  	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
>  }
>  
> +static void ilk_fbc_flip_prepare(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> +	uint32_t val;
> +
> +	/* Although the documentation suggests we can't change DPFC_CONTROL
> +	 * while compression is enabled, the hardware guys said that updating
> +	 * the fence register bits during a flip is fine. */
> +	val = I915_READ(ILK_DPFC_CONTROL);
> +	val &= ~ILK_DPFC_FENCE_MASK;
> +	val |= obj->fence_reg;
> +	I915_WRITE(ILK_DPFC_CONTROL, val);
> +}
> +
> +static void snb_fbc_flip_prepare(struct drm_i915_private *dev_priv)
> +{
> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> +
> +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj->fence_reg);
> +}
> +
>  static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
>  {
>  	u32 dpfc_ctl;
> @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv,
>  	if (origin == ORIGIN_GTT)
>  		return;
>  
> +	/* Hardware tracking already recompresses the CFB (nuke) for us if FBC
> +	 * is enabled and we do a page flip, so we can safely ignore it here.
> +	 * FBC may be disabled in case we got an invalidate() before the
> +	 * flush(), so we'll still have to check that case below. */
> +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
> +		return;
> +
>  	mutex_lock(&dev_priv->fbc.lock);
>  
>  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
>  
>  	if (!dev_priv->fbc.busy_bits) {
> -		__intel_fbc_disable(dev_priv);
> -		__intel_fbc_update(dev_priv);
> +		if (origin == ORIGIN_FLIP) {
> +			__intel_fbc_update(dev_priv);
> +		} else {
> +			__intel_fbc_disable(dev_priv);
> +			__intel_fbc_update(dev_priv);
> +		}
> +	}
> +
> +	mutex_unlock(&dev_priv->fbc.lock);
> +}
> +
> +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> +			    unsigned int frontbuffer_bits)
> +{
> +	unsigned int fbc_bits;
> +
> +	if (!fbc_supported(dev_priv))
> +		return;
> +
> +	mutex_lock(&dev_priv->fbc.lock);
> +
> +	if (dev_priv->fbc.enabled) {
> +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);

primary->frontbuffer_bit would seem better.

> +		if (fbc_bits & frontbuffer_bits)
> +			dev_priv->fbc.flip_prepare(dev_priv);

You would still have to disable+reenable if you need to eg. reallocate
the compressed buffer, of if the new fb isn't suitable for fbc, or maybe
if you need to change anything else in the control register.

What I had was:
"
static bool intel_fbc_need_reinit(struct intel_crtc *crtc)
{
	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
	struct drm_framebuffer *fb = crtc->base.primary->fb;
	struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)->obj;

	return crtc != dev_priv->fbc.crtc ||
	       obj->base.size > dev_priv->fbc.size ||
	       drm_format_plane_cpp(fb->pixel_format, 0) !=
	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 0) ||
	       fb->pitches[0] != dev_priv->fbc.pitch;
}
intel_fbc_pre_page_flip()
{
	...
	intel_fbc_update_pending_score(crtc);

	/*
	 * If fbc was already possible we can update immediately,
	 * otherwise we will wait until the flip is finished.
	 */
	if (crtc->fbc.score != 0)
		crtc->fbc.score = crtc->fbc.pending_score;

	/*
	 * Disable fbc if we're not (yet) capable, or if
	 * we just need a full disable+enable reinit.
	 */
	if (crtc->fbc.score == 0 || intel_fbc_need_reinit(crtc))
		__intel_fbc_disable(crtc);
	...
}
"

> +	} else if (dev_priv->fbc.fbc_work) {
> +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> +				dev_priv->fbc.fbc_work->crtc->pipe);
> +		if (fbc_bits & frontbuffer_bits)
> +			__intel_fbc_disable(dev_priv);
>  	}
>  
>  	mutex_unlock(&dev_priv->fbc.lock);
> @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct drm_i915_private *dev_priv)
>  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
>  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
>  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> +		dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
>  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
>  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
>  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
>  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> +		if (INTEL_INFO(dev_priv)->gen == 5)
> +			dev_priv->fbc.flip_prepare = ilk_fbc_flip_prepare;
> +		else
> +			dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
>  	} else if (IS_GM45(dev_priv)) {
>  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
>  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
>  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
> +		dev_priv->fbc.flip_prepare = g4x_fbc_flip_prepare;
>  	} else {
>  		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
>  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
>  		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
> +		dev_priv->fbc.flip_prepare = i8xx_fbc_flip_prepare;
>  
>  		/* This value was pulled out of someone's hat */
>  		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index ac85357..31a1ad3 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -192,6 +192,7 @@ void intel_frontbuffer_flip_prepare(struct drm_device *dev,
>  	mutex_unlock(&dev_priv->fb_tracking.lock);
>  
>  	intel_psr_single_frame_update(dev, frontbuffer_bits);
> +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);
>  }
>  
>  /**
> -- 
> 2.6.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 27, 2015, 7:50 p.m. UTC | #2
On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
> @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv,
>  	if (origin == ORIGIN_GTT)
>  		return;
>  
> +	/* Hardware tracking already recompresses the CFB (nuke) for us if FBC
> +	 * is enabled and we do a page flip, so we can safely ignore it here.
> +	 * FBC may be disabled in case we got an invalidate() before the
> +	 * flush(), so we'll still have to check that case below. */

I feel like I understand what is going on a bit better now, thanks!

> +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
> +		return;
> +
>  	mutex_lock(&dev_priv->fbc.lock);
>  
>  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
>  
>  	if (!dev_priv->fbc.busy_bits) {
> -		__intel_fbc_disable(dev_priv);
> -		__intel_fbc_update(dev_priv);
> +		if (origin == ORIGIN_FLIP) {

Note this test here is redundant.

We know that for an origin == FLIP to be here, FBC is disabled and
calling intel_fbc_disable() is then a no-op.

So it turns out that the origin two lines of disable(); update() works
just as well. Or is there some later patch that tweaks this branch
further?
-Chris
Zanoni, Paulo R Oct. 28, 2015, 4:56 p.m. UTC | #3
Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:
> On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:

> > The hardware already takes care of disabling and recompressing FBC

> > when we do a page flip, so all we need to do is to update the fence

> > registers and move on.

> > 

> > One of the important things to notice is that on the pre-gen6

> > platforms the fence is programmed on the FBC control register and

> > the

> > documentation says we can't update the control register while FBC

> > is

> > enabled. This would basically mean we'd have to to disable+enable

> > FBC

> > at every flip in order to make sure the hardware tracking still

> > works,

> > which doesn't seem to make too much sense. So I sent an email to

> > the

> > hardware team requesting some clarification. The information I got

> > was

> > this:

> > 

> > "I don't think any hardware is latching on to 0x100100, 0x100104,

> > or

> > the old fence number in FBC_CTL. Instructions against changing on

> > the

> > fly would be to simplify testing and ensure you don't miss any

> > invalidation that happened right at that time."

> > 

> > So I guess we're fine for flips. But I can't really say I tested

> > FBC

> > on these ancient platforms - nor that I'll ever propose enabling

> > FBC

> > by default on them exactly because of problems like these.

> 

> Yeah, I did this in my patches too, and IIRC it seemed to work just

> fine

> back then.

> 

> > 

> > v2: Add comment at flush() (Chris).

> > 

> > Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack

> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > ---

> >  drivers/gpu/drm/i915/i915_drv.h          |   1 +

> >  drivers/gpu/drm/i915/i915_reg.h          |   3 +

> >  drivers/gpu/drm/i915/intel_display.c     |   1 -

> >  drivers/gpu/drm/i915/intel_drv.h         |   2 +

> >  drivers/gpu/drm/i915/intel_fbc.c         | 103

> > ++++++++++++++++++++++++++++++-

> >  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +

> >  6 files changed, 108 insertions(+), 3 deletions(-)

> > 

> > diff --git a/drivers/gpu/drm/i915/i915_drv.h

> > b/drivers/gpu/drm/i915/i915_drv.h

> > index ee14962..77da500 100644

> > --- a/drivers/gpu/drm/i915/i915_drv.h

> > +++ b/drivers/gpu/drm/i915/i915_drv.h

> > @@ -928,6 +928,7 @@ struct i915_fbc {

> >  	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);

> >  	void (*enable_fbc)(struct intel_crtc *crtc);

> >  	void (*disable_fbc)(struct drm_i915_private *dev_priv);

> > +	void (*flip_prepare)(struct drm_i915_private *dev_priv);

> >  };

> >  

> >  /**

> > diff --git a/drivers/gpu/drm/i915/i915_reg.h

> > b/drivers/gpu/drm/i915/i915_reg.h

> > index 8942532..3d598a2 100644

> > --- a/drivers/gpu/drm/i915/i915_reg.h

> > +++ b/drivers/gpu/drm/i915/i915_reg.h

> > @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {

> >  #define   FBC_CTL_C3_IDLE	(1<<13)

> >  #define   FBC_CTL_STRIDE_SHIFT	(5)

> >  #define   FBC_CTL_FENCENO_SHIFT	(0)

> > +#define   FBC_CTL_FENCENO_MASK	0xF

> 

> It's only three bits on gen2. But bit 3 is MBZ and there are only 8

> fence registers on those platforms, so this is OK.

> 

> >  #define FBC_COMMAND		0x0320c

> >  #define   FBC_CMD_COMPRESS	(1<<0)

> >  #define FBC_STATUS		0x03210

> > @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {

> >  #define   DPFC_CTL_LIMIT_1X	(0<<6)

> >  #define   DPFC_CTL_LIMIT_2X	(1<<6)

> >  #define   DPFC_CTL_LIMIT_4X	(2<<6)

> > +#define   DPFC_CTL_FENCE_MASK	0xF

> >  #define DPFC_RECOMP_CTL		0x320c

> >  #define   DPFC_RECOMP_STALL_EN	(1<<27)

> >  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)

> > @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {

> >  #define   FBC_CTL_FALSE_COLOR	(1<<10)

> >  /* The bit 28-8 is reserved */

> >  #define   DPFC_RESERVED		(0x1FFFFF00)

> > +#define   ILK_DPFC_FENCE_MASK	0xF

> >  #define ILK_DPFC_RECOMP_CTL	0x4320c

> >  #define ILK_DPFC_STATUS		0x43210

> >  #define ILK_DPFC_FENCE_YOFF	0x43218

> > diff --git a/drivers/gpu/drm/i915/intel_display.c

> > b/drivers/gpu/drm/i915/intel_display.c

> > index bc1907e..d9378c3 100644

> > --- a/drivers/gpu/drm/i915/intel_display.c

> > +++ b/drivers/gpu/drm/i915/intel_display.c

> > @@ -11502,7 +11502,6 @@ static int intel_crtc_page_flip(struct

> > drm_crtc *crtc,

> >  			  to_intel_plane(primary)-

> > >frontbuffer_bit);

> >  	mutex_unlock(&dev->struct_mutex);

> >  

> > -	intel_fbc_disable_crtc(intel_crtc);

> >  	intel_frontbuffer_flip_prepare(dev,

> >  				       to_intel_plane(primary)-

> > >frontbuffer_bit);

> >  

> > diff --git a/drivers/gpu/drm/i915/intel_drv.h

> > b/drivers/gpu/drm/i915/intel_drv.h

> > index 08847d0..9065a8f 100644

> > --- a/drivers/gpu/drm/i915/intel_drv.h

> > +++ b/drivers/gpu/drm/i915/intel_drv.h

> > @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct

> > drm_i915_private *dev_priv,

> >  			  enum fb_op_origin origin);

> >  void intel_fbc_flush(struct drm_i915_private *dev_priv,

> >  		     unsigned int frontbuffer_bits, enum

> > fb_op_origin origin);

> > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,

> > +			    unsigned int frontbuffer_bits);

> >  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);

> >  

> >  /* intel_hdmi.c */

> > diff --git a/drivers/gpu/drm/i915/intel_fbc.c

> > b/drivers/gpu/drm/i915/intel_fbc.c

> > index d9d7e54..62f158b 100644

> > --- a/drivers/gpu/drm/i915/intel_fbc.c

> > +++ b/drivers/gpu/drm/i915/intel_fbc.c

> > @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct

> > drm_i915_private *dev_priv)

> >  	DRM_DEBUG_KMS("disabled FBC\n");

> >  }

> >  

> > +static void i8xx_fbc_flip_prepare(struct drm_i915_private

> > *dev_priv)

> > +{

> > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > +	uint32_t val;

> > +

> > +	/* Although the documentation suggests we can't change

> > DPFC_CONTROL

> > +	 * while compression is enabled, the hardware guys said

> > that updating

> > +	 * the fence register bits during a flip is fine. */

> > +	val = I915_READ(FBC_CONTROL);

> > +	val &= ~FBC_CTL_FENCENO_MASK;

> > +	val |= obj->fence_reg;

> > +	I915_WRITE(FBC_CONTROL, val);

> > +}

> 

> IIRC I just called the enable function to reconstruct the entire

> register contents to avoid code duplication. But maybe that's not

> possible without reowrking the fbc state handling entirely

> (which I had done).

> > +

> >  static void i8xx_fbc_enable(struct intel_crtc *crtc)

> >  {

> >  	struct drm_i915_private *dev_priv = crtc->base.dev-

> > >dev_private;

> > @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct intel_crtc

> > *crtc)

> >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",

> > plane_name(crtc->plane));

> >  }

> >  

> > +static void g4x_fbc_flip_prepare(struct drm_i915_private

> > *dev_priv)

> > +{

> > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > +	uint32_t val;

> > +

> > +	/* Although the documentation suggests we can't change

> > DPFC_CONTROL

> > +	 * while compression is enabled, the hardware guys said

> > that updating

> > +	 * the fence register bits during a flip is fine. */

> > +	val = I915_READ(DPFC_CONTROL);

> > +	val &= ~DPFC_CTL_FENCE_MASK;

> > +	val |= obj->fence_reg;

> > +	I915_WRITE(DPFC_CONTROL, val);

> > +}

> > +

> >  static void g4x_fbc_disable(struct drm_i915_private *dev_priv)

> >  {

> >  	u32 dpfc_ctl;

> > @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct intel_crtc

> > *crtc)

> >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",

> > plane_name(crtc->plane));

> >  }

> >  

> > +static void ilk_fbc_flip_prepare(struct drm_i915_private

> > *dev_priv)

> > +{

> > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > +	uint32_t val;

> > +

> > +	/* Although the documentation suggests we can't change

> > DPFC_CONTROL

> > +	 * while compression is enabled, the hardware guys said

> > that updating

> > +	 * the fence register bits during a flip is fine. */

> > +	val = I915_READ(ILK_DPFC_CONTROL);

> > +	val &= ~ILK_DPFC_FENCE_MASK;

> > +	val |= obj->fence_reg;

> > +	I915_WRITE(ILK_DPFC_CONTROL, val);

> > +}

> > +

> > +static void snb_fbc_flip_prepare(struct drm_i915_private

> > *dev_priv)

> > +{

> > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > +

> > +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj-

> > >fence_reg);

> > +}

> > +

> >  static void ilk_fbc_disable(struct drm_i915_private *dev_priv)

> >  {

> >  	u32 dpfc_ctl;

> > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct

> > drm_i915_private *dev_priv,

> >  	if (origin == ORIGIN_GTT)

> >  		return;

> >  

> > +	/* Hardware tracking already recompresses the CFB (nuke)

> > for us if FBC

> > +	 * is enabled and we do a page flip, so we can safely

> > ignore it here.

> > +	 * FBC may be disabled in case we got an invalidate()

> > before the

> > +	 * flush(), so we'll still have to check that case below.

> > */

> > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)

> > +		return;

> > +

> >  	mutex_lock(&dev_priv->fbc.lock);

> >  

> >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;

> >  

> >  	if (!dev_priv->fbc.busy_bits) {

> > -		__intel_fbc_disable(dev_priv);

> > -		__intel_fbc_update(dev_priv);

> > +		if (origin == ORIGIN_FLIP) {

> > +			__intel_fbc_update(dev_priv);

> > +		} else {

> > +			__intel_fbc_disable(dev_priv);

> > +			__intel_fbc_update(dev_priv);

> > +		}

> > +	}

> > +

> > +	mutex_unlock(&dev_priv->fbc.lock);

> > +}

> > +

> > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,

> > +			    unsigned int frontbuffer_bits)

> > +{

> > +	unsigned int fbc_bits;

> > +

> > +	if (!fbc_supported(dev_priv))

> > +		return;

> > +

> > +	mutex_lock(&dev_priv->fbc.lock);

> > +

> > +	if (dev_priv->fbc.enabled) {

> > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv-

> > >fbc.crtc->pipe);

> 

> primary->frontbuffer_bit would seem better.


Why?

> 

> > +		if (fbc_bits & frontbuffer_bits)

> > +			dev_priv->fbc.flip_prepare(dev_priv);

> 

> You would still have to disable+reenable if you need to eg.

> reallocate

> the compressed buffer, of if the new fb isn't suitable for fbc, or

> maybe

> if you need to change anything else in the control register.


Yes, but as far as I understand the frontbuffer tracking, this case
won't happen here. I'm assuming we'll always go through
intel_fbc_update() on these cases.

> 

> What I had was:

> "

> static bool intel_fbc_need_reinit(struct intel_crtc *crtc)

> {

> 	struct drm_i915_private *dev_priv = crtc->base.dev-

> >dev_private;

> 	struct drm_framebuffer *fb = crtc->base.primary->fb;

> 	struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)-

> >obj;

> 

> 	return crtc != dev_priv->fbc.crtc ||

> 	       obj->base.size > dev_priv->fbc.size ||

> 	       drm_format_plane_cpp(fb->pixel_format, 0) !=

> 	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 0) ||

> 	       fb->pitches[0] != dev_priv->fbc.pitch;

> }

> intel_fbc_pre_page_flip()

> {

> 	...

> 	intel_fbc_update_pending_score(crtc);

> 

> 	/*

> 	 * If fbc was already possible we can update immediately,

> 	 * otherwise we will wait until the flip is finished.

> 	 */

> 	if (crtc->fbc.score != 0)

> 		crtc->fbc.score = crtc->fbc.pending_score;

> 

> 	/*

> 	 * Disable fbc if we're not (yet) capable, or if

> 	 * we just need a full disable+enable reinit.

> 	 */

> 	if (crtc->fbc.score == 0 || intel_fbc_need_reinit(crtc))

> 		__intel_fbc_disable(crtc);

> 	...

> }

> "

> 

> > +	} else if (dev_priv->fbc.fbc_work) {

> > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(

> > +				dev_priv->fbc.fbc_work->crtc-

> > >pipe);

> > +		if (fbc_bits & frontbuffer_bits)

> > +			__intel_fbc_disable(dev_priv);

> >  	}

> >  

> >  	mutex_unlock(&dev_priv->fbc.lock);

> > @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct drm_i915_private

> > *dev_priv)

> >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;

> >  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;

> >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;

> > +		dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;

> >  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {

> >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;

> >  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;

> >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;

> > +		if (INTEL_INFO(dev_priv)->gen == 5)

> > +			dev_priv->fbc.flip_prepare =

> > ilk_fbc_flip_prepare;

> > +		else

> > +			dev_priv->fbc.flip_prepare =

> > snb_fbc_flip_prepare;

> >  	} else if (IS_GM45(dev_priv)) {

> >  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;

> >  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;

> >  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;

> > +		dev_priv->fbc.flip_prepare = g4x_fbc_flip_prepare;

> >  	} else {

> >  		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;

> >  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;

> >  		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;

> > +		dev_priv->fbc.flip_prepare =

> > i8xx_fbc_flip_prepare;

> >  

> >  		/* This value was pulled out of someone's hat */

> >  		I915_WRITE(FBC_CONTROL, 500 <<

> > FBC_CTL_INTERVAL_SHIFT);

> > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c

> > b/drivers/gpu/drm/i915/intel_frontbuffer.c

> > index ac85357..31a1ad3 100644

> > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c

> > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c

> > @@ -192,6 +192,7 @@ void intel_frontbuffer_flip_prepare(struct

> > drm_device *dev,

> >  	mutex_unlock(&dev_priv->fb_tracking.lock);

> >  

> >  	intel_psr_single_frame_update(dev, frontbuffer_bits);

> > +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);

> >  }

> >  

> >  /**

> > -- 

> > 2.6.1

> > 

> > _______________________________________________

> > Intel-gfx mailing list

> > Intel-gfx@lists.freedesktop.org

> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx

>
Zanoni, Paulo R Oct. 28, 2015, 4:58 p.m. UTC | #4
Em Ter, 2015-10-27 às 19:50 +0000, Chris Wilson escreveu:
> On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:

> > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct

> > drm_i915_private *dev_priv,

> >  	if (origin == ORIGIN_GTT)

> >  		return;

> >  

> > +	/* Hardware tracking already recompresses the CFB (nuke)

> > for us if FBC

> > +	 * is enabled and we do a page flip, so we can safely

> > ignore it here.

> > +	 * FBC may be disabled in case we got an invalidate()

> > before the

> > +	 * flush(), so we'll still have to check that case below.

> > */

> 

> I feel like I understand what is going on a bit better now, thanks!

> 

> > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)

> > +		return;

> > +

> >  	mutex_lock(&dev_priv->fbc.lock);

> >  

> >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;

> >  

> >  	if (!dev_priv->fbc.busy_bits) {

> > -		__intel_fbc_disable(dev_priv);

> > -		__intel_fbc_update(dev_priv);

> > +		if (origin == ORIGIN_FLIP) {

> 

> Note this test here is redundant.

> 

> We know that for an origin == FLIP to be here, FBC is disabled and

> calling intel_fbc_disable() is then a no-op.

> 

> So it turns out that the origin two lines of disable(); update()

> works

> just as well. Or is there some later patch that tweaks this branch

> further?


Right, you commented about this on the patch that is currently 04/26 an
I only fixed it there.

> -Chris

>
Ville Syrjala Oct. 28, 2015, 5:20 p.m. UTC | #5
On Wed, Oct 28, 2015 at 04:56:39PM +0000, Zanoni, Paulo R wrote:
> Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:
> > On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
> > > The hardware already takes care of disabling and recompressing FBC
> > > when we do a page flip, so all we need to do is to update the fence
> > > registers and move on.
> > > 
> > > One of the important things to notice is that on the pre-gen6
> > > platforms the fence is programmed on the FBC control register and
> > > the
> > > documentation says we can't update the control register while FBC
> > > is
> > > enabled. This would basically mean we'd have to to disable+enable
> > > FBC
> > > at every flip in order to make sure the hardware tracking still
> > > works,
> > > which doesn't seem to make too much sense. So I sent an email to
> > > the
> > > hardware team requesting some clarification. The information I got
> > > was
> > > this:
> > > 
> > > "I don't think any hardware is latching on to 0x100100, 0x100104,
> > > or
> > > the old fence number in FBC_CTL. Instructions against changing on
> > > the
> > > fly would be to simplify testing and ensure you don't miss any
> > > invalidation that happened right at that time."
> > > 
> > > So I guess we're fine for flips. But I can't really say I tested
> > > FBC
> > > on these ancient platforms - nor that I'll ever propose enabling
> > > FBC
> > > by default on them exactly because of problems like these.
> > 
> > Yeah, I did this in my patches too, and IIRC it seemed to work just
> > fine
> > back then.
> > 
> > > 
> > > v2: Add comment at flush() (Chris).
> > > 
> > > Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
> > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_drv.h          |   1 +
> > >  drivers/gpu/drm/i915/i915_reg.h          |   3 +
> > >  drivers/gpu/drm/i915/intel_display.c     |   1 -
> > >  drivers/gpu/drm/i915/intel_drv.h         |   2 +
> > >  drivers/gpu/drm/i915/intel_fbc.c         | 103
> > > ++++++++++++++++++++++++++++++-
> > >  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
> > >  6 files changed, 108 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > b/drivers/gpu/drm/i915/i915_drv.h
> > > index ee14962..77da500 100644
> > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > @@ -928,6 +928,7 @@ struct i915_fbc {
> > >  	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
> > >  	void (*enable_fbc)(struct intel_crtc *crtc);
> > >  	void (*disable_fbc)(struct drm_i915_private *dev_priv);
> > > +	void (*flip_prepare)(struct drm_i915_private *dev_priv);
> > >  };
> > >  
> > >  /**
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > > b/drivers/gpu/drm/i915/i915_reg.h
> > > index 8942532..3d598a2 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {
> > >  #define   FBC_CTL_C3_IDLE	(1<<13)
> > >  #define   FBC_CTL_STRIDE_SHIFT	(5)
> > >  #define   FBC_CTL_FENCENO_SHIFT	(0)
> > > +#define   FBC_CTL_FENCENO_MASK	0xF
> > 
> > It's only three bits on gen2. But bit 3 is MBZ and there are only 8
> > fence registers on those platforms, so this is OK.
> > 
> > >  #define FBC_COMMAND		0x0320c
> > >  #define   FBC_CMD_COMPRESS	(1<<0)
> > >  #define FBC_STATUS		0x03210
> > > @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {
> > >  #define   DPFC_CTL_LIMIT_1X	(0<<6)
> > >  #define   DPFC_CTL_LIMIT_2X	(1<<6)
> > >  #define   DPFC_CTL_LIMIT_4X	(2<<6)
> > > +#define   DPFC_CTL_FENCE_MASK	0xF
> > >  #define DPFC_RECOMP_CTL		0x320c
> > >  #define   DPFC_RECOMP_STALL_EN	(1<<27)
> > >  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
> > > @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {
> > >  #define   FBC_CTL_FALSE_COLOR	(1<<10)
> > >  /* The bit 28-8 is reserved */
> > >  #define   DPFC_RESERVED		(0x1FFFFF00)
> > > +#define   ILK_DPFC_FENCE_MASK	0xF
> > >  #define ILK_DPFC_RECOMP_CTL	0x4320c
> > >  #define ILK_DPFC_STATUS		0x43210
> > >  #define ILK_DPFC_FENCE_YOFF	0x43218
> > > diff --git a/drivers/gpu/drm/i915/intel_display.c
> > > b/drivers/gpu/drm/i915/intel_display.c
> > > index bc1907e..d9378c3 100644
> > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > @@ -11502,7 +11502,6 @@ static int intel_crtc_page_flip(struct
> > > drm_crtc *crtc,
> > >  			  to_intel_plane(primary)-
> > > >frontbuffer_bit);
> > >  	mutex_unlock(&dev->struct_mutex);
> > >  
> > > -	intel_fbc_disable_crtc(intel_crtc);
> > >  	intel_frontbuffer_flip_prepare(dev,
> > >  				       to_intel_plane(primary)-
> > > >frontbuffer_bit);
> > >  
> > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > b/drivers/gpu/drm/i915/intel_drv.h
> > > index 08847d0..9065a8f 100644
> > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct
> > > drm_i915_private *dev_priv,
> > >  			  enum fb_op_origin origin);
> > >  void intel_fbc_flush(struct drm_i915_private *dev_priv,
> > >  		     unsigned int frontbuffer_bits, enum
> > > fb_op_origin origin);
> > > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> > > +			    unsigned int frontbuffer_bits);
> > >  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
> > >  
> > >  /* intel_hdmi.c */
> > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > index d9d7e54..62f158b 100644
> > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct
> > > drm_i915_private *dev_priv)
> > >  	DRM_DEBUG_KMS("disabled FBC\n");
> > >  }
> > >  
> > > +static void i8xx_fbc_flip_prepare(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > +	uint32_t val;
> > > +
> > > +	/* Although the documentation suggests we can't change
> > > DPFC_CONTROL
> > > +	 * while compression is enabled, the hardware guys said
> > > that updating
> > > +	 * the fence register bits during a flip is fine. */
> > > +	val = I915_READ(FBC_CONTROL);
> > > +	val &= ~FBC_CTL_FENCENO_MASK;
> > > +	val |= obj->fence_reg;
> > > +	I915_WRITE(FBC_CONTROL, val);
> > > +}
> > 
> > IIRC I just called the enable function to reconstruct the entire
> > register contents to avoid code duplication. But maybe that's not
> > possible without reowrking the fbc state handling entirely
> > (which I had done).
> > > +
> > >  static void i8xx_fbc_enable(struct intel_crtc *crtc)
> > >  {
> > >  	struct drm_i915_private *dev_priv = crtc->base.dev-
> > > >dev_private;
> > > @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct intel_crtc
> > > *crtc)
> > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > plane_name(crtc->plane));
> > >  }
> > >  
> > > +static void g4x_fbc_flip_prepare(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > +	uint32_t val;
> > > +
> > > +	/* Although the documentation suggests we can't change
> > > DPFC_CONTROL
> > > +	 * while compression is enabled, the hardware guys said
> > > that updating
> > > +	 * the fence register bits during a flip is fine. */
> > > +	val = I915_READ(DPFC_CONTROL);
> > > +	val &= ~DPFC_CTL_FENCE_MASK;
> > > +	val |= obj->fence_reg;
> > > +	I915_WRITE(DPFC_CONTROL, val);
> > > +}
> > > +
> > >  static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
> > >  {
> > >  	u32 dpfc_ctl;
> > > @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct intel_crtc
> > > *crtc)
> > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > plane_name(crtc->plane));
> > >  }
> > >  
> > > +static void ilk_fbc_flip_prepare(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > +	uint32_t val;
> > > +
> > > +	/* Although the documentation suggests we can't change
> > > DPFC_CONTROL
> > > +	 * while compression is enabled, the hardware guys said
> > > that updating
> > > +	 * the fence register bits during a flip is fine. */
> > > +	val = I915_READ(ILK_DPFC_CONTROL);
> > > +	val &= ~ILK_DPFC_FENCE_MASK;
> > > +	val |= obj->fence_reg;
> > > +	I915_WRITE(ILK_DPFC_CONTROL, val);
> > > +}
> > > +
> > > +static void snb_fbc_flip_prepare(struct drm_i915_private
> > > *dev_priv)
> > > +{
> > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > +
> > > +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj-
> > > >fence_reg);
> > > +}
> > > +
> > >  static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
> > >  {
> > >  	u32 dpfc_ctl;
> > > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct
> > > drm_i915_private *dev_priv,
> > >  	if (origin == ORIGIN_GTT)
> > >  		return;
> > >  
> > > +	/* Hardware tracking already recompresses the CFB (nuke)
> > > for us if FBC
> > > +	 * is enabled and we do a page flip, so we can safely
> > > ignore it here.
> > > +	 * FBC may be disabled in case we got an invalidate()
> > > before the
> > > +	 * flush(), so we'll still have to check that case below.
> > > */
> > > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
> > > +		return;
> > > +
> > >  	mutex_lock(&dev_priv->fbc.lock);
> > >  
> > >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
> > >  
> > >  	if (!dev_priv->fbc.busy_bits) {
> > > -		__intel_fbc_disable(dev_priv);
> > > -		__intel_fbc_update(dev_priv);
> > > +		if (origin == ORIGIN_FLIP) {
> > > +			__intel_fbc_update(dev_priv);
> > > +		} else {
> > > +			__intel_fbc_disable(dev_priv);
> > > +			__intel_fbc_update(dev_priv);
> > > +		}
> > > +	}
> > > +
> > > +	mutex_unlock(&dev_priv->fbc.lock);
> > > +}
> > > +
> > > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> > > +			    unsigned int frontbuffer_bits)
> > > +{
> > > +	unsigned int fbc_bits;
> > > +
> > > +	if (!fbc_supported(dev_priv))
> > > +		return;
> > > +
> > > +	mutex_lock(&dev_priv->fbc.lock);
> > > +
> > > +	if (dev_priv->fbc.enabled) {
> > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv-
> > > >fbc.crtc->pipe);
> > 
> > primary->frontbuffer_bit would seem better.
> 
> Why?

Why not? It's there already stashed away for you to use. No need to
sprinkle these INTEL_FRONTBUFFER... things all over the place.

> 
> > 
> > > +		if (fbc_bits & frontbuffer_bits)
> > > +			dev_priv->fbc.flip_prepare(dev_priv);
> > 
> > You would still have to disable+reenable if you need to eg.
> > reallocate
> > the compressed buffer, of if the new fb isn't suitable for fbc, or
> > maybe
> > if you need to change anything else in the control register.
> 
> Yes, but as far as I understand the frontbuffer tracking, this case
> won't happen here. I'm assuming we'll always go through
> intel_fbc_update() on these cases.

Perhaps, but more by luck than by design. With the setplane and atomic
APIs there's no difference between a page flip and other plane updates.
So if you want to take advantage of the hw flip nuke, you would need to
check how the plane state is going to change, and based on that decide
whether a fence update is enough or disable+re-enable is needed. And in
fact I don't see any fbc_disable before the flip, just an fbc_update
after the fact. So seems to me the disable really should be in your
flip_prepare hook in this case. And in fact the whole flip_prepare
thing isn't even called from the setplane/atomic path.

> 
> > 
> > What I had was:
> > "
> > static bool intel_fbc_need_reinit(struct intel_crtc *crtc)
> > {
> > 	struct drm_i915_private *dev_priv = crtc->base.dev-
> > >dev_private;
> > 	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > 	struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)-
> > >obj;
> > 
> > 	return crtc != dev_priv->fbc.crtc ||
> > 	       obj->base.size > dev_priv->fbc.size ||
> > 	       drm_format_plane_cpp(fb->pixel_format, 0) !=
> > 	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 0) ||
> > 	       fb->pitches[0] != dev_priv->fbc.pitch;
> > }
> > intel_fbc_pre_page_flip()
> > {
> > 	...
> > 	intel_fbc_update_pending_score(crtc);
> > 
> > 	/*
> > 	 * If fbc was already possible we can update immediately,
> > 	 * otherwise we will wait until the flip is finished.
> > 	 */
> > 	if (crtc->fbc.score != 0)
> > 		crtc->fbc.score = crtc->fbc.pending_score;
> > 
> > 	/*
> > 	 * Disable fbc if we're not (yet) capable, or if
> > 	 * we just need a full disable+enable reinit.
> > 	 */
> > 	if (crtc->fbc.score == 0 || intel_fbc_need_reinit(crtc))
> > 		__intel_fbc_disable(crtc);
> > 	...
> > }
> > "
> > 
> > > +	} else if (dev_priv->fbc.fbc_work) {
> > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> > > +				dev_priv->fbc.fbc_work->crtc-
> > > >pipe);
> > > +		if (fbc_bits & frontbuffer_bits)
> > > +			__intel_fbc_disable(dev_priv);
> > >  	}
> > >  
> > >  	mutex_unlock(&dev_priv->fbc.lock);
> > > @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct drm_i915_private
> > > *dev_priv)
> > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > >  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
> > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > +		dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
> > >  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
> > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > >  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
> > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > +		if (INTEL_INFO(dev_priv)->gen == 5)
> > > +			dev_priv->fbc.flip_prepare =
> > > ilk_fbc_flip_prepare;
> > > +		else
> > > +			dev_priv->fbc.flip_prepare =
> > > snb_fbc_flip_prepare;
> > >  	} else if (IS_GM45(dev_priv)) {
> > >  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
> > >  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
> > >  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
> > > +		dev_priv->fbc.flip_prepare = g4x_fbc_flip_prepare;
> > >  	} else {
> > >  		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
> > >  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
> > >  		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
> > > +		dev_priv->fbc.flip_prepare =
> > > i8xx_fbc_flip_prepare;
> > >  
> > >  		/* This value was pulled out of someone's hat */
> > >  		I915_WRITE(FBC_CONTROL, 500 <<
> > > FBC_CTL_INTERVAL_SHIFT);
> > > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > index ac85357..31a1ad3 100644
> > > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > @@ -192,6 +192,7 @@ void intel_frontbuffer_flip_prepare(struct
> > > drm_device *dev,
> > >  	mutex_unlock(&dev_priv->fb_tracking.lock);
> > >  
> > >  	intel_psr_single_frame_update(dev, frontbuffer_bits);
> > > +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);
> > >  }
> > >  
> > >  /**
> > > -- 
> > > 2.6.1
> > > 
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
Maarten Lankhorst Oct. 29, 2015, 12:05 p.m. UTC | #6
Op 28-10-15 om 18:20 schreef Ville Syrjälä:
> On Wed, Oct 28, 2015 at 04:56:39PM +0000, Zanoni, Paulo R wrote:
>> Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:
>>> On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
>>>> The hardware already takes care of disabling and recompressing FBC
>>>> when we do a page flip, so all we need to do is to update the fence
>>>> registers and move on.
>>>>
>>>> One of the important things to notice is that on the pre-gen6
>>>> platforms the fence is programmed on the FBC control register and
>>>> the
>>>> documentation says we can't update the control register while FBC
>>>> is
>>>> enabled. This would basically mean we'd have to to disable+enable
>>>> FBC
>>>> at every flip in order to make sure the hardware tracking still
>>>> works,
>>>> which doesn't seem to make too much sense. So I sent an email to
>>>> the
>>>> hardware team requesting some clarification. The information I got
>>>> was
>>>> this:
>>>>
>>>> "I don't think any hardware is latching on to 0x100100, 0x100104,
>>>> or
>>>> the old fence number in FBC_CTL. Instructions against changing on
>>>> the
>>>> fly would be to simplify testing and ensure you don't miss any
>>>> invalidation that happened right at that time."
>>>>
>>>> So I guess we're fine for flips. But I can't really say I tested
>>>> FBC
>>>> on these ancient platforms - nor that I'll ever propose enabling
>>>> FBC
>>>> by default on them exactly because of problems like these.
>>> Yeah, I did this in my patches too, and IIRC it seemed to work just
>>> fine
>>> back then.
>>>
>>>> v2: Add comment at flush() (Chris).
>>>>
>>>> Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
>>>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>>> ---
>>>>  drivers/gpu/drm/i915/i915_drv.h          |   1 +
>>>>  drivers/gpu/drm/i915/i915_reg.h          |   3 +
>>>>  drivers/gpu/drm/i915/intel_display.c     |   1 -
>>>>  drivers/gpu/drm/i915/intel_drv.h         |   2 +
>>>>  drivers/gpu/drm/i915/intel_fbc.c         | 103
>>>> ++++++++++++++++++++++++++++++-
>>>>  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
>>>>  6 files changed, 108 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>> index ee14962..77da500 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -928,6 +928,7 @@ struct i915_fbc {
>>>>  	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
>>>>  	void (*enable_fbc)(struct intel_crtc *crtc);
>>>>  	void (*disable_fbc)(struct drm_i915_private *dev_priv);
>>>> +	void (*flip_prepare)(struct drm_i915_private *dev_priv);
>>>>  };
>>>>  
>>>>  /**
>>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
>>>> b/drivers/gpu/drm/i915/i915_reg.h
>>>> index 8942532..3d598a2 100644
>>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>>> @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {
>>>>  #define   FBC_CTL_C3_IDLE	(1<<13)
>>>>  #define   FBC_CTL_STRIDE_SHIFT	(5)
>>>>  #define   FBC_CTL_FENCENO_SHIFT	(0)
>>>> +#define   FBC_CTL_FENCENO_MASK	0xF
>>> It's only three bits on gen2. But bit 3 is MBZ and there are only 8
>>> fence registers on those platforms, so this is OK.
>>>
>>>>  #define FBC_COMMAND		0x0320c
>>>>  #define   FBC_CMD_COMPRESS	(1<<0)
>>>>  #define FBC_STATUS		0x03210
>>>> @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {
>>>>  #define   DPFC_CTL_LIMIT_1X	(0<<6)
>>>>  #define   DPFC_CTL_LIMIT_2X	(1<<6)
>>>>  #define   DPFC_CTL_LIMIT_4X	(2<<6)
>>>> +#define   DPFC_CTL_FENCE_MASK	0xF
>>>>  #define DPFC_RECOMP_CTL		0x320c
>>>>  #define   DPFC_RECOMP_STALL_EN	(1<<27)
>>>>  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
>>>> @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {
>>>>  #define   FBC_CTL_FALSE_COLOR	(1<<10)
>>>>  /* The bit 28-8 is reserved */
>>>>  #define   DPFC_RESERVED		(0x1FFFFF00)
>>>> +#define   ILK_DPFC_FENCE_MASK	0xF
>>>>  #define ILK_DPFC_RECOMP_CTL	0x4320c
>>>>  #define ILK_DPFC_STATUS		0x43210
>>>>  #define ILK_DPFC_FENCE_YOFF	0x43218
>>>> diff --git a/drivers/gpu/drm/i915/intel_display.c
>>>> b/drivers/gpu/drm/i915/intel_display.c
>>>> index bc1907e..d9378c3 100644
>>>> --- a/drivers/gpu/drm/i915/intel_display.c
>>>> +++ b/drivers/gpu/drm/i915/intel_display.c
>>>> @@ -11502,7 +11502,6 @@ static int intel_crtc_page_flip(struct
>>>> drm_crtc *crtc,
>>>>  			  to_intel_plane(primary)-
>>>>> frontbuffer_bit);
>>>>  	mutex_unlock(&dev->struct_mutex);
>>>>  
>>>> -	intel_fbc_disable_crtc(intel_crtc);
>>>>  	intel_frontbuffer_flip_prepare(dev,
>>>>  				       to_intel_plane(primary)-
>>>>> frontbuffer_bit);
>>>>  
>>>> diff --git a/drivers/gpu/drm/i915/intel_drv.h
>>>> b/drivers/gpu/drm/i915/intel_drv.h
>>>> index 08847d0..9065a8f 100644
>>>> --- a/drivers/gpu/drm/i915/intel_drv.h
>>>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>>>> @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct
>>>> drm_i915_private *dev_priv,
>>>>  			  enum fb_op_origin origin);
>>>>  void intel_fbc_flush(struct drm_i915_private *dev_priv,
>>>>  		     unsigned int frontbuffer_bits, enum
>>>> fb_op_origin origin);
>>>> +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
>>>> +			    unsigned int frontbuffer_bits);
>>>>  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
>>>>  
>>>>  /* intel_hdmi.c */
>>>> diff --git a/drivers/gpu/drm/i915/intel_fbc.c
>>>> b/drivers/gpu/drm/i915/intel_fbc.c
>>>> index d9d7e54..62f158b 100644
>>>> --- a/drivers/gpu/drm/i915/intel_fbc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_fbc.c
>>>> @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct
>>>> drm_i915_private *dev_priv)
>>>>  	DRM_DEBUG_KMS("disabled FBC\n");
>>>>  }
>>>>  
>>>> +static void i8xx_fbc_flip_prepare(struct drm_i915_private
>>>> *dev_priv)
>>>> +{
>>>> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
>>>> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
>>>> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>>>> +	uint32_t val;
>>>> +
>>>> +	/* Although the documentation suggests we can't change
>>>> DPFC_CONTROL
>>>> +	 * while compression is enabled, the hardware guys said
>>>> that updating
>>>> +	 * the fence register bits during a flip is fine. */
>>>> +	val = I915_READ(FBC_CONTROL);
>>>> +	val &= ~FBC_CTL_FENCENO_MASK;
>>>> +	val |= obj->fence_reg;
>>>> +	I915_WRITE(FBC_CONTROL, val);
>>>> +}
>>> IIRC I just called the enable function to reconstruct the entire
>>> register contents to avoid code duplication. But maybe that's not
>>> possible without reowrking the fbc state handling entirely
>>> (which I had done).
>>>> +
>>>>  static void i8xx_fbc_enable(struct intel_crtc *crtc)
>>>>  {
>>>>  	struct drm_i915_private *dev_priv = crtc->base.dev-
>>>>> dev_private;
>>>> @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct intel_crtc
>>>> *crtc)
>>>>  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
>>>> plane_name(crtc->plane));
>>>>  }
>>>>  
>>>> +static void g4x_fbc_flip_prepare(struct drm_i915_private
>>>> *dev_priv)
>>>> +{
>>>> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
>>>> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
>>>> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>>>> +	uint32_t val;
>>>> +
>>>> +	/* Although the documentation suggests we can't change
>>>> DPFC_CONTROL
>>>> +	 * while compression is enabled, the hardware guys said
>>>> that updating
>>>> +	 * the fence register bits during a flip is fine. */
>>>> +	val = I915_READ(DPFC_CONTROL);
>>>> +	val &= ~DPFC_CTL_FENCE_MASK;
>>>> +	val |= obj->fence_reg;
>>>> +	I915_WRITE(DPFC_CONTROL, val);
>>>> +}
>>>> +
>>>>  static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
>>>>  {
>>>>  	u32 dpfc_ctl;
>>>> @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct intel_crtc
>>>> *crtc)
>>>>  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
>>>> plane_name(crtc->plane));
>>>>  }
>>>>  
>>>> +static void ilk_fbc_flip_prepare(struct drm_i915_private
>>>> *dev_priv)
>>>> +{
>>>> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
>>>> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
>>>> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>>>> +	uint32_t val;
>>>> +
>>>> +	/* Although the documentation suggests we can't change
>>>> DPFC_CONTROL
>>>> +	 * while compression is enabled, the hardware guys said
>>>> that updating
>>>> +	 * the fence register bits during a flip is fine. */
>>>> +	val = I915_READ(ILK_DPFC_CONTROL);
>>>> +	val &= ~ILK_DPFC_FENCE_MASK;
>>>> +	val |= obj->fence_reg;
>>>> +	I915_WRITE(ILK_DPFC_CONTROL, val);
>>>> +}
>>>> +
>>>> +static void snb_fbc_flip_prepare(struct drm_i915_private
>>>> *dev_priv)
>>>> +{
>>>> +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
>>>> +	struct drm_framebuffer *fb = crtc->base.primary->fb;
>>>> +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
>>>> +
>>>> +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj-
>>>>> fence_reg);
>>>> +}
>>>> +
>>>>  static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
>>>>  {
>>>>  	u32 dpfc_ctl;
>>>> @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct
>>>> drm_i915_private *dev_priv,
>>>>  	if (origin == ORIGIN_GTT)
>>>>  		return;
>>>>  
>>>> +	/* Hardware tracking already recompresses the CFB (nuke)
>>>> for us if FBC
>>>> +	 * is enabled and we do a page flip, so we can safely
>>>> ignore it here.
>>>> +	 * FBC may be disabled in case we got an invalidate()
>>>> before the
>>>> +	 * flush(), so we'll still have to check that case below.
>>>> */
>>>> +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
>>>> +		return;
>>>> +
>>>>  	mutex_lock(&dev_priv->fbc.lock);
>>>>  
>>>>  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
>>>>  
>>>>  	if (!dev_priv->fbc.busy_bits) {
>>>> -		__intel_fbc_disable(dev_priv);
>>>> -		__intel_fbc_update(dev_priv);
>>>> +		if (origin == ORIGIN_FLIP) {
>>>> +			__intel_fbc_update(dev_priv);
>>>> +		} else {
>>>> +			__intel_fbc_disable(dev_priv);
>>>> +			__intel_fbc_update(dev_priv);
>>>> +		}
>>>> +	}
>>>> +
>>>> +	mutex_unlock(&dev_priv->fbc.lock);
>>>> +}
>>>> +
>>>> +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
>>>> +			    unsigned int frontbuffer_bits)
>>>> +{
>>>> +	unsigned int fbc_bits;
>>>> +
>>>> +	if (!fbc_supported(dev_priv))
>>>> +		return;
>>>> +
>>>> +	mutex_lock(&dev_priv->fbc.lock);
>>>> +
>>>> +	if (dev_priv->fbc.enabled) {
>>>> +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv-
>>>>> fbc.crtc->pipe);
>>> primary->frontbuffer_bit would seem better.
>> Why?
> Why not? It's there already stashed away for you to use. No need to
> sprinkle these INTEL_FRONTBUFFER... things all over the place.
>
>>>> +		if (fbc_bits & frontbuffer_bits)
>>>> +			dev_priv->fbc.flip_prepare(dev_priv);
>>> You would still have to disable+reenable if you need to eg.
>>> reallocate
>>> the compressed buffer, of if the new fb isn't suitable for fbc, or
>>> maybe
>>> if you need to change anything else in the control register.
>> Yes, but as far as I understand the frontbuffer tracking, this case
>> won't happen here. I'm assuming we'll always go through
>> intel_fbc_update() on these cases.
> Perhaps, but more by luck than by design. With the setplane and atomic
> APIs there's no difference between a page flip and other plane updates.
> So if you want to take advantage of the hw flip nuke, you would need to
> check how the plane state is going to change, and based on that decide
> whether a fence update is enough or disable+re-enable is needed. And in
> fact I don't see any fbc_disable before the flip, just an fbc_update
> after the fact. So seems to me the disable really should be in your
> flip_prepare hook in this case. And in fact the whole flip_prepare
> thing isn't even called from the setplane/atomic path.
I'm working on fixing that, but that still requires a few more patches until page_flip and atomic_commit are unified. :-)

If it's useful we could do a simple patch to do the same in atomic commit. I'm moving fb_bits to crtc_state anyway.

~Maarten
Ville Syrjala Oct. 29, 2015, 5:30 p.m. UTC | #7
On Wed, Oct 28, 2015 at 07:20:12PM +0200, Ville Syrjälä wrote:
> On Wed, Oct 28, 2015 at 04:56:39PM +0000, Zanoni, Paulo R wrote:
> > Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:
> > > On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
> > > > The hardware already takes care of disabling and recompressing FBC
> > > > when we do a page flip, so all we need to do is to update the fence
> > > > registers and move on.
> > > > 
> > > > One of the important things to notice is that on the pre-gen6
> > > > platforms the fence is programmed on the FBC control register and
> > > > the
> > > > documentation says we can't update the control register while FBC
> > > > is
> > > > enabled. This would basically mean we'd have to to disable+enable
> > > > FBC
> > > > at every flip in order to make sure the hardware tracking still
> > > > works,
> > > > which doesn't seem to make too much sense. So I sent an email to
> > > > the
> > > > hardware team requesting some clarification. The information I got
> > > > was
> > > > this:
> > > > 
> > > > "I don't think any hardware is latching on to 0x100100, 0x100104,
> > > > or
> > > > the old fence number in FBC_CTL. Instructions against changing on
> > > > the
> > > > fly would be to simplify testing and ensure you don't miss any
> > > > invalidation that happened right at that time."
> > > > 
> > > > So I guess we're fine for flips. But I can't really say I tested
> > > > FBC
> > > > on these ancient platforms - nor that I'll ever propose enabling
> > > > FBC
> > > > by default on them exactly because of problems like these.
> > > 
> > > Yeah, I did this in my patches too, and IIRC it seemed to work just
> > > fine
> > > back then.
> > > 
> > > > 
> > > > v2: Add comment at flush() (Chris).
> > > > 
> > > > Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
> > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_drv.h          |   1 +
> > > >  drivers/gpu/drm/i915/i915_reg.h          |   3 +
> > > >  drivers/gpu/drm/i915/intel_display.c     |   1 -
> > > >  drivers/gpu/drm/i915/intel_drv.h         |   2 +
> > > >  drivers/gpu/drm/i915/intel_fbc.c         | 103
> > > > ++++++++++++++++++++++++++++++-
> > > >  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
> > > >  6 files changed, 108 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > index ee14962..77da500 100644
> > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > @@ -928,6 +928,7 @@ struct i915_fbc {
> > > >  	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
> > > >  	void (*enable_fbc)(struct intel_crtc *crtc);
> > > >  	void (*disable_fbc)(struct drm_i915_private *dev_priv);
> > > > +	void (*flip_prepare)(struct drm_i915_private *dev_priv);
> > > >  };
> > > >  
> > > >  /**
> > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > > > b/drivers/gpu/drm/i915/i915_reg.h
> > > > index 8942532..3d598a2 100644
> > > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > > @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {
> > > >  #define   FBC_CTL_C3_IDLE	(1<<13)
> > > >  #define   FBC_CTL_STRIDE_SHIFT	(5)
> > > >  #define   FBC_CTL_FENCENO_SHIFT	(0)
> > > > +#define   FBC_CTL_FENCENO_MASK	0xF
> > > 
> > > It's only three bits on gen2. But bit 3 is MBZ and there are only 8
> > > fence registers on those platforms, so this is OK.
> > > 
> > > >  #define FBC_COMMAND		0x0320c
> > > >  #define   FBC_CMD_COMPRESS	(1<<0)
> > > >  #define FBC_STATUS		0x03210
> > > > @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {
> > > >  #define   DPFC_CTL_LIMIT_1X	(0<<6)
> > > >  #define   DPFC_CTL_LIMIT_2X	(1<<6)
> > > >  #define   DPFC_CTL_LIMIT_4X	(2<<6)
> > > > +#define   DPFC_CTL_FENCE_MASK	0xF
> > > >  #define DPFC_RECOMP_CTL		0x320c
> > > >  #define   DPFC_RECOMP_STALL_EN	(1<<27)
> > > >  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
> > > > @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {
> > > >  #define   FBC_CTL_FALSE_COLOR	(1<<10)
> > > >  /* The bit 28-8 is reserved */
> > > >  #define   DPFC_RESERVED		(0x1FFFFF00)
> > > > +#define   ILK_DPFC_FENCE_MASK	0xF
> > > >  #define ILK_DPFC_RECOMP_CTL	0x4320c
> > > >  #define ILK_DPFC_STATUS		0x43210
> > > >  #define ILK_DPFC_FENCE_YOFF	0x43218
> > > > diff --git a/drivers/gpu/drm/i915/intel_display.c
> > > > b/drivers/gpu/drm/i915/intel_display.c
> > > > index bc1907e..d9378c3 100644
> > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > @@ -11502,7 +11502,6 @@ static int intel_crtc_page_flip(struct
> > > > drm_crtc *crtc,
> > > >  			  to_intel_plane(primary)-
> > > > >frontbuffer_bit);
> > > >  	mutex_unlock(&dev->struct_mutex);
> > > >  
> > > > -	intel_fbc_disable_crtc(intel_crtc);
> > > >  	intel_frontbuffer_flip_prepare(dev,
> > > >  				       to_intel_plane(primary)-
> > > > >frontbuffer_bit);
> > > >  
> > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > index 08847d0..9065a8f 100644
> > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct
> > > > drm_i915_private *dev_priv,
> > > >  			  enum fb_op_origin origin);
> > > >  void intel_fbc_flush(struct drm_i915_private *dev_priv,
> > > >  		     unsigned int frontbuffer_bits, enum
> > > > fb_op_origin origin);
> > > > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> > > > +			    unsigned int frontbuffer_bits);
> > > >  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
> > > >  
> > > >  /* intel_hdmi.c */
> > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > > index d9d7e54..62f158b 100644
> > > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > > @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct
> > > > drm_i915_private *dev_priv)
> > > >  	DRM_DEBUG_KMS("disabled FBC\n");
> > > >  }
> > > >  
> > > > +static void i8xx_fbc_flip_prepare(struct drm_i915_private
> > > > *dev_priv)
> > > > +{
> > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > +	uint32_t val;
> > > > +
> > > > +	/* Although the documentation suggests we can't change
> > > > DPFC_CONTROL
> > > > +	 * while compression is enabled, the hardware guys said
> > > > that updating
> > > > +	 * the fence register bits during a flip is fine. */
> > > > +	val = I915_READ(FBC_CONTROL);
> > > > +	val &= ~FBC_CTL_FENCENO_MASK;
> > > > +	val |= obj->fence_reg;
> > > > +	I915_WRITE(FBC_CONTROL, val);
> > > > +}
> > > 
> > > IIRC I just called the enable function to reconstruct the entire
> > > register contents to avoid code duplication. But maybe that's not
> > > possible without reowrking the fbc state handling entirely
> > > (which I had done).
> > > > +
> > > >  static void i8xx_fbc_enable(struct intel_crtc *crtc)
> > > >  {
> > > >  	struct drm_i915_private *dev_priv = crtc->base.dev-
> > > > >dev_private;
> > > > @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct intel_crtc
> > > > *crtc)
> > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > > plane_name(crtc->plane));
> > > >  }
> > > >  
> > > > +static void g4x_fbc_flip_prepare(struct drm_i915_private
> > > > *dev_priv)
> > > > +{
> > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > +	uint32_t val;
> > > > +
> > > > +	/* Although the documentation suggests we can't change
> > > > DPFC_CONTROL
> > > > +	 * while compression is enabled, the hardware guys said
> > > > that updating
> > > > +	 * the fence register bits during a flip is fine. */
> > > > +	val = I915_READ(DPFC_CONTROL);
> > > > +	val &= ~DPFC_CTL_FENCE_MASK;
> > > > +	val |= obj->fence_reg;
> > > > +	I915_WRITE(DPFC_CONTROL, val);
> > > > +}
> > > > +
> > > >  static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
> > > >  {
> > > >  	u32 dpfc_ctl;
> > > > @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct intel_crtc
> > > > *crtc)
> > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > > plane_name(crtc->plane));
> > > >  }
> > > >  
> > > > +static void ilk_fbc_flip_prepare(struct drm_i915_private
> > > > *dev_priv)
> > > > +{
> > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > +	uint32_t val;
> > > > +
> > > > +	/* Although the documentation suggests we can't change
> > > > DPFC_CONTROL
> > > > +	 * while compression is enabled, the hardware guys said
> > > > that updating
> > > > +	 * the fence register bits during a flip is fine. */
> > > > +	val = I915_READ(ILK_DPFC_CONTROL);
> > > > +	val &= ~ILK_DPFC_FENCE_MASK;
> > > > +	val |= obj->fence_reg;
> > > > +	I915_WRITE(ILK_DPFC_CONTROL, val);
> > > > +}
> > > > +
> > > > +static void snb_fbc_flip_prepare(struct drm_i915_private
> > > > *dev_priv)
> > > > +{
> > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > +
> > > > +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj-
> > > > >fence_reg);
> > > > +}
> > > > +
> > > >  static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
> > > >  {
> > > >  	u32 dpfc_ctl;
> > > > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct
> > > > drm_i915_private *dev_priv,
> > > >  	if (origin == ORIGIN_GTT)
> > > >  		return;
> > > >  
> > > > +	/* Hardware tracking already recompresses the CFB (nuke)
> > > > for us if FBC
> > > > +	 * is enabled and we do a page flip, so we can safely
> > > > ignore it here.
> > > > +	 * FBC may be disabled in case we got an invalidate()
> > > > before the
> > > > +	 * flush(), so we'll still have to check that case below.
> > > > */
> > > > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
> > > > +		return;
> > > > +
> > > >  	mutex_lock(&dev_priv->fbc.lock);
> > > >  
> > > >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
> > > >  
> > > >  	if (!dev_priv->fbc.busy_bits) {
> > > > -		__intel_fbc_disable(dev_priv);
> > > > -		__intel_fbc_update(dev_priv);
> > > > +		if (origin == ORIGIN_FLIP) {
> > > > +			__intel_fbc_update(dev_priv);
> > > > +		} else {
> > > > +			__intel_fbc_disable(dev_priv);
> > > > +			__intel_fbc_update(dev_priv);
> > > > +		}
> > > > +	}
> > > > +
> > > > +	mutex_unlock(&dev_priv->fbc.lock);
> > > > +}
> > > > +
> > > > +void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
> > > > +			    unsigned int frontbuffer_bits)
> > > > +{
> > > > +	unsigned int fbc_bits;
> > > > +
> > > > +	if (!fbc_supported(dev_priv))
> > > > +		return;
> > > > +
> > > > +	mutex_lock(&dev_priv->fbc.lock);
> > > > +
> > > > +	if (dev_priv->fbc.enabled) {
> > > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv-
> > > > >fbc.crtc->pipe);
> > > 
> > > primary->frontbuffer_bit would seem better.
> > 
> > Why?
> 
> Why not? It's there already stashed away for you to use. No need to
> sprinkle these INTEL_FRONTBUFFER... things all over the place.
> 
> > 
> > > 
> > > > +		if (fbc_bits & frontbuffer_bits)
> > > > +			dev_priv->fbc.flip_prepare(dev_priv);
> > > 
> > > You would still have to disable+reenable if you need to eg.
> > > reallocate
> > > the compressed buffer, of if the new fb isn't suitable for fbc, or
> > > maybe
> > > if you need to change anything else in the control register.
> > 
> > Yes, but as far as I understand the frontbuffer tracking, this case
> > won't happen here. I'm assuming we'll always go through
> > intel_fbc_update() on these cases.
> 
> Perhaps, but more by luck than by design. With the setplane and atomic
> APIs there's no difference between a page flip and other plane updates.
> So if you want to take advantage of the hw flip nuke, you would need to
> check how the plane state is going to change, and based on that decide
> whether a fence update is enough or disable+re-enable is needed. And in
> fact I don't see any fbc_disable before the flip, just an fbc_update
> after the fact. So seems to me the disable really should be in your
> flip_prepare hook in this case. And in fact the whole flip_prepare
> thing isn't even called from the setplane/atomic path.

Oh and another related scenario also came to mind. Consider for example
the follow sequence of events:

1. fbc enabled on pipe A
2. flip scheduled on pipe A which requires fbc to be disabled
   -> fbc gets disabled
4. modeset on pipe B calls fbc_update
   -> fbc gets re-enabled on pipe A before the flip has finished, oops!

So we also need something to keep fbc reliably disabled until the
flip has occured.

> 
> > 
> > > 
> > > What I had was:
> > > "
> > > static bool intel_fbc_need_reinit(struct intel_crtc *crtc)
> > > {
> > > 	struct drm_i915_private *dev_priv = crtc->base.dev-
> > > >dev_private;
> > > 	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > 	struct drm_i915_gem_object *obj = to_intel_framebuffer(fb)-
> > > >obj;
> > > 
> > > 	return crtc != dev_priv->fbc.crtc ||
> > > 	       obj->base.size > dev_priv->fbc.size ||
> > > 	       drm_format_plane_cpp(fb->pixel_format, 0) !=
> > > 	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 0) ||
> > > 	       fb->pitches[0] != dev_priv->fbc.pitch;
> > > }
> > > intel_fbc_pre_page_flip()
> > > {
> > > 	...
> > > 	intel_fbc_update_pending_score(crtc);
> > > 
> > > 	/*
> > > 	 * If fbc was already possible we can update immediately,
> > > 	 * otherwise we will wait until the flip is finished.
> > > 	 */
> > > 	if (crtc->fbc.score != 0)
> > > 		crtc->fbc.score = crtc->fbc.pending_score;
> > > 
> > > 	/*
> > > 	 * Disable fbc if we're not (yet) capable, or if
> > > 	 * we just need a full disable+enable reinit.
> > > 	 */
> > > 	if (crtc->fbc.score == 0 || intel_fbc_need_reinit(crtc))
> > > 		__intel_fbc_disable(crtc);
> > > 	...
> > > }
> > > "
> > > 
> > > > +	} else if (dev_priv->fbc.fbc_work) {
> > > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> > > > +				dev_priv->fbc.fbc_work->crtc-
> > > > >pipe);
> > > > +		if (fbc_bits & frontbuffer_bits)
> > > > +			__intel_fbc_disable(dev_priv);
> > > >  	}
> > > >  
> > > >  	mutex_unlock(&dev_priv->fbc.lock);
> > > > @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct drm_i915_private
> > > > *dev_priv)
> > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > > >  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
> > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > > +		dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
> > > >  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
> > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > > >  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
> > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > > +		if (INTEL_INFO(dev_priv)->gen == 5)
> > > > +			dev_priv->fbc.flip_prepare =
> > > > ilk_fbc_flip_prepare;
> > > > +		else
> > > > +			dev_priv->fbc.flip_prepare =
> > > > snb_fbc_flip_prepare;
> > > >  	} else if (IS_GM45(dev_priv)) {
> > > >  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
> > > >  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
> > > >  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
> > > > +		dev_priv->fbc.flip_prepare = g4x_fbc_flip_prepare;
> > > >  	} else {
> > > >  		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
> > > >  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
> > > >  		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
> > > > +		dev_priv->fbc.flip_prepare =
> > > > i8xx_fbc_flip_prepare;
> > > >  
> > > >  		/* This value was pulled out of someone's hat */
> > > >  		I915_WRITE(FBC_CONTROL, 500 <<
> > > > FBC_CTL_INTERVAL_SHIFT);
> > > > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > index ac85357..31a1ad3 100644
> > > > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > @@ -192,6 +192,7 @@ void intel_frontbuffer_flip_prepare(struct
> > > > drm_device *dev,
> > > >  	mutex_unlock(&dev_priv->fb_tracking.lock);
> > > >  
> > > >  	intel_psr_single_frame_update(dev, frontbuffer_bits);
> > > > +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);
> > > >  }
> > > >  
> > > >  /**
> > > > -- 
> > > > 2.6.1
> > > > 
> > > > _______________________________________________
> > > > Intel-gfx mailing list
> > > > Intel-gfx@lists.freedesktop.org
> > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > 
> 
> -- 
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Zanoni, Paulo R Oct. 29, 2015, 5:52 p.m. UTC | #8
Em Qui, 2015-10-29 às 19:30 +0200, Ville Syrjälä escreveu:
> On Wed, Oct 28, 2015 at 07:20:12PM +0200, Ville Syrjälä wrote:

> > On Wed, Oct 28, 2015 at 04:56:39PM +0000, Zanoni, Paulo R wrote:

> > > Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:

> > > > On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:

> > > > > The hardware already takes care of disabling and

> > > > > recompressing FBC

> > > > > when we do a page flip, so all we need to do is to update the

> > > > > fence

> > > > > registers and move on.

> > > > > 

> > > > > One of the important things to notice is that on the pre-gen6

> > > > > platforms the fence is programmed on the FBC control register

> > > > > and

> > > > > the

> > > > > documentation says we can't update the control register while

> > > > > FBC

> > > > > is

> > > > > enabled. This would basically mean we'd have to to

> > > > > disable+enable

> > > > > FBC

> > > > > at every flip in order to make sure the hardware tracking

> > > > > still

> > > > > works,

> > > > > which doesn't seem to make too much sense. So I sent an email

> > > > > to

> > > > > the

> > > > > hardware team requesting some clarification. The information

> > > > > I got

> > > > > was

> > > > > this:

> > > > > 

> > > > > "I don't think any hardware is latching on to 0x100100,

> > > > > 0x100104,

> > > > > or

> > > > > the old fence number in FBC_CTL. Instructions against

> > > > > changing on

> > > > > the

> > > > > fly would be to simplify testing and ensure you don't miss

> > > > > any

> > > > > invalidation that happened right at that time."

> > > > > 

> > > > > So I guess we're fine for flips. But I can't really say I

> > > > > tested

> > > > > FBC

> > > > > on these ancient platforms - nor that I'll ever propose

> > > > > enabling

> > > > > FBC

> > > > > by default on them exactly because of problems like these.

> > > > 

> > > > Yeah, I did this in my patches too, and IIRC it seemed to work

> > > > just

> > > > fine

> > > > back then.

> > > > 

> > > > > 

> > > > > v2: Add comment at flush() (Chris).

> > > > > 

> > > > > Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack

> > > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> > > > > ---

> > > > >  drivers/gpu/drm/i915/i915_drv.h          |   1 +

> > > > >  drivers/gpu/drm/i915/i915_reg.h          |   3 +

> > > > >  drivers/gpu/drm/i915/intel_display.c     |   1 -

> > > > >  drivers/gpu/drm/i915/intel_drv.h         |   2 +

> > > > >  drivers/gpu/drm/i915/intel_fbc.c         | 103

> > > > > ++++++++++++++++++++++++++++++-

> > > > >  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +

> > > > >  6 files changed, 108 insertions(+), 3 deletions(-)

> > > > > 

> > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h

> > > > > b/drivers/gpu/drm/i915/i915_drv.h

> > > > > index ee14962..77da500 100644

> > > > > --- a/drivers/gpu/drm/i915/i915_drv.h

> > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h

> > > > > @@ -928,6 +928,7 @@ struct i915_fbc {

> > > > >  	bool (*fbc_enabled)(struct drm_i915_private

> > > > > *dev_priv);

> > > > >  	void (*enable_fbc)(struct intel_crtc *crtc);

> > > > >  	void (*disable_fbc)(struct drm_i915_private

> > > > > *dev_priv);

> > > > > +	void (*flip_prepare)(struct drm_i915_private

> > > > > *dev_priv);

> > > > >  };

> > > > >  

> > > > >  /**

> > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h

> > > > > b/drivers/gpu/drm/i915/i915_reg.h

> > > > > index 8942532..3d598a2 100644

> > > > > --- a/drivers/gpu/drm/i915/i915_reg.h

> > > > > +++ b/drivers/gpu/drm/i915/i915_reg.h

> > > > > @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {

> > > > >  #define   FBC_CTL_C3_IDLE	(1<<13)

> > > > >  #define   FBC_CTL_STRIDE_SHIFT	(5)

> > > > >  #define   FBC_CTL_FENCENO_SHIFT	(0)

> > > > > +#define   FBC_CTL_FENCENO_MASK	0xF

> > > > 

> > > > It's only three bits on gen2. But bit 3 is MBZ and there are

> > > > only 8

> > > > fence registers on those platforms, so this is OK.

> > > > 

> > > > >  #define FBC_COMMAND		0x0320c

> > > > >  #define   FBC_CMD_COMPRESS	(1<<0)

> > > > >  #define FBC_STATUS		0x03210

> > > > > @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {

> > > > >  #define   DPFC_CTL_LIMIT_1X	(0<<6)

> > > > >  #define   DPFC_CTL_LIMIT_2X	(1<<6)

> > > > >  #define   DPFC_CTL_LIMIT_4X	(2<<6)

> > > > > +#define   DPFC_CTL_FENCE_MASK	0xF

> > > > >  #define DPFC_RECOMP_CTL		0x320c

> > > > >  #define   DPFC_RECOMP_STALL_EN	(1<<27)

> > > > >  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)

> > > > > @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {

> > > > >  #define   FBC_CTL_FALSE_COLOR	(1<<10)

> > > > >  /* The bit 28-8 is reserved */

> > > > >  #define   DPFC_RESERVED		(0x1FFFFF00)

> > > > > +#define   ILK_DPFC_FENCE_MASK	0xF

> > > > >  #define ILK_DPFC_RECOMP_CTL	0x4320c

> > > > >  #define ILK_DPFC_STATUS		0x43210

> > > > >  #define ILK_DPFC_FENCE_YOFF	0x43218

> > > > > diff --git a/drivers/gpu/drm/i915/intel_display.c

> > > > > b/drivers/gpu/drm/i915/intel_display.c

> > > > > index bc1907e..d9378c3 100644

> > > > > --- a/drivers/gpu/drm/i915/intel_display.c

> > > > > +++ b/drivers/gpu/drm/i915/intel_display.c

> > > > > @@ -11502,7 +11502,6 @@ static int

> > > > > intel_crtc_page_flip(struct

> > > > > drm_crtc *crtc,

> > > > >  			  to_intel_plane(primary)-

> > > > > > frontbuffer_bit);

> > > > >  	mutex_unlock(&dev->struct_mutex);

> > > > >  

> > > > > -	intel_fbc_disable_crtc(intel_crtc);

> > > > >  	intel_frontbuffer_flip_prepare(dev,

> > > > >  				       to_intel_plane(primar

> > > > > y)-

> > > > > > frontbuffer_bit);

> > > > >  

> > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h

> > > > > b/drivers/gpu/drm/i915/intel_drv.h

> > > > > index 08847d0..9065a8f 100644

> > > > > --- a/drivers/gpu/drm/i915/intel_drv.h

> > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h

> > > > > @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct

> > > > > drm_i915_private *dev_priv,

> > > > >  			  enum fb_op_origin origin);

> > > > >  void intel_fbc_flush(struct drm_i915_private *dev_priv,

> > > > >  		     unsigned int frontbuffer_bits, enum

> > > > > fb_op_origin origin);

> > > > > +void intel_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv,

> > > > > +			    unsigned int frontbuffer_bits);

> > > > >  void intel_fbc_cleanup_cfb(struct drm_i915_private

> > > > > *dev_priv);

> > > > >  

> > > > >  /* intel_hdmi.c */

> > > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c

> > > > > b/drivers/gpu/drm/i915/intel_fbc.c

> > > > > index d9d7e54..62f158b 100644

> > > > > --- a/drivers/gpu/drm/i915/intel_fbc.c

> > > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c

> > > > > @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct

> > > > > drm_i915_private *dev_priv)

> > > > >  	DRM_DEBUG_KMS("disabled FBC\n");

> > > > >  }

> > > > >  

> > > > > +static void i8xx_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv)

> > > > > +{

> > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > > > > +	uint32_t val;

> > > > > +

> > > > > +	/* Although the documentation suggests we can't

> > > > > change

> > > > > DPFC_CONTROL

> > > > > +	 * while compression is enabled, the hardware guys

> > > > > said

> > > > > that updating

> > > > > +	 * the fence register bits during a flip is fine. */

> > > > > +	val = I915_READ(FBC_CONTROL);

> > > > > +	val &= ~FBC_CTL_FENCENO_MASK;

> > > > > +	val |= obj->fence_reg;

> > > > > +	I915_WRITE(FBC_CONTROL, val);

> > > > > +}

> > > > 

> > > > IIRC I just called the enable function to reconstruct the

> > > > entire

> > > > register contents to avoid code duplication. But maybe that's

> > > > not

> > > > possible without reowrking the fbc state handling entirely

> > > > (which I had done).

> > > > > +

> > > > >  static void i8xx_fbc_enable(struct intel_crtc *crtc)

> > > > >  {

> > > > >  	struct drm_i915_private *dev_priv = crtc->base.dev-

> > > > > > dev_private;

> > > > > @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct

> > > > > intel_crtc

> > > > > *crtc)

> > > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",

> > > > > plane_name(crtc->plane));

> > > > >  }

> > > > >  

> > > > > +static void g4x_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv)

> > > > > +{

> > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > > > > +	uint32_t val;

> > > > > +

> > > > > +	/* Although the documentation suggests we can't

> > > > > change

> > > > > DPFC_CONTROL

> > > > > +	 * while compression is enabled, the hardware guys

> > > > > said

> > > > > that updating

> > > > > +	 * the fence register bits during a flip is fine. */

> > > > > +	val = I915_READ(DPFC_CONTROL);

> > > > > +	val &= ~DPFC_CTL_FENCE_MASK;

> > > > > +	val |= obj->fence_reg;

> > > > > +	I915_WRITE(DPFC_CONTROL, val);

> > > > > +}

> > > > > +

> > > > >  static void g4x_fbc_disable(struct drm_i915_private

> > > > > *dev_priv)

> > > > >  {

> > > > >  	u32 dpfc_ctl;

> > > > > @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct

> > > > > intel_crtc

> > > > > *crtc)

> > > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",

> > > > > plane_name(crtc->plane));

> > > > >  }

> > > > >  

> > > > > +static void ilk_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv)

> > > > > +{

> > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > > > > +	uint32_t val;

> > > > > +

> > > > > +	/* Although the documentation suggests we can't

> > > > > change

> > > > > DPFC_CONTROL

> > > > > +	 * while compression is enabled, the hardware guys

> > > > > said

> > > > > that updating

> > > > > +	 * the fence register bits during a flip is fine. */

> > > > > +	val = I915_READ(ILK_DPFC_CONTROL);

> > > > > +	val &= ~ILK_DPFC_FENCE_MASK;

> > > > > +	val |= obj->fence_reg;

> > > > > +	I915_WRITE(ILK_DPFC_CONTROL, val);

> > > > > +}

> > > > > +

> > > > > +static void snb_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv)

> > > > > +{

> > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;

> > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);

> > > > > +

> > > > > +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE |

> > > > > obj-

> > > > > > fence_reg);

> > > > > +}

> > > > > +

> > > > >  static void ilk_fbc_disable(struct drm_i915_private

> > > > > *dev_priv)

> > > > >  {

> > > > >  	u32 dpfc_ctl;

> > > > > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct

> > > > > drm_i915_private *dev_priv,

> > > > >  	if (origin == ORIGIN_GTT)

> > > > >  		return;

> > > > >  

> > > > > +	/* Hardware tracking already recompresses the CFB

> > > > > (nuke)

> > > > > for us if FBC

> > > > > +	 * is enabled and we do a page flip, so we can

> > > > > safely

> > > > > ignore it here.

> > > > > +	 * FBC may be disabled in case we got an

> > > > > invalidate()

> > > > > before the

> > > > > +	 * flush(), so we'll still have to check that case

> > > > > below.

> > > > > */

> > > > > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)

> > > > > +		return;

> > > > > +

> > > > >  	mutex_lock(&dev_priv->fbc.lock);

> > > > >  

> > > > >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;

> > > > >  

> > > > >  	if (!dev_priv->fbc.busy_bits) {

> > > > > -		__intel_fbc_disable(dev_priv);

> > > > > -		__intel_fbc_update(dev_priv);

> > > > > +		if (origin == ORIGIN_FLIP) {

> > > > > +			__intel_fbc_update(dev_priv);

> > > > > +		} else {

> > > > > +			__intel_fbc_disable(dev_priv);

> > > > > +			__intel_fbc_update(dev_priv);

> > > > > +		}

> > > > > +	}

> > > > > +

> > > > > +	mutex_unlock(&dev_priv->fbc.lock);

> > > > > +}

> > > > > +

> > > > > +void intel_fbc_flip_prepare(struct drm_i915_private

> > > > > *dev_priv,

> > > > > +			    unsigned int frontbuffer_bits)

> > > > > +{

> > > > > +	unsigned int fbc_bits;

> > > > > +

> > > > > +	if (!fbc_supported(dev_priv))

> > > > > +		return;

> > > > > +

> > > > > +	mutex_lock(&dev_priv->fbc.lock);

> > > > > +

> > > > > +	if (dev_priv->fbc.enabled) {

> > > > > +		fbc_bits =

> > > > > INTEL_FRONTBUFFER_PRIMARY(dev_priv-

> > > > > > fbc.crtc->pipe);

> > > > 

> > > > primary->frontbuffer_bit would seem better.

> > > 

> > > Why?

> > 

> > Why not? It's there already stashed away for you to use. No need to

> > sprinkle these INTEL_FRONTBUFFER... things all over the place.

> > 

> > > 

> > > > 

> > > > > +		if (fbc_bits & frontbuffer_bits)

> > > > > +			dev_priv-

> > > > > >fbc.flip_prepare(dev_priv);

> > > > 

> > > > You would still have to disable+reenable if you need to eg.

> > > > reallocate

> > > > the compressed buffer, of if the new fb isn't suitable for fbc,

> > > > or

> > > > maybe

> > > > if you need to change anything else in the control register.

> > > 

> > > Yes, but as far as I understand the frontbuffer tracking, this

> > > case

> > > won't happen here. I'm assuming we'll always go through

> > > intel_fbc_update() on these cases.

> > 

> > Perhaps, but more by luck than by design. With the setplane and

> > atomic

> > APIs there's no difference between a page flip and other plane

> > updates.


I am aware that some code paths are not the same as the full modeset
path, but there is still difference from the plain pageflip IOCTL. And
so far I have seen intel_fbc_update() being called everywhere, so this
should prevent the problems you seem to be pointing. Can you please
point a specific case that would break here involving a single pipe?
Multiple pipes are discussed below. 

I've been trying to analyze what you said and convert this to a proper
IGT subtest, but I just can't see the bug here. Please be a little more
specific on your description, like you just did for the multi-pipe
case.

> > So if you want to take advantage of the hw flip nuke, you would

> > need to

> > check how the plane state is going to change, and based on that

> > decide

> > whether a fence update is enough or disable+re-enable is needed.

> > And in

> > fact I don't see any fbc_disable before the flip, just an

> > fbc_update

> > after the fact. So seems to me the disable really should be in your

> > flip_prepare hook in this case


The flip_prepare hook is only for cases where the disable+update is not
needed since we're taking advantage of the HW tracking.

Besides, update() without a previous disable() is fine.

> > . And in fact the whole flip_prepare

> > thing isn't even called from the setplane/atomic path.


If update() is called we don't need flip_prepare() since flip_prepare()
is only for the fast path where we just update the fence.

> 

> Oh and another related scenario also came to mind. Consider for

> example

> the follow sequence of events:

> 

> 1. fbc enabled on pipe A

> 2. flip scheduled on pipe A which requires fbc to be disabled

>    -> fbc gets disabled

> 4. modeset on pipe B calls fbc_update

>    -> fbc gets re-enabled on pipe A before the flip has finished,

> oops!


If we have FBC disabled on pipe A before the pipe B modeset, then pipe
B calls fbc_update(), I just can't see how FBC may get re-enabled on
pipe A. Can you please elaborate more?


I know we should avoid regressions in the middle of the series, but as
a side-comment, the multi-pipe scenario changes even more later in the
series at the point where we introduce
enable/disable+activate/deactivate. So we'd only get FBC on pipe B if
we get a ilk_crtc_disable(pipe_a) first.

Besides, my plan here is to enable FBC only on HSW+, and these
platforms restrict FBC to pipe A. Of course I'm trying to not introduce
any new bugs, but I also think we shouldn't block patches just because
some ILK-IVB bug was not addressed.

> 

> So we also need something to keep fbc reliably disabled until the

> flip has occured.

> 

> > 

> > > 

> > > > 

> > > > What I had was:

> > > > "

> > > > static bool intel_fbc_need_reinit(struct intel_crtc *crtc)

> > > > {

> > > > 	struct drm_i915_private *dev_priv = crtc->base.dev-

> > > > > dev_private;

> > > > 	struct drm_framebuffer *fb = crtc->base.primary->fb;

> > > > 	struct drm_i915_gem_object *obj =

> > > > to_intel_framebuffer(fb)-

> > > > > obj;

> > > > 

> > > > 	return crtc != dev_priv->fbc.crtc ||

> > > > 	       obj->base.size > dev_priv->fbc.size ||

> > > > 	       drm_format_plane_cpp(fb->pixel_format, 0) !=

> > > > 	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 

> > > > 0) ||

> > > > 	       fb->pitches[0] != dev_priv->fbc.pitch;

> > > > }

> > > > intel_fbc_pre_page_flip()

> > > > {

> > > > 	...

> > > > 	intel_fbc_update_pending_score(crtc);

> > > > 

> > > > 	/*

> > > > 	 * If fbc was already possible we can update

> > > > immediately,

> > > > 	 * otherwise we will wait until the flip is finished.

> > > > 	 */

> > > > 	if (crtc->fbc.score != 0)

> > > > 		crtc->fbc.score = crtc->fbc.pending_score;

> > > > 

> > > > 	/*

> > > > 	 * Disable fbc if we're not (yet) capable, or if

> > > > 	 * we just need a full disable+enable reinit.

> > > > 	 */

> > > > 	if (crtc->fbc.score == 0 ||

> > > > intel_fbc_need_reinit(crtc))

> > > > 		__intel_fbc_disable(crtc);

> > > > 	...

> > > > }

> > > > "

> > > > 

> > > > > +	} else if (dev_priv->fbc.fbc_work) {

> > > > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(

> > > > > +				dev_priv->fbc.fbc_work-

> > > > > >crtc-

> > > > > > pipe);

> > > > > +		if (fbc_bits & frontbuffer_bits)

> > > > > +			__intel_fbc_disable(dev_priv);

> > > > >  	}

> > > > >  

> > > > >  	mutex_unlock(&dev_priv->fbc.lock);

> > > > > @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct

> > > > > drm_i915_private

> > > > > *dev_priv)

> > > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;

> > > > >  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;

> > > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;

> > > > > +		dev_priv->fbc.flip_prepare =

> > > > > snb_fbc_flip_prepare;

> > > > >  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {

> > > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;

> > > > >  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;

> > > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;

> > > > > +		if (INTEL_INFO(dev_priv)->gen == 5)

> > > > > +			dev_priv->fbc.flip_prepare =

> > > > > ilk_fbc_flip_prepare;

> > > > > +		else

> > > > > +			dev_priv->fbc.flip_prepare =

> > > > > snb_fbc_flip_prepare;

> > > > >  	} else if (IS_GM45(dev_priv)) {

> > > > >  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;

> > > > >  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;

> > > > >  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;

> > > > > +		dev_priv->fbc.flip_prepare =

> > > > > g4x_fbc_flip_prepare;

> > > > >  	} else {

> > > > >  		dev_priv->fbc.fbc_enabled =

> > > > > i8xx_fbc_enabled;

> > > > >  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;

> > > > >  		dev_priv->fbc.disable_fbc =

> > > > > i8xx_fbc_disable;

> > > > > +		dev_priv->fbc.flip_prepare =

> > > > > i8xx_fbc_flip_prepare;

> > > > >  

> > > > >  		/* This value was pulled out of someone's

> > > > > hat */

> > > > >  		I915_WRITE(FBC_CONTROL, 500 <<

> > > > > FBC_CTL_INTERVAL_SHIFT);

> > > > > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c

> > > > > b/drivers/gpu/drm/i915/intel_frontbuffer.c

> > > > > index ac85357..31a1ad3 100644

> > > > > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c

> > > > > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c

> > > > > @@ -192,6 +192,7 @@ void

> > > > > intel_frontbuffer_flip_prepare(struct

> > > > > drm_device *dev,

> > > > >  	mutex_unlock(&dev_priv->fb_tracking.lock);

> > > > >  

> > > > >  	intel_psr_single_frame_update(dev,

> > > > > frontbuffer_bits);

> > > > > +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);

> > > > >  }

> > > > >  

> > > > >  /**

> > > > > -- 

> > > > > 2.6.1

> > > > > 

> > > > > _______________________________________________

> > > > > Intel-gfx mailing list

> > > > > Intel-gfx@lists.freedesktop.org

> > > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx

> > > > 

> > 

> > -- 

> > Ville Syrjälä

> > Intel OTC

> > _______________________________________________

> > Intel-gfx mailing list

> > Intel-gfx@lists.freedesktop.org

> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx

>
Ville Syrjala Oct. 29, 2015, 6:14 p.m. UTC | #9
On Thu, Oct 29, 2015 at 05:52:26PM +0000, Zanoni, Paulo R wrote:
> Em Qui, 2015-10-29 às 19:30 +0200, Ville Syrjälä escreveu:
> > On Wed, Oct 28, 2015 at 07:20:12PM +0200, Ville Syrjälä wrote:
> > > On Wed, Oct 28, 2015 at 04:56:39PM +0000, Zanoni, Paulo R wrote:
> > > > Em Ter, 2015-10-27 às 20:32 +0200, Ville Syrjälä escreveu:
> > > > > On Tue, Oct 27, 2015 at 02:50:04PM -0200, Paulo Zanoni wrote:
> > > > > > The hardware already takes care of disabling and
> > > > > > recompressing FBC
> > > > > > when we do a page flip, so all we need to do is to update the
> > > > > > fence
> > > > > > registers and move on.
> > > > > > 
> > > > > > One of the important things to notice is that on the pre-gen6
> > > > > > platforms the fence is programmed on the FBC control register
> > > > > > and
> > > > > > the
> > > > > > documentation says we can't update the control register while
> > > > > > FBC
> > > > > > is
> > > > > > enabled. This would basically mean we'd have to to
> > > > > > disable+enable
> > > > > > FBC
> > > > > > at every flip in order to make sure the hardware tracking
> > > > > > still
> > > > > > works,
> > > > > > which doesn't seem to make too much sense. So I sent an email
> > > > > > to
> > > > > > the
> > > > > > hardware team requesting some clarification. The information
> > > > > > I got
> > > > > > was
> > > > > > this:
> > > > > > 
> > > > > > "I don't think any hardware is latching on to 0x100100,
> > > > > > 0x100104,
> > > > > > or
> > > > > > the old fence number in FBC_CTL. Instructions against
> > > > > > changing on
> > > > > > the
> > > > > > fly would be to simplify testing and ensure you don't miss
> > > > > > any
> > > > > > invalidation that happened right at that time."
> > > > > > 
> > > > > > So I guess we're fine for flips. But I can't really say I
> > > > > > tested
> > > > > > FBC
> > > > > > on these ancient platforms - nor that I'll ever propose
> > > > > > enabling
> > > > > > FBC
> > > > > > by default on them exactly because of problems like these.
> > > > > 
> > > > > Yeah, I did this in my patches too, and IIRC it seemed to work
> > > > > just
> > > > > fine
> > > > > back then.
> > > > > 
> > > > > > 
> > > > > > v2: Add comment at flush() (Chris).
> > > > > > 
> > > > > > Testcase: igt/kms_frontbuffer_tracking/fbc*-fliptrack
> > > > > > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/i915_drv.h          |   1 +
> > > > > >  drivers/gpu/drm/i915/i915_reg.h          |   3 +
> > > > > >  drivers/gpu/drm/i915/intel_display.c     |   1 -
> > > > > >  drivers/gpu/drm/i915/intel_drv.h         |   2 +
> > > > > >  drivers/gpu/drm/i915/intel_fbc.c         | 103
> > > > > > ++++++++++++++++++++++++++++++-
> > > > > >  drivers/gpu/drm/i915/intel_frontbuffer.c |   1 +
> > > > > >  6 files changed, 108 insertions(+), 3 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > index ee14962..77da500 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_drv.h
> > > > > > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > > > > > @@ -928,6 +928,7 @@ struct i915_fbc {
> > > > > >  	bool (*fbc_enabled)(struct drm_i915_private
> > > > > > *dev_priv);
> > > > > >  	void (*enable_fbc)(struct intel_crtc *crtc);
> > > > > >  	void (*disable_fbc)(struct drm_i915_private
> > > > > > *dev_priv);
> > > > > > +	void (*flip_prepare)(struct drm_i915_private
> > > > > > *dev_priv);
> > > > > >  };
> > > > > >  
> > > > > >  /**
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > > > > > b/drivers/gpu/drm/i915/i915_reg.h
> > > > > > index 8942532..3d598a2 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > > > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > > > > @@ -2028,6 +2028,7 @@ enum skl_disp_power_wells {
> > > > > >  #define   FBC_CTL_C3_IDLE	(1<<13)
> > > > > >  #define   FBC_CTL_STRIDE_SHIFT	(5)
> > > > > >  #define   FBC_CTL_FENCENO_SHIFT	(0)
> > > > > > +#define   FBC_CTL_FENCENO_MASK	0xF
> > > > > 
> > > > > It's only three bits on gen2. But bit 3 is MBZ and there are
> > > > > only 8
> > > > > fence registers on those platforms, so this is OK.
> > > > > 
> > > > > >  #define FBC_COMMAND		0x0320c
> > > > > >  #define   FBC_CMD_COMPRESS	(1<<0)
> > > > > >  #define FBC_STATUS		0x03210
> > > > > > @@ -2064,6 +2065,7 @@ enum skl_disp_power_wells {
> > > > > >  #define   DPFC_CTL_LIMIT_1X	(0<<6)
> > > > > >  #define   DPFC_CTL_LIMIT_2X	(1<<6)
> > > > > >  #define   DPFC_CTL_LIMIT_4X	(2<<6)
> > > > > > +#define   DPFC_CTL_FENCE_MASK	0xF
> > > > > >  #define DPFC_RECOMP_CTL		0x320c
> > > > > >  #define   DPFC_RECOMP_STALL_EN	(1<<27)
> > > > > >  #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
> > > > > > @@ -2086,6 +2088,7 @@ enum skl_disp_power_wells {
> > > > > >  #define   FBC_CTL_FALSE_COLOR	(1<<10)
> > > > > >  /* The bit 28-8 is reserved */
> > > > > >  #define   DPFC_RESERVED		(0x1FFFFF00)
> > > > > > +#define   ILK_DPFC_FENCE_MASK	0xF
> > > > > >  #define ILK_DPFC_RECOMP_CTL	0x4320c
> > > > > >  #define ILK_DPFC_STATUS		0x43210
> > > > > >  #define ILK_DPFC_FENCE_YOFF	0x43218
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_display.c
> > > > > > b/drivers/gpu/drm/i915/intel_display.c
> > > > > > index bc1907e..d9378c3 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_display.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_display.c
> > > > > > @@ -11502,7 +11502,6 @@ static int
> > > > > > intel_crtc_page_flip(struct
> > > > > > drm_crtc *crtc,
> > > > > >  			  to_intel_plane(primary)-
> > > > > > > frontbuffer_bit);
> > > > > >  	mutex_unlock(&dev->struct_mutex);
> > > > > >  
> > > > > > -	intel_fbc_disable_crtc(intel_crtc);
> > > > > >  	intel_frontbuffer_flip_prepare(dev,
> > > > > >  				       to_intel_plane(primar
> > > > > > y)-
> > > > > > > frontbuffer_bit);
> > > > > >  
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > index 08847d0..9065a8f 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_drv.h
> > > > > > +++ b/drivers/gpu/drm/i915/intel_drv.h
> > > > > > @@ -1305,6 +1305,8 @@ void intel_fbc_invalidate(struct
> > > > > > drm_i915_private *dev_priv,
> > > > > >  			  enum fb_op_origin origin);
> > > > > >  void intel_fbc_flush(struct drm_i915_private *dev_priv,
> > > > > >  		     unsigned int frontbuffer_bits, enum
> > > > > > fb_op_origin origin);
> > > > > > +void intel_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv,
> > > > > > +			    unsigned int frontbuffer_bits);
> > > > > >  void intel_fbc_cleanup_cfb(struct drm_i915_private
> > > > > > *dev_priv);
> > > > > >  
> > > > > >  /* intel_hdmi.c */
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > index d9d7e54..62f158b 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > > > > > @@ -82,6 +82,22 @@ static void i8xx_fbc_disable(struct
> > > > > > drm_i915_private *dev_priv)
> > > > > >  	DRM_DEBUG_KMS("disabled FBC\n");
> > > > > >  }
> > > > > >  
> > > > > > +static void i8xx_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > > +{
> > > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > > > +	uint32_t val;
> > > > > > +
> > > > > > +	/* Although the documentation suggests we can't
> > > > > > change
> > > > > > DPFC_CONTROL
> > > > > > +	 * while compression is enabled, the hardware guys
> > > > > > said
> > > > > > that updating
> > > > > > +	 * the fence register bits during a flip is fine. */
> > > > > > +	val = I915_READ(FBC_CONTROL);
> > > > > > +	val &= ~FBC_CTL_FENCENO_MASK;
> > > > > > +	val |= obj->fence_reg;
> > > > > > +	I915_WRITE(FBC_CONTROL, val);
> > > > > > +}
> > > > > 
> > > > > IIRC I just called the enable function to reconstruct the
> > > > > entire
> > > > > register contents to avoid code duplication. But maybe that's
> > > > > not
> > > > > possible without reowrking the fbc state handling entirely
> > > > > (which I had done).
> > > > > > +
> > > > > >  static void i8xx_fbc_enable(struct intel_crtc *crtc)
> > > > > >  {
> > > > > >  	struct drm_i915_private *dev_priv = crtc->base.dev-
> > > > > > > dev_private;
> > > > > > @@ -161,6 +177,22 @@ static void g4x_fbc_enable(struct
> > > > > > intel_crtc
> > > > > > *crtc)
> > > > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > > > > plane_name(crtc->plane));
> > > > > >  }
> > > > > >  
> > > > > > +static void g4x_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > > +{
> > > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > > > +	uint32_t val;
> > > > > > +
> > > > > > +	/* Although the documentation suggests we can't
> > > > > > change
> > > > > > DPFC_CONTROL
> > > > > > +	 * while compression is enabled, the hardware guys
> > > > > > said
> > > > > > that updating
> > > > > > +	 * the fence register bits during a flip is fine. */
> > > > > > +	val = I915_READ(DPFC_CONTROL);
> > > > > > +	val &= ~DPFC_CTL_FENCE_MASK;
> > > > > > +	val |= obj->fence_reg;
> > > > > > +	I915_WRITE(DPFC_CONTROL, val);
> > > > > > +}
> > > > > > +
> > > > > >  static void g4x_fbc_disable(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > >  {
> > > > > >  	u32 dpfc_ctl;
> > > > > > @@ -236,6 +268,31 @@ static void ilk_fbc_enable(struct
> > > > > > intel_crtc
> > > > > > *crtc)
> > > > > >  	DRM_DEBUG_KMS("enabled fbc on plane %c\n",
> > > > > > plane_name(crtc->plane));
> > > > > >  }
> > > > > >  
> > > > > > +static void ilk_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > > +{
> > > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > > > +	uint32_t val;
> > > > > > +
> > > > > > +	/* Although the documentation suggests we can't
> > > > > > change
> > > > > > DPFC_CONTROL
> > > > > > +	 * while compression is enabled, the hardware guys
> > > > > > said
> > > > > > that updating
> > > > > > +	 * the fence register bits during a flip is fine. */
> > > > > > +	val = I915_READ(ILK_DPFC_CONTROL);
> > > > > > +	val &= ~ILK_DPFC_FENCE_MASK;
> > > > > > +	val |= obj->fence_reg;
> > > > > > +	I915_WRITE(ILK_DPFC_CONTROL, val);
> > > > > > +}
> > > > > > +
> > > > > > +static void snb_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > > +{
> > > > > > +	struct intel_crtc *crtc = dev_priv->fbc.crtc;
> > > > > > +	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > > > +	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
> > > > > > +
> > > > > > +	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE |
> > > > > > obj-
> > > > > > > fence_reg);
> > > > > > +}
> > > > > > +
> > > > > >  static void ilk_fbc_disable(struct drm_i915_private
> > > > > > *dev_priv)
> > > > > >  {
> > > > > >  	u32 dpfc_ctl;
> > > > > > @@ -1021,13 +1078,48 @@ void intel_fbc_flush(struct
> > > > > > drm_i915_private *dev_priv,
> > > > > >  	if (origin == ORIGIN_GTT)
> > > > > >  		return;
> > > > > >  
> > > > > > +	/* Hardware tracking already recompresses the CFB
> > > > > > (nuke)
> > > > > > for us if FBC
> > > > > > +	 * is enabled and we do a page flip, so we can
> > > > > > safely
> > > > > > ignore it here.
> > > > > > +	 * FBC may be disabled in case we got an
> > > > > > invalidate()
> > > > > > before the
> > > > > > +	 * flush(), so we'll still have to check that case
> > > > > > below.
> > > > > > */
> > > > > > +	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
> > > > > > +		return;
> > > > > > +
> > > > > >  	mutex_lock(&dev_priv->fbc.lock);
> > > > > >  
> > > > > >  	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
> > > > > >  
> > > > > >  	if (!dev_priv->fbc.busy_bits) {
> > > > > > -		__intel_fbc_disable(dev_priv);
> > > > > > -		__intel_fbc_update(dev_priv);
> > > > > > +		if (origin == ORIGIN_FLIP) {
> > > > > > +			__intel_fbc_update(dev_priv);
> > > > > > +		} else {
> > > > > > +			__intel_fbc_disable(dev_priv);
> > > > > > +			__intel_fbc_update(dev_priv);
> > > > > > +		}
> > > > > > +	}
> > > > > > +
> > > > > > +	mutex_unlock(&dev_priv->fbc.lock);
> > > > > > +}
> > > > > > +
> > > > > > +void intel_fbc_flip_prepare(struct drm_i915_private
> > > > > > *dev_priv,
> > > > > > +			    unsigned int frontbuffer_bits)
> > > > > > +{
> > > > > > +	unsigned int fbc_bits;
> > > > > > +
> > > > > > +	if (!fbc_supported(dev_priv))
> > > > > > +		return;
> > > > > > +
> > > > > > +	mutex_lock(&dev_priv->fbc.lock);
> > > > > > +
> > > > > > +	if (dev_priv->fbc.enabled) {
> > > > > > +		fbc_bits =
> > > > > > INTEL_FRONTBUFFER_PRIMARY(dev_priv-
> > > > > > > fbc.crtc->pipe);
> > > > > 
> > > > > primary->frontbuffer_bit would seem better.
> > > > 
> > > > Why?
> > > 
> > > Why not? It's there already stashed away for you to use. No need to
> > > sprinkle these INTEL_FRONTBUFFER... things all over the place.
> > > 
> > > > 
> > > > > 
> > > > > > +		if (fbc_bits & frontbuffer_bits)
> > > > > > +			dev_priv-
> > > > > > >fbc.flip_prepare(dev_priv);
> > > > > 
> > > > > You would still have to disable+reenable if you need to eg.
> > > > > reallocate
> > > > > the compressed buffer, of if the new fb isn't suitable for fbc,
> > > > > or
> > > > > maybe
> > > > > if you need to change anything else in the control register.
> > > > 
> > > > Yes, but as far as I understand the frontbuffer tracking, this
> > > > case
> > > > won't happen here. I'm assuming we'll always go through
> > > > intel_fbc_update() on these cases.
> > > 
> > > Perhaps, but more by luck than by design. With the setplane and
> > > atomic
> > > APIs there's no difference between a page flip and other plane
> > > updates.
> 
> I am aware that some code paths are not the same as the full modeset
> path, but there is still difference from the plain pageflip IOCTL. And
> so far I have seen intel_fbc_update() being called everywhere, so this
> should prevent the problems you seem to be pointing. Can you please
> point a specific case that would break here involving a single pipe?
> Multiple pipes are discussed below. 
> 
> I've been trying to analyze what you said and convert this to a proper
> IGT subtest, but I just can't see the bug here. Please be a little more
> specific on your description, like you just did for the multi-pipe
> case.
> 
> > > So if you want to take advantage of the hw flip nuke, you would
> > > need to
> > > check how the plane state is going to change, and based on that
> > > decide
> > > whether a fence update is enough or disable+re-enable is needed.
> > > And in
> > > fact I don't see any fbc_disable before the flip, just an
> > > fbc_update
> > > after the fact. So seems to me the disable really should be in your
> > > flip_prepare hook in this case
> 
> The flip_prepare hook is only for cases where the disable+update is not
> needed since we're taking advantage of the HW tracking.

That may be OK for the legacy page flip path since we artifically
restrict what it can change. The setplane/atomic path can change
anything, so more checks are needed there to see if fbc needs to be
disabled or if the fence update is enough.

Oh, actually we do allow the page flip ioctl to change the tiling
mode, so you do need to take this into account even there.

> 
> Besides, update() without a previous disable() is fine.

Not if the flip already happened while fbc was still enabled and
configured based on the old plane state.

> 
> > > . And in fact the whole flip_prepare
> > > thing isn't even called from the setplane/atomic path.
> 
> If update() is called we don't need flip_prepare() since flip_prepare()
> is only for the fast path where we just update the fence.
> 
> > 
> > Oh and another related scenario also came to mind. Consider for
> > example
> > the follow sequence of events:
> > 
> > 1. fbc enabled on pipe A
> > 2. flip scheduled on pipe A which requires fbc to be disabled
> >    -> fbc gets disabled
> > 4. modeset on pipe B calls fbc_update
> >    -> fbc gets re-enabled on pipe A before the flip has finished,
> > oops!
> 
> If we have FBC disabled on pipe A before the pipe B modeset, then pipe
> B calls fbc_update(), I just can't see how FBC may get re-enabled on
> pipe A. Can you please elaborate more?

Last time I looked fbc_update() just went rummaging through all crtcs
and picked one of them. Maybe it no longer does that? Or maybe it still
disables fbc entirely when multiple pipes are active?

> 
> 
> I know we should avoid regressions in the middle of the series, but as
> a side-comment, the multi-pipe scenario changes even more later in the
> series at the point where we introduce
> enable/disable+activate/deactivate. So we'd only get FBC on pipe B if
> we get a ilk_crtc_disable(pipe_a) first.
> 
> Besides, my plan here is to enable FBC only on HSW+, and these
> platforms restrict FBC to pipe A. Of course I'm trying to not introduce
> any new bugs, but I also think we shouldn't block patches just because
> some ILK-IVB bug was not addressed.
> 
> > 
> > So we also need something to keep fbc reliably disabled until the
> > flip has occured.
> > 
> > > 
> > > > 
> > > > > 
> > > > > What I had was:
> > > > > "
> > > > > static bool intel_fbc_need_reinit(struct intel_crtc *crtc)
> > > > > {
> > > > > 	struct drm_i915_private *dev_priv = crtc->base.dev-
> > > > > > dev_private;
> > > > > 	struct drm_framebuffer *fb = crtc->base.primary->fb;
> > > > > 	struct drm_i915_gem_object *obj =
> > > > > to_intel_framebuffer(fb)-
> > > > > > obj;
> > > > > 
> > > > > 	return crtc != dev_priv->fbc.crtc ||
> > > > > 	       obj->base.size > dev_priv->fbc.size ||
> > > > > 	       drm_format_plane_cpp(fb->pixel_format, 0) !=
> > > > > 	       drm_format_plane_cpp(dev_priv->fbc.pixel_format, 
> > > > > 0) ||
> > > > > 	       fb->pitches[0] != dev_priv->fbc.pitch;
> > > > > }
> > > > > intel_fbc_pre_page_flip()
> > > > > {
> > > > > 	...
> > > > > 	intel_fbc_update_pending_score(crtc);
> > > > > 
> > > > > 	/*
> > > > > 	 * If fbc was already possible we can update
> > > > > immediately,
> > > > > 	 * otherwise we will wait until the flip is finished.
> > > > > 	 */
> > > > > 	if (crtc->fbc.score != 0)
> > > > > 		crtc->fbc.score = crtc->fbc.pending_score;
> > > > > 
> > > > > 	/*
> > > > > 	 * Disable fbc if we're not (yet) capable, or if
> > > > > 	 * we just need a full disable+enable reinit.
> > > > > 	 */
> > > > > 	if (crtc->fbc.score == 0 ||
> > > > > intel_fbc_need_reinit(crtc))
> > > > > 		__intel_fbc_disable(crtc);
> > > > > 	...
> > > > > }
> > > > > "
> > > > > 
> > > > > > +	} else if (dev_priv->fbc.fbc_work) {
> > > > > > +		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> > > > > > +				dev_priv->fbc.fbc_work-
> > > > > > >crtc-
> > > > > > > pipe);
> > > > > > +		if (fbc_bits & frontbuffer_bits)
> > > > > > +			__intel_fbc_disable(dev_priv);
> > > > > >  	}
> > > > > >  
> > > > > >  	mutex_unlock(&dev_priv->fbc.lock);
> > > > > > @@ -1063,18 +1155,25 @@ void intel_fbc_init(struct
> > > > > > drm_i915_private
> > > > > > *dev_priv)
> > > > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > > > > >  		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
> > > > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > > > > +		dev_priv->fbc.flip_prepare =
> > > > > > snb_fbc_flip_prepare;
> > > > > >  	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
> > > > > >  		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
> > > > > >  		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
> > > > > >  		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
> > > > > > +		if (INTEL_INFO(dev_priv)->gen == 5)
> > > > > > +			dev_priv->fbc.flip_prepare =
> > > > > > ilk_fbc_flip_prepare;
> > > > > > +		else
> > > > > > +			dev_priv->fbc.flip_prepare =
> > > > > > snb_fbc_flip_prepare;
> > > > > >  	} else if (IS_GM45(dev_priv)) {
> > > > > >  		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
> > > > > >  		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
> > > > > >  		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
> > > > > > +		dev_priv->fbc.flip_prepare =
> > > > > > g4x_fbc_flip_prepare;
> > > > > >  	} else {
> > > > > >  		dev_priv->fbc.fbc_enabled =
> > > > > > i8xx_fbc_enabled;
> > > > > >  		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
> > > > > >  		dev_priv->fbc.disable_fbc =
> > > > > > i8xx_fbc_disable;
> > > > > > +		dev_priv->fbc.flip_prepare =
> > > > > > i8xx_fbc_flip_prepare;
> > > > > >  
> > > > > >  		/* This value was pulled out of someone's
> > > > > > hat */
> > > > > >  		I915_WRITE(FBC_CONTROL, 500 <<
> > > > > > FBC_CTL_INTERVAL_SHIFT);
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > > > b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > > > index ac85357..31a1ad3 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> > > > > > @@ -192,6 +192,7 @@ void
> > > > > > intel_frontbuffer_flip_prepare(struct
> > > > > > drm_device *dev,
> > > > > >  	mutex_unlock(&dev_priv->fb_tracking.lock);
> > > > > >  
> > > > > >  	intel_psr_single_frame_update(dev,
> > > > > > frontbuffer_bits);
> > > > > > +	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);
> > > > > >  }
> > > > > >  
> > > > > >  /**
> > > > > > -- 
> > > > > > 2.6.1
> > > > > > 
> > > > > > _______________________________________________
> > > > > > Intel-gfx mailing list
> > > > > > Intel-gfx@lists.freedesktop.org
> > > > > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> > > > > 
> > > 
> > > -- 
> > > Ville Syrjälä
> > > Intel OTC
> > > _______________________________________________
> > > Intel-gfx mailing list
> > > Intel-gfx@lists.freedesktop.org
> > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ee14962..77da500 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -928,6 +928,7 @@  struct i915_fbc {
 	bool (*fbc_enabled)(struct drm_i915_private *dev_priv);
 	void (*enable_fbc)(struct intel_crtc *crtc);
 	void (*disable_fbc)(struct drm_i915_private *dev_priv);
+	void (*flip_prepare)(struct drm_i915_private *dev_priv);
 };
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 8942532..3d598a2 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2028,6 +2028,7 @@  enum skl_disp_power_wells {
 #define   FBC_CTL_C3_IDLE	(1<<13)
 #define   FBC_CTL_STRIDE_SHIFT	(5)
 #define   FBC_CTL_FENCENO_SHIFT	(0)
+#define   FBC_CTL_FENCENO_MASK	0xF
 #define FBC_COMMAND		0x0320c
 #define   FBC_CMD_COMPRESS	(1<<0)
 #define FBC_STATUS		0x03210
@@ -2064,6 +2065,7 @@  enum skl_disp_power_wells {
 #define   DPFC_CTL_LIMIT_1X	(0<<6)
 #define   DPFC_CTL_LIMIT_2X	(1<<6)
 #define   DPFC_CTL_LIMIT_4X	(2<<6)
+#define   DPFC_CTL_FENCE_MASK	0xF
 #define DPFC_RECOMP_CTL		0x320c
 #define   DPFC_RECOMP_STALL_EN	(1<<27)
 #define   DPFC_RECOMP_STALL_WM_SHIFT (16)
@@ -2086,6 +2088,7 @@  enum skl_disp_power_wells {
 #define   FBC_CTL_FALSE_COLOR	(1<<10)
 /* The bit 28-8 is reserved */
 #define   DPFC_RESERVED		(0x1FFFFF00)
+#define   ILK_DPFC_FENCE_MASK	0xF
 #define ILK_DPFC_RECOMP_CTL	0x4320c
 #define ILK_DPFC_STATUS		0x43210
 #define ILK_DPFC_FENCE_YOFF	0x43218
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index bc1907e..d9378c3 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -11502,7 +11502,6 @@  static int intel_crtc_page_flip(struct drm_crtc *crtc,
 			  to_intel_plane(primary)->frontbuffer_bit);
 	mutex_unlock(&dev->struct_mutex);
 
-	intel_fbc_disable_crtc(intel_crtc);
 	intel_frontbuffer_flip_prepare(dev,
 				       to_intel_plane(primary)->frontbuffer_bit);
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 08847d0..9065a8f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1305,6 +1305,8 @@  void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
 			  enum fb_op_origin origin);
 void intel_fbc_flush(struct drm_i915_private *dev_priv,
 		     unsigned int frontbuffer_bits, enum fb_op_origin origin);
+void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
+			    unsigned int frontbuffer_bits);
 void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
 
 /* intel_hdmi.c */
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index d9d7e54..62f158b 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -82,6 +82,22 @@  static void i8xx_fbc_disable(struct drm_i915_private *dev_priv)
 	DRM_DEBUG_KMS("disabled FBC\n");
 }
 
+static void i8xx_fbc_flip_prepare(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	struct drm_framebuffer *fb = crtc->base.primary->fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	uint32_t val;
+
+	/* Although the documentation suggests we can't change DPFC_CONTROL
+	 * while compression is enabled, the hardware guys said that updating
+	 * the fence register bits during a flip is fine. */
+	val = I915_READ(FBC_CONTROL);
+	val &= ~FBC_CTL_FENCENO_MASK;
+	val |= obj->fence_reg;
+	I915_WRITE(FBC_CONTROL, val);
+}
+
 static void i8xx_fbc_enable(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
@@ -161,6 +177,22 @@  static void g4x_fbc_enable(struct intel_crtc *crtc)
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
+static void g4x_fbc_flip_prepare(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	struct drm_framebuffer *fb = crtc->base.primary->fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	uint32_t val;
+
+	/* Although the documentation suggests we can't change DPFC_CONTROL
+	 * while compression is enabled, the hardware guys said that updating
+	 * the fence register bits during a flip is fine. */
+	val = I915_READ(DPFC_CONTROL);
+	val &= ~DPFC_CTL_FENCE_MASK;
+	val |= obj->fence_reg;
+	I915_WRITE(DPFC_CONTROL, val);
+}
+
 static void g4x_fbc_disable(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
@@ -236,6 +268,31 @@  static void ilk_fbc_enable(struct intel_crtc *crtc)
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(crtc->plane));
 }
 
+static void ilk_fbc_flip_prepare(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	struct drm_framebuffer *fb = crtc->base.primary->fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+	uint32_t val;
+
+	/* Although the documentation suggests we can't change DPFC_CONTROL
+	 * while compression is enabled, the hardware guys said that updating
+	 * the fence register bits during a flip is fine. */
+	val = I915_READ(ILK_DPFC_CONTROL);
+	val &= ~ILK_DPFC_FENCE_MASK;
+	val |= obj->fence_reg;
+	I915_WRITE(ILK_DPFC_CONTROL, val);
+}
+
+static void snb_fbc_flip_prepare(struct drm_i915_private *dev_priv)
+{
+	struct intel_crtc *crtc = dev_priv->fbc.crtc;
+	struct drm_framebuffer *fb = crtc->base.primary->fb;
+	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
+
+	I915_WRITE(SNB_DPFC_CTL_SA, SNB_CPU_FENCE_ENABLE | obj->fence_reg);
+}
+
 static void ilk_fbc_disable(struct drm_i915_private *dev_priv)
 {
 	u32 dpfc_ctl;
@@ -1021,13 +1078,48 @@  void intel_fbc_flush(struct drm_i915_private *dev_priv,
 	if (origin == ORIGIN_GTT)
 		return;
 
+	/* Hardware tracking already recompresses the CFB (nuke) for us if FBC
+	 * is enabled and we do a page flip, so we can safely ignore it here.
+	 * FBC may be disabled in case we got an invalidate() before the
+	 * flush(), so we'll still have to check that case below. */
+	if (origin == ORIGIN_FLIP && dev_priv->fbc.enabled)
+		return;
+
 	mutex_lock(&dev_priv->fbc.lock);
 
 	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
 
 	if (!dev_priv->fbc.busy_bits) {
-		__intel_fbc_disable(dev_priv);
-		__intel_fbc_update(dev_priv);
+		if (origin == ORIGIN_FLIP) {
+			__intel_fbc_update(dev_priv);
+		} else {
+			__intel_fbc_disable(dev_priv);
+			__intel_fbc_update(dev_priv);
+		}
+	}
+
+	mutex_unlock(&dev_priv->fbc.lock);
+}
+
+void intel_fbc_flip_prepare(struct drm_i915_private *dev_priv,
+			    unsigned int frontbuffer_bits)
+{
+	unsigned int fbc_bits;
+
+	if (!fbc_supported(dev_priv))
+		return;
+
+	mutex_lock(&dev_priv->fbc.lock);
+
+	if (dev_priv->fbc.enabled) {
+		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
+		if (fbc_bits & frontbuffer_bits)
+			dev_priv->fbc.flip_prepare(dev_priv);
+	} else if (dev_priv->fbc.fbc_work) {
+		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
+				dev_priv->fbc.fbc_work->crtc->pipe);
+		if (fbc_bits & frontbuffer_bits)
+			__intel_fbc_disable(dev_priv);
 	}
 
 	mutex_unlock(&dev_priv->fbc.lock);
@@ -1063,18 +1155,25 @@  void intel_fbc_init(struct drm_i915_private *dev_priv)
 		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
 		dev_priv->fbc.enable_fbc = gen7_fbc_enable;
 		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
 	} else if (INTEL_INFO(dev_priv)->gen >= 5) {
 		dev_priv->fbc.fbc_enabled = ilk_fbc_enabled;
 		dev_priv->fbc.enable_fbc = ilk_fbc_enable;
 		dev_priv->fbc.disable_fbc = ilk_fbc_disable;
+		if (INTEL_INFO(dev_priv)->gen == 5)
+			dev_priv->fbc.flip_prepare = ilk_fbc_flip_prepare;
+		else
+			dev_priv->fbc.flip_prepare = snb_fbc_flip_prepare;
 	} else if (IS_GM45(dev_priv)) {
 		dev_priv->fbc.fbc_enabled = g4x_fbc_enabled;
 		dev_priv->fbc.enable_fbc = g4x_fbc_enable;
 		dev_priv->fbc.disable_fbc = g4x_fbc_disable;
+		dev_priv->fbc.flip_prepare = g4x_fbc_flip_prepare;
 	} else {
 		dev_priv->fbc.fbc_enabled = i8xx_fbc_enabled;
 		dev_priv->fbc.enable_fbc = i8xx_fbc_enable;
 		dev_priv->fbc.disable_fbc = i8xx_fbc_disable;
+		dev_priv->fbc.flip_prepare = i8xx_fbc_flip_prepare;
 
 		/* This value was pulled out of someone's hat */
 		I915_WRITE(FBC_CONTROL, 500 << FBC_CTL_INTERVAL_SHIFT);
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index ac85357..31a1ad3 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -192,6 +192,7 @@  void intel_frontbuffer_flip_prepare(struct drm_device *dev,
 	mutex_unlock(&dev_priv->fb_tracking.lock);
 
 	intel_psr_single_frame_update(dev, frontbuffer_bits);
+	intel_fbc_flip_prepare(dev_priv, frontbuffer_bits);
 }
 
 /**