diff mbox

[6/7] drm/i915: add frontbuffer tracking to FBC

Message ID 1423855427-2098-7-git-send-email-przanoni@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paulo Zanoni Feb. 13, 2015, 7:23 p.m. UTC
From: Paulo Zanoni <paulo.r.zanoni@intel.com>

Kill the blt/render tracking we currently have and use the frontbuffer
tracking infrastructure.

Don't enable things by default yet.

v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
v3: (Paulo) Rebase on RENDER_CS change.
v4: (Paulo) Rebase.
v5: (Paulo) Simplify: flushes don't have origin (Daniel).
            Also rebase due to patch order changes.

Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          | 10 +---
 drivers/gpu/drm/i915/intel_drv.h         |  6 ++-
 drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----
 drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -
 6 files changed, 65 insertions(+), 98 deletions(-)

Comments

Rodrigo Vivi March 4, 2015, 12:57 a.m. UTC | #1
On Fri, Feb 13, 2015 at 11:23 AM, Paulo Zanoni <przanoni@gmail.com> wrote:
> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>
> Kill the blt/render tracking we currently have and use the frontbuffer
> tracking infrastructure.
>
> Don't enable things by default yet.
>
> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
> v3: (Paulo) Rebase on RENDER_CS change.
> v4: (Paulo) Rebase.
> v5: (Paulo) Simplify: flushes don't have origin (Daniel).
>             Also rebase due to patch order changes.
>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          | 10 +---
>  drivers/gpu/drm/i915/intel_drv.h         |  6 ++-
>  drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----
>  drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -
>  6 files changed, 65 insertions(+), 98 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 30aaa8e..7680ca0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -783,6 +783,8 @@ struct i915_fbc {
>         unsigned long uncompressed_size;
>         unsigned threshold;
>         unsigned int fb_id;
> +       unsigned int possible_framebuffer_bits;
> +       unsigned int busy_bits;

I'm not sure if I understood why you keep 2 variables here?
Is it because we keep enabling/disabling fbc with updates all over the code?
In this case it is ok we just need to kill it when we kill update_fbc...

>         struct intel_crtc *crtc;
>         int y;
>
> @@ -795,14 +797,6 @@ struct i915_fbc {
>          * possible. */
>         bool enabled;
>
> -       /* On gen8 some rings cannont perform fbc clean operation so for now
> -        * we are doing this on SW with mmio.
> -        * This variable works in the opposite information direction
> -        * of ring->fbc_dirty telling software on frontbuffer tracking
> -        * to perform the cache clean on sw side.
> -        */
> -       bool need_sw_cache_clean;
> -
>         struct intel_fbc_work {
>                 struct delayed_work work;
>                 struct drm_crtc *crtc;
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index 05d0a43f..571174f 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1091,7 +1091,11 @@ bool intel_fbc_enabled(struct drm_device *dev);
>  void intel_fbc_update(struct drm_device *dev);
>  void intel_fbc_init(struct drm_i915_private *dev_priv);
>  void intel_fbc_disable(struct drm_device *dev);
> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
> +                         unsigned int frontbuffer_bits,
> +                         enum fb_op_origin origin);
> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
> +                    unsigned int frontbuffer_bits);
>
>  /* intel_hdmi.c */
>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> index 618f7bd..9fcf446 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)
>         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
>  }
>
> -static void snb_fbc_blit_update(struct drm_device *dev)
> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
>  {
> -       struct drm_i915_private *dev_priv = dev->dev_private;
> -       u32 blt_ecoskpd;
> -
> -       /* Make sure blitter notifies FBC of writes */
> -
> -       /* Blitter is part of Media powerwell on VLV. No impact of
> -        * his param in other platforms for now */
> -       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> -
> -       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
> -               GEN6_BLITTER_LOCK_SHIFT;
> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> -       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
> -                        GEN6_BLITTER_LOCK_SHIFT);
> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> -       POSTING_READ(GEN6_BLITTER_ECOSKPD);
> -
> -       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);

Are you sure this continue working on snb? or should we fully remove
fbc support for snb and older?

Anyway I believe this could be in a different patch.

> +       I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
> +       POSTING_READ(MSG_FBC_REND_STATE);
>  }
>
>  static void ilk_fbc_enable(struct drm_crtc *crtc)
> @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc)
>                 I915_WRITE(SNB_DPFC_CTL_SA,
>                            SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>                 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
> -               snb_fbc_blit_update(dev);
>         }
>
> +       intel_fbc_nuke(dev_priv);
> +
>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>  }
>
> @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc)
>                    SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>         I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
>
> -       snb_fbc_blit_update(dev);
> +       intel_fbc_nuke(dev_priv);
>
>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>  }
> @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev)
>         return dev_priv->fbc.enabled;
>  }
>
> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)
> -{
> -       struct drm_i915_private *dev_priv = dev->dev_private;
> -
> -       if (!IS_GEN8(dev))
> -               return;
> -
> -       if (!intel_fbc_enabled(dev))
> -               return;
> -
> -       I915_WRITE(MSG_FBC_REND_STATE, value);
> -}
> -
>  static void intel_fbc_work_fn(struct work_struct *__work)
>  {
>         struct intel_fbc_work *work =
> @@ -685,6 +654,44 @@ out_disable:
>         i915_gem_stolen_cleanup_compression(dev);
>  }
>
> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
> +                         unsigned int frontbuffer_bits,
> +                         enum fb_op_origin origin)
> +{
> +       struct drm_device *dev = dev_priv->dev;
> +       unsigned int fbc_bits;
> +
> +       if (origin == ORIGIN_GTT)
> +               return;
> +
> +       if (dev_priv->fbc.enabled)
> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
> +       else if (dev_priv->fbc.fbc_work)
> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> +                       to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);
> +       else
> +               fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
> +
> +       dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
> +
> +       if (dev_priv->fbc.busy_bits)
> +               intel_fbc_disable(dev);
> +}
> +
> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
> +                    unsigned int frontbuffer_bits)
> +{
> +       struct drm_device *dev = dev_priv->dev;
> +
> +       if (!dev_priv->fbc.busy_bits)
> +               return;
> +
> +       dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
> +
> +       if (!dev_priv->fbc.busy_bits)
> +               intel_fbc_update(dev);
> +}
> +
>  /**
>   * intel_fbc_init - Initialize FBC
>   * @dev_priv: the i915 device
> @@ -693,12 +700,22 @@ out_disable:
>   */
>  void intel_fbc_init(struct drm_i915_private *dev_priv)
>  {
> +       enum pipe pipe;
> +
>         if (!HAS_FBC(dev_priv)) {
>                 dev_priv->fbc.enabled = false;
>                 dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
>                 return;
>         }
>
> +       for_each_pipe(dev_priv, pipe) {
> +               dev_priv->fbc.possible_framebuffer_bits |=
> +                               INTEL_FRONTBUFFER_PRIMARY(pipe);
> +
> +               if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
> +                       break;
> +       }
> +
>         if (INTEL_INFO(dev_priv)->gen >= 7) {
>                 dev_priv->display.fbc_enabled = ilk_fbc_enabled;
>                 dev_priv->display.enable_fbc = gen7_fbc_enable;
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index 5da73f0..0a1bac8 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev,
>                         continue;
>
>                 intel_increase_pllclock(dev, pipe);
> -               if (ring && intel_fbc_enabled(dev))
> -                       ring->fbc_dirty = true;
>         }
>  }
>
> @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>
>         intel_psr_invalidate(dev, obj->frontbuffer_bits);
>         intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
> +       intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
>  }
>
>  /**
> @@ -187,16 +186,7 @@ void intel_frontbuffer_flush(struct drm_device *dev,
>
>         intel_edp_drrs_flush(dev, frontbuffer_bits);
>         intel_psr_flush(dev, frontbuffer_bits);
> -
> -       /*
> -        * FIXME: Unconditional fbc flushing here is a rather gross hack and
> -        * needs to be reworked into a proper frontbuffer tracking scheme like
> -        * psr employs.
> -        */
> -       if (dev_priv->fbc.need_sw_cache_clean) {
> -               dev_priv->fbc.need_sw_cache_clean = false;
> -               bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);
> -       }
> +       intel_fbc_flush(dev_priv, frontbuffer_bits);
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index d17e76d..fed6284 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
>         return 0;
>  }
>
> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
> -{
> -       int ret;
> -
> -       if (!ring->fbc_dirty)
> -               return 0;
> -
> -       ret = intel_ring_begin(ring, 6);
> -       if (ret)
> -               return ret;
> -       /* WaFbcNukeOn3DBlt:ivb/hsw */
> -       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
> -       intel_ring_emit(ring, value);
> -       intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
> -       intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> -       intel_ring_advance(ring);
> -
> -       ring->fbc_dirty = false;
> -       return 0;
> -}
> -
>  static int
>  gen7_render_ring_flush(struct intel_engine_cs *ring,
>                        u32 invalidate_domains, u32 flush_domains)
> @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>         intel_ring_emit(ring, 0);
>         intel_ring_advance(ring);
>
> -       if (!invalidate_domains && flush_domains)
> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> -
>         return 0;
>  }
>
> @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>         if (ret)
>                 return ret;
>
> -       if (!invalidate_domains && flush_domains)
> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> -
>         return 0;
>  }
>
> @@ -2420,7 +2391,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>                            u32 invalidate, u32 flush)
>  {
>         struct drm_device *dev = ring->dev;
> -       struct drm_i915_private *dev_priv = dev->dev_private;
>         uint32_t cmd;
>         int ret;
>
> @@ -2429,7 +2399,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>                 return ret;
>
>         cmd = MI_FLUSH_DW;
> -       if (INTEL_INFO(ring->dev)->gen >= 8)
> +       if (INTEL_INFO(dev)->gen >= 8)
>                 cmd += 1;
>
>         /* We always require a command barrier so that subsequent
> @@ -2449,7 +2419,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>                 cmd |= MI_INVALIDATE_TLB;
>         intel_ring_emit(ring, cmd);
>         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
> -       if (INTEL_INFO(ring->dev)->gen >= 8) {
> +       if (INTEL_INFO(dev)->gen >= 8) {
>                 intel_ring_emit(ring, 0); /* upper addr */
>                 intel_ring_emit(ring, 0); /* value */
>         } else  {
> @@ -2458,13 +2428,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>         }
>         intel_ring_advance(ring);
>
> -       if (!invalidate && flush) {
> -               if (IS_GEN7(dev))
> -                       return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
> -               else if (IS_BROADWELL(dev))
> -                       dev_priv->fbc.need_sw_cache_clean = true;
> -       }
> -
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index b6c484f..2ac2de2 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -267,7 +267,6 @@ struct  intel_engine_cs {
>          */
>         struct drm_i915_gem_request *outstanding_lazy_request;
>         bool gpu_caches_dirty;
> -       bool fbc_dirty;
>
>         wait_queue_head_t irq_queue;
>
> --
> 2.1.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Thanks,
Paulo Zanoni March 4, 2015, 6:03 p.m. UTC | #2
2015-03-03 21:57 GMT-03:00 Rodrigo Vivi <rodrigo.vivi@gmail.com>:
> On Fri, Feb 13, 2015 at 11:23 AM, Paulo Zanoni <przanoni@gmail.com> wrote:
>> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
>>
>> Kill the blt/render tracking we currently have and use the frontbuffer
>> tracking infrastructure.
>>
>> Don't enable things by default yet.
>>
>> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
>> v3: (Paulo) Rebase on RENDER_CS change.
>> v4: (Paulo) Rebase.
>> v5: (Paulo) Simplify: flushes don't have origin (Daniel).
>>             Also rebase due to patch order changes.
>>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h          | 10 +---
>>  drivers/gpu/drm/i915/intel_drv.h         |  6 ++-
>>  drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------
>>  drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----
>>  drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------
>>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -
>>  6 files changed, 65 insertions(+), 98 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 30aaa8e..7680ca0 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -783,6 +783,8 @@ struct i915_fbc {
>>         unsigned long uncompressed_size;
>>         unsigned threshold;
>>         unsigned int fb_id;
>> +       unsigned int possible_framebuffer_bits;
>> +       unsigned int busy_bits;
>
> I'm not sure if I understood why you keep 2 variables here?

After Daniel's last suggestion, the possible_framebuffer_bits could
probably be removed and left as a local variable at intel_fbc_validate
now: it's just a matter of coding style. Some platforms support FBC on
more than just pipe A, so this variable is used to simplify checks
related to this.

The busy_bits was also part of Daniel's last request: we need to store
which bits triggered intel_fbc_invalidate calls, and then check them
when intel_fbc_flush is called.

> Is it because we keep enabling/disabling fbc with updates all over the code?
> In this case it is ok we just need to kill it when we kill update_fbc...

I don't understand what you mean here. Please clarify.

>
>>         struct intel_crtc *crtc;
>>         int y;
>>
>> @@ -795,14 +797,6 @@ struct i915_fbc {
>>          * possible. */
>>         bool enabled;
>>
>> -       /* On gen8 some rings cannont perform fbc clean operation so for now
>> -        * we are doing this on SW with mmio.
>> -        * This variable works in the opposite information direction
>> -        * of ring->fbc_dirty telling software on frontbuffer tracking
>> -        * to perform the cache clean on sw side.
>> -        */
>> -       bool need_sw_cache_clean;
>> -
>>         struct intel_fbc_work {
>>                 struct delayed_work work;
>>                 struct drm_crtc *crtc;
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index 05d0a43f..571174f 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1091,7 +1091,11 @@ bool intel_fbc_enabled(struct drm_device *dev);
>>  void intel_fbc_update(struct drm_device *dev);
>>  void intel_fbc_init(struct drm_i915_private *dev_priv);
>>  void intel_fbc_disable(struct drm_device *dev);
>> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
>> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
>> +                         unsigned int frontbuffer_bits,
>> +                         enum fb_op_origin origin);
>> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
>> +                    unsigned int frontbuffer_bits);
>>
>>  /* intel_hdmi.c */
>>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
>> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
>> index 618f7bd..9fcf446 100644
>> --- a/drivers/gpu/drm/i915/intel_fbc.c
>> +++ b/drivers/gpu/drm/i915/intel_fbc.c
>> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)
>>         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
>>  }
>>
>> -static void snb_fbc_blit_update(struct drm_device *dev)
>> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
>>  {
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>> -       u32 blt_ecoskpd;
>> -
>> -       /* Make sure blitter notifies FBC of writes */
>> -
>> -       /* Blitter is part of Media powerwell on VLV. No impact of
>> -        * his param in other platforms for now */
>> -       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
>> -
>> -       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
>> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
>> -               GEN6_BLITTER_LOCK_SHIFT;
>> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
>> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
>> -                        GEN6_BLITTER_LOCK_SHIFT);
>> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -       POSTING_READ(GEN6_BLITTER_ECOSKPD);
>> -
>> -       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
>
> Are you sure this continue working on snb? or should we fully remove
> fbc support for snb and older?
>
> Anyway I believe this could be in a different patch.

Well, the whole idea of using the sofware-based frontbuffer tracking
mechanism is to get rid of all the hardware-based stuff.

>
>> +       I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
>> +       POSTING_READ(MSG_FBC_REND_STATE);
>>  }
>>
>>  static void ilk_fbc_enable(struct drm_crtc *crtc)
>> @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc)
>>                 I915_WRITE(SNB_DPFC_CTL_SA,
>>                            SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>>                 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
>> -               snb_fbc_blit_update(dev);
>>         }
>>
>> +       intel_fbc_nuke(dev_priv);
>> +
>>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>>  }
>>
>> @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc)
>>                    SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>>         I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
>>
>> -       snb_fbc_blit_update(dev);
>> +       intel_fbc_nuke(dev_priv);
>>
>>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>>  }
>> @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev)
>>         return dev_priv->fbc.enabled;
>>  }
>>
>> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)
>> -{
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> -       if (!IS_GEN8(dev))
>> -               return;
>> -
>> -       if (!intel_fbc_enabled(dev))
>> -               return;
>> -
>> -       I915_WRITE(MSG_FBC_REND_STATE, value);
>> -}
>> -
>>  static void intel_fbc_work_fn(struct work_struct *__work)
>>  {
>>         struct intel_fbc_work *work =
>> @@ -685,6 +654,44 @@ out_disable:
>>         i915_gem_stolen_cleanup_compression(dev);
>>  }
>>
>> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
>> +                         unsigned int frontbuffer_bits,
>> +                         enum fb_op_origin origin)
>> +{
>> +       struct drm_device *dev = dev_priv->dev;
>> +       unsigned int fbc_bits;
>> +
>> +       if (origin == ORIGIN_GTT)
>> +               return;
>> +
>> +       if (dev_priv->fbc.enabled)
>> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
>> +       else if (dev_priv->fbc.fbc_work)
>> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
>> +                       to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);
>> +       else
>> +               fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
>> +
>> +       dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
>> +
>> +       if (dev_priv->fbc.busy_bits)
>> +               intel_fbc_disable(dev);
>> +}
>> +
>> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
>> +                    unsigned int frontbuffer_bits)
>> +{
>> +       struct drm_device *dev = dev_priv->dev;
>> +
>> +       if (!dev_priv->fbc.busy_bits)
>> +               return;
>> +
>> +       dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
>> +
>> +       if (!dev_priv->fbc.busy_bits)
>> +               intel_fbc_update(dev);
>> +}
>> +
>>  /**
>>   * intel_fbc_init - Initialize FBC
>>   * @dev_priv: the i915 device
>> @@ -693,12 +700,22 @@ out_disable:
>>   */
>>  void intel_fbc_init(struct drm_i915_private *dev_priv)
>>  {
>> +       enum pipe pipe;
>> +
>>         if (!HAS_FBC(dev_priv)) {
>>                 dev_priv->fbc.enabled = false;
>>                 dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
>>                 return;
>>         }
>>
>> +       for_each_pipe(dev_priv, pipe) {
>> +               dev_priv->fbc.possible_framebuffer_bits |=
>> +                               INTEL_FRONTBUFFER_PRIMARY(pipe);
>> +
>> +               if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
>> +                       break;
>> +       }
>> +
>>         if (INTEL_INFO(dev_priv)->gen >= 7) {
>>                 dev_priv->display.fbc_enabled = ilk_fbc_enabled;
>>                 dev_priv->display.enable_fbc = gen7_fbc_enable;
>> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
>> index 5da73f0..0a1bac8 100644
>> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
>> @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev,
>>                         continue;
>>
>>                 intel_increase_pllclock(dev, pipe);
>> -               if (ring && intel_fbc_enabled(dev))
>> -                       ring->fbc_dirty = true;
>>         }
>>  }
>>
>> @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>>
>>         intel_psr_invalidate(dev, obj->frontbuffer_bits);
>>         intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
>> +       intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
>>  }
>>
>>  /**
>> @@ -187,16 +186,7 @@ void intel_frontbuffer_flush(struct drm_device *dev,
>>
>>         intel_edp_drrs_flush(dev, frontbuffer_bits);
>>         intel_psr_flush(dev, frontbuffer_bits);
>> -
>> -       /*
>> -        * FIXME: Unconditional fbc flushing here is a rather gross hack and
>> -        * needs to be reworked into a proper frontbuffer tracking scheme like
>> -        * psr employs.
>> -        */
>> -       if (dev_priv->fbc.need_sw_cache_clean) {
>> -               dev_priv->fbc.need_sw_cache_clean = false;
>> -               bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);
>> -       }
>> +       intel_fbc_flush(dev_priv, frontbuffer_bits);
>>  }
>>
>>  /**
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index d17e76d..fed6284 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
>>         return 0;
>>  }
>>
>> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
>> -{
>> -       int ret;
>> -
>> -       if (!ring->fbc_dirty)
>> -               return 0;
>> -
>> -       ret = intel_ring_begin(ring, 6);
>> -       if (ret)
>> -               return ret;
>> -       /* WaFbcNukeOn3DBlt:ivb/hsw */
>> -       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
>> -       intel_ring_emit(ring, value);
>> -       intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
>> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
>> -       intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
>> -       intel_ring_advance(ring);
>> -
>> -       ring->fbc_dirty = false;
>> -       return 0;
>> -}
>> -
>>  static int
>>  gen7_render_ring_flush(struct intel_engine_cs *ring,
>>                        u32 invalidate_domains, u32 flush_domains)
>> @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>>         intel_ring_emit(ring, 0);
>>         intel_ring_advance(ring);
>>
>> -       if (!invalidate_domains && flush_domains)
>> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
>> -
>>         return 0;
>>  }
>>
>> @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>>         if (ret)
>>                 return ret;
>>
>> -       if (!invalidate_domains && flush_domains)
>> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
>> -
>>         return 0;
>>  }
>>
>> @@ -2420,7 +2391,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>                            u32 invalidate, u32 flush)
>>  {
>>         struct drm_device *dev = ring->dev;
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>>         uint32_t cmd;
>>         int ret;
>>
>> @@ -2429,7 +2399,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>                 return ret;
>>
>>         cmd = MI_FLUSH_DW;
>> -       if (INTEL_INFO(ring->dev)->gen >= 8)
>> +       if (INTEL_INFO(dev)->gen >= 8)
>>                 cmd += 1;
>>
>>         /* We always require a command barrier so that subsequent
>> @@ -2449,7 +2419,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>                 cmd |= MI_INVALIDATE_TLB;
>>         intel_ring_emit(ring, cmd);
>>         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
>> -       if (INTEL_INFO(ring->dev)->gen >= 8) {
>> +       if (INTEL_INFO(dev)->gen >= 8) {
>>                 intel_ring_emit(ring, 0); /* upper addr */
>>                 intel_ring_emit(ring, 0); /* value */
>>         } else  {
>> @@ -2458,13 +2428,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>         }
>>         intel_ring_advance(ring);
>>
>> -       if (!invalidate && flush) {
>> -               if (IS_GEN7(dev))
>> -                       return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
>> -               else if (IS_BROADWELL(dev))
>> -                       dev_priv->fbc.need_sw_cache_clean = true;
>> -       }
>> -
>>         return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index b6c484f..2ac2de2 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -267,7 +267,6 @@ struct  intel_engine_cs {
>>          */
>>         struct drm_i915_gem_request *outstanding_lazy_request;
>>         bool gpu_caches_dirty;
>> -       bool fbc_dirty;
>>
>>         wait_queue_head_t irq_queue;
>>
>> --
>> 2.1.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> Thanks,
>
> --
> Rodrigo Vivi
> Blog: http://blog.vivi.eng.br
Rodrigo Vivi March 4, 2015, 8:54 p.m. UTC | #3
On Wed, 2015-03-04 at 15:03 -0300, Paulo Zanoni wrote:
> 2015-03-03 21:57 GMT-03:00 Rodrigo Vivi <rodrigo.vivi@gmail.com>:

> > On Fri, Feb 13, 2015 at 11:23 AM, Paulo Zanoni <przanoni@gmail.com> wrote:

> >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>

> >>

> >> Kill the blt/render tracking we currently have and use the frontbuffer

> >> tracking infrastructure.

> >>

> >> Don't enable things by default yet.

> >>

> >> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.

> >> v3: (Paulo) Rebase on RENDER_CS change.

> >> v4: (Paulo) Rebase.

> >> v5: (Paulo) Simplify: flushes don't have origin (Daniel).

> >>             Also rebase due to patch order changes.

> >>

> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>

> >> ---

> >>  drivers/gpu/drm/i915/i915_drv.h          | 10 +---

> >>  drivers/gpu/drm/i915/intel_drv.h         |  6 ++-

> >>  drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------

> >>  drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----

> >>  drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------

> >>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -

> >>  6 files changed, 65 insertions(+), 98 deletions(-)

> >>

> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

> >> index 30aaa8e..7680ca0 100644

> >> --- a/drivers/gpu/drm/i915/i915_drv.h

> >> +++ b/drivers/gpu/drm/i915/i915_drv.h

> >> @@ -783,6 +783,8 @@ struct i915_fbc {

> >>         unsigned long uncompressed_size;

> >>         unsigned threshold;

> >>         unsigned int fb_id;

> >> +       unsigned int possible_framebuffer_bits;

> >> +       unsigned int busy_bits;

> >

> > I'm not sure if I understood why you keep 2 variables here?

> 

> After Daniel's last suggestion, the possible_framebuffer_bits could

> probably be removed and left as a local variable at intel_fbc_validate

> now: it's just a matter of coding style. Some platforms support FBC on

> more than just pipe A, so this variable is used to simplify checks

> related to this.

> 

> The busy_bits was also part of Daniel's last request: we need to store

> which bits triggered intel_fbc_invalidate calls, and then check them

> when intel_fbc_flush is called.


Got it.
Thanks for explaining it.

> 

> > Is it because we keep enabling/disabling fbc with updates all over the code?

> > In this case it is ok we just need to kill it when we kill update_fbc...

> 

> I don't understand what you mean here. Please clarify.


I had missunderstood the reason of possible_frontbuffer bits, sorry.
But about killing update_fbc I believe that after frontbuffer rework is
in place we don't need to use that update_fbc function that disable and
enable fbc on the cases we knew it could fail. With frontbuffer bits we
could let it always enabled and kicking with frontbuffer bits.

> 

> >

> >>         struct intel_crtc *crtc;

> >>         int y;

> >>

> >> @@ -795,14 +797,6 @@ struct i915_fbc {

> >>          * possible. */

> >>         bool enabled;

> >>

> >> -       /* On gen8 some rings cannont perform fbc clean operation so for now

> >> -        * we are doing this on SW with mmio.

> >> -        * This variable works in the opposite information direction

> >> -        * of ring->fbc_dirty telling software on frontbuffer tracking

> >> -        * to perform the cache clean on sw side.

> >> -        */

> >> -       bool need_sw_cache_clean;

> >> -

> >>         struct intel_fbc_work {

> >>                 struct delayed_work work;

> >>                 struct drm_crtc *crtc;

> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h

> >> index 05d0a43f..571174f 100644

> >> --- a/drivers/gpu/drm/i915/intel_drv.h

> >> +++ b/drivers/gpu/drm/i915/intel_drv.h

> >> @@ -1091,7 +1091,11 @@ bool intel_fbc_enabled(struct drm_device *dev);

> >>  void intel_fbc_update(struct drm_device *dev);

> >>  void intel_fbc_init(struct drm_i915_private *dev_priv);

> >>  void intel_fbc_disable(struct drm_device *dev);

> >> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);

> >> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,

> >> +                         unsigned int frontbuffer_bits,

> >> +                         enum fb_op_origin origin);

> >> +void intel_fbc_flush(struct drm_i915_private *dev_priv,

> >> +                    unsigned int frontbuffer_bits);

> >>

> >>  /* intel_hdmi.c */

> >>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);

> >> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c

> >> index 618f7bd..9fcf446 100644

> >> --- a/drivers/gpu/drm/i915/intel_fbc.c

> >> +++ b/drivers/gpu/drm/i915/intel_fbc.c

> >> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)

> >>         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;

> >>  }

> >>

> >> -static void snb_fbc_blit_update(struct drm_device *dev)

> >> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)

> >>  {

> >> -       struct drm_i915_private *dev_priv = dev->dev_private;

> >> -       u32 blt_ecoskpd;

> >> -

> >> -       /* Make sure blitter notifies FBC of writes */

> >> -

> >> -       /* Blitter is part of Media powerwell on VLV. No impact of

> >> -        * his param in other platforms for now */

> >> -       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);

> >> -

> >> -       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);

> >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<

> >> -               GEN6_BLITTER_LOCK_SHIFT;

> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);

> >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;

> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);

> >> -       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<

> >> -                        GEN6_BLITTER_LOCK_SHIFT);

> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);

> >> -       POSTING_READ(GEN6_BLITTER_ECOSKPD);

> >> -

> >> -       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);

> >

> > Are you sure this continue working on snb? or should we fully remove

> > fbc support for snb and older?

> >

> > Anyway I believe this could be in a different patch.

> 

> Well, the whole idea of using the sofware-based frontbuffer tracking

> mechanism is to get rid of all the hardware-based stuff.


Hm, I still have a concern here. We would need to test on those
platforms to be certain. The reason for that is that HW tracking still
works so it can let the buffer in a stage that compression is unable to
start... same reason that we still need both lines below on BDW.

Anyway this wouldn't break anything and it this rework was a great
improvement so feel free to use

Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>


and we continue any other necessary improvement or fix on top.
> 

> >

> >> +       I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);

> >> +       POSTING_READ(MSG_FBC_REND_STATE);

> >>  }

> >>

> >>  static void ilk_fbc_enable(struct drm_crtc *crtc)

> >> @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc)

> >>                 I915_WRITE(SNB_DPFC_CTL_SA,

> >>                            SNB_CPU_FENCE_ENABLE | obj->fence_reg);

> >>                 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);

> >> -               snb_fbc_blit_update(dev);

> >>         }

> >>

> >> +       intel_fbc_nuke(dev_priv);

> >> +

> >>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));

> >>  }

> >>

> >> @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc)

> >>                    SNB_CPU_FENCE_ENABLE | obj->fence_reg);

> >>         I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);

> >>

> >> -       snb_fbc_blit_update(dev);

> >> +       intel_fbc_nuke(dev_priv);

> >>

> >>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));

> >>  }

> >> @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev)

> >>         return dev_priv->fbc.enabled;

> >>  }

> >>

> >> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)

> >> -{

> >> -       struct drm_i915_private *dev_priv = dev->dev_private;

> >> -

> >> -       if (!IS_GEN8(dev))

> >> -               return;

> >> -

> >> -       if (!intel_fbc_enabled(dev))

> >> -               return;

> >> -

> >> -       I915_WRITE(MSG_FBC_REND_STATE, value);

> >> -}

> >> -

> >>  static void intel_fbc_work_fn(struct work_struct *__work)

> >>  {

> >>         struct intel_fbc_work *work =

> >> @@ -685,6 +654,44 @@ out_disable:

> >>         i915_gem_stolen_cleanup_compression(dev);

> >>  }

> >>

> >> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,

> >> +                         unsigned int frontbuffer_bits,

> >> +                         enum fb_op_origin origin)

> >> +{

> >> +       struct drm_device *dev = dev_priv->dev;

> >> +       unsigned int fbc_bits;

> >> +

> >> +       if (origin == ORIGIN_GTT)

> >> +               return;

> >> +

> >> +       if (dev_priv->fbc.enabled)

> >> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);

> >> +       else if (dev_priv->fbc.fbc_work)

> >> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(

> >> +                       to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);

> >> +       else

> >> +               fbc_bits = dev_priv->fbc.possible_framebuffer_bits;

> >> +

> >> +       dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);

> >> +

> >> +       if (dev_priv->fbc.busy_bits)

> >> +               intel_fbc_disable(dev);

> >> +}

> >> +

> >> +void intel_fbc_flush(struct drm_i915_private *dev_priv,

> >> +                    unsigned int frontbuffer_bits)

> >> +{

> >> +       struct drm_device *dev = dev_priv->dev;

> >> +

> >> +       if (!dev_priv->fbc.busy_bits)

> >> +               return;

> >> +

> >> +       dev_priv->fbc.busy_bits &= ~frontbuffer_bits;

> >> +

> >> +       if (!dev_priv->fbc.busy_bits)

> >> +               intel_fbc_update(dev);

> >> +}

> >> +

> >>  /**

> >>   * intel_fbc_init - Initialize FBC

> >>   * @dev_priv: the i915 device

> >> @@ -693,12 +700,22 @@ out_disable:

> >>   */

> >>  void intel_fbc_init(struct drm_i915_private *dev_priv)

> >>  {

> >> +       enum pipe pipe;

> >> +

> >>         if (!HAS_FBC(dev_priv)) {

> >>                 dev_priv->fbc.enabled = false;

> >>                 dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;

> >>                 return;

> >>         }

> >>

> >> +       for_each_pipe(dev_priv, pipe) {

> >> +               dev_priv->fbc.possible_framebuffer_bits |=

> >> +                               INTEL_FRONTBUFFER_PRIMARY(pipe);

> >> +

> >> +               if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)

> >> +                       break;

> >> +       }

> >> +

> >>         if (INTEL_INFO(dev_priv)->gen >= 7) {

> >>                 dev_priv->display.fbc_enabled = ilk_fbc_enabled;

> >>                 dev_priv->display.enable_fbc = gen7_fbc_enable;

> >> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c

> >> index 5da73f0..0a1bac8 100644

> >> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c

> >> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c

> >> @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev,

> >>                         continue;

> >>

> >>                 intel_increase_pllclock(dev, pipe);

> >> -               if (ring && intel_fbc_enabled(dev))

> >> -                       ring->fbc_dirty = true;

> >>         }

> >>  }

> >>

> >> @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,

> >>

> >>         intel_psr_invalidate(dev, obj->frontbuffer_bits);

> >>         intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);

> >> +       intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);

> >>  }

> >>

> >>  /**

> >> @@ -187,16 +186,7 @@ void intel_frontbuffer_flush(struct drm_device *dev,

> >>

> >>         intel_edp_drrs_flush(dev, frontbuffer_bits);

> >>         intel_psr_flush(dev, frontbuffer_bits);

> >> -

> >> -       /*

> >> -        * FIXME: Unconditional fbc flushing here is a rather gross hack and

> >> -        * needs to be reworked into a proper frontbuffer tracking scheme like

> >> -        * psr employs.

> >> -        */

> >> -       if (dev_priv->fbc.need_sw_cache_clean) {

> >> -               dev_priv->fbc.need_sw_cache_clean = false;

> >> -               bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);

> >> -       }

> >> +       intel_fbc_flush(dev_priv, frontbuffer_bits);

> >>  }

> >>

> >>  /**

> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

> >> index d17e76d..fed6284 100644

> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c

> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

> >> @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)

> >>         return 0;

> >>  }

> >>

> >> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)

> >> -{

> >> -       int ret;

> >> -

> >> -       if (!ring->fbc_dirty)

> >> -               return 0;

> >> -

> >> -       ret = intel_ring_begin(ring, 6);

> >> -       if (ret)

> >> -               return ret;

> >> -       /* WaFbcNukeOn3DBlt:ivb/hsw */

> >> -       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));

> >> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);

> >> -       intel_ring_emit(ring, value);

> >> -       intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);

> >> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);

> >> -       intel_ring_emit(ring, ring->scratch.gtt_offset + 256);

> >> -       intel_ring_advance(ring);

> >> -

> >> -       ring->fbc_dirty = false;

> >> -       return 0;

> >> -}

> >> -

> >>  static int

> >>  gen7_render_ring_flush(struct intel_engine_cs *ring,

> >>                        u32 invalidate_domains, u32 flush_domains)

> >> @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,

> >>         intel_ring_emit(ring, 0);

> >>         intel_ring_advance(ring);

> >>

> >> -       if (!invalidate_domains && flush_domains)

> >> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);

> >> -

> >>         return 0;

> >>  }

> >>

> >> @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,

> >>         if (ret)

> >>                 return ret;

> >>

> >> -       if (!invalidate_domains && flush_domains)

> >> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);

> >> -

> >>         return 0;

> >>  }

> >>

> >> @@ -2420,7 +2391,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,

> >>                            u32 invalidate, u32 flush)

> >>  {

> >>         struct drm_device *dev = ring->dev;

> >> -       struct drm_i915_private *dev_priv = dev->dev_private;

> >>         uint32_t cmd;

> >>         int ret;

> >>

> >> @@ -2429,7 +2399,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,

> >>                 return ret;

> >>

> >>         cmd = MI_FLUSH_DW;

> >> -       if (INTEL_INFO(ring->dev)->gen >= 8)

> >> +       if (INTEL_INFO(dev)->gen >= 8)

> >>                 cmd += 1;

> >>

> >>         /* We always require a command barrier so that subsequent

> >> @@ -2449,7 +2419,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,

> >>                 cmd |= MI_INVALIDATE_TLB;

> >>         intel_ring_emit(ring, cmd);

> >>         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);

> >> -       if (INTEL_INFO(ring->dev)->gen >= 8) {

> >> +       if (INTEL_INFO(dev)->gen >= 8) {

> >>                 intel_ring_emit(ring, 0); /* upper addr */

> >>                 intel_ring_emit(ring, 0); /* value */

> >>         } else  {

> >> @@ -2458,13 +2428,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,

> >>         }

> >>         intel_ring_advance(ring);

> >>

> >> -       if (!invalidate && flush) {

> >> -               if (IS_GEN7(dev))

> >> -                       return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);

> >> -               else if (IS_BROADWELL(dev))

> >> -                       dev_priv->fbc.need_sw_cache_clean = true;

> >> -       }

> >> -

> >>         return 0;

> >>  }

> >>

> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

> >> index b6c484f..2ac2de2 100644

> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h

> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h

> >> @@ -267,7 +267,6 @@ struct  intel_engine_cs {

> >>          */

> >>         struct drm_i915_gem_request *outstanding_lazy_request;

> >>         bool gpu_caches_dirty;

> >> -       bool fbc_dirty;

> >>

> >>         wait_queue_head_t irq_queue;

> >>

> >> --

> >> 2.1.4

> >>

> >> _______________________________________________

> >> Intel-gfx mailing list

> >> Intel-gfx@lists.freedesktop.org

> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

> >

> > Thanks,

> >

> > --

> > Rodrigo Vivi

> > Blog: http://blog.vivi.eng.br

> 

> 

>
Daniel Vetter March 5, 2015, 11:52 a.m. UTC | #4
On Wed, Mar 04, 2015 at 08:54:21PM +0000, Vivi, Rodrigo wrote:
> On Wed, 2015-03-04 at 15:03 -0300, Paulo Zanoni wrote:
> > 2015-03-03 21:57 GMT-03:00 Rodrigo Vivi <rodrigo.vivi@gmail.com>:
> > > On Fri, Feb 13, 2015 at 11:23 AM, Paulo Zanoni <przanoni@gmail.com> wrote:
> > >> From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > >>
> > >> Kill the blt/render tracking we currently have and use the frontbuffer
> > >> tracking infrastructure.
> > >>
> > >> Don't enable things by default yet.
> > >>
> > >> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
> > >> v3: (Paulo) Rebase on RENDER_CS change.
> > >> v4: (Paulo) Rebase.
> > >> v5: (Paulo) Simplify: flushes don't have origin (Daniel).
> > >>             Also rebase due to patch order changes.
> > >>
> > >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
> > >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > >> ---
> > >>  drivers/gpu/drm/i915/i915_drv.h          | 10 +---
> > >>  drivers/gpu/drm/i915/intel_drv.h         |  6 ++-
> > >>  drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------
> > >>  drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----
> > >>  drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------
> > >>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -
> > >>  6 files changed, 65 insertions(+), 98 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > >> index 30aaa8e..7680ca0 100644
> > >> --- a/drivers/gpu/drm/i915/i915_drv.h
> > >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> > >> @@ -783,6 +783,8 @@ struct i915_fbc {
> > >>         unsigned long uncompressed_size;
> > >>         unsigned threshold;
> > >>         unsigned int fb_id;
> > >> +       unsigned int possible_framebuffer_bits;
> > >> +       unsigned int busy_bits;
> > >
> > > I'm not sure if I understood why you keep 2 variables here?
> > 
> > After Daniel's last suggestion, the possible_framebuffer_bits could
> > probably be removed and left as a local variable at intel_fbc_validate
> > now: it's just a matter of coding style. Some platforms support FBC on
> > more than just pipe A, so this variable is used to simplify checks
> > related to this.
> > 
> > The busy_bits was also part of Daniel's last request: we need to store
> > which bits triggered intel_fbc_invalidate calls, and then check them
> > when intel_fbc_flush is called.
> 
> Got it.
> Thanks for explaining it.
> 
> > 
> > > Is it because we keep enabling/disabling fbc with updates all over the code?
> > > In this case it is ok we just need to kill it when we kill update_fbc...
> > 
> > I don't understand what you mean here. Please clarify.
> 
> I had missunderstood the reason of possible_frontbuffer bits, sorry.
> But about killing update_fbc I believe that after frontbuffer rework is
> in place we don't need to use that update_fbc function that disable and
> enable fbc on the cases we knew it could fail. With frontbuffer bits we
> could let it always enabled and kicking with frontbuffer bits.
> 
> > 
> > >
> > >>         struct intel_crtc *crtc;
> > >>         int y;
> > >>
> > >> @@ -795,14 +797,6 @@ struct i915_fbc {
> > >>          * possible. */
> > >>         bool enabled;
> > >>
> > >> -       /* On gen8 some rings cannont perform fbc clean operation so for now
> > >> -        * we are doing this on SW with mmio.
> > >> -        * This variable works in the opposite information direction
> > >> -        * of ring->fbc_dirty telling software on frontbuffer tracking
> > >> -        * to perform the cache clean on sw side.
> > >> -        */
> > >> -       bool need_sw_cache_clean;
> > >> -
> > >>         struct intel_fbc_work {
> > >>                 struct delayed_work work;
> > >>                 struct drm_crtc *crtc;
> > >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> > >> index 05d0a43f..571174f 100644
> > >> --- a/drivers/gpu/drm/i915/intel_drv.h
> > >> +++ b/drivers/gpu/drm/i915/intel_drv.h
> > >> @@ -1091,7 +1091,11 @@ bool intel_fbc_enabled(struct drm_device *dev);
> > >>  void intel_fbc_update(struct drm_device *dev);
> > >>  void intel_fbc_init(struct drm_i915_private *dev_priv);
> > >>  void intel_fbc_disable(struct drm_device *dev);
> > >> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
> > >> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
> > >> +                         unsigned int frontbuffer_bits,
> > >> +                         enum fb_op_origin origin);
> > >> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
> > >> +                    unsigned int frontbuffer_bits);
> > >>
> > >>  /* intel_hdmi.c */
> > >>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
> > >> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> > >> index 618f7bd..9fcf446 100644
> > >> --- a/drivers/gpu/drm/i915/intel_fbc.c
> > >> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> > >> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)
> > >>         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
> > >>  }
> > >>
> > >> -static void snb_fbc_blit_update(struct drm_device *dev)
> > >> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
> > >>  {
> > >> -       struct drm_i915_private *dev_priv = dev->dev_private;
> > >> -       u32 blt_ecoskpd;
> > >> -
> > >> -       /* Make sure blitter notifies FBC of writes */
> > >> -
> > >> -       /* Blitter is part of Media powerwell on VLV. No impact of
> > >> -        * his param in other platforms for now */
> > >> -       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> > >> -
> > >> -       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
> > >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
> > >> -               GEN6_BLITTER_LOCK_SHIFT;
> > >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> > >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
> > >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> > >> -       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
> > >> -                        GEN6_BLITTER_LOCK_SHIFT);
> > >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> > >> -       POSTING_READ(GEN6_BLITTER_ECOSKPD);
> > >> -
> > >> -       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> > >
> > > Are you sure this continue working on snb? or should we fully remove
> > > fbc support for snb and older?
> > >
> > > Anyway I believe this could be in a different patch.
> > 
> > Well, the whole idea of using the sofware-based frontbuffer tracking
> > mechanism is to get rid of all the hardware-based stuff.
> 
> Hm, I still have a concern here. We would need to test on those
> platforms to be certain. The reason for that is that HW tracking still
> works so it can let the buffer in a stage that compression is unable to
> start... same reason that we still need both lines below on BDW.

Afaik the fence based tracking only invalidates specific lines. Everything
else shouldn't be required any more with this patch.

> Anyway this wouldn't break anything and it this rework was a great
> improvement so feel free to use
> 
> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>

Queued for -next, thanks for the patch.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 30aaa8e..7680ca0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -783,6 +783,8 @@  struct i915_fbc {
 	unsigned long uncompressed_size;
 	unsigned threshold;
 	unsigned int fb_id;
+	unsigned int possible_framebuffer_bits;
+	unsigned int busy_bits;
 	struct intel_crtc *crtc;
 	int y;
 
@@ -795,14 +797,6 @@  struct i915_fbc {
 	 * possible. */
 	bool enabled;
 
-	/* On gen8 some rings cannont perform fbc clean operation so for now
-	 * we are doing this on SW with mmio.
-	 * This variable works in the opposite information direction
-	 * of ring->fbc_dirty telling software on frontbuffer tracking
-	 * to perform the cache clean on sw side.
-	 */
-	bool need_sw_cache_clean;
-
 	struct intel_fbc_work {
 		struct delayed_work work;
 		struct drm_crtc *crtc;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 05d0a43f..571174f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1091,7 +1091,11 @@  bool intel_fbc_enabled(struct drm_device *dev);
 void intel_fbc_update(struct drm_device *dev);
 void intel_fbc_init(struct drm_i915_private *dev_priv);
 void intel_fbc_disable(struct drm_device *dev);
-void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
+void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
+			  unsigned int frontbuffer_bits,
+			  enum fb_op_origin origin);
+void intel_fbc_flush(struct drm_i915_private *dev_priv,
+		     unsigned int frontbuffer_bits);
 
 /* intel_hdmi.c */
 void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 618f7bd..9fcf446 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -174,29 +174,10 @@  static bool g4x_fbc_enabled(struct drm_device *dev)
 	return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
 }
 
-static void snb_fbc_blit_update(struct drm_device *dev)
+static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 blt_ecoskpd;
-
-	/* Make sure blitter notifies FBC of writes */
-
-	/* Blitter is part of Media powerwell on VLV. No impact of
-	 * his param in other platforms for now */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
-
-	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
-	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
-		GEN6_BLITTER_LOCK_SHIFT;
-	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
-	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-	blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
-			 GEN6_BLITTER_LOCK_SHIFT);
-	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
-	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
+	I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
+	POSTING_READ(MSG_FBC_REND_STATE);
 }
 
 static void ilk_fbc_enable(struct drm_crtc *crtc)
@@ -239,9 +220,10 @@  static void ilk_fbc_enable(struct drm_crtc *crtc)
 		I915_WRITE(SNB_DPFC_CTL_SA,
 			   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
 		I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
-		snb_fbc_blit_update(dev);
 	}
 
+	intel_fbc_nuke(dev_priv);
+
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
 }
 
@@ -320,7 +302,7 @@  static void gen7_fbc_enable(struct drm_crtc *crtc)
 		   SNB_CPU_FENCE_ENABLE | obj->fence_reg);
 	I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
 
-	snb_fbc_blit_update(dev);
+	intel_fbc_nuke(dev_priv);
 
 	DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
 }
@@ -340,19 +322,6 @@  bool intel_fbc_enabled(struct drm_device *dev)
 	return dev_priv->fbc.enabled;
 }
 
-void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (!IS_GEN8(dev))
-		return;
-
-	if (!intel_fbc_enabled(dev))
-		return;
-
-	I915_WRITE(MSG_FBC_REND_STATE, value);
-}
-
 static void intel_fbc_work_fn(struct work_struct *__work)
 {
 	struct intel_fbc_work *work =
@@ -685,6 +654,44 @@  out_disable:
 	i915_gem_stolen_cleanup_compression(dev);
 }
 
+void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
+			  unsigned int frontbuffer_bits,
+			  enum fb_op_origin origin)
+{
+	struct drm_device *dev = dev_priv->dev;
+	unsigned int fbc_bits;
+
+	if (origin == ORIGIN_GTT)
+		return;
+
+	if (dev_priv->fbc.enabled)
+		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
+	else if (dev_priv->fbc.fbc_work)
+		fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
+			to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);
+	else
+		fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
+
+	dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
+
+	if (dev_priv->fbc.busy_bits)
+		intel_fbc_disable(dev);
+}
+
+void intel_fbc_flush(struct drm_i915_private *dev_priv,
+		     unsigned int frontbuffer_bits)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (!dev_priv->fbc.busy_bits)
+		return;
+
+	dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
+
+	if (!dev_priv->fbc.busy_bits)
+		intel_fbc_update(dev);
+}
+
 /**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
@@ -693,12 +700,22 @@  out_disable:
  */
 void intel_fbc_init(struct drm_i915_private *dev_priv)
 {
+	enum pipe pipe;
+
 	if (!HAS_FBC(dev_priv)) {
 		dev_priv->fbc.enabled = false;
 		dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
 		return;
 	}
 
+	for_each_pipe(dev_priv, pipe) {
+		dev_priv->fbc.possible_framebuffer_bits |=
+				INTEL_FRONTBUFFER_PRIMARY(pipe);
+
+		if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
+			break;
+	}
+
 	if (INTEL_INFO(dev_priv)->gen >= 7) {
 		dev_priv->display.fbc_enabled = ilk_fbc_enabled;
 		dev_priv->display.enable_fbc = gen7_fbc_enable;
diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
index 5da73f0..0a1bac8 100644
--- a/drivers/gpu/drm/i915/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
@@ -118,8 +118,6 @@  static void intel_mark_fb_busy(struct drm_device *dev,
 			continue;
 
 		intel_increase_pllclock(dev, pipe);
-		if (ring && intel_fbc_enabled(dev))
-			ring->fbc_dirty = true;
 	}
 }
 
@@ -160,6 +158,7 @@  void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
 
 	intel_psr_invalidate(dev, obj->frontbuffer_bits);
 	intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
+	intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
 }
 
 /**
@@ -187,16 +186,7 @@  void intel_frontbuffer_flush(struct drm_device *dev,
 
 	intel_edp_drrs_flush(dev, frontbuffer_bits);
 	intel_psr_flush(dev, frontbuffer_bits);
-
-	/*
-	 * FIXME: Unconditional fbc flushing here is a rather gross hack and
-	 * needs to be reworked into a proper frontbuffer tracking scheme like
-	 * psr employs.
-	 */
-	if (dev_priv->fbc.need_sw_cache_clean) {
-		dev_priv->fbc.need_sw_cache_clean = false;
-		bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);
-	}
+	intel_fbc_flush(dev_priv, frontbuffer_bits);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d17e76d..fed6284 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -317,29 +317,6 @@  gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
 	return 0;
 }
 
-static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
-{
-	int ret;
-
-	if (!ring->fbc_dirty)
-		return 0;
-
-	ret = intel_ring_begin(ring, 6);
-	if (ret)
-		return ret;
-	/* WaFbcNukeOn3DBlt:ivb/hsw */
-	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-	intel_ring_emit(ring, MSG_FBC_REND_STATE);
-	intel_ring_emit(ring, value);
-	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
-	intel_ring_emit(ring, MSG_FBC_REND_STATE);
-	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
-	intel_ring_advance(ring);
-
-	ring->fbc_dirty = false;
-	return 0;
-}
-
 static int
 gen7_render_ring_flush(struct intel_engine_cs *ring,
 		       u32 invalidate_domains, u32 flush_domains)
@@ -398,9 +375,6 @@  gen7_render_ring_flush(struct intel_engine_cs *ring,
 	intel_ring_emit(ring, 0);
 	intel_ring_advance(ring);
 
-	if (!invalidate_domains && flush_domains)
-		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
-
 	return 0;
 }
 
@@ -462,9 +436,6 @@  gen8_render_ring_flush(struct intel_engine_cs *ring,
 	if (ret)
 		return ret;
 
-	if (!invalidate_domains && flush_domains)
-		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
-
 	return 0;
 }
 
@@ -2420,7 +2391,6 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 			   u32 invalidate, u32 flush)
 {
 	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t cmd;
 	int ret;
 
@@ -2429,7 +2399,7 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 		return ret;
 
 	cmd = MI_FLUSH_DW;
-	if (INTEL_INFO(ring->dev)->gen >= 8)
+	if (INTEL_INFO(dev)->gen >= 8)
 		cmd += 1;
 
 	/* We always require a command barrier so that subsequent
@@ -2449,7 +2419,7 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 		cmd |= MI_INVALIDATE_TLB;
 	intel_ring_emit(ring, cmd);
 	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
-	if (INTEL_INFO(ring->dev)->gen >= 8) {
+	if (INTEL_INFO(dev)->gen >= 8) {
 		intel_ring_emit(ring, 0); /* upper addr */
 		intel_ring_emit(ring, 0); /* value */
 	} else  {
@@ -2458,13 +2428,6 @@  static int gen6_ring_flush(struct intel_engine_cs *ring,
 	}
 	intel_ring_advance(ring);
 
-	if (!invalidate && flush) {
-		if (IS_GEN7(dev))
-			return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
-		else if (IS_BROADWELL(dev))
-			dev_priv->fbc.need_sw_cache_clean = true;
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b6c484f..2ac2de2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -267,7 +267,6 @@  struct  intel_engine_cs {
 	 */
 	struct drm_i915_gem_request *outstanding_lazy_request;
 	bool gpu_caches_dirty;
-	bool fbc_dirty;
 
 	wait_queue_head_t irq_queue;