diff mbox

[v2,1/2] drm/i915: Initialize bdw workarounds in logical ring mode too

Message ID 1411560132-9086-1-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Sept. 24, 2014, 12:02 p.m. UTC
Following the legacy ring submission example, update the
ring->init_context() hook to support the execlist submission mode.

Workarounds are defined in bdw_emit_workarounds(), but the emit
now depends on the ring submission mode.

v2: Updated after "Cleanup pre prod workarounds"

For: VIZ-4092
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
 drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
 4 files changed, 120 insertions(+), 34 deletions(-)

Comments

Rodrigo Vivi Nov. 4, 2014, 7:23 p.m. UTC | #1
These patches got listed to -collector but got a huge conflict. If it
is still relevant please rebase it.

Also my bikeshed is to findo better names to help on differentiate
them at least.

On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
<michel.thierry@intel.com> wrote:
> Following the legacy ring submission example, update the
> ring->init_context() hook to support the execlist submission mode.
>
> Workarounds are defined in bdw_emit_workarounds(), but the emit
> now depends on the ring submission mode.
>
> v2: Updated after "Cleanup pre prod workarounds"
>
> For: VIZ-4092
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>  drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
>  4 files changed, 120 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 7b73b36..d1ed21a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -657,7 +657,7 @@ done:
>
>         if (uninitialized) {
>                 if (ring->init_context) {
> -                       ret = ring->init_context(ring);
> +                       ret = ring->init_context(ring->buffer);
>                         if (ret)
>                                 DRM_ERROR("ring init context: %d\n", ret);
>                 }
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index d64d518..a0aa3f0 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1020,6 +1020,62 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>         return 0;
>  }
>
> +static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
> +                                      u32 addr, u32 value)
> +{
> +       struct intel_engine_cs *ring = ringbuf->ring;
> +       struct drm_device *dev = ring->dev;
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +       if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
> +               return;
> +
> +       intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
> +       intel_logical_ring_emit(ringbuf, addr);
> +       intel_logical_ring_emit(ringbuf, value);
> +
> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
> +       /* value is updated with the status of remaining bits of this
> +        * register when it is read from debugfs file
> +        */
> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
> +       dev_priv->num_wa_regs++;
> +}
> +
> +static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
> +{
> +       int ret;
> +       struct intel_engine_cs *ring = ringbuf->ring;
> +       struct drm_device *dev = ring->dev;
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +       /*
> +        * workarounds applied in this fn are part of register state context,
> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
> +        * module reload.
> +        */
> +       dev_priv->num_wa_regs = 0;
> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> +
> +       /*
> +        * update the number of dwords required based on the
> +        * actual number of workarounds applied
> +        */
> +       ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
> +       if (ret)
> +               return ret;
> +
> +       bdw_emit_workarounds(ringbuf);
> +
> +       intel_logical_ring_advance(ringbuf);
> +
> +       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
> +                        dev_priv->num_wa_regs);
> +
> +       return 0;
> +}
> +
>  static int gen8_init_common_ring(struct intel_engine_cs *ring)
>  {
>         struct drm_device *dev = ring->dev;
> @@ -1315,6 +1371,10 @@ static int logical_render_ring_init(struct drm_device *dev)
>         if (HAS_L3_DPF(dev))
>                 ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
>
> +       if (IS_BROADWELL(dev))
> +               ring->init_context = bdw_init_logical_workarounds;
> +       ring->emit_wa = intel_logical_ring_emit_wa;
> +
>         ring->init = gen8_init_render_ring;
>         ring->cleanup = intel_fini_pipe_control;
>         ring->get_seqno = gen8_get_seqno;
> @@ -1802,6 +1862,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>         }
>
>         if (ring->id == RCS && !ctx->rcs_initialized) {
> +               if (ring->init_context) {
> +                       ret = ring->init_context(ringbuf);
> +                       if (ret)
> +                               DRM_ERROR("ring init context: %d\n", ret);
> +               }
> +
>                 ret = intel_lr_context_render_state_init(ring, ctx);
>                 if (ret) {
>                         DRM_ERROR("Init render state failed: %d\n", ret);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 395f926..e6ac913 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -677,9 +677,10 @@ err:
>         return ret;
>  }
>
> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
> +static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>                                        u32 addr, u32 value)
>  {
> +       struct intel_engine_cs *ring = ringbuf->ring;
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_private *dev_priv = dev->dev_private;
>
> @@ -701,51 +702,33 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>         return;
>  }
>
> -static int bdw_init_workarounds(struct intel_engine_cs *ring)
> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
>  {
> -       int ret;
> -       struct drm_device *dev = ring->dev;
> -       struct drm_i915_private *dev_priv = dev->dev_private;
> -
> -       /*
> -        * workarounds applied in this fn are part of register state context,
> -        * they need to be re-initialized followed by gpu reset, suspend/resume,
> -        * module reload.
> -        */
> -       dev_priv->num_wa_regs = 0;
> -       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> -
> -       /*
> -        * update the number of dwords required based on the
> -        * actual number of workarounds applied
> -        */
> -       ret = intel_ring_begin(ring, 18);
> -       if (ret)
> -               return ret;
> +       struct intel_engine_cs *ring = ringbuf->ring;
>
>         /* WaDisablePartialInstShootdown:bdw */
>         /* WaDisableThreadStallDopClockGating:bdw */
>         /* FIXME: Unclear whether we really need this on production bdw. */
> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> +       ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>                            _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
>                                              | STALL_DOP_GATING_DISABLE));
>
>         /* WaDisableDopClockGating:bdw May not be needed for production */
> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> +       ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>
> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> +       ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>                            _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>
>         /* Use Force Non-Coherent whenever executing a 3D context. This is a
>          * workaround for for a possible hang in the unlikely event a TLB
>          * invalidation occurs during a PSD flush.
>          */
> -       intel_ring_emit_wa(ring, HDC_CHICKEN0,
> +       ring->emit_wa(ringbuf, HDC_CHICKEN0,
>                            _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>
>         /* Wa4x4STCOptimizationDisable:bdw */
> -       intel_ring_emit_wa(ring, CACHE_MODE_1,
> +       ring->emit_wa(ringbuf, CACHE_MODE_1,
>                            _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>
>         /*
> @@ -756,8 +739,34 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>          * disable bit, which we don't touch here, but it's good
>          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>          */
> -       intel_ring_emit_wa(ring, GEN7_GT_MODE,
> +       ring->emit_wa(ringbuf, GEN7_GT_MODE,
>                            GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
> +}
> +
> +static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
> +{
> +       int ret;
> +       struct intel_engine_cs *ring = ringbuf->ring;
> +       struct drm_device *dev = ring->dev;
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +
> +       /*
> +        * workarounds applied in this fn are part of register state context,
> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
> +        * module reload.
> +        */
> +       dev_priv->num_wa_regs = 0;
> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
> +
> +       /*
> +        * update the number of dwords required based on the
> +        * actual number of workarounds applied
> +        */
> +       ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
> +       if (ret)
> +               return ret;
> +
> +       bdw_emit_workarounds(ringbuf);
>
>         intel_ring_advance(ring);
>
> @@ -767,9 +776,10 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>         return 0;
>  }
>
> -static int chv_init_workarounds(struct intel_engine_cs *ring)
> +static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
>  {
>         int ret;
> +       struct intel_engine_cs *ring = ringbuf->ring;
>         struct drm_device *dev = ring->dev;
>         struct drm_i915_private *dev_priv = dev->dev_private;
>
> @@ -786,19 +796,19 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
>                 return ret;
>
>         /* WaDisablePartialInstShootdown:chv */
> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>                            _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>
>         /* WaDisableThreadStallDopClockGating:chv */
> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>                            _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>
>         /* WaDisableDopClockGating:chv (pre-production hw) */
> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
> +       intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>                            _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>
>         /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
> +       intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>                            _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>
>         intel_ring_advance(ring);
> @@ -2310,6 +2320,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>                         ring->init_context = chv_init_workarounds;
>                 else
>                         ring->init_context = bdw_init_workarounds;
> +               ring->emit_wa = intel_ring_emit_wa;
>                 ring->add_request = gen6_add_request;
>                 ring->flush = gen8_render_ring_flush;
>                 ring->irq_get = gen8_ring_get_irq;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 07f66d4..417aa09 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -12,6 +12,11 @@
>   */
>  #define CACHELINE_BYTES 64
>
> +/* Number of dwords required based on the
> + * actual number of workarounds applied
> + */
> +#define BDW_WA_DWORDS_SIZE 18
> +
>  /*
>   * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
>   * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
> @@ -148,7 +153,10 @@ struct  intel_engine_cs {
>
>         int             (*init)(struct intel_engine_cs *ring);
>
> -       int             (*init_context)(struct intel_engine_cs *ring);
> +       int             (*init_context)(struct intel_ringbuffer *ringbuf);
> +
> +       void    (*emit_wa)(struct intel_ringbuffer *ringbuf,
> +                      u32 addr, u32 value);
>
>         void            (*write_tail)(struct intel_engine_cs *ring,
>                                       u32 value);
> @@ -427,6 +435,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev);
>
>  u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
>  void intel_ring_setup_status_page(struct intel_engine_cs *ring);
> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);
>
>  static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>  {
> --
> 2.0.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
arun.siluvery@linux.intel.com Nov. 5, 2014, 9:54 a.m. UTC | #2
On 04/11/2014 19:23, Rodrigo Vivi wrote:
> These patches got listed to -collector but got a huge conflict. If it
> is still relevant please rebase it.
>
This patch is currently not relevant, rebased version is already sent to 
the list for review.

https://patchwork.kernel.org/patch/5178771/

regards
Arun

> Also my bikeshed is to findo better names to help on differentiate
> them at least.
>
> On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
> <michel.thierry@intel.com> wrote:
>> Following the legacy ring submission example, update the
>> ring->init_context() hook to support the execlist submission mode.
>>
>> Workarounds are defined in bdw_emit_workarounds(), but the emit
>> now depends on the ring submission mode.
>>
>> v2: Updated after "Cleanup pre prod workarounds"
>>
>> For: VIZ-4092
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
>>   4 files changed, 120 insertions(+), 34 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 7b73b36..d1ed21a 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -657,7 +657,7 @@ done:
>>
>>          if (uninitialized) {
>>                  if (ring->init_context) {
>> -                       ret = ring->init_context(ring);
>> +                       ret = ring->init_context(ring->buffer);
>>                          if (ret)
>>                                  DRM_ERROR("ring init context: %d\n", ret);
>>                  }
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index d64d518..a0aa3f0 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1020,6 +1020,62 @@ int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
>>          return 0;
>>   }
>>
>> +static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>> +                                      u32 addr, u32 value)
>> +{
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
>> +               return;
>> +
>> +       intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
>> +       intel_logical_ring_emit(ringbuf, addr);
>> +       intel_logical_ring_emit(ringbuf, value);
>> +
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
>> +       /* value is updated with the status of remaining bits of this
>> +        * register when it is read from debugfs file
>> +        */
>> +       dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
>> +       dev_priv->num_wa_regs++;
>> +}
>> +
>> +static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> +       int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       /*
>> +        * workarounds applied in this fn are part of register state context,
>> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> +        * module reload.
>> +        */
>> +       dev_priv->num_wa_regs = 0;
>> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> +       /*
>> +        * update the number of dwords required based on the
>> +        * actual number of workarounds applied
>> +        */
>> +       ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
>> +       if (ret)
>> +               return ret;
>> +
>> +       bdw_emit_workarounds(ringbuf);
>> +
>> +       intel_logical_ring_advance(ringbuf);
>> +
>> +       DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
>> +                        dev_priv->num_wa_regs);
>> +
>> +       return 0;
>> +}
>> +
>>   static int gen8_init_common_ring(struct intel_engine_cs *ring)
>>   {
>>          struct drm_device *dev = ring->dev;
>> @@ -1315,6 +1371,10 @@ static int logical_render_ring_init(struct drm_device *dev)
>>          if (HAS_L3_DPF(dev))
>>                  ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
>>
>> +       if (IS_BROADWELL(dev))
>> +               ring->init_context = bdw_init_logical_workarounds;
>> +       ring->emit_wa = intel_logical_ring_emit_wa;
>> +
>>          ring->init = gen8_init_render_ring;
>>          ring->cleanup = intel_fini_pipe_control;
>>          ring->get_seqno = gen8_get_seqno;
>> @@ -1802,6 +1862,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>>          }
>>
>>          if (ring->id == RCS && !ctx->rcs_initialized) {
>> +               if (ring->init_context) {
>> +                       ret = ring->init_context(ringbuf);
>> +                       if (ret)
>> +                               DRM_ERROR("ring init context: %d\n", ret);
>> +               }
>> +
>>                  ret = intel_lr_context_render_state_init(ring, ctx);
>>                  if (ret) {
>>                          DRM_ERROR("Init render state failed: %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 395f926..e6ac913 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -677,9 +677,10 @@ err:
>>          return ret;
>>   }
>>
>> -static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>> +static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
>>                                         u32 addr, u32 value)
>>   {
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>          struct drm_device *dev = ring->dev;
>>          struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -701,51 +702,33 @@ static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
>>          return;
>>   }
>>
>> -static int bdw_init_workarounds(struct intel_engine_cs *ring)
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
>>   {
>> -       int ret;
>> -       struct drm_device *dev = ring->dev;
>> -       struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> -       /*
>> -        * workarounds applied in this fn are part of register state context,
>> -        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> -        * module reload.
>> -        */
>> -       dev_priv->num_wa_regs = 0;
>> -       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> -
>> -       /*
>> -        * update the number of dwords required based on the
>> -        * actual number of workarounds applied
>> -        */
>> -       ret = intel_ring_begin(ring, 18);
>> -       if (ret)
>> -               return ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>
>>          /* WaDisablePartialInstShootdown:bdw */
>>          /* WaDisableThreadStallDopClockGating:bdw */
>>          /* FIXME: Unclear whether we really need this on production bdw. */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
>>                                               | STALL_DOP_GATING_DISABLE));
>>
>>          /* WaDisableDopClockGating:bdw May not be needed for production */
>> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> +       ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>>                             _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> +       ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>>                             _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>>          /* Use Force Non-Coherent whenever executing a 3D context. This is a
>>           * workaround for for a possible hang in the unlikely event a TLB
>>           * invalidation occurs during a PSD flush.
>>           */
>> -       intel_ring_emit_wa(ring, HDC_CHICKEN0,
>> +       ring->emit_wa(ringbuf, HDC_CHICKEN0,
>>                             _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
>>
>>          /* Wa4x4STCOptimizationDisable:bdw */
>> -       intel_ring_emit_wa(ring, CACHE_MODE_1,
>> +       ring->emit_wa(ringbuf, CACHE_MODE_1,
>>                             _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
>>
>>          /*
>> @@ -756,8 +739,34 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>>           * disable bit, which we don't touch here, but it's good
>>           * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
>>           */
>> -       intel_ring_emit_wa(ring, GEN7_GT_MODE,
>> +       ring->emit_wa(ringbuf, GEN7_GT_MODE,
>>                             GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
>> +}
>> +
>> +static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
>> +{
>> +       int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>> +       struct drm_device *dev = ring->dev;
>> +       struct drm_i915_private *dev_priv = dev->dev_private;
>> +
>> +       /*
>> +        * workarounds applied in this fn are part of register state context,
>> +        * they need to be re-initialized followed by gpu reset, suspend/resume,
>> +        * module reload.
>> +        */
>> +       dev_priv->num_wa_regs = 0;
>> +       memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
>> +
>> +       /*
>> +        * update the number of dwords required based on the
>> +        * actual number of workarounds applied
>> +        */
>> +       ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
>> +       if (ret)
>> +               return ret;
>> +
>> +       bdw_emit_workarounds(ringbuf);
>>
>>          intel_ring_advance(ring);
>>
>> @@ -767,9 +776,10 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)
>>          return 0;
>>   }
>>
>> -static int chv_init_workarounds(struct intel_engine_cs *ring)
>> +static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
>>   {
>>          int ret;
>> +       struct intel_engine_cs *ring = ringbuf->ring;
>>          struct drm_device *dev = ring->dev;
>>          struct drm_i915_private *dev_priv = dev->dev_private;
>>
>> @@ -786,19 +796,19 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)
>>                  return ret;
>>
>>          /* WaDisablePartialInstShootdown:chv */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
>>
>>          /* WaDisableThreadStallDopClockGating:chv */
>> -       intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
>> +       intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
>>                             _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
>>
>>          /* WaDisableDopClockGating:chv (pre-production hw) */
>> -       intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
>> +       intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
>>                             _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
>>
>>          /* WaDisableSamplerPowerBypass:chv (pre-production hw) */
>> -       intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
>> +       intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
>>                             _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
>>
>>          intel_ring_advance(ring);
>> @@ -2310,6 +2320,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
>>                          ring->init_context = chv_init_workarounds;
>>                  else
>>                          ring->init_context = bdw_init_workarounds;
>> +               ring->emit_wa = intel_ring_emit_wa;
>>                  ring->add_request = gen6_add_request;
>>                  ring->flush = gen8_render_ring_flush;
>>                  ring->irq_get = gen8_ring_get_irq;
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 07f66d4..417aa09 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -12,6 +12,11 @@
>>    */
>>   #define CACHELINE_BYTES 64
>>
>> +/* Number of dwords required based on the
>> + * actual number of workarounds applied
>> + */
>> +#define BDW_WA_DWORDS_SIZE 18
>> +
>>   /*
>>    * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
>>    * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
>> @@ -148,7 +153,10 @@ struct  intel_engine_cs {
>>
>>          int             (*init)(struct intel_engine_cs *ring);
>>
>> -       int             (*init_context)(struct intel_engine_cs *ring);
>> +       int             (*init_context)(struct intel_ringbuffer *ringbuf);
>> +
>> +       void    (*emit_wa)(struct intel_ringbuffer *ringbuf,
>> +                      u32 addr, u32 value);
>>
>>          void            (*write_tail)(struct intel_engine_cs *ring,
>>                                        u32 value);
>> @@ -427,6 +435,7 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev);
>>
>>   u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
>>   void intel_ring_setup_status_page(struct intel_engine_cs *ring);
>> +void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);
>>
>>   static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
>>   {
>> --
>> 2.0.3
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
>
Michel Thierry Nov. 5, 2014, 9:55 a.m. UTC | #3
On 11/4/2014 7:23 PM, Rodrigo Vivi wrote:
> These patches got listed to -collector but got a huge conflict. If it
> is still relevant please rebase it.
>
> Also my bikeshed is to findo better names to help on differentiate
> them at least.
>
> On Wed, Sep 24, 2014 at 5:02 AM, Michel Thierry
> <michel.thierry@intel.com> wrote:
>> Following the legacy ring submission example, update the
>> ring->init_context() hook to support the execlist submission mode.
>>
>> Workarounds are defined in bdw_emit_workarounds(), but the emit
>> now depends on the ring submission mode.
>>
>> v2: Updated after "Cleanup pre prod workarounds"
>>
>> For: VIZ-4092
>> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gem_context.c |  2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c        | 66 +++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/intel_ringbuffer.c | 75 +++++++++++++++++++--------------
>>   drivers/gpu/drm/i915/intel_ringbuffer.h | 11 ++++-
>>   4 files changed, 120 insertions(+), 34 deletions(-)
Hi Rodrigo,

This patch is no longer needed, and was superseded by Arun's patch:
<1414516762-27254-1-git-send-email-arun.siluvery@linux.intel.com>

Thanks,

-Michel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 7b73b36..d1ed21a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -657,7 +657,7 @@  done:
 
 	if (uninitialized) {
 		if (ring->init_context) {
-			ret = ring->init_context(ring);
+			ret = ring->init_context(ring->buffer);
 			if (ret)
 				DRM_ERROR("ring init context: %d\n", ret);
 		}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d64d518..a0aa3f0 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1020,6 +1020,62 @@  int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
 	return 0;
 }
 
+static inline void intel_logical_ring_emit_wa(struct intel_ringbuffer *ringbuf,
+				       u32 addr, u32 value)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (WARN_ON(dev_priv->num_wa_regs >= I915_MAX_WA_REGS))
+		return;
+
+	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+	intel_logical_ring_emit(ringbuf, addr);
+	intel_logical_ring_emit(ringbuf, value);
+
+	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].addr = addr;
+	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].mask = value & 0xFFFF;
+	/* value is updated with the status of remaining bits of this
+	 * register when it is read from debugfs file
+	 */
+	dev_priv->intel_wa_regs[dev_priv->num_wa_regs].value = value;
+	dev_priv->num_wa_regs++;
+}
+
+static int bdw_init_logical_workarounds(struct intel_ringbuffer *ringbuf)
+{
+	int ret;
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * workarounds applied in this fn are part of register state context,
+	 * they need to be re-initialized followed by gpu reset, suspend/resume,
+	 * module reload.
+	 */
+	dev_priv->num_wa_regs = 0;
+	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
+
+	/*
+	 * update the number of dwords required based on the
+	 * actual number of workarounds applied
+	 */
+	ret = intel_logical_ring_begin(ringbuf, BDW_WA_DWORDS_SIZE);
+	if (ret)
+		return ret;
+
+	bdw_emit_workarounds(ringbuf);
+
+	intel_logical_ring_advance(ringbuf);
+
+	DRM_DEBUG_DRIVER("Number of Workarounds applied: %d\n",
+			 dev_priv->num_wa_regs);
+
+	return 0;
+}
+
 static int gen8_init_common_ring(struct intel_engine_cs *ring)
 {
 	struct drm_device *dev = ring->dev;
@@ -1315,6 +1371,10 @@  static int logical_render_ring_init(struct drm_device *dev)
 	if (HAS_L3_DPF(dev))
 		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
 
+	if (IS_BROADWELL(dev))
+		ring->init_context = bdw_init_logical_workarounds;
+	ring->emit_wa = intel_logical_ring_emit_wa;
+
 	ring->init = gen8_init_render_ring;
 	ring->cleanup = intel_fini_pipe_control;
 	ring->get_seqno = gen8_get_seqno;
@@ -1802,6 +1862,12 @@  int intel_lr_context_deferred_create(struct intel_context *ctx,
 	}
 
 	if (ring->id == RCS && !ctx->rcs_initialized) {
+		if (ring->init_context) {
+			ret = ring->init_context(ringbuf);
+			if (ret)
+				DRM_ERROR("ring init context: %d\n", ret);
+		}
+
 		ret = intel_lr_context_render_state_init(ring, ctx);
 		if (ret) {
 			DRM_ERROR("Init render state failed: %d\n", ret);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 395f926..e6ac913 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -677,9 +677,10 @@  err:
 	return ret;
 }
 
-static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
+static inline void intel_ring_emit_wa(struct intel_ringbuffer *ringbuf,
 				       u32 addr, u32 value)
 {
+	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -701,51 +702,33 @@  static inline void intel_ring_emit_wa(struct intel_engine_cs *ring,
 	return;
 }
 
-static int bdw_init_workarounds(struct intel_engine_cs *ring)
+void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf)
 {
-	int ret;
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	/*
-	 * workarounds applied in this fn are part of register state context,
-	 * they need to be re-initialized followed by gpu reset, suspend/resume,
-	 * module reload.
-	 */
-	dev_priv->num_wa_regs = 0;
-	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
-
-	/*
-	 * update the number of dwords required based on the
-	 * actual number of workarounds applied
-	 */
-	ret = intel_ring_begin(ring, 18);
-	if (ret)
-		return ret;
+	struct intel_engine_cs *ring = ringbuf->ring;
 
 	/* WaDisablePartialInstShootdown:bdw */
 	/* WaDisableThreadStallDopClockGating:bdw */
 	/* FIXME: Unclear whether we really need this on production bdw. */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+	ring->emit_wa(ringbuf, GEN8_ROW_CHICKEN,
 			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE
 					     | STALL_DOP_GATING_DISABLE));
 
 	/* WaDisableDopClockGating:bdw May not be needed for production */
-	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
+	ring->emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 
-	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
+	ring->emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
 			   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
 
 	/* Use Force Non-Coherent whenever executing a 3D context. This is a
 	 * workaround for for a possible hang in the unlikely event a TLB
 	 * invalidation occurs during a PSD flush.
 	 */
-	intel_ring_emit_wa(ring, HDC_CHICKEN0,
+	ring->emit_wa(ringbuf, HDC_CHICKEN0,
 			   _MASKED_BIT_ENABLE(HDC_FORCE_NON_COHERENT));
 
 	/* Wa4x4STCOptimizationDisable:bdw */
-	intel_ring_emit_wa(ring, CACHE_MODE_1,
+	ring->emit_wa(ringbuf, CACHE_MODE_1,
 			   _MASKED_BIT_ENABLE(GEN8_4x4_STC_OPTIMIZATION_DISABLE));
 
 	/*
@@ -756,8 +739,34 @@  static int bdw_init_workarounds(struct intel_engine_cs *ring)
 	 * disable bit, which we don't touch here, but it's good
 	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
 	 */
-	intel_ring_emit_wa(ring, GEN7_GT_MODE,
+	ring->emit_wa(ringbuf, GEN7_GT_MODE,
 			   GEN6_WIZ_HASHING_MASK | GEN6_WIZ_HASHING_16x4);
+}
+
+static int bdw_init_workarounds(struct intel_ringbuffer *ringbuf)
+{
+	int ret;
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * workarounds applied in this fn are part of register state context,
+	 * they need to be re-initialized followed by gpu reset, suspend/resume,
+	 * module reload.
+	 */
+	dev_priv->num_wa_regs = 0;
+	memset(dev_priv->intel_wa_regs, 0, sizeof(dev_priv->intel_wa_regs));
+
+	/*
+	 * update the number of dwords required based on the
+	 * actual number of workarounds applied
+	 */
+	ret = intel_ring_begin(ring, BDW_WA_DWORDS_SIZE);
+	if (ret)
+		return ret;
+
+	bdw_emit_workarounds(ringbuf);
 
 	intel_ring_advance(ring);
 
@@ -767,9 +776,10 @@  static int bdw_init_workarounds(struct intel_engine_cs *ring)
 	return 0;
 }
 
-static int chv_init_workarounds(struct intel_engine_cs *ring)
+static int chv_init_workarounds(struct intel_ringbuffer *ringbuf)
 {
 	int ret;
+	struct intel_engine_cs *ring = ringbuf->ring;
 	struct drm_device *dev = ring->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -786,19 +796,19 @@  static int chv_init_workarounds(struct intel_engine_cs *ring)
 		return ret;
 
 	/* WaDisablePartialInstShootdown:chv */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+	intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
 			   _MASKED_BIT_ENABLE(PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE));
 
 	/* WaDisableThreadStallDopClockGating:chv */
-	intel_ring_emit_wa(ring, GEN8_ROW_CHICKEN,
+	intel_ring_emit_wa(ringbuf, GEN8_ROW_CHICKEN,
 			   _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
 
 	/* WaDisableDopClockGating:chv (pre-production hw) */
-	intel_ring_emit_wa(ring, GEN7_ROW_CHICKEN2,
+	intel_ring_emit_wa(ringbuf, GEN7_ROW_CHICKEN2,
 			   _MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
 
 	/* WaDisableSamplerPowerBypass:chv (pre-production hw) */
-	intel_ring_emit_wa(ring, HALF_SLICE_CHICKEN3,
+	intel_ring_emit_wa(ringbuf, HALF_SLICE_CHICKEN3,
 			   _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS));
 
 	intel_ring_advance(ring);
@@ -2310,6 +2320,7 @@  int intel_init_render_ring_buffer(struct drm_device *dev)
 			ring->init_context = chv_init_workarounds;
 		else
 			ring->init_context = bdw_init_workarounds;
+		ring->emit_wa = intel_ring_emit_wa;
 		ring->add_request = gen6_add_request;
 		ring->flush = gen8_render_ring_flush;
 		ring->irq_get = gen8_ring_get_irq;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 07f66d4..417aa09 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -12,6 +12,11 @@ 
  */
 #define CACHELINE_BYTES 64
 
+/* Number of dwords required based on the
+ * actual number of workarounds applied
+ */
+#define BDW_WA_DWORDS_SIZE 18
+
 /*
  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -148,7 +153,10 @@  struct  intel_engine_cs {
 
 	int		(*init)(struct intel_engine_cs *ring);
 
-	int		(*init_context)(struct intel_engine_cs *ring);
+	int		(*init_context)(struct intel_ringbuffer *ringbuf);
+
+	void	(*emit_wa)(struct intel_ringbuffer *ringbuf,
+		       u32 addr, u32 value);
 
 	void		(*write_tail)(struct intel_engine_cs *ring,
 				      u32 value);
@@ -427,6 +435,7 @@  int intel_init_vebox_ring_buffer(struct drm_device *dev);
 
 u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
 void intel_ring_setup_status_page(struct intel_engine_cs *ring);
+void bdw_emit_workarounds(struct intel_ringbuffer *ringbuf);
 
 static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
 {