diff mbox

[06/10] drm/i915: Extract "emit write" part of emit breadcrumb functions

Message ID 20171005092005.10559-1-michal.winiarski@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michał Winiarski Oct. 5, 2017, 9:20 a.m. UTC
Let's separate the "emit" part from touching any internal structures,
this way we can have a generic "emit coherent GGTT write" function.
We would like to reuse this functionality for emitting HWSP write, to
confirm that preempt-to-idle has finished.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com
Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 21 +++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 16 deletions(-)

Comments

Chris Wilson Oct. 5, 2017, 9:47 a.m. UTC | #1
Quoting Michał Winiarski (2017-10-05 10:20:01)
> Let's separate the "emit" part from touching any internal structures,
> this way we can have a generic "emit coherent GGTT write" function.
> We would like to reuse this functionality for emitting HWSP write, to
> confirm that preempt-to-idle has finished.
> 
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com
> Signed-off-by: Michał Winiarski <michal.winiarski@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        | 21 +++++-------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++++++++++
>  2 files changed, 43 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 12956f2ba699..e6bbdc5dd01b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1766,10 +1766,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
>         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
>         BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
>  
> -       *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
> -       *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
> -       *cs++ = 0;
> -       *cs++ = request->global_seqno;
> +       cs = gen8_emit_ggtt_write(intel_hws_seqno_address(request->engine),
> +                                 request->global_seqno, cs);
>         *cs++ = MI_USER_INTERRUPT;
>         *cs++ = MI_NOOP;
>         request->tail = intel_ring_offset(request, cs);
> @@ -1785,18 +1783,9 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
>         /* We're using qword write, seqno should be aligned to 8 bytes. */
>         BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
>  
> -       /* w/a for post sync ops following a GPGPU operation we
> -        * need a prior CS_STALL, which is emitted by the flush
> -        * following the batch.
> -        */
> -       *cs++ = GFX_OP_PIPE_CONTROL(6);
> -       *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
> -               PIPE_CONTROL_QW_WRITE;
> -       *cs++ = intel_hws_seqno_address(request->engine);
> -       *cs++ = 0;
> -       *cs++ = request->global_seqno;
> -       /* We're thrashing one dword of HWS. */
> -       *cs++ = 0;
> +       cs = gen8_emit_ggtt_write_render(
> +                                     intel_hws_seqno_address(request->engine),
> +                                     request->global_seqno, cs);

Would this be less problematic if we s/render/rcs/ ?

>         *cs++ = MI_USER_INTERRUPT;
>         *cs++ = MI_NOOP;
>         request->tail = intel_ring_offset(request, cs);
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 0fedda17488c..e9650552d3c2 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -831,6 +831,44 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
>         return batch + 6;
>  }
>  
> +static inline u32 *
> +gen8_emit_ggtt_write_render(u32 gtt_offset, u32 value, u32 *cs)

I prefer cs to the be first param. I just checked we did
emit_pipe_control in that fashion as well...

In principle,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 12956f2ba699..e6bbdc5dd01b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1766,10 +1766,8 @@  static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
 
-	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
-	*cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT;
-	*cs++ = 0;
-	*cs++ = request->global_seqno;
+	cs = gen8_emit_ggtt_write(intel_hws_seqno_address(request->engine),
+				  request->global_seqno, cs);
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
@@ -1785,18 +1783,9 @@  static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
 	/* We're using qword write, seqno should be aligned to 8 bytes. */
 	BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
 
-	/* w/a for post sync ops following a GPGPU operation we
-	 * need a prior CS_STALL, which is emitted by the flush
-	 * following the batch.
-	 */
-	*cs++ = GFX_OP_PIPE_CONTROL(6);
-	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
-		PIPE_CONTROL_QW_WRITE;
-	*cs++ = intel_hws_seqno_address(request->engine);
-	*cs++ = 0;
-	*cs++ = request->global_seqno;
-	/* We're thrashing one dword of HWS. */
-	*cs++ = 0;
+	cs = gen8_emit_ggtt_write_render(
+				      intel_hws_seqno_address(request->engine),
+				      request->global_seqno, cs);
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0fedda17488c..e9650552d3c2 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -831,6 +831,44 @@  static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 	return batch + 6;
 }
 
+static inline u32 *
+gen8_emit_ggtt_write_render(u32 gtt_offset, u32 value, u32 *cs)
+{
+	/* We're using qword write, offset should be aligned to 8 bytes. */
+	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+	/* w/a for post sync ops following a GPGPU operation we
+	 * need a prior CS_STALL, which is emitted by the flush
+	 * following the batch.
+	 */
+	*cs++ = GFX_OP_PIPE_CONTROL(6);
+	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_QW_WRITE;
+	*cs++ = gtt_offset;
+	*cs++ = 0;
+	*cs++ = value;
+	/* We're thrashing one dword of HWS. */
+	*cs++ = 0;
+
+	return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 gtt_offset, u32 value, u32 *cs)
+{
+	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+	GEM_BUG_ON(gtt_offset & (1 << 5));
+	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+	*cs++ = 0;
+	*cs++ = value;
+
+	return cs;
+}
+
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);