Message ID | 20171005092005.10559-1-michal.winiarski@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Michał Winiarski (2017-10-05 10:20:01) > Let's separate the "emit" part from touching any internal structures, > this way we can have a generic "emit coherent GGTT write" function. > We would like to reuse this functionality for emitting HWSP write, to > confirm that preempt-to-idle has finished. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> > --- > drivers/gpu/drm/i915/intel_lrc.c | 21 +++++------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++++++++++ > 2 files changed, 43 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 12956f2ba699..e6bbdc5dd01b 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1766,10 +1766,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) > /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ > BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); > > - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; > - *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; > - *cs++ = 0; > - *cs++ = request->global_seqno; > + cs = gen8_emit_ggtt_write(intel_hws_seqno_address(request->engine), > + request->global_seqno, cs); > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_NOOP; > request->tail = intel_ring_offset(request, cs); > @@ -1785,18 +1783,9 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, > /* We're using qword write, seqno should be aligned to 8 bytes. */ > BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); > > - /* w/a for post sync ops following a GPGPU operation we > - * need a prior CS_STALL, which is emitted by the flush > - * following the batch. > - */ > - *cs++ = GFX_OP_PIPE_CONTROL(6); > - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | > - PIPE_CONTROL_QW_WRITE; > - *cs++ = intel_hws_seqno_address(request->engine); > - *cs++ = 0; > - *cs++ = request->global_seqno; > - /* We're thrashing one dword of HWS. */ > - *cs++ = 0; > + cs = gen8_emit_ggtt_write_render( > + intel_hws_seqno_address(request->engine), > + request->global_seqno, cs); Would this be less problematic if we s/render/rcs/ ? > *cs++ = MI_USER_INTERRUPT; > *cs++ = MI_NOOP; > request->tail = intel_ring_offset(request, cs); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 0fedda17488c..e9650552d3c2 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -831,6 +831,44 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) > return batch + 6; > } > > +static inline u32 * > +gen8_emit_ggtt_write_render(u32 gtt_offset, u32 value, u32 *cs) I prefer cs to the be first param. I just checked we did emit_pipe_control in that fashion as well... In principle, Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> -Chris
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 12956f2ba699..e6bbdc5dd01b 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1766,10 +1766,8 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *cs++ = 0; - *cs++ = request->global_seqno; + cs = gen8_emit_ggtt_write(intel_hws_seqno_address(request->engine), + request->global_seqno, cs); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); @@ -1785,18 +1783,9 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); - /* w/a for post sync ops following a GPGPU operation we - * need a prior CS_STALL, which is emitted by the flush - * following the batch. - */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE; - *cs++ = intel_hws_seqno_address(request->engine); - *cs++ = 0; - *cs++ = request->global_seqno; - /* We're thrashing one dword of HWS. */ - *cs++ = 0; + cs = gen8_emit_ggtt_write_render( + intel_hws_seqno_address(request->engine), + request->global_seqno, cs); *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0fedda17488c..e9650552d3c2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -831,6 +831,44 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +static inline u32 * +gen8_emit_ggtt_write_render(u32 gtt_offset, u32 value, u32 *cs) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + /* w/a for post sync ops following a GPGPU operation we + * need a prior CS_STALL, which is emitted by the flush + * following the batch. + */ + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + /* We're thrashing one dword of HWS. */ + *cs++ = 0; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 gtt_offset, u32 value, u32 *cs) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = value; + + return cs; +} + bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
Let's separate the "emit" part from touching any internal structures, this way we can have a generic "emit coherent GGTT write" function. We would like to reuse this functionality for emitting HWSP write, to confirm that preempt-to-idle has finished. Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> --- drivers/gpu/drm/i915/intel_lrc.c | 21 +++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 38 +++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 16 deletions(-)