Message ID | 20180226140000.13320-1-michal.winiarski@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Michał Winiarski (2018-02-26 13:59:59) > Since we're inhibiting context save of preempt context, we're no longer > tracking the position of HEAD/TAIL. With GuC, we're adding a new > breadcrumb for each preemption, which means that the HW will do more and > more breadcrumb writes. Eventually the ring is filled, and we're > submitting the preemption context with HEAD==TAIL==0, which won't result > in breadcrumb write, but will trigger hangcheck instead. > Instead of writing a new preempt breadcrumb for each preemption, let's > just fill the ring once at init time (which also saves a couple of > instructions in the tasklet). > > Fixes: 517aaffe0c1b ("drm/i915/execlists: Inhibit context save/restore for the fake preempt context") > Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Michel Thierry <michel.thierry@intel.com> > Cc: Mika Kuoppala <mika.kuoppala@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > --- > drivers/gpu/drm/i915/intel_guc_submission.c | 68 +++++++++++++++++------------ > 1 file changed, 41 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 586dde579903..89e5b036061d 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -28,6 +28,10 @@ > #include "intel_guc_submission.h" > #include "i915_drv.h" > > +#define GUC_PREEMPT_FINISHED 0x1 > +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 > +#define GUC_PREEMPT_BREADCRUMB_BYTES (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) > + > /** > * DOC: GuC-based command submission > * > @@ -535,8 +539,6 @@ static void flush_ggtt_writes(struct i915_vma *vma) > POSTING_READ_FW(GUC_STATUS); > } > > -#define GUC_PREEMPT_FINISHED 0x1 > -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 > static void inject_preempt_context(struct work_struct *work) > { > struct guc_preempt_work *preempt_work = > @@ -546,37 +548,13 @@ static void inject_preempt_context(struct work_struct *work) > preempt_work[engine->id]); > struct intel_guc_client *client = guc->preempt_client; > struct guc_stage_desc *stage_desc = __get_stage_desc(client); > - struct intel_ring *ring = client->owner->engine[engine->id].ring; > u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, > engine)); > - u32 *cs = ring->vaddr + ring->tail; > u32 data[7]; > > - if (engine->id == RCS) { > - cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, > - intel_hws_preempt_done_address(engine)); > - } else { > - cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, > - intel_hws_preempt_done_address(engine)); > - *cs++ = MI_NOOP; > - *cs++ = MI_NOOP; > - } > - *cs++ = MI_USER_INTERRUPT; > - *cs++ = MI_NOOP; > - > - GEM_BUG_ON(!IS_ALIGNED(ring->size, > - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); > - GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != > - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); > - > - ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); > - ring->tail &= (ring->size - 1); > - > - flush_ggtt_writes(ring->vma); > - > spin_lock_irq(&client->wq_lock); > guc_wq_item_append(client, engine->guc_id, ctx_desc, > - ring->tail / sizeof(u64), 0); > + GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0); > spin_unlock_irq(&client->wq_lock); > > /* > @@ -972,6 +950,40 @@ static void guc_client_free(struct intel_guc_client *client) > kfree(client); > } > > +static void guc_fill_preempt_context(struct intel_guc *guc) > +{ > + struct drm_i915_private *dev_priv = guc_to_i915(guc); > + struct intel_guc_client *client = guc->preempt_client; > + struct intel_engine_cs *engine; > + struct intel_ring *ring; > + enum intel_engine_id id; > + u32 *cs; > + Whether to use preempt_client or preempt_context is your call. > + for_each_engine(engine, dev_priv, id) { struct intel_engine *ce = &client->owner->engine[id]; /* The preempt context must be pinned on each engine); GEM_BUG_ON(!ce->pin_count); /* * We rely on the context image *not* being saved after * preemption. This ensures that the RING_HEAD / RING_TAIL * do not advance and they remain pointing at this command * sequence forever. */ GEM_BUG_ON(!(ce->reg_state[SR] & CTX_SR_SAVE_INHIBIT)); > + ring = client->owner->engine[id].ring; > + cs = ring->vaddr; > + > + if (id == RCS) { > + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, > + intel_hws_preempt_done_address(engine)); > + } else { > + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, > + intel_hws_preempt_done_address(engine)); > + *cs++ = MI_NOOP; > + *cs++ = MI_NOOP; > + } > + *cs++ = MI_USER_INTERRUPT; > + *cs++ = MI_NOOP; > + > + GEM_BUG_ON(!IS_ALIGNED(ring->size, > + GUC_PREEMPT_BREADCRUMB_BYTES)); > + GEM_BUG_ON((void *)cs - ring->vaddr != > + GUC_PREEMPT_BREADCRUMB_BYTES); We don't care about these anymore as we don't have to worry about instructions wrapping. Similarly, we don't need the NOOP padding after MI_FLUSH_DW. Keep setting ring->tail here so we can avoid the hardcoding inside the submission. That will allow us to adapt this with ease. -Chris
Quoting Chris Wilson (2018-02-26 14:28:36) > Quoting Michał Winiarski (2018-02-26 13:59:59) > > + for_each_engine(engine, dev_priv, id) { > struct intel_engine *ce = &client->owner->engine[id]; > > /* The preempt context must be pinned on each engine); > GEM_BUG_ON(!ce->pin_count); > > /* > * We rely on the context image *not* being saved after s/the context/this context/ > * preemption. This ensures that the RING_HEAD / RING_TAIL > * do not advance and they remain pointing at this command > * sequence forever. > */ > GEM_BUG_ON(!(ce->reg_state[SR] & CTX_SR_SAVE_INHIBIT)); -Chris
Quoting Chris Wilson (2018-02-26 14:28:36) > Quoting Michał Winiarski (2018-02-26 13:59:59) > > + for_each_engine(engine, dev_priv, id) { > struct intel_engine *ce = &client->owner->engine[id]; > > /* The preempt context must be pinned on each engine); > GEM_BUG_ON(!ce->pin_count); > > /* > * We rely on the context image *not* being saved after > * preemption. This ensures that the RING_HEAD / RING_TAIL > * do not advance and they remain pointing at this command > * sequence forever. > */ Hmm, this is not true. See intel_lr_context_resume(). Does this patch justify a test case for gem_exec_schedule+suspend specifically? Or do we just repeat every test after a S&R cycle? I think we could do with the latter! -Chris
Quoting Chris Wilson (2018-02-26 14:38:20) > Quoting Chris Wilson (2018-02-26 14:28:36) > > Quoting Michał Winiarski (2018-02-26 13:59:59) > > > + for_each_engine(engine, dev_priv, id) { > > struct intel_engine *ce = &client->owner->engine[id]; > > > > /* The preempt context must be pinned on each engine); > > GEM_BUG_ON(!ce->pin_count); > > > > /* > > * We rely on the context image *not* being saved after > > * preemption. This ensures that the RING_HEAD / RING_TAIL > > * do not advance and they remain pointing at this command > > * sequence forever. > > */ > > Hmm, this is not true. See intel_lr_context_resume(). Continuing this chain of thought, that doesn't matter. The reg state is reset to 0, which is what we expect with context-save-inhibit anyway. What it does do is reset ring->tail to 0 as well. That doesn't play well with my idea to use ring->tail. -Chris
Quoting Chris Wilson (2018-02-26 14:28:36) > Quoting Michał Winiarski (2018-02-26 13:59:59) > > + if (id == RCS) { > > + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, > > + intel_hws_preempt_done_address(engine)); > > + } else { > > + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, > > + intel_hws_preempt_done_address(engine)); > > + *cs++ = MI_NOOP; > > + *cs++ = MI_NOOP; > > + } > > + *cs++ = MI_USER_INTERRUPT; > > + *cs++ = MI_NOOP; > > + > > + GEM_BUG_ON(!IS_ALIGNED(ring->size, > > + GUC_PREEMPT_BREADCRUMB_BYTES)); > > + GEM_BUG_ON((void *)cs - ring->vaddr != > > + GUC_PREEMPT_BREADCRUMB_BYTES); > > We don't care about these anymore as we don't have to worry about > instructions wrapping. Similarly, we don't need the NOOP padding after > MI_FLUSH_DW. > > Keep setting ring->tail here so we can avoid the hardcoding inside the > submission. That will allow us to adapt this with ease. Quick digression later, intel_lr_context_resume() breaks this idea to keep ring->tail set. So that means we need to keep the fixed size command packet and the hardcoded length. (But we can still remove the asserts as we do not require magic alignments to avoid wraparound issues anymore.) But we do want a comment here for something like GEM_BUG_ON(cs != GUC_PREEMPT_BREADCRUMB_BYTES); to tie the magic constant here to the wq submission. And maybe a comment in inject_preempt_context() to explain that we we submit a command packet that writes GUC_PREEMPT_FINISHED. -Chris
Quoting Chris Wilson (2018-02-26 14:28:36) > Quoting Michał Winiarski (2018-02-26 13:59:59) > > + for_each_engine(engine, dev_priv, id) { > struct intel_engine *ce = &client->owner->engine[id]; > > /* The preempt context must be pinned on each engine); > GEM_BUG_ON(!ce->pin_count); > > /* > * We rely on the context image *not* being saved after > * preemption. This ensures that the RING_HEAD / RING_TAIL > * do not advance and they remain pointing at this command > * sequence forever. > */ > GEM_BUG_ON(!(ce->reg_state[SR] & CTX_SR_SAVE_INHIBIT)); In fact, don't bug out, just set it here. Then it won't break again when icl enabling lands. -Chris
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 586dde579903..89e5b036061d 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -28,6 +28,10 @@ #include "intel_guc_submission.h" #include "i915_drv.h" +#define GUC_PREEMPT_FINISHED 0x1 +#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 +#define GUC_PREEMPT_BREADCRUMB_BYTES (sizeof(u32) * GUC_PREEMPT_BREADCRUMB_DWORDS) + /** * DOC: GuC-based command submission * @@ -535,8 +539,6 @@ static void flush_ggtt_writes(struct i915_vma *vma) POSTING_READ_FW(GUC_STATUS); } -#define GUC_PREEMPT_FINISHED 0x1 -#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8 static void inject_preempt_context(struct work_struct *work) { struct guc_preempt_work *preempt_work = @@ -546,37 +548,13 @@ static void inject_preempt_context(struct work_struct *work) preempt_work[engine->id]); struct intel_guc_client *client = guc->preempt_client; struct guc_stage_desc *stage_desc = __get_stage_desc(client); - struct intel_ring *ring = client->owner->engine[engine->id].ring; u32 ctx_desc = lower_32_bits(intel_lr_context_descriptor(client->owner, engine)); - u32 *cs = ring->vaddr + ring->tail; u32 data[7]; - if (engine->id == RCS) { - cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - } else { - cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, - intel_hws_preempt_done_address(engine)); - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; - - GEM_BUG_ON(!IS_ALIGNED(ring->size, - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32))); - GEM_BUG_ON((void *)cs - (ring->vaddr + ring->tail) != - GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32)); - - ring->tail += GUC_PREEMPT_BREADCRUMB_DWORDS * sizeof(u32); - ring->tail &= (ring->size - 1); - - flush_ggtt_writes(ring->vma); - spin_lock_irq(&client->wq_lock); guc_wq_item_append(client, engine->guc_id, ctx_desc, - ring->tail / sizeof(u64), 0); + GUC_PREEMPT_BREADCRUMB_BYTES / sizeof(u64), 0); spin_unlock_irq(&client->wq_lock); /* @@ -972,6 +950,40 @@ static void guc_client_free(struct intel_guc_client *client) kfree(client); } +static void guc_fill_preempt_context(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct intel_guc_client *client = guc->preempt_client; + struct intel_engine_cs *engine; + struct intel_ring *ring; + enum intel_engine_id id; + u32 *cs; + + for_each_engine(engine, dev_priv, id) { + ring = client->owner->engine[id].ring; + cs = ring->vaddr; + + if (id == RCS) { + cs = gen8_emit_ggtt_write_rcs(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + } else { + cs = gen8_emit_ggtt_write(cs, GUC_PREEMPT_FINISHED, + intel_hws_preempt_done_address(engine)); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + + GEM_BUG_ON(!IS_ALIGNED(ring->size, + GUC_PREEMPT_BREADCRUMB_BYTES)); + GEM_BUG_ON((void *)cs - ring->vaddr != + GUC_PREEMPT_BREADCRUMB_BYTES); + + flush_ggtt_writes(ring->vma); + } +} + static int guc_clients_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -1002,6 +1014,8 @@ static int guc_clients_create(struct intel_guc *guc) return PTR_ERR(client); } guc->preempt_client = client; + + guc_fill_preempt_context(guc); } return 0;
Since we're inhibiting context save of preempt context, we're no longer tracking the position of HEAD/TAIL. With GuC, we're adding a new breadcrumb for each preemption, which means that the HW will do more and more breadcrumb writes. Eventually the ring is filled, and we're submitting the preemption context with HEAD==TAIL==0, which won't result in breadcrumb write, but will trigger hangcheck instead. Instead of writing a new preempt breadcrumb for each preemption, let's just fill the ring once at init time (which also saves a couple of instructions in the tasklet). Fixes: 517aaffe0c1b ("drm/i915/execlists: Inhibit context save/restore for the fake preempt context") Signed-off-by: Michał Winiarski <michal.winiarski@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> --- drivers/gpu/drm/i915/intel_guc_submission.c | 68 +++++++++++++++++------------ 1 file changed, 41 insertions(+), 27 deletions(-)