Message ID | 1469124942-17943-6-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 21/07/16 19:15, Dave Gordon wrote: > Now that host structures are indexed by host engine-id rather than > guc_id, we can usefully convert some for_each_engine() loops to use > for_each_engine_id() and avoid multiple dereferences of engine->id. > > Also a few related tweaks to cache structure members locally wherever > they're used more than once or twice, hopefully eliminating memory > references. > > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++-------- > drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++--------- > 2 files changed, 22 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 793b1d9..2106766 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m, > struct i915_guc_client *client) > { > struct intel_engine_cs *engine; > + enum intel_engine_id id; > uint64_t tot = 0; > > seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", > @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m, > seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); > seq_printf(m, "\tLast submission result: %d\n", client->retcode); > > - for_each_engine(engine, dev_priv) { > + for_each_engine_id(engine, dev_priv, id) { > + u64 submissions = client->submissions[id]; > + tot += submissions; > seq_printf(m, "\tSubmissions: %llu %s\n", > - client->submissions[engine->id], > - engine->name); > - tot += client->submissions[engine->id]; > + submissions, engine->name); > } > seq_printf(m, "\tTotal: %llu\n", tot); > } > @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data) > seq_printf(m, "GuC last action error code: %d\n", guc.action_err); > > seq_printf(m, "\nGuC submissions:\n"); > - for_each_engine(engine, dev_priv) { > + for_each_engine_id(engine, dev_priv, id) { > + u64 submissions = guc.submissions[id]; > + total += submissions; > seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", > - engine->name, guc.submissions[engine->id], > - guc.last_seqno[engine->id]); > - total += guc.submissions[engine->id]; > + engine->name, submissions, guc.last_seqno[id]); > } > seq_printf(m, "\t%s: %llu\n", "Total", total); > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index 6756db0..ece3479 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, > > for_each_engine_masked(engine, dev_priv, client->engines) { > struct intel_context *ce = &ctx->engine[engine->id]; > - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; > + uint32_t guc_engine_id = engine->guc_id; > + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; > struct drm_i915_gem_object *obj; > > /* TODO: We have a design issue to be solved here. Only when we > @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, > gfx_addr = i915_gem_obj_ggtt_offset(ce->state); > lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; > lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | > - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); > + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); > > obj = ce->ringbuf->obj; > gfx_addr = i915_gem_obj_ggtt_offset(obj); > @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, > lrc->ring_next_free_location = gfx_addr; > lrc->ring_current_tail_pointer_value = 0; > > - desc.engines_used |= (1 << engine->guc_id); > + desc.engines_used |= (1 << guc_engine_id); > } > > DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", > @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, > /* wqi_len is in DWords, and does not include the one-word header */ > const size_t wqi_size = sizeof(struct guc_wq_item); > const u32 wqi_len = wqi_size/sizeof(u32) - 1; > + struct intel_engine_cs *engine = rq->engine; > struct guc_process_desc *desc; > struct guc_wq_item *wqi; > void *base; > @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, > /* Now fill in the 4-word work queue item */ > wqi->header = WQ_TYPE_INORDER | > (wqi_len << WQ_LEN_SHIFT) | > - (rq->engine->guc_id << WQ_TARGET_SHIFT) | > + (engine->guc_id << WQ_TARGET_SHIFT) | > WQ_NO_WCFLUSH_WAIT; > > /* The GuC wants only the low-order word of the context descriptor */ > - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, > - rq->engine); > + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); > > wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; > wqi->fence_id = rq->fence.seqno; > @@ -1035,11 +1036,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) > void i915_guc_submission_disable(struct drm_i915_private *dev_priv) > { > struct intel_guc *guc = &dev_priv->guc; > + struct i915_guc_client **gc_ptr; > struct intel_engine_cs *engine; > + enum intel_engine_id engine_id; > > - for_each_engine(engine, dev_priv) { > - guc_client_free(dev_priv, guc->exec_clients[engine->id]); > - guc->exec_clients[engine->id] = NULL; > + for_each_engine_id(engine, dev_priv, engine_id) { > + gc_ptr = &guc->exec_clients[engine_id]; > + guc_client_free(dev_priv, *gc_ptr); > + *gc_ptr = NULL; > } > } > > Didn't spot a difference from v1 which I reviewed before. What is new in v2? Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On 22/07/16 11:04, Tvrtko Ursulin wrote: > > On 21/07/16 19:15, Dave Gordon wrote: >> Now that host structures are indexed by host engine-id rather than >> guc_id, we can usefully convert some for_each_engine() loops to use >> for_each_engine_id() and avoid multiple dereferences of engine->id. >> >> Also a few related tweaks to cache structure members locally wherever >> they're used more than once or twice, hopefully eliminating memory >> references. >> >> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++-------- >> drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++--------- >> 2 files changed, 22 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c >> b/drivers/gpu/drm/i915/i915_debugfs.c >> index 793b1d9..2106766 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file >> *m, >> struct i915_guc_client *client) >> { >> struct intel_engine_cs *engine; >> + enum intel_engine_id id; >> uint64_t tot = 0; >> >> seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", >> @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct >> seq_file *m, >> seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); >> seq_printf(m, "\tLast submission result: %d\n", client->retcode); >> >> - for_each_engine(engine, dev_priv) { >> + for_each_engine_id(engine, dev_priv, id) { >> + u64 submissions = client->submissions[id]; >> + tot += submissions; >> seq_printf(m, "\tSubmissions: %llu %s\n", >> - client->submissions[engine->id], >> - engine->name); >> - tot += client->submissions[engine->id]; >> + submissions, engine->name); >> } >> seq_printf(m, "\tTotal: %llu\n", tot); >> } >> @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, >> void *data) >> seq_printf(m, "GuC last action error code: %d\n", guc.action_err); >> >> seq_printf(m, "\nGuC submissions:\n"); >> - for_each_engine(engine, dev_priv) { >> + for_each_engine_id(engine, dev_priv, id) { >> + u64 submissions = guc.submissions[id]; >> + total += submissions; >> seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", >> - engine->name, guc.submissions[engine->id], >> - guc.last_seqno[engine->id]); >> - total += guc.submissions[engine->id]; >> + engine->name, submissions, guc.last_seqno[id]); >> } >> seq_printf(m, "\t%s: %llu\n", "Total", total); >> >> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c >> b/drivers/gpu/drm/i915/i915_guc_submission.c >> index 6756db0..ece3479 100644 >> --- a/drivers/gpu/drm/i915/i915_guc_submission.c >> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c >> @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, >> >> for_each_engine_masked(engine, dev_priv, client->engines) { >> struct intel_context *ce = &ctx->engine[engine->id]; >> - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; >> + uint32_t guc_engine_id = engine->guc_id; >> + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; >> struct drm_i915_gem_object *obj; >> >> /* TODO: We have a design issue to be solved here. Only when we >> @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, >> gfx_addr = i915_gem_obj_ggtt_offset(ce->state); >> lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; >> lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | >> - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); >> + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); >> >> obj = ce->ringbuf->obj; >> gfx_addr = i915_gem_obj_ggtt_offset(obj); >> @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, >> lrc->ring_next_free_location = gfx_addr; >> lrc->ring_current_tail_pointer_value = 0; >> >> - desc.engines_used |= (1 << engine->guc_id); >> + desc.engines_used |= (1 << guc_engine_id); >> } >> >> DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", >> @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct >> i915_guc_client *gc, >> /* wqi_len is in DWords, and does not include the one-word >> header */ >> const size_t wqi_size = sizeof(struct guc_wq_item); >> const u32 wqi_len = wqi_size/sizeof(u32) - 1; >> + struct intel_engine_cs *engine = rq->engine; >> struct guc_process_desc *desc; >> struct guc_wq_item *wqi; >> void *base; >> @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct >> i915_guc_client *gc, >> /* Now fill in the 4-word work queue item */ >> wqi->header = WQ_TYPE_INORDER | >> (wqi_len << WQ_LEN_SHIFT) | >> - (rq->engine->guc_id << WQ_TARGET_SHIFT) | >> + (engine->guc_id << WQ_TARGET_SHIFT) | >> WQ_NO_WCFLUSH_WAIT; >> >> /* The GuC wants only the low-order word of the context >> descriptor */ >> - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, >> - rq->engine); >> + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, >> engine); >> >> wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; >> wqi->fence_id = rq->fence.seqno; >> @@ -1035,11 +1036,14 @@ int i915_guc_submission_enable(struct >> drm_i915_private *dev_priv) >> void i915_guc_submission_disable(struct drm_i915_private *dev_priv) >> { >> struct intel_guc *guc = &dev_priv->guc; >> + struct i915_guc_client **gc_ptr; >> struct intel_engine_cs *engine; >> + enum intel_engine_id engine_id; >> >> - for_each_engine(engine, dev_priv) { >> - guc_client_free(dev_priv, guc->exec_clients[engine->id]); >> - guc->exec_clients[engine->id] = NULL; >> + for_each_engine_id(engine, dev_priv, engine_id) { >> + gc_ptr = &guc->exec_clients[engine_id]; >> + guc_client_free(dev_priv, *gc_ptr); >> + *gc_ptr = NULL; >> } >> } >> >> > > Didn't spot a difference from v1 which I reviewed before. What is new in > v2? > > Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Regards, > Tvrtko Nothing different in this patch; it's v2 of the set, not of each patch within it. Notice in particular the presence of a cover letter (as it's more than a single patch), as agreed at yesterday's Tech Forum. But I only found R-B's from you for v1 3/5 and 4/5; we discussed 1/5 and 2/5 offline but AFAICT you didn't post an R-B for them? .Dave.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 793b1d9..2106766 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; + enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m, seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = client->submissions[id]; + tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", - client->submissions[engine->id], - engine->name); - tot += client->submissions[engine->id]; + submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = guc.submissions[id]; + total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, guc.submissions[engine->id], - guc.last_seqno[engine->id]); - total += guc.submissions[engine->id]; + engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6756db0..ece3479 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc, for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; + uint32_t guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; struct drm_i915_gem_object *obj; /* TODO: We have a design issue to be solved here. Only when we @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, gfx_addr = i915_gem_obj_ggtt_offset(ce->state); lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); obj = ce->ringbuf->obj; gfx_addr = i915_gem_obj_ggtt_offset(obj); @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->ring_next_free_location = gfx_addr; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << engine->guc_id); + desc.engines_used |= (1 << guc_engine_id); } DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; + struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item *wqi; void *base; @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | (wqi_len << WQ_LEN_SHIFT) | - (rq->engine->guc_id << WQ_TARGET_SHIFT) | + (engine->guc_id << WQ_TARGET_SHIFT) | WQ_NO_WCFLUSH_WAIT; /* The GuC wants only the low-order word of the context descriptor */ - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, - rq->engine); + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->fence.seqno; @@ -1035,11 +1036,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) void i915_guc_submission_disable(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; + struct i915_guc_client **gc_ptr; struct intel_engine_cs *engine; + enum intel_engine_id engine_id; - for_each_engine(engine, dev_priv) { - guc_client_free(dev_priv, guc->exec_clients[engine->id]); - guc->exec_clients[engine->id] = NULL; + for_each_engine_id(engine, dev_priv, engine_id) { + gc_ptr = &guc->exec_clients[engine_id]; + guc_client_free(dev_priv, *gc_ptr); + *gc_ptr = NULL; } }
Now that host structures are indexed by host engine-id rather than guc_id, we can usefully convert some for_each_engine() loops to use for_each_engine_id() and avoid multiple dereferences of engine->id. Also a few related tweaks to cache structure members locally wherever they're used more than once or twice, hopefully eliminating memory references. Signed-off-by: Dave Gordon <david.s.gordon@intel.com> --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++-------- drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++--------- 2 files changed, 22 insertions(+), 17 deletions(-)