diff mbox

[v2,5/6] drm/i915/guc: use for_each_engine_id() where appropriate

Message ID 1469124942-17943-6-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon July 21, 2016, 6:15 p.m. UTC
Now that host structures are indexed by host engine-id rather than
guc_id, we can usefully convert some for_each_engine() loops to use
for_each_engine_id() and avoid multiple dereferences of engine->id.

Also a few related tweaks to cache structure members locally wherever
they're used more than once or twice, hopefully eliminating memory
references.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 17 +++++++++--------
 drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++---------
 2 files changed, 22 insertions(+), 17 deletions(-)

Comments

Tvrtko Ursulin July 22, 2016, 10:04 a.m. UTC | #1
On 21/07/16 19:15, Dave Gordon wrote:
> Now that host structures are indexed by host engine-id rather than
> guc_id, we can usefully convert some for_each_engine() loops to use
> for_each_engine_id() and avoid multiple dereferences of engine->id.
>
> Also a few related tweaks to cache structure members locally wherever
> they're used more than once or twice, hopefully eliminating memory
> references.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 17 +++++++++--------
>   drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++---------
>   2 files changed, 22 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 793b1d9..2106766 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m,
>   				 struct i915_guc_client *client)
>   {
>   	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   	uint64_t tot = 0;
>
>   	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
> @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m,
>   	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>   	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>
> -	for_each_engine(engine, dev_priv) {
> +	for_each_engine_id(engine, dev_priv, id) {
> +		u64 submissions = client->submissions[id];
> +		tot += submissions;
>   		seq_printf(m, "\tSubmissions: %llu %s\n",
> -				client->submissions[engine->id],
> -				engine->name);
> -		tot += client->submissions[engine->id];
> +				submissions, engine->name);
>   	}
>   	seq_printf(m, "\tTotal: %llu\n", tot);
>   }
> @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
>
>   	seq_printf(m, "\nGuC submissions:\n");
> -	for_each_engine(engine, dev_priv) {
> +	for_each_engine_id(engine, dev_priv, id) {
> +		u64 submissions = guc.submissions[id];
> +		total += submissions;
>   		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
> -			engine->name, guc.submissions[engine->id],
> -			guc.last_seqno[engine->id]);
> -		total += guc.submissions[engine->id];
> +			engine->name, submissions, guc.last_seqno[id]);
>   	}
>   	seq_printf(m, "\t%s: %llu\n", "Total", total);
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 6756db0..ece3479 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>
>   	for_each_engine_masked(engine, dev_priv, client->engines) {
>   		struct intel_context *ce = &ctx->engine[engine->id];
> -		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
> +		uint32_t guc_engine_id = engine->guc_id;
> +		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
>   		struct drm_i915_gem_object *obj;
>
>   		/* TODO: We have a design issue to be solved here. Only when we
> @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>   		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
>   		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>   		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
> -				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
> +				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
>
>   		obj = ce->ringbuf->obj;
>   		gfx_addr = i915_gem_obj_ggtt_offset(obj);
> @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>   		lrc->ring_next_free_location = gfx_addr;
>   		lrc->ring_current_tail_pointer_value = 0;
>
> -		desc.engines_used |= (1 << engine->guc_id);
> +		desc.engines_used |= (1 << guc_engine_id);
>   	}
>
>   	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
> @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
>   	/* wqi_len is in DWords, and does not include the one-word header */
>   	const size_t wqi_size = sizeof(struct guc_wq_item);
>   	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
> +	struct intel_engine_cs *engine = rq->engine;
>   	struct guc_process_desc *desc;
>   	struct guc_wq_item *wqi;
>   	void *base;
> @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
>   	/* Now fill in the 4-word work queue item */
>   	wqi->header = WQ_TYPE_INORDER |
>   			(wqi_len << WQ_LEN_SHIFT) |
> -			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
> +			(engine->guc_id << WQ_TARGET_SHIFT) |
>   			WQ_NO_WCFLUSH_WAIT;
>
>   	/* The GuC wants only the low-order word of the context descriptor */
> -	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
> -							     rq->engine);
> +	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
>
>   	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
>   	wqi->fence_id = rq->fence.seqno;
> @@ -1035,11 +1036,14 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_guc *guc = &dev_priv->guc;
> +	struct i915_guc_client **gc_ptr;
>   	struct intel_engine_cs *engine;
> +	enum intel_engine_id engine_id;
>
> -	for_each_engine(engine, dev_priv) {
> -		guc_client_free(dev_priv, guc->exec_clients[engine->id]);
> -		guc->exec_clients[engine->id] = NULL;
> +	for_each_engine_id(engine, dev_priv, engine_id) {
> +		gc_ptr = &guc->exec_clients[engine_id];
> +		guc_client_free(dev_priv, *gc_ptr);
> +		*gc_ptr = NULL;
>   	}
>   }
>
>

Didn't spot a difference from v1 which I reviewed before. What is new in v2?

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Dave Gordon July 22, 2016, 10:45 a.m. UTC | #2
On 22/07/16 11:04, Tvrtko Ursulin wrote:
>
> On 21/07/16 19:15, Dave Gordon wrote:
>> Now that host structures are indexed by host engine-id rather than
>> guc_id, we can usefully convert some for_each_engine() loops to use
>> for_each_engine_id() and avoid multiple dereferences of engine->id.
>>
>> Also a few related tweaks to cache structure members locally wherever
>> they're used more than once or twice, hopefully eliminating memory
>> references.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        | 17 +++++++++--------
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 22 +++++++++++++---------
>>   2 files changed, 22 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 793b1d9..2106766 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file
>> *m,
>>                    struct i915_guc_client *client)
>>   {
>>       struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>>       uint64_t tot = 0;
>>
>>       seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
>> @@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct
>> seq_file *m,
>>       seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
>>       seq_printf(m, "\tLast submission result: %d\n", client->retcode);
>>
>> -    for_each_engine(engine, dev_priv) {
>> +    for_each_engine_id(engine, dev_priv, id) {
>> +        u64 submissions = client->submissions[id];
>> +        tot += submissions;
>>           seq_printf(m, "\tSubmissions: %llu %s\n",
>> -                client->submissions[engine->id],
>> -                engine->name);
>> -        tot += client->submissions[engine->id];
>> +                submissions, engine->name);
>>       }
>>       seq_printf(m, "\tTotal: %llu\n", tot);
>>   }
>> @@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m,
>> void *data)
>>       seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
>>
>>       seq_printf(m, "\nGuC submissions:\n");
>> -    for_each_engine(engine, dev_priv) {
>> +    for_each_engine_id(engine, dev_priv, id) {
>> +        u64 submissions = guc.submissions[id];
>> +        total += submissions;
>>           seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
>> -            engine->name, guc.submissions[engine->id],
>> -            guc.last_seqno[engine->id]);
>> -        total += guc.submissions[engine->id];
>> +            engine->name, submissions, guc.last_seqno[id]);
>>       }
>>       seq_printf(m, "\t%s: %llu\n", "Total", total);
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 6756db0..ece3479 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>>
>>       for_each_engine_masked(engine, dev_priv, client->engines) {
>>           struct intel_context *ce = &ctx->engine[engine->id];
>> -        struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
>> +        uint32_t guc_engine_id = engine->guc_id;
>> +        struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
>>           struct drm_i915_gem_object *obj;
>>
>>           /* TODO: We have a design issue to be solved here. Only when we
>> @@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>>           gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
>>           lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
>>           lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
>> -                (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
>> +                (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
>>
>>           obj = ce->ringbuf->obj;
>>           gfx_addr = i915_gem_obj_ggtt_offset(obj);
>> @@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
>>           lrc->ring_next_free_location = gfx_addr;
>>           lrc->ring_current_tail_pointer_value = 0;
>>
>> -        desc.engines_used |= (1 << engine->guc_id);
>> +        desc.engines_used |= (1 << guc_engine_id);
>>       }
>>
>>       DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
>> @@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct
>> i915_guc_client *gc,
>>       /* wqi_len is in DWords, and does not include the one-word
>> header */
>>       const size_t wqi_size = sizeof(struct guc_wq_item);
>>       const u32 wqi_len = wqi_size/sizeof(u32) - 1;
>> +    struct intel_engine_cs *engine = rq->engine;
>>       struct guc_process_desc *desc;
>>       struct guc_wq_item *wqi;
>>       void *base;
>> @@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct
>> i915_guc_client *gc,
>>       /* Now fill in the 4-word work queue item */
>>       wqi->header = WQ_TYPE_INORDER |
>>               (wqi_len << WQ_LEN_SHIFT) |
>> -            (rq->engine->guc_id << WQ_TARGET_SHIFT) |
>> +            (engine->guc_id << WQ_TARGET_SHIFT) |
>>               WQ_NO_WCFLUSH_WAIT;
>>
>>       /* The GuC wants only the low-order word of the context
>> descriptor */
>> -    wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>> -                                 rq->engine);
>> +    wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
>> engine);
>>
>>       wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
>>       wqi->fence_id = rq->fence.seqno;
>> @@ -1035,11 +1036,14 @@ int i915_guc_submission_enable(struct
>> drm_i915_private *dev_priv)
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>>   {
>>       struct intel_guc *guc = &dev_priv->guc;
>> +    struct i915_guc_client **gc_ptr;
>>       struct intel_engine_cs *engine;
>> +    enum intel_engine_id engine_id;
>>
>> -    for_each_engine(engine, dev_priv) {
>> -        guc_client_free(dev_priv, guc->exec_clients[engine->id]);
>> -        guc->exec_clients[engine->id] = NULL;
>> +    for_each_engine_id(engine, dev_priv, engine_id) {
>> +        gc_ptr = &guc->exec_clients[engine_id];
>> +        guc_client_free(dev_priv, *gc_ptr);
>> +        *gc_ptr = NULL;
>>       }
>>   }
>>
>>
>
> Didn't spot a difference from v1 which I reviewed before. What is new in
> v2?
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
> Tvrtko

Nothing different in this patch; it's v2 of the set, not of each patch 
within it. Notice in particular the presence of a cover letter (as it's 
more than a single patch), as agreed at yesterday's Tech Forum.

But I only found R-B's from you for v1 3/5 and 4/5; we discussed 1/5 and 
2/5 offline but AFAICT you didn't post an R-B for them?

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 793b1d9..2106766 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2535,6 +2535,7 @@  static void i915_guc_client_info(struct seq_file *m,
 				 struct i915_guc_client *client)
 {
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	uint64_t tot = 0;
 
 	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2549,11 +2550,11 @@  static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
 	seq_printf(m, "\tLast submission result: %d\n", client->retcode);
 
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = client->submissions[id];
+		tot += submissions;
 		seq_printf(m, "\tSubmissions: %llu %s\n",
-				client->submissions[engine->id],
-				engine->name);
-		tot += client->submissions[engine->id];
+				submissions, engine->name);
 	}
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
@@ -2598,11 +2599,11 @@  static int i915_guc_info(struct seq_file *m, void *data)
 	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
 
 	seq_printf(m, "\nGuC submissions:\n");
-	for_each_engine(engine, dev_priv) {
+	for_each_engine_id(engine, dev_priv, id) {
+		u64 submissions = guc.submissions[id];
+		total += submissions;
 		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-			engine->name, guc.submissions[engine->id],
-			guc.last_seqno[engine->id]);
-		total += guc.submissions[engine->id];
+			engine->name, submissions, guc.last_seqno[id]);
 	}
 	seq_printf(m, "\t%s: %llu\n", "Total", total);
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 6756db0..ece3479 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -342,7 +342,8 @@  static void guc_init_ctx_desc(struct intel_guc *guc,
 
 	for_each_engine_masked(engine, dev_priv, client->engines) {
 		struct intel_context *ce = &ctx->engine[engine->id];
-		struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+		uint32_t guc_engine_id = engine->guc_id;
+		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
 		struct drm_i915_gem_object *obj;
 
 		/* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@  static void guc_init_ctx_desc(struct intel_guc *guc,
 		gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
 		lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
 		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-				(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
 		obj = ce->ringbuf->obj;
 		gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,7 +372,7 @@  static void guc_init_ctx_desc(struct intel_guc *guc,
 		lrc->ring_next_free_location = gfx_addr;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << engine->guc_id);
+		desc.engines_used |= (1 << guc_engine_id);
 	}
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
@@ -461,6 +462,7 @@  static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	/* wqi_len is in DWords, and does not include the one-word header */
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+	struct intel_engine_cs *engine = rq->engine;
 	struct guc_process_desc *desc;
 	struct guc_wq_item *wqi;
 	void *base;
@@ -502,12 +504,11 @@  static void guc_add_workqueue_item(struct i915_guc_client *gc,
 	/* Now fill in the 4-word work queue item */
 	wqi->header = WQ_TYPE_INORDER |
 			(wqi_len << WQ_LEN_SHIFT) |
-			(rq->engine->guc_id << WQ_TARGET_SHIFT) |
+			(engine->guc_id << WQ_TARGET_SHIFT) |
 			WQ_NO_WCFLUSH_WAIT;
 
 	/* The GuC wants only the low-order word of the context descriptor */
-	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx,
-							     rq->engine);
+	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
 	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
 	wqi->fence_id = rq->fence.seqno;
@@ -1035,11 +1036,14 @@  int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
+	struct i915_guc_client **gc_ptr;
 	struct intel_engine_cs *engine;
+	enum intel_engine_id engine_id;
 
-	for_each_engine(engine, dev_priv) {
-		guc_client_free(dev_priv, guc->exec_clients[engine->id]);
-		guc->exec_clients[engine->id] = NULL;
+	for_each_engine_id(engine, dev_priv, engine_id) {
+		gc_ptr = &guc->exec_clients[engine_id];
+		guc_client_free(dev_priv, *gc_ptr);
+		*gc_ptr = NULL;
 	}
 }