diff mbox

[3/5] drm/i915/guc: use a separate GuC client for each engine

Message ID 1468933157-22412-3-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon July 19, 2016, 12:59 p.m. UTC
When using a single GuC client for multiple engines, the i915 driver has
to merge all work items into a single work queue, which the GuC firmware
then demultiplexes into separate submission queues per engine. In
theory, this could lead to the single queue becoming a bottleneck in
which an excess of outstanding work for one or more engines might
prevent work for an idle engine reaching the hardware.

To reduce this risk, we can create one GuC client per engine. Each will
have its own workqueue, to be used only for work targeting a single
engine, so there will be no cross-engine contention for workqueue slots.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 25 ++++++++++++++++-----
 drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++++++++++++++++++-----------
 drivers/gpu/drm/i915/intel_guc.h           |  2 +-
 3 files changed, 42 insertions(+), 20 deletions(-)

Comments

Tvrtko Ursulin July 19, 2016, 3:02 p.m. UTC | #1
On 19/07/16 13:59, Dave Gordon wrote:
> When using a single GuC client for multiple engines, the i915 driver has
> to merge all work items into a single work queue, which the GuC firmware
> then demultiplexes into separate submission queues per engine. In
> theory, this could lead to the single queue becoming a bottleneck in
> which an excess of outstanding work for one or more engines might
> prevent work for an idle engine reaching the hardware.
>
> To reduce this risk, we can create one GuC client per engine. Each will
> have its own workqueue, to be used only for work targeting a single
> engine, so there will be no cross-engine contention for workqueue slots.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 25 ++++++++++++++++-----
>   drivers/gpu/drm/i915/i915_guc_submission.c | 35 +++++++++++++++++++-----------
>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>   3 files changed, 42 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 90aef45..5cbb8ef 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2570,20 +2570,26 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	struct drm_device *dev = node->minor->dev;
>   	struct drm_i915_private *dev_priv = to_i915(dev);
>   	struct intel_guc guc;
> -	struct i915_guc_client client = {};
> +	struct i915_guc_client *clients;
>   	struct intel_engine_cs *engine;
> +	enum intel_engine_id id;
>   	u64 total = 0;
>
>   	if (!HAS_GUC_SCHED(dev_priv))
>   		return 0;
>
> +	clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL);
> +	if (clients == NULL)
> +		return -ENOMEM;
> +
>   	if (mutex_lock_interruptible(&dev->struct_mutex))
> -		return 0;
> +		goto done;
>
>   	/* Take a local copy of the GuC data, so we can dump it at leisure */
>   	guc = dev_priv->guc;
> -	if (guc.execbuf_client)
> -		client = *guc.execbuf_client;
> +	for_each_engine_id(engine, dev_priv, id)
> +		if (guc.exec_clients[id])
> +			clients[id] = *guc.exec_clients[id];
>
>   	mutex_unlock(&dev->struct_mutex);
>
> @@ -2606,11 +2612,18 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	}
>   	seq_printf(m, "\t%s: %llu\n", "Total", total);
>
> -	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
> -	i915_guc_client_info(m, dev_priv, &client);
> +	for_each_engine_id(engine, dev_priv, id) {
> +		seq_printf(m, "\nGuC exec_client[%d] @ %p:\n",
> +				id, guc.exec_clients[id]);

Minor and not a blocker for this patch, but I would potentially 
re-consider if printing out the client pointer is useful.

> +		if (guc.exec_clients[id])
> +			i915_guc_client_info(m, dev_priv, &clients[id]);
> +	}
>
>   	/* Add more as required ... */
>
> +done:
> +	kfree(clients);
> +
>   	return 0;
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index dc5f485..b0f9945 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -434,7 +434,9 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>   {
>   	const size_t wqi_size = sizeof(struct guc_wq_item);
> -	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
> +	enum intel_engine_id engine_id = request->engine->id;
> +	struct intel_guc *guc = &request->i915->guc;
> +	struct i915_guc_client *gc = guc->exec_clients[engine_id];
>   	struct guc_process_desc *desc;
>   	u32 freespace;
>
> @@ -589,7 +591,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
>   {
>   	unsigned int engine_id = rq->engine->id;
>   	struct intel_guc *guc = &rq->i915->guc;
> -	struct i915_guc_client *client = guc->execbuf_client;
> +	struct i915_guc_client *client = guc->exec_clients[engine_id];
>   	int b_ret;
>
>   	guc_add_workqueue_item(client, rq);
> @@ -723,7 +725,7 @@ static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
>    */
>   static void guc_init_doorbell_hw(struct intel_guc *guc)
>   {
> -	struct i915_guc_client *client = guc->execbuf_client;
> +	struct i915_guc_client *client = guc->exec_clients[RCS];
>   	uint16_t db_id;
>   	int i, err;
>
> @@ -1004,17 +1006,21 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_guc *guc = &dev_priv->guc;
>   	struct i915_guc_client *client;
> +	struct intel_engine_cs *engine;
>
> -	/* client for execbuf submission */
> -	client = guc_client_alloc(dev_priv,
> -				  GUC_CTX_PRIORITY_KMD_NORMAL,
> -				  dev_priv->kernel_context);
> -	if (!client) {
> -		DRM_ERROR("Failed to create execbuf guc_client\n");
> -		return -ENOMEM;
> +	for_each_engine(engine, dev_priv) {
> +		/* client for execbuf submission */
> +		client = guc_client_alloc(dev_priv,
> +					  GUC_CTX_PRIORITY_KMD_NORMAL,
> +					  dev_priv->kernel_context);
> +		if (!client) {
> +			DRM_ERROR("Failed to create GuC client(s)\n");
> +			return -ENOMEM;
> +		}
> +
> +		guc->exec_clients[engine->id] = client;
>   	}
>
> -	guc->execbuf_client = client;
>   	host2guc_sample_forcewake(guc, client);
>   	guc_init_doorbell_hw(guc);
>
> @@ -1024,9 +1030,12 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_guc *guc = &dev_priv->guc;
> +	struct intel_engine_cs *engine;
>
> -	guc_client_free(dev_priv, guc->execbuf_client);
> -	guc->execbuf_client = NULL;
> +	for_each_engine(engine, dev_priv) {
> +		guc_client_free(dev_priv, guc->exec_clients[engine->id]);
> +		guc->exec_clients[engine->id] = NULL;
> +	}
>   }
>
>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 3e3e743..7b4cc4d 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -132,7 +132,7 @@ struct intel_guc {
>   	struct drm_i915_gem_object *ctx_pool_obj;
>   	struct ida ctx_ids;
>
> -	struct i915_guc_client *execbuf_client;
> +	struct i915_guc_client *exec_clients[I915_NUM_ENGINES];
>
>   	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
>   	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Dave Gordon July 19, 2016, 3:13 p.m. UTC | #2
On 19/07/16 16:02, Tvrtko Ursulin wrote:
>
> On 19/07/16 13:59, Dave Gordon wrote:
>> When using a single GuC client for multiple engines, the i915 driver has
>> to merge all work items into a single work queue, which the GuC firmware
>> then demultiplexes into separate submission queues per engine. In
>> theory, this could lead to the single queue becoming a bottleneck in
>> which an excess of outstanding work for one or more engines might
>> prevent work for an idle engine reaching the hardware.
>>
>> To reduce this risk, we can create one GuC client per engine. Each will
>> have its own workqueue, to be used only for work targeting a single
>> engine, so there will be no cross-engine contention for workqueue slots.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        | 25 ++++++++++++++++-----
>>   drivers/gpu/drm/i915/i915_guc_submission.c | 35
>> +++++++++++++++++++-----------
>>   drivers/gpu/drm/i915/intel_guc.h           |  2 +-
>>   3 files changed, 42 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 90aef45..5cbb8ef 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2570,20 +2570,26 @@ static int i915_guc_info(struct seq_file *m,
>> void *data)
>>       struct drm_device *dev = node->minor->dev;
>>       struct drm_i915_private *dev_priv = to_i915(dev);
>>       struct intel_guc guc;
>> -    struct i915_guc_client client = {};
>> +    struct i915_guc_client *clients;
>>       struct intel_engine_cs *engine;
>> +    enum intel_engine_id id;
>>       u64 total = 0;
>>
>>       if (!HAS_GUC_SCHED(dev_priv))
>>           return 0;
>>
>> +    clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL);
>> +    if (clients == NULL)
>> +        return -ENOMEM;
>> +
>>       if (mutex_lock_interruptible(&dev->struct_mutex))
>> -        return 0;
>> +        goto done;
>>
>>       /* Take a local copy of the GuC data, so we can dump it at
>> leisure */
>>       guc = dev_priv->guc;
>> -    if (guc.execbuf_client)
>> -        client = *guc.execbuf_client;
>> +    for_each_engine_id(engine, dev_priv, id)
>> +        if (guc.exec_clients[id])
>> +            clients[id] = *guc.exec_clients[id];
>>
>>       mutex_unlock(&dev->struct_mutex);
>>
>> @@ -2606,11 +2612,18 @@ static int i915_guc_info(struct seq_file *m,
>> void *data)
>>       }
>>       seq_printf(m, "\t%s: %llu\n", "Total", total);
>>
>> -    seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
>> -    i915_guc_client_info(m, dev_priv, &client);
>> +    for_each_engine_id(engine, dev_priv, id) {
>> +        seq_printf(m, "\nGuC exec_client[%d] @ %p:\n",
>> +                id, guc.exec_clients[id]);
>
> Minor and not a blocker for this patch, but I would potentially
> re-consider if printing out the client pointer is useful.

It's really only useful to know whether it's NULL or not; but printing 
the pointer itself is simpler than printing a message saying that. This 
is a debugfs interface, so the content is pretty much ad-hoc.

.Dave.

>> +        if (guc.exec_clients[id])
>> +            i915_guc_client_info(m, dev_priv, &clients[id]);
>> +    }
>>
>>       /* Add more as required ... */
>>
>> +done:
>> +    kfree(clients);
>> +
>>       return 0;
>>   }
>>
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index dc5f485..b0f9945 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -434,7 +434,9 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
>>   int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
>>   {
>>       const size_t wqi_size = sizeof(struct guc_wq_item);
>> -    struct i915_guc_client *gc = request->i915->guc.execbuf_client;
>> +    enum intel_engine_id engine_id = request->engine->id;
>> +    struct intel_guc *guc = &request->i915->guc;
>> +    struct i915_guc_client *gc = guc->exec_clients[engine_id];
>>       struct guc_process_desc *desc;
>>       u32 freespace;
>>
>> @@ -589,7 +591,7 @@ int i915_guc_submit(struct drm_i915_gem_request *rq)
>>   {
>>       unsigned int engine_id = rq->engine->id;
>>       struct intel_guc *guc = &rq->i915->guc;
>> -    struct i915_guc_client *client = guc->execbuf_client;
>> +    struct i915_guc_client *client = guc->exec_clients[engine_id];
>>       int b_ret;
>>
>>       guc_add_workqueue_item(client, rq);
>> @@ -723,7 +725,7 @@ static bool guc_doorbell_check(struct intel_guc
>> *guc, uint16_t db_id)
>>    */
>>   static void guc_init_doorbell_hw(struct intel_guc *guc)
>>   {
>> -    struct i915_guc_client *client = guc->execbuf_client;
>> +    struct i915_guc_client *client = guc->exec_clients[RCS];
>>       uint16_t db_id;
>>       int i, err;
>>
>> @@ -1004,17 +1006,21 @@ int i915_guc_submission_enable(struct
>> drm_i915_private *dev_priv)
>>   {
>>       struct intel_guc *guc = &dev_priv->guc;
>>       struct i915_guc_client *client;
>> +    struct intel_engine_cs *engine;
>>
>> -    /* client for execbuf submission */
>> -    client = guc_client_alloc(dev_priv,
>> -                  GUC_CTX_PRIORITY_KMD_NORMAL,
>> -                  dev_priv->kernel_context);
>> -    if (!client) {
>> -        DRM_ERROR("Failed to create execbuf guc_client\n");
>> -        return -ENOMEM;
>> +    for_each_engine(engine, dev_priv) {
>> +        /* client for execbuf submission */
>> +        client = guc_client_alloc(dev_priv,
>> +                      GUC_CTX_PRIORITY_KMD_NORMAL,
>> +                      dev_priv->kernel_context);
>> +        if (!client) {
>> +            DRM_ERROR("Failed to create GuC client(s)\n");
>> +            return -ENOMEM;
>> +        }
>> +
>> +        guc->exec_clients[engine->id] = client;
>>       }
>>
>> -    guc->execbuf_client = client;
>>       host2guc_sample_forcewake(guc, client);
>>       guc_init_doorbell_hw(guc);
>>
>> @@ -1024,9 +1030,12 @@ int i915_guc_submission_enable(struct
>> drm_i915_private *dev_priv)
>>   void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
>>   {
>>       struct intel_guc *guc = &dev_priv->guc;
>> +    struct intel_engine_cs *engine;
>>
>> -    guc_client_free(dev_priv, guc->execbuf_client);
>> -    guc->execbuf_client = NULL;
>> +    for_each_engine(engine, dev_priv) {
>> +        guc_client_free(dev_priv, guc->exec_clients[engine->id]);
>> +        guc->exec_clients[engine->id] = NULL;
>> +    }
>>   }
>>
>>   void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 3e3e743..7b4cc4d 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -132,7 +132,7 @@ struct intel_guc {
>>       struct drm_i915_gem_object *ctx_pool_obj;
>>       struct ida ctx_ids;
>>
>> -    struct i915_guc_client *execbuf_client;
>> +    struct i915_guc_client *exec_clients[I915_NUM_ENGINES];
>>
>>       DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
>>       uint32_t db_cacheline;        /* Cyclic counter mod pagesize    */
>>
>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Regards,
> Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 90aef45..5cbb8ef 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2570,20 +2570,26 @@  static int i915_guc_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct intel_guc guc;
-	struct i915_guc_client client = {};
+	struct i915_guc_client *clients;
 	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	u64 total = 0;
 
 	if (!HAS_GUC_SCHED(dev_priv))
 		return 0;
 
+	clients = kcalloc(I915_NUM_ENGINES, sizeof(*clients), GFP_KERNEL);
+	if (clients == NULL)
+		return -ENOMEM;
+
 	if (mutex_lock_interruptible(&dev->struct_mutex))
-		return 0;
+		goto done;
 
 	/* Take a local copy of the GuC data, so we can dump it at leisure */
 	guc = dev_priv->guc;
-	if (guc.execbuf_client)
-		client = *guc.execbuf_client;
+	for_each_engine_id(engine, dev_priv, id)
+		if (guc.exec_clients[id])
+			clients[id] = *guc.exec_clients[id];
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -2606,11 +2612,18 @@  static int i915_guc_info(struct seq_file *m, void *data)
 	}
 	seq_printf(m, "\t%s: %llu\n", "Total", total);
 
-	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
-	i915_guc_client_info(m, dev_priv, &client);
+	for_each_engine_id(engine, dev_priv, id) {
+		seq_printf(m, "\nGuC exec_client[%d] @ %p:\n",
+				id, guc.exec_clients[id]);
+		if (guc.exec_clients[id])
+			i915_guc_client_info(m, dev_priv, &clients[id]);
+	}
 
 	/* Add more as required ... */
 
+done:
+	kfree(clients);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index dc5f485..b0f9945 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -434,7 +434,9 @@  static void guc_fini_ctx_desc(struct intel_guc *guc,
 int i915_guc_wq_check_space(struct drm_i915_gem_request *request)
 {
 	const size_t wqi_size = sizeof(struct guc_wq_item);
-	struct i915_guc_client *gc = request->i915->guc.execbuf_client;
+	enum intel_engine_id engine_id = request->engine->id;
+	struct intel_guc *guc = &request->i915->guc;
+	struct i915_guc_client *gc = guc->exec_clients[engine_id];
 	struct guc_process_desc *desc;
 	u32 freespace;
 
@@ -589,7 +591,7 @@  int i915_guc_submit(struct drm_i915_gem_request *rq)
 {
 	unsigned int engine_id = rq->engine->id;
 	struct intel_guc *guc = &rq->i915->guc;
-	struct i915_guc_client *client = guc->execbuf_client;
+	struct i915_guc_client *client = guc->exec_clients[engine_id];
 	int b_ret;
 
 	guc_add_workqueue_item(client, rq);
@@ -723,7 +725,7 @@  static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
  */
 static void guc_init_doorbell_hw(struct intel_guc *guc)
 {
-	struct i915_guc_client *client = guc->execbuf_client;
+	struct i915_guc_client *client = guc->exec_clients[RCS];
 	uint16_t db_id;
 	int i, err;
 
@@ -1004,17 +1006,21 @@  int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client;
+	struct intel_engine_cs *engine;
 
-	/* client for execbuf submission */
-	client = guc_client_alloc(dev_priv,
-				  GUC_CTX_PRIORITY_KMD_NORMAL,
-				  dev_priv->kernel_context);
-	if (!client) {
-		DRM_ERROR("Failed to create execbuf guc_client\n");
-		return -ENOMEM;
+	for_each_engine(engine, dev_priv) {
+		/* client for execbuf submission */
+		client = guc_client_alloc(dev_priv,
+					  GUC_CTX_PRIORITY_KMD_NORMAL,
+					  dev_priv->kernel_context);
+		if (!client) {
+			DRM_ERROR("Failed to create GuC client(s)\n");
+			return -ENOMEM;
+		}
+
+		guc->exec_clients[engine->id] = client;
 	}
 
-	guc->execbuf_client = client;
 	host2guc_sample_forcewake(guc, client);
 	guc_init_doorbell_hw(guc);
 
@@ -1024,9 +1030,12 @@  int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_engine_cs *engine;
 
-	guc_client_free(dev_priv, guc->execbuf_client);
-	guc->execbuf_client = NULL;
+	for_each_engine(engine, dev_priv) {
+		guc_client_free(dev_priv, guc->exec_clients[engine->id]);
+		guc->exec_clients[engine->id] = NULL;
+	}
 }
 
 void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 3e3e743..7b4cc4d 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -132,7 +132,7 @@  struct intel_guc {
 	struct drm_i915_gem_object *ctx_pool_obj;
 	struct ida ctx_ids;
 
-	struct i915_guc_client *execbuf_client;
+	struct i915_guc_client *exec_clients[I915_NUM_ENGINES];
 
 	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/