diff mbox

[14/15] drm/i915: Debugfs interface for GuC submission statistics

Message ID 1434393394-21002-15-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon June 15, 2015, 6:36 p.m. UTC
This provides a means of reading status and counts relating
to GuC actions and submissions.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c |   41 +++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

Comments

Chris Wilson June 16, 2015, 9:28 a.m. UTC | #1
On Mon, Jun 15, 2015 at 07:36:32PM +0100, Dave Gordon wrote:
> This provides a means of reading status and counts relating
> to GuC actions and submissions.

Anything that ends to ease debugging also tends to ease
postmortem error analysis...

> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Signed-off-by: Alex Dai <yu.dai@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |   41 +++++++++++++++++++++++++++++++++++
>  1 file changed, 41 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index c6e2582..e699b38 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2388,6 +2388,46 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>  	return 0;
>  }
>  
> +static int i915_guc_info(struct seq_file *m, void *data)
> +{
> +	struct drm_info_node *node = m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_guc guc;
> +	struct i915_guc_client client = { .client_obj = 0 };
> +
> +	if (!HAS_GUC_SCHED(dev_priv->dev))
> +		return 0;
> +
> +	/* Take a local copy of the GuC data, so we can dump it at leisure */
> +	spin_lock(&dev_priv->guc.host2guc_lock);
> +	guc = dev_priv->guc;
> +	if (guc.execbuf_client) {
> +		spin_lock(&guc.execbuf_client->wq_lock);
> +		client = *guc.execbuf_client;
> +		spin_unlock(&guc.execbuf_client->wq_lock);
> +	}
> +	spin_unlock(&dev_priv->guc.host2guc_lock);
> +
> +	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
> +	seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
> +	seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status);
> +
> +	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
> +	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);

If these had been a struct you could have minimised that copy.
Again, it would have been best if the debug inteface had been added all
at once, so we could take the extra infrastructure or leave it out
altogether.

> +	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
> +	seq_printf(m, "\tTotal submissions: %llu\n", client.submissions);
> +	seq_printf(m, "\tFailed to queue: %u\n", client.q_fail);
> +	seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail);
> +	seq_printf(m, "\tLast submission result: %d\n", client.retcode);
> +
> +	/* Add more as required ... */
> +	seq_puts(m, "\n");

Trailing newline, why?
-Chris
Dave Gordon June 24, 2015, 8:27 a.m. UTC | #2
On 16/06/15 10:28, Chris Wilson wrote:
> On Mon, Jun 15, 2015 at 07:36:32PM +0100, Dave Gordon wrote:
>> This provides a means of reading status and counts relating
>> to GuC actions and submissions.
> 
> Anything that ends to ease debugging also tends to ease
> postmortem error analysis...

So maybe someday we'll add GuC info to an error dump, though I haven't
yet seen any cases where it would have helped. We'll file this under
"future enhancements".

The GuC debugfs files remain accessible even when the GPU is hung, so
one can already capture the GuC statistics alongside the error dump.

>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Signed-off-by: Alex Dai <yu.dai@intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_debugfs.c |   41 +++++++++++++++++++++++++++++++++++
>>  1 file changed, 41 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index c6e2582..e699b38 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2388,6 +2388,46 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>>  	return 0;
>>  }
>>  
>> +static int i915_guc_info(struct seq_file *m, void *data)
>> +{
>> +	struct drm_info_node *node = m->private;
>> +	struct drm_device *dev = node->minor->dev;
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct intel_guc guc;
>> +	struct i915_guc_client client = { .client_obj = 0 };
>> +
>> +	if (!HAS_GUC_SCHED(dev_priv->dev))
>> +		return 0;
>> +
>> +	/* Take a local copy of the GuC data, so we can dump it at leisure */
>> +	spin_lock(&dev_priv->guc.host2guc_lock);
>> +	guc = dev_priv->guc;
>> +	if (guc.execbuf_client) {
>> +		spin_lock(&guc.execbuf_client->wq_lock);
>> +		client = *guc.execbuf_client;
>> +		spin_unlock(&guc.execbuf_client->wq_lock);
>> +	}
>> +	spin_unlock(&dev_priv->guc.host2guc_lock);
>> +
>> +	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
>> +	seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
>> +	seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status);
>> +
>> +	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
>> +	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
> 
> If these had been a struct you could have minimised that copy.

We needed to copy some other parts of the "struct intel_guc" anyway, in
particular the execbuf_client pointer. And anything else we might choose
to print in future, such as the ctx or doorbell bitmaps.

> Again, it would have been best if the debug inteface had been added all
> at once, so we could take the extra infrastructure or leave it out
> altogether.

Well, no. You argued for stuff to be added to the structs in the header
files incrementally, so debugfs dumping has to be added in parallel. So
there are /two/ debugfs interfaces, each added in a separate patch. The
first relates only to the loading process; the second to /use/ of the
GuC. You can still leave both out if you choose.

>> +	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
>> +	seq_printf(m, "\tTotal submissions: %llu\n", client.submissions);
>> +	seq_printf(m, "\tFailed to queue: %u\n", client.q_fail);
>> +	seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail);
>> +	seq_printf(m, "\tLast submission result: %d\n", client.retcode);
>> +
>> +	/* Add more as required ... */
>> +	seq_puts(m, "\n");
> 
> Trailing newline, why?
> -Chris

Looks prettier when I cat i915_guc* in the debugfs directory. Also so
it's ready for "adding more as required" :) But I've taken it away again
for now ...

.Dave.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index c6e2582..e699b38 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2388,6 +2388,46 @@  static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_guc_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_guc guc;
+	struct i915_guc_client client = { .client_obj = 0 };
+
+	if (!HAS_GUC_SCHED(dev_priv->dev))
+		return 0;
+
+	/* Take a local copy of the GuC data, so we can dump it at leisure */
+	spin_lock(&dev_priv->guc.host2guc_lock);
+	guc = dev_priv->guc;
+	if (guc.execbuf_client) {
+		spin_lock(&guc.execbuf_client->wq_lock);
+		client = *guc.execbuf_client;
+		spin_unlock(&guc.execbuf_client->wq_lock);
+	}
+	spin_unlock(&dev_priv->guc.host2guc_lock);
+
+	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
+	seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd);
+	seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status);
+
+	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
+	seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
+
+	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
+	seq_printf(m, "\tTotal submissions: %llu\n", client.submissions);
+	seq_printf(m, "\tFailed to queue: %u\n", client.q_fail);
+	seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail);
+	seq_printf(m, "\tLast submission result: %d\n", client.retcode);
+
+	/* Add more as required ... */
+	seq_puts(m, "\n");
+
+	return 0;
+}
+
 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -5110,6 +5150,7 @@  static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS},
 	{"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS},
 	{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
+	{"i915_guc_info", i915_guc_info, 0},
 	{"i915_guc_load_status", i915_guc_load_status_info, 0},
 	{"i915_guc_log_dump", i915_guc_log_dump, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},