Message ID | 1434393394-21002-15-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jun 15, 2015 at 07:36:32PM +0100, Dave Gordon wrote: > This provides a means of reading status and counts relating > to GuC actions and submissions. Anything that ends to ease debugging also tends to ease postmortem error analysis... > > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > Signed-off-by: Alex Dai <yu.dai@intel.com> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 41 +++++++++++++++++++++++++++++++++++ > 1 file changed, 41 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index c6e2582..e699b38 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -2388,6 +2388,46 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) > return 0; > } > > +static int i915_guc_info(struct seq_file *m, void *data) > +{ > + struct drm_info_node *node = m->private; > + struct drm_device *dev = node->minor->dev; > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_guc guc; > + struct i915_guc_client client = { .client_obj = 0 }; > + > + if (!HAS_GUC_SCHED(dev_priv->dev)) > + return 0; > + > + /* Take a local copy of the GuC data, so we can dump it at leisure */ > + spin_lock(&dev_priv->guc.host2guc_lock); > + guc = dev_priv->guc; > + if (guc.execbuf_client) { > + spin_lock(&guc.execbuf_client->wq_lock); > + client = *guc.execbuf_client; > + spin_unlock(&guc.execbuf_client->wq_lock); > + } > + spin_unlock(&dev_priv->guc.host2guc_lock); > + > + seq_printf(m, "GuC total action count: %llu\n", guc.action_count); > + seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd); > + seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status); > + > + seq_printf(m, "GuC action failure count: %u\n", guc.action_fail); > + seq_printf(m, "GuC last action error code: %d\n", guc.action_err); If these had been a struct you could have minimised that copy. Again, it would have been best if the debug inteface had been added all at once, so we could take the extra infrastructure or leave it out altogether. > + seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); > + seq_printf(m, "\tTotal submissions: %llu\n", client.submissions); > + seq_printf(m, "\tFailed to queue: %u\n", client.q_fail); > + seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail); > + seq_printf(m, "\tLast submission result: %d\n", client.retcode); > + > + /* Add more as required ... */ > + seq_puts(m, "\n"); Trailing newline, why? -Chris
On 16/06/15 10:28, Chris Wilson wrote: > On Mon, Jun 15, 2015 at 07:36:32PM +0100, Dave Gordon wrote: >> This provides a means of reading status and counts relating >> to GuC actions and submissions. > > Anything that ends to ease debugging also tends to ease > postmortem error analysis... So maybe someday we'll add GuC info to an error dump, though I haven't yet seen any cases where it would have helped. We'll file this under "future enhancements". The GuC debugfs files remain accessible even when the GPU is hung, so one can already capture the GuC statistics alongside the error dump. >> Signed-off-by: Dave Gordon <david.s.gordon@intel.com> >> Signed-off-by: Alex Dai <yu.dai@intel.com> >> --- >> drivers/gpu/drm/i915/i915_debugfs.c | 41 +++++++++++++++++++++++++++++++++++ >> 1 file changed, 41 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c >> index c6e2582..e699b38 100644 >> --- a/drivers/gpu/drm/i915/i915_debugfs.c >> +++ b/drivers/gpu/drm/i915/i915_debugfs.c >> @@ -2388,6 +2388,46 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) >> return 0; >> } >> >> +static int i915_guc_info(struct seq_file *m, void *data) >> +{ >> + struct drm_info_node *node = m->private; >> + struct drm_device *dev = node->minor->dev; >> + struct drm_i915_private *dev_priv = dev->dev_private; >> + struct intel_guc guc; >> + struct i915_guc_client client = { .client_obj = 0 }; >> + >> + if (!HAS_GUC_SCHED(dev_priv->dev)) >> + return 0; >> + >> + /* Take a local copy of the GuC data, so we can dump it at leisure */ >> + spin_lock(&dev_priv->guc.host2guc_lock); >> + guc = dev_priv->guc; >> + if (guc.execbuf_client) { >> + spin_lock(&guc.execbuf_client->wq_lock); >> + client = *guc.execbuf_client; >> + spin_unlock(&guc.execbuf_client->wq_lock); >> + } >> + spin_unlock(&dev_priv->guc.host2guc_lock); >> + >> + seq_printf(m, "GuC total action count: %llu\n", guc.action_count); >> + seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd); >> + seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status); >> + >> + seq_printf(m, "GuC action failure count: %u\n", guc.action_fail); >> + seq_printf(m, "GuC last action error code: %d\n", guc.action_err); > > If these had been a struct you could have minimised that copy. We needed to copy some other parts of the "struct intel_guc" anyway, in particular the execbuf_client pointer. And anything else we might choose to print in future, such as the ctx or doorbell bitmaps. > Again, it would have been best if the debug inteface had been added all > at once, so we could take the extra infrastructure or leave it out > altogether. Well, no. You argued for stuff to be added to the structs in the header files incrementally, so debugfs dumping has to be added in parallel. So there are /two/ debugfs interfaces, each added in a separate patch. The first relates only to the loading process; the second to /use/ of the GuC. You can still leave both out if you choose. >> + seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); >> + seq_printf(m, "\tTotal submissions: %llu\n", client.submissions); >> + seq_printf(m, "\tFailed to queue: %u\n", client.q_fail); >> + seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail); >> + seq_printf(m, "\tLast submission result: %d\n", client.retcode); >> + >> + /* Add more as required ... */ >> + seq_puts(m, "\n"); > > Trailing newline, why? > -Chris Looks prettier when I cat i915_guc* in the debugfs directory. Also so it's ready for "adding more as required" :) But I've taken it away again for now ... .Dave.
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c6e2582..e699b38 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2388,6 +2388,46 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) return 0; } +static int i915_guc_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_guc guc; + struct i915_guc_client client = { .client_obj = 0 }; + + if (!HAS_GUC_SCHED(dev_priv->dev)) + return 0; + + /* Take a local copy of the GuC data, so we can dump it at leisure */ + spin_lock(&dev_priv->guc.host2guc_lock); + guc = dev_priv->guc; + if (guc.execbuf_client) { + spin_lock(&guc.execbuf_client->wq_lock); + client = *guc.execbuf_client; + spin_unlock(&guc.execbuf_client->wq_lock); + } + spin_unlock(&dev_priv->guc.host2guc_lock); + + seq_printf(m, "GuC total action count: %llu\n", guc.action_count); + seq_printf(m, "GuC last action command: 0x%x\n", guc.action_cmd); + seq_printf(m, "GuC last action status: 0x%x\n", guc.action_status); + + seq_printf(m, "GuC action failure count: %u\n", guc.action_fail); + seq_printf(m, "GuC last action error code: %d\n", guc.action_err); + + seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); + seq_printf(m, "\tTotal submissions: %llu\n", client.submissions); + seq_printf(m, "\tFailed to queue: %u\n", client.q_fail); + seq_printf(m, "\tFailed doorbell: %u\n", client.b_fail); + seq_printf(m, "\tLast submission result: %d\n", client.retcode); + + /* Add more as required ... */ + seq_puts(m, "\n"); + + return 0; +} + static int i915_guc_log_dump(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -5110,6 +5150,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_gem_hws_bsd", i915_hws_info, 0, (void *)VCS}, {"i915_gem_hws_vebox", i915_hws_info, 0, (void *)VECS}, {"i915_gem_batch_pool", i915_gem_batch_pool_info, 0}, + {"i915_guc_info", i915_guc_info, 0}, {"i915_guc_load_status", i915_guc_load_status_info, 0}, {"i915_guc_log_dump", i915_guc_log_dump, 0}, {"i915_frequency_info", i915_frequency_info, 0},