diff mbox series

[v2,3/3] drm/i915: Include timeline seqno in error capture

Message ID 20230217022420.2664116-4-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show
Series More error capture improvements | expand

Commit Message

John Harrison Feb. 17, 2023, 2:24 a.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The seqno value actually written out to memory is no longer in the
regular HWSP. Instead, it is now in its own private timeline buffer.
Thus, it is no longer visible in an error capture. So, explicitly read
the value and include that in the capture.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++
 drivers/gpu/drm/i915/i915_gpu_error.h | 1 +
 2 files changed, 4 insertions(+)

Comments

Alan Previn March 8, 2023, 10:02 p.m. UTC | #1
On Thu, 2023-02-16 at 18:24 -0800, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
> 
> The seqno value actually written out to memory is no longer in the
> regular HWSP. Instead, it is now in its own private timeline buffer.
> Thus, it is no longer visible in an error capture. So, explicitly read
> the value and include that in the capture.
> 
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
alan: snip.

simple one ... LGTM
Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>
Alan Previn March 8, 2023, 10:07 p.m. UTC | #2
On Wed, 2023-03-08 at 14:02 -0800, Teres Alexis, Alan Previn wrote:
> On Thu, 2023-02-16 at 18:24 -0800, John.C.Harrison@Intel.com wrote:
> > From: John Harrison <John.C.Harrison@Intel.com>
> > 
> > The seqno value actually written out to memory is no longer in the
> > regular HWSP. Instead, it is now in its own private timeline buffer.
> > Thus, it is no longer visible in an error capture. So, explicitly read
> > the value and include that in the capture.
> > 
> > Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> alan: snip.
> 
> simple one ... LGTM
> Reviewed-by: Alan Previn <alan.previn.teres.alexis@intel.com>

alan: i just realized i missed something. On the following hunk,
seqno printout should be using a %u format specifier since we could use the upper most bit of that 32 bit value:
Consider above a conditional RB (based on this fix) - sorry about that.

@@ -505,6 +505,7 @@  static void error_print_context(struct drm_i915_error_state_buf *m,
 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
 		   ctx->guilty, ctx->active,
 		   ctx->total_runtime, ctx->avg_runtime);
+	err_printf(m, "  context timeline seqno %d\n", ctx->hwsp_seqno);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 904f21e1380cd..036a65c9cbf67 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -505,6 +505,7 @@  static void error_print_context(struct drm_i915_error_state_buf *m,
 		   header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
 		   ctx->guilty, ctx->active,
 		   ctx->total_runtime, ctx->avg_runtime);
+	err_printf(m, "  context timeline seqno %d\n", ctx->hwsp_seqno);
 }
 
 static struct i915_vma_coredump *
@@ -1395,6 +1396,8 @@  static bool record_context(struct i915_gem_context_coredump *e,
 	e->sched_attr = ctx->sched;
 	e->guilty = atomic_read(&ctx->guilty_count);
 	e->active = atomic_read(&ctx->active_count);
+	e->hwsp_seqno = (ce->timeline && ce->timeline->hwsp_seqno) ?
+				*ce->timeline->hwsp_seqno : ~0U;
 
 	e->total_runtime = intel_context_get_total_runtime_ns(ce);
 	e->avg_runtime = intel_context_get_avg_runtime_ns(ce);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 56027ffbce51f..a91932cc65317 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -107,6 +107,7 @@  struct intel_engine_coredump {
 		int active;
 		int guilty;
 		struct i915_sched_attr sched_attr;
+		u32 hwsp_seqno;
 	} context;
 
 	struct i915_vma_coredump *vma;