diff mbox

[35/43] drm/i915/bdw: Make sure error capture keeps working with Execlists

Message ID 1406217891-8912-36-git-send-email-thomas.daniel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Thomas Daniel July 24, 2014, 4:04 p.m. UTC
From: Oscar Mateo <oscar.mateo@intel.com>

Since the ringbuffer does not belong per engine anymore, we have to
make sure that we are always recording the correct ringbuffer.

TODO: This is only a small fix to keep basic error capture working, but
we need to add more information for it to be useful (e.g. dump the
context being executed).

v2: Reorder how the ringbuffer is chosen to clarify the change and
rename the variable, both changes suggested by Chris Wilson. Also,
add the TODO comment to the code, as suggested by Daniel.

Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_gpu_error.c |   22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

Comments

Daniel Vetter Aug. 15, 2014, 12:14 p.m. UTC | #1
On Thu, Jul 24, 2014 at 05:04:43PM +0100, Thomas Daniel wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> Since the ringbuffer does not belong per engine anymore, we have to
> make sure that we are always recording the correct ringbuffer.
> 
> TODO: This is only a small fix to keep basic error capture working, but
> we need to add more information for it to be useful (e.g. dump the
> context being executed).
> 
> v2: Reorder how the ringbuffer is chosen to clarify the change and
> rename the variable, both changes suggested by Chris Wilson. Also,
> add the TODO comment to the code, as suggested by Daniel.
> 
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>

There's a bit too much stuff in-flight to fix up error capture for ppgtt.
I think it's better to stall this patch here until that work is completed.
Please coordinate with Mika here.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_gpu_error.c |   22 ++++++++++++++++++----
>  1 file changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 45b6191..1e38576 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -874,9 +874,6 @@ static void i915_record_ring_state(struct drm_device *dev,
>  		ering->hws = I915_READ(mmio);
>  	}
>  
> -	ering->cpu_ring_head = ring->buffer->head;
> -	ering->cpu_ring_tail = ring->buffer->tail;
> -
>  	ering->hangcheck_score = ring->hangcheck.score;
>  	ering->hangcheck_action = ring->hangcheck.action;
>  
> @@ -936,6 +933,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  
>  	for (i = 0; i < I915_NUM_RINGS; i++) {
>  		struct intel_engine_cs *ring = &dev_priv->ring[i];
> +		struct intel_ringbuffer *rbuf;
>  
>  		error->ring[i].pid = -1;
>  
> @@ -979,8 +977,24 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  			}
>  		}
>  
> +		if (i915.enable_execlists) {
> +			/* TODO: This is only a small fix to keep basic error
> +			 * capture working, but we need to add more information
> +			 * for it to be useful (e.g. dump the context being
> +			 * executed).
> +			 */
> +			if (request)
> +				rbuf = request->ctx->engine[ring->id].ringbuf;
> +			else
> +				rbuf = ring->default_context->engine[ring->id].ringbuf;
> +		} else
> +			rbuf = ring->buffer;
> +
> +		error->ring[i].cpu_ring_head = rbuf->head;
> +		error->ring[i].cpu_ring_tail = rbuf->tail;
> +
>  		error->ring[i].ringbuffer =
> -			i915_error_ggtt_object_create(dev_priv, ring->buffer->obj);
> +			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
>  
>  		if (ring->status_page.obj)
>  			error->ring[i].hws_page =
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Thomas Daniel Aug. 21, 2014, 10:57 a.m. UTC | #2
> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> Vetter
> Sent: Friday, August 15, 2014 1:14 PM
> To: Daniel, Thomas
> Cc: intel-gfx@lists.freedesktop.org; Mika Kuoppala
> Subject: Re: [Intel-gfx] [PATCH 35/43] drm/i915/bdw: Make sure error
> capture keeps working with Execlists
> 
> On Thu, Jul 24, 2014 at 05:04:43PM +0100, Thomas Daniel wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> >
> > Since the ringbuffer does not belong per engine anymore, we have to
> > make sure that we are always recording the correct ringbuffer.
> >
> > TODO: This is only a small fix to keep basic error capture working,
> > but we need to add more information for it to be useful (e.g. dump the
> > context being executed).
> >
> > v2: Reorder how the ringbuffer is chosen to clarify the change and
> > rename the variable, both changes suggested by Chris Wilson. Also, add
> > the TODO comment to the code, as suggested by Daniel.
> >
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> 
> There's a bit too much stuff in-flight to fix up error capture for ppgtt.
> I think it's better to stall this patch here until that work is completed.
> Please coordinate with Mika here.
> -Daniel
Mika has now closed the Jira issue.  This patch still applies and looks
correct.  Is it OK to be merged as-is?

Thomas.

> 
> > ---
> >  drivers/gpu/drm/i915/i915_gpu_error.c |   22 ++++++++++++++++++----
> >  1 file changed, 18 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c
> > b/drivers/gpu/drm/i915/i915_gpu_error.c
> > index 45b6191..1e38576 100644
> > --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> > @@ -874,9 +874,6 @@ static void i915_record_ring_state(struct
> drm_device *dev,
> >  		ering->hws = I915_READ(mmio);
> >  	}
> >
> > -	ering->cpu_ring_head = ring->buffer->head;
> > -	ering->cpu_ring_tail = ring->buffer->tail;
> > -
> >  	ering->hangcheck_score = ring->hangcheck.score;
> >  	ering->hangcheck_action = ring->hangcheck.action;
> >
> > @@ -936,6 +933,7 @@ static void i915_gem_record_rings(struct
> > drm_device *dev,
> >
> >  	for (i = 0; i < I915_NUM_RINGS; i++) {
> >  		struct intel_engine_cs *ring = &dev_priv->ring[i];
> > +		struct intel_ringbuffer *rbuf;
> >
> >  		error->ring[i].pid = -1;
> >
> > @@ -979,8 +977,24 @@ static void i915_gem_record_rings(struct
> drm_device *dev,
> >  			}
> >  		}
> >
> > +		if (i915.enable_execlists) {
> > +			/* TODO: This is only a small fix to keep basic error
> > +			 * capture working, but we need to add more
> information
> > +			 * for it to be useful (e.g. dump the context being
> > +			 * executed).
> > +			 */
> > +			if (request)
> > +				rbuf = request->ctx->engine[ring-
> >id].ringbuf;
> > +			else
> > +				rbuf = ring->default_context->engine[ring-
> >id].ringbuf;
> > +		} else
> > +			rbuf = ring->buffer;
> > +
> > +		error->ring[i].cpu_ring_head = rbuf->head;
> > +		error->ring[i].cpu_ring_tail = rbuf->tail;
> > +
> >  		error->ring[i].ringbuffer =
> > -			i915_error_ggtt_object_create(dev_priv, ring-
> >buffer->obj);
> > +			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
> >
> >  		if (ring->status_page.obj)
> >  			error->ring[i].hws_page =
> > --
> > 1.7.9.5
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Daniel Vetter Aug. 25, 2014, 9 p.m. UTC | #3
On Thu, Aug 21, 2014 at 10:57:09AM +0000, Daniel, Thomas wrote:
> 
> 
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Friday, August 15, 2014 1:14 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org; Mika Kuoppala
> > Subject: Re: [Intel-gfx] [PATCH 35/43] drm/i915/bdw: Make sure error
> > capture keeps working with Execlists
> > 
> > On Thu, Jul 24, 2014 at 05:04:43PM +0100, Thomas Daniel wrote:
> > > From: Oscar Mateo <oscar.mateo@intel.com>
> > >
> > > Since the ringbuffer does not belong per engine anymore, we have to
> > > make sure that we are always recording the correct ringbuffer.
> > >
> > > TODO: This is only a small fix to keep basic error capture working,
> > > but we need to add more information for it to be useful (e.g. dump the
> > > context being executed).
> > >
> > > v2: Reorder how the ringbuffer is chosen to clarify the change and
> > > rename the variable, both changes suggested by Chris Wilson. Also, add
> > > the TODO comment to the code, as suggested by Daniel.
> > >
> > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > 
> > There's a bit too much stuff in-flight to fix up error capture for ppgtt.
> > I think it's better to stall this patch here until that work is completed.
> > Please coordinate with Mika here.
> > -Daniel
> Mika has now closed the Jira issue.  This patch still applies and looks
> correct.  Is it OK to be merged as-is?

Apparently Mika closed the Jira before I've merged all the patches. I'd
like to do that first, then come back to this patch.
-Daniel
Daniel Vetter Aug. 25, 2014, 9:29 p.m. UTC | #4
On Thu, Aug 21, 2014 at 10:57:09AM +0000, Daniel, Thomas wrote:
> 
> 
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel
> > Vetter
> > Sent: Friday, August 15, 2014 1:14 PM
> > To: Daniel, Thomas
> > Cc: intel-gfx@lists.freedesktop.org; Mika Kuoppala
> > Subject: Re: [Intel-gfx] [PATCH 35/43] drm/i915/bdw: Make sure error
> > capture keeps working with Execlists
> > 
> > On Thu, Jul 24, 2014 at 05:04:43PM +0100, Thomas Daniel wrote:
> > > From: Oscar Mateo <oscar.mateo@intel.com>
> > >
> > > Since the ringbuffer does not belong per engine anymore, we have to
> > > make sure that we are always recording the correct ringbuffer.
> > >
> > > TODO: This is only a small fix to keep basic error capture working,
> > > but we need to add more information for it to be useful (e.g. dump the
> > > context being executed).
> > >
> > > v2: Reorder how the ringbuffer is chosen to clarify the change and
> > > rename the variable, both changes suggested by Chris Wilson. Also, add
> > > the TODO comment to the code, as suggested by Daniel.
> > >
> > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> > 
> > There's a bit too much stuff in-flight to fix up error capture for ppgtt.
> > I think it's better to stall this patch here until that work is completed.
> > Please coordinate with Mika here.
> > -Daniel
> Mika has now closed the Jira issue.  This patch still applies and looks
> correct.  Is it OK to be merged as-is?

Queued for -next, thanks for the patch.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 45b6191..1e38576 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -874,9 +874,6 @@  static void i915_record_ring_state(struct drm_device *dev,
 		ering->hws = I915_READ(mmio);
 	}
 
-	ering->cpu_ring_head = ring->buffer->head;
-	ering->cpu_ring_tail = ring->buffer->tail;
-
 	ering->hangcheck_score = ring->hangcheck.score;
 	ering->hangcheck_action = ring->hangcheck.action;
 
@@ -936,6 +933,7 @@  static void i915_gem_record_rings(struct drm_device *dev,
 
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		struct intel_engine_cs *ring = &dev_priv->ring[i];
+		struct intel_ringbuffer *rbuf;
 
 		error->ring[i].pid = -1;
 
@@ -979,8 +977,24 @@  static void i915_gem_record_rings(struct drm_device *dev,
 			}
 		}
 
+		if (i915.enable_execlists) {
+			/* TODO: This is only a small fix to keep basic error
+			 * capture working, but we need to add more information
+			 * for it to be useful (e.g. dump the context being
+			 * executed).
+			 */
+			if (request)
+				rbuf = request->ctx->engine[ring->id].ringbuf;
+			else
+				rbuf = ring->default_context->engine[ring->id].ringbuf;
+		} else
+			rbuf = ring->buffer;
+
+		error->ring[i].cpu_ring_head = rbuf->head;
+		error->ring[i].cpu_ring_tail = rbuf->tail;
+
 		error->ring[i].ringbuffer =
-			i915_error_ggtt_object_create(dev_priv, ring->buffer->obj);
+			i915_error_ggtt_object_create(dev_priv, rbuf->obj);
 
 		if (ring->status_page.obj)
 			error->ring[i].hws_page =