diff mbox

[5/5] drm/i915/error: Capture WA ctx batch in error state

Message ID 1454007684-16777-6-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com Jan. 28, 2016, 7:01 p.m. UTC
From Gen8 onwards we apply ctx workarounds using special batch buffers that
execute during save/restore, good to have them in error state.

Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h       |  2 +-
 drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

Comments

Mika Kuoppala Jan. 29, 2016, 7:52 a.m. UTC | #1
Arun Siluvery <arun.siluvery@linux.intel.com> writes:

> From Gen8 onwards we apply ctx workarounds using special batch buffers that
> execute during save/restore, good to have them in error state.
>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>  drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>  2 files changed, 26 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4b199a4..8440c35 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>  			int page_count;
>  			u64 gtt_offset;
>  			u32 *pages[0];
> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>  
>  		struct drm_i915_error_request {
>  			u64 ctx_desc;
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index 8b1a1c0..e2c32d4 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>  			}
>  		}
>  
> +		if ((obj = error->ring[i].wa_ctx)) {
> +			u64 wa_ctx_offset = obj->gtt_offset;
> +			u32 *wa_ctx_page = &obj->pages[0][0];
> +
> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
> +				   dev_priv->ring[i].name, wa_ctx_offset);
> +			offset = 0;
> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {

PAGE_SIZE/16 ?

Also we have wa_ctx->size. Is there a reason to output past that?

Assumption is that after wa_ctx->size and BB_END, there should
be zeros only. If it is a concern that something has corrupted
that space, you could print only nonzero ones after ctx_size?

Thanks,
-Mika


> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
> +					   offset,
> +					   wa_ctx_page[elt],
> +					   wa_ctx_page[elt+1],
> +					   wa_ctx_page[elt+2],
> +					   wa_ctx_page[elt+3]);
> +				offset += 16;
> +			}
> +		}
> +
>  		if ((obj = error->ring[i].ctx)) {
>  			err_printf(m, "%s --- HW Context = 0x%08x\n",
>  				   dev_priv->ring[i].name,
> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>  		i915_error_object_free(error->ring[i].hws_page);
>  		i915_error_object_free(error->ring[i].ctx);
>  		kfree(error->ring[i].requests);
> +		if (i == RCS)
> +			i915_error_object_free(error->ring[i].wa_ctx);
>  	}
>  
>  	i915_error_object_free(error->semaphore_obj);
> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>  		error->ring[i].hws_page =
>  			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>  
> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
> +			error->ring[i].wa_ctx =
> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
> +		}
> +
>  		i915_gem_record_active_context(ring, error, &error->ring[i]);
>  
>  		count = 0;
> -- 
> 1.9.1
arun.siluvery@linux.intel.com Jan. 29, 2016, 10:09 a.m. UTC | #2
On 29/01/2016 07:52, Mika Kuoppala wrote:
> Arun Siluvery <arun.siluvery@linux.intel.com> writes:
>
>>  From Gen8 onwards we apply ctx workarounds using special batch buffers that
>> execute during save/restore, good to have them in error state.
>>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h       |  2 +-
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 25 +++++++++++++++++++++++++
>>   2 files changed, 26 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 4b199a4..8440c35 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -567,7 +567,7 @@ struct drm_i915_error_state {
>>   			int page_count;
>>   			u64 gtt_offset;
>>   			u32 *pages[0];
>> -		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
>> +		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
>>
>>   		struct drm_i915_error_request {
>>   			u64 ctx_desc;
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 8b1a1c0..e2c32d4 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -561,6 +561,24 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
>>   			}
>>   		}
>>
>> +		if ((obj = error->ring[i].wa_ctx)) {
>> +			u64 wa_ctx_offset = obj->gtt_offset;
>> +			u32 *wa_ctx_page = &obj->pages[0][0];
>> +
>> +			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
>> +				   dev_priv->ring[i].name, wa_ctx_offset);
>> +			offset = 0;
>> +			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {
>
> PAGE_SIZE/16 ?
>
> Also we have wa_ctx->size. Is there a reason to output past that?

No reason, wa_ctx->size is not the total size, it is the size of one wa 
batch although we can get total size easily by combining all (two) of them.
>
> Assumption is that after wa_ctx->size and BB_END, there should
> be zeros only. If it is a concern that something has corrupted
> that space, you could print only nonzero ones after ctx_size?
No concern that it gets corrupted, I will update the patch to use 
wa_ctx->size and print only size values.

regards
Arun

>
> Thanks,
> -Mika
>
>
>> +				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
>> +					   offset,
>> +					   wa_ctx_page[elt],
>> +					   wa_ctx_page[elt+1],
>> +					   wa_ctx_page[elt+2],
>> +					   wa_ctx_page[elt+3]);
>> +				offset += 16;
>> +			}
>> +		}
>> +
>>   		if ((obj = error->ring[i].ctx)) {
>>   			err_printf(m, "%s --- HW Context = 0x%08x\n",
>>   				   dev_priv->ring[i].name,
>> @@ -654,6 +672,8 @@ static void i915_error_state_free(struct kref *error_ref)
>>   		i915_error_object_free(error->ring[i].hws_page);
>>   		i915_error_object_free(error->ring[i].ctx);
>>   		kfree(error->ring[i].requests);
>> +		if (i == RCS)
>> +			i915_error_object_free(error->ring[i].wa_ctx);
>>   	}
>>
>>   	i915_error_object_free(error->semaphore_obj);
>> @@ -1165,6 +1185,11 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   		error->ring[i].hws_page =
>>   			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
>>
>> +		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
>> +			error->ring[i].wa_ctx =
>> +				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
>> +		}
>> +
>>   		i915_gem_record_active_context(ring, error, &error->ring[i]);
>>
>>   		count = 0;
>> --
>> 1.9.1
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4b199a4..8440c35 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -567,7 +567,7 @@  struct drm_i915_error_state {
 			int page_count;
 			u64 gtt_offset;
 			u32 *pages[0];
-		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
+		} *req_ringbuffer, *hw_ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page, *wa_ctx;
 
 		struct drm_i915_error_request {
 			u64 ctx_desc;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8b1a1c0..e2c32d4 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -561,6 +561,24 @@  int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			}
 		}
 
+		if ((obj = error->ring[i].wa_ctx)) {
+			u64 wa_ctx_offset = obj->gtt_offset;
+			u32 *wa_ctx_page = &obj->pages[0][0];
+
+			err_printf(m, "\n%s --- WA Ctx batch buffer = 0x%08llx\n",
+				   dev_priv->ring[i].name, wa_ctx_offset);
+			offset = 0;
+			for (elt = 0; elt < PAGE_SIZE/32; elt += 4) {
+				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
+					   offset,
+					   wa_ctx_page[elt],
+					   wa_ctx_page[elt+1],
+					   wa_ctx_page[elt+2],
+					   wa_ctx_page[elt+3]);
+				offset += 16;
+			}
+		}
+
 		if ((obj = error->ring[i].ctx)) {
 			err_printf(m, "%s --- HW Context = 0x%08x\n",
 				   dev_priv->ring[i].name,
@@ -654,6 +672,8 @@  static void i915_error_state_free(struct kref *error_ref)
 		i915_error_object_free(error->ring[i].hws_page);
 		i915_error_object_free(error->ring[i].ctx);
 		kfree(error->ring[i].requests);
+		if (i == RCS)
+			i915_error_object_free(error->ring[i].wa_ctx);
 	}
 
 	i915_error_object_free(error->semaphore_obj);
@@ -1165,6 +1185,11 @@  static void i915_gem_record_rings(struct drm_device *dev,
 		error->ring[i].hws_page =
 			i915_error_ggtt_object_create(dev_priv, ring->status_page.obj);
 
+		if (INTEL_INFO(dev)->gen >= 8 && ring->id == RCS) {
+			error->ring[i].wa_ctx =
+				i915_error_ggtt_object_create(dev_priv, ring->wa_ctx.obj);
+		}
+
 		i915_gem_record_active_context(ring, error, &error->ring[i]);
 
 		count = 0;