diff mbox

[v2,16/16] drm/i915: add i915_gem_context_get_reset_status_ioctl

Message ID 1363276337-12509-17-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala March 14, 2013, 3:52 p.m. UTC
This ioctl returns context reset status for specified context.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
CC: idr@freedesktop.org
---
 drivers/gpu/drm/i915/i915_dma.c |    1 +
 drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h |    2 ++
 include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
 4 files changed, 92 insertions(+)

Comments

Chris Wilson March 15, 2013, 10:01 a.m. UTC | #1
On Thu, Mar 14, 2013 at 05:52:17PM +0200, Mika Kuoppala wrote:
> This ioctl returns context reset status for specified context.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> CC: idr@freedesktop.org
> ---
>  drivers/gpu/drm/i915/i915_dma.c |    1 +
>  drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h |    2 ++
>  include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
>  4 files changed, 92 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 7902d97..c919832 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1903,6 +1903,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 69c9856..a4d06f2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1267,3 +1267,64 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>  
>  	return 0;
>  }
> +
> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
> +					    void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_ring_buffer *ring;
> +	struct drm_i915_reset_status *args = data;
> +	struct ctx_reset_state *rs = NULL;
> +	unsigned long reset_cnt;
> +	u32 reset_status = I915_RESET_UNKNOWN;
> +	int ret;
> +
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	ring = &dev_priv->ring[RCS];
> +
> +	ret = i915_gem_context_get_reset_state(ring,
> +					       file,
> +					       args->ctx_id,
> +					       &rs);
> +	if (ret)
> +		goto out;
See earlier comments.

> +	BUG_ON(!rs);
> +
> +	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
> +
> +	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||
> +	    reset_cnt == I915_WEDGED) {
> +		goto out;
I915_WEDGED & I915_RESET_IN_PROGRESS_FLAGS is defined as true.

> +	}
> +
> +	/* Set guilty/innocent status if only one reset was
> +	 * observed and if only one guilty was found
> +	 */
> +	if ((rs->reset_cnt + 2) == reset_cnt &&
> +	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {
> +		reset_status = 0;
> +
> +		if (rs->guilty)
> +			reset_status |= I915_RESET_BATCH_ACTIVE;
> +
> +		if (rs->innocent)
> +			reset_status |= I915_RESET_BATCH_PENDING;
> +
> +		if (reset_status == 0)
> +			reset_status = I915_RESET_UNKNOWN;
> +	} else if (rs->reset_cnt == reset_cnt) {
> +		reset_status = I915_RESET_NO_ERROR;
> +	}
This looks very fragile and time dependent. It is not an interface I can
use...
-Chris
Ian Romanick March 18, 2013, 8:26 p.m. UTC | #2
On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
> This ioctl returns context reset status for specified context.
>
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> CC: idr@freedesktop.org
> ---
>   drivers/gpu/drm/i915/i915_dma.c |    1 +
>   drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_drv.h |    2 ++
>   include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
>   4 files changed, 92 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 7902d97..c919832 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1903,6 +1903,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>   	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
>   };
>
>   int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 69c9856..a4d06f2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1267,3 +1267,64 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>
>   	return 0;
>   }
> +
> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
> +					    void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_ring_buffer *ring;
> +	struct drm_i915_reset_status *args = data;
> +	struct ctx_reset_state *rs = NULL;
> +	unsigned long reset_cnt;
> +	u32 reset_status = I915_RESET_UNKNOWN;
> +	int ret;
> +
> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
> +	if (ret)
> +		return ret;
> +
> +	ring = &dev_priv->ring[RCS];
> +
> +	ret = i915_gem_context_get_reset_state(ring,
> +					       file,
> +					       args->ctx_id,
> +					       &rs);
> +	if (ret)
> +		goto out;
> +
> +	BUG_ON(!rs);
> +
> +	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
> +
> +	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||

In this case, I believe we're supposed to return the reset state to the 
application.  The ARB_robustness spec says:

     "If a reset status other than NO_ERROR is returned and subsequent
     calls return NO_ERROR, the context reset was encountered and
     completed. If a reset status is repeatedly returned, the context may
     be in the process of resetting."

If the reset takes a long time, it seems that even a well-behaved app 
could run afoul of the 'banned' logic.

> +	    reset_cnt == I915_WEDGED) {
> +		goto out;
> +	}
> +
> +	/* Set guilty/innocent status if only one reset was
> +	 * observed and if only one guilty was found
> +	 */
> +	if ((rs->reset_cnt + 2) == reset_cnt &&
> +	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {

This logic seems... wrong, or at least weird.  "rs->reset_cnt + 2" is 
confusing next to "if only one reset was observed".

dev_priv->gpu_error.reset_counter is the global GPU reset count since 
start-up, and rs->reset_cnt is the global GPU count since start-up when 
the context was created.  Right?

If that's the case, this will cause a context that was completely idle 
(i.e., didn't actually lose anything) to get a reset notification. 
That's an absolute deal breaker.

If that's not the case, then this architecture needs a lot more 
documentation so that people new to it can understand what's happening.

> +		reset_status = 0;
> +
> +		if (rs->guilty)
> +			reset_status |= I915_RESET_BATCH_ACTIVE;
> +
> +		if (rs->innocent)
> +			reset_status |= I915_RESET_BATCH_PENDING;
> +
> +		if (reset_status == 0)
> +			reset_status = I915_RESET_UNKNOWN;
> +	} else if (rs->reset_cnt == reset_cnt) {
> +		reset_status = I915_RESET_NO_ERROR;
> +	}
> +
> +out:
> +	if (!ret)
> +		args->reset_status = reset_status;
> +
> +	mutex_unlock(&dev->struct_mutex);
> +
> +	return ret ? -EINVAL : 0;
> +}
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3e11acf..2e5e8e7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1712,6 +1712,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>   				  struct drm_file *file);
>   int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>   				   struct drm_file *file);
> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
> +					    void *data, struct drm_file *file);
>
>   /* i915_gem_gtt.c */
>   void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 07d5941..a195e0e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_I915_GEM_SET_CACHING	0x2f
>   #define DRM_I915_GEM_GET_CACHING	0x30
>   #define DRM_I915_REG_READ		0x31
> +#define DRM_I915_GET_RESET_STATUS	0x32
>
>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>   #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
>   #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>   #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>   #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> +#define DRM_IOCTL_I915_GET_RESET_STATUS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATUS, struct drm_i915_reset_status)
>
>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>    * on the security mechanisms provided by hardware.
> @@ -980,4 +982,30 @@ struct drm_i915_reg_read {
>   	__u64 offset;
>   	__u64 val; /* Return value */
>   };
> +
> +/* No reset observed */
> +#define I915_RESET_NO_ERROR      0
> +
> +/* Context had batch processing active while
> +   gpu hung and batch was guilty of gpu hang */
> +#define I915_RESET_BATCH_ACTIVE  (1 << 0)
> +
> +/* Context had batch queued for processing while
> +   reset occurred and guilty batch was found:
> +   I915_RESET_BATCH_ACTIVE was set for this or
> +   some other context */
> +#define I915_RESET_BATCH_PENDING (1 << 1)
> +
> +/* Context observed gpu hung and reset but guilty context
> +   was not found: I915_RESET_BATCH_ACTIVE and
> +   I915_RESET_BATCH_PENDING were not set for any context */
> +#define I915_RESET_UNKNOWN       (1 << 2)
> +
> +struct drm_i915_reset_status {
> +	__u32 ctx_id;
> +	__u32 flags;
> +	__u32 reset_status;
> +	__u32 pad;
> +};
> +
>   #endif /* _UAPI_I915_DRM_H_ */
>
Mika Kuoppala March 19, 2013, 12:58 p.m. UTC | #3
Ian Romanick <idr@freedesktop.org> writes:

> On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
>> This ioctl returns context reset status for specified context.
>>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> CC: idr@freedesktop.org
>> ---
>>   drivers/gpu/drm/i915/i915_dma.c |    1 +
>>   drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>   include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
>>   4 files changed, 92 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>> index 7902d97..c919832 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -1903,6 +1903,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>>   	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
>> +	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
>>   };
>>
>>   int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>> index 69c9856..a4d06f2 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -1267,3 +1267,64 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>>
>>   	return 0;
>>   }
>> +
>> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
>> +					    void *data, struct drm_file *file)
>> +{
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct intel_ring_buffer *ring;
>> +	struct drm_i915_reset_status *args = data;
>> +	struct ctx_reset_state *rs = NULL;
>> +	unsigned long reset_cnt;
>> +	u32 reset_status = I915_RESET_UNKNOWN;
>> +	int ret;
>> +
>> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
>> +	if (ret)
>> +		return ret;
>> +
>> +	ring = &dev_priv->ring[RCS];
>> +
>> +	ret = i915_gem_context_get_reset_state(ring,
>> +					       file,
>> +					       args->ctx_id,
>> +					       &rs);
>> +	if (ret)
>> +		goto out;
>> +
>> +	BUG_ON(!rs);
>> +
>> +	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
>> +
>> +	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||
>
> In this case, I believe we're supposed to return the reset state to the 
> application.  The ARB_robustness spec says:
>
>      "If a reset status other than NO_ERROR is returned and subsequent
>      calls return NO_ERROR, the context reset was encountered and
>      completed. If a reset status is repeatedly returned, the context may
>      be in the process of resetting."
>
> If the reset takes a long time, it seems that even a well-behaved app 
> could run afoul of the 'banned' logic.

As there reset status is initialized to I915_RESET_UNKNOWN,
we return it if the reset is in progress or gpu is wedged.

>> +	    reset_cnt == I915_WEDGED) {
>> +		goto out;
>> +	}
>> +
>> +	/* Set guilty/innocent status if only one reset was
>> +	 * observed and if only one guilty was found
>> +	 */
>> +	if ((rs->reset_cnt + 2) == reset_cnt &&
>> +	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {
>
> This logic seems... wrong, or at least weird.  "rs->reset_cnt + 2" is 
> confusing next to "if only one reset was observed".
>
> dev_priv->gpu_error.reset_counter is the global GPU reset count since 
> start-up, and rs->reset_cnt is the global GPU count since start-up when 
> the context was created.  Right?

Right. The confusing part in here is the
dev_priv->gpu_error.reset_counter. If it is odd, reset is in progress,
if it is even, the reset has been handled and all is well. That is why +2

> If that's the case, this will cause a context that was completely idle 
> (i.e., didn't actually lose anything) to get a reset notification. 
> That's an absolute deal breaker.

This was then misunderstood by me. I will make it so that if you have
no batches submitted, you wont observe a reset.

> If that's not the case, then this architecture needs a lot more 
> documentation so that people new to it can understand what's happening.

Agreed. If we don't need to care about the contexts where there
were no batches submitted, the logic will be simpler tho.

-Mika

>> +		reset_status = 0;
>> +
>> +		if (rs->guilty)
>> +			reset_status |= I915_RESET_BATCH_ACTIVE;
>> +
>> +		if (rs->innocent)
>> +			reset_status |= I915_RESET_BATCH_PENDING;
>> +
>> +		if (reset_status == 0)
>> +			reset_status = I915_RESET_UNKNOWN;
>> +	} else if (rs->reset_cnt == reset_cnt) {
>> +		reset_status = I915_RESET_NO_ERROR;
>> +	}
>> +
>> +out:
>> +	if (!ret)
>> +		args->reset_status = reset_status;
>> +
>> +	mutex_unlock(&dev->struct_mutex);
>> +
>> +	return ret ? -EINVAL : 0;
>> +}
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 3e11acf..2e5e8e7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1712,6 +1712,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>>   				  struct drm_file *file);
>>   int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>   				   struct drm_file *file);
>> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
>> +					    void *data, struct drm_file *file);
>>
>>   /* i915_gem_gtt.c */
>>   void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 07d5941..a195e0e 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_I915_GEM_SET_CACHING	0x2f
>>   #define DRM_I915_GEM_GET_CACHING	0x30
>>   #define DRM_I915_REG_READ		0x31
>> +#define DRM_I915_GET_RESET_STATUS	0x32
>>
>>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>>   #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>> @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>>   #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>>   #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>> +#define DRM_IOCTL_I915_GET_RESET_STATUS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATUS, struct drm_i915_reset_status)
>>
>>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>>    * on the security mechanisms provided by hardware.
>> @@ -980,4 +982,30 @@ struct drm_i915_reg_read {
>>   	__u64 offset;
>>   	__u64 val; /* Return value */
>>   };
>> +
>> +/* No reset observed */
>> +#define I915_RESET_NO_ERROR      0
>> +
>> +/* Context had batch processing active while
>> +   gpu hung and batch was guilty of gpu hang */
>> +#define I915_RESET_BATCH_ACTIVE  (1 << 0)
>> +
>> +/* Context had batch queued for processing while
>> +   reset occurred and guilty batch was found:
>> +   I915_RESET_BATCH_ACTIVE was set for this or
>> +   some other context */
>> +#define I915_RESET_BATCH_PENDING (1 << 1)
>> +
>> +/* Context observed gpu hung and reset but guilty context
>> +   was not found: I915_RESET_BATCH_ACTIVE and
>> +   I915_RESET_BATCH_PENDING were not set for any context */
>> +#define I915_RESET_UNKNOWN       (1 << 2)
>> +
>> +struct drm_i915_reset_status {
>> +	__u32 ctx_id;
>> +	__u32 flags;
>> +	__u32 reset_status;
>> +	__u32 pad;
>> +};
>> +
>>   #endif /* _UAPI_I915_DRM_H_ */
>>
Ian Romanick March 19, 2013, 7:02 p.m. UTC | #4
On 03/19/2013 05:58 AM, Mika Kuoppala wrote:
> Ian Romanick <idr@freedesktop.org> writes:
>
>> On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
>>> This ioctl returns context reset status for specified context.
>>>
>>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>>> CC: idr@freedesktop.org
>>> ---
>>>    drivers/gpu/drm/i915/i915_dma.c |    1 +
>>>    drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
>>>    drivers/gpu/drm/i915/i915_drv.h |    2 ++
>>>    include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
>>>    4 files changed, 92 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>>> index 7902d97..c919832 100644
>>> --- a/drivers/gpu/drm/i915/i915_dma.c
>>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>>> @@ -1903,6 +1903,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>>>    	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>>>    	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>>>    	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
>>> +	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
>>>    };
>>>
>>>    int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
>>> index 69c9856..a4d06f2 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -1267,3 +1267,64 @@ int i915_reg_read_ioctl(struct drm_device *dev,
>>>
>>>    	return 0;
>>>    }
>>> +
>>> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
>>> +					    void *data, struct drm_file *file)
>>> +{
>>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>> +	struct intel_ring_buffer *ring;
>>> +	struct drm_i915_reset_status *args = data;
>>> +	struct ctx_reset_state *rs = NULL;
>>> +	unsigned long reset_cnt;
>>> +	u32 reset_status = I915_RESET_UNKNOWN;
>>> +	int ret;
>>> +
>>> +	ret = mutex_lock_interruptible(&dev->struct_mutex);
>>> +	if (ret)
>>> +		return ret;
>>> +
>>> +	ring = &dev_priv->ring[RCS];
>>> +
>>> +	ret = i915_gem_context_get_reset_state(ring,
>>> +					       file,
>>> +					       args->ctx_id,
>>> +					       &rs);
>>> +	if (ret)
>>> +		goto out;
>>> +
>>> +	BUG_ON(!rs);
>>> +
>>> +	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
>>> +
>>> +	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||
>>
>> In this case, I believe we're supposed to return the reset state to the
>> application.  The ARB_robustness spec says:
>>
>>       "If a reset status other than NO_ERROR is returned and subsequent
>>       calls return NO_ERROR, the context reset was encountered and
>>       completed. If a reset status is repeatedly returned, the context may
>>       be in the process of resetting."
>>
>> If the reset takes a long time, it seems that even a well-behaved app
>> could run afoul of the 'banned' logic.
>
> As there reset status is initialized to I915_RESET_UNKNOWN,
> we return it if the reset is in progress or gpu is wedged.

Hmm... so user space will see I915_RESET_UNKNOWN until the reset is 
done, then it will (usually) see either I915_RESET_BATCH_ACTIVE or 
I915_RESET_BATCH_PENDING.  I think that should be okay.

>>> +	    reset_cnt == I915_WEDGED) {
>>> +		goto out;
>>> +	}
>>> +
>>> +	/* Set guilty/innocent status if only one reset was
>>> +	 * observed and if only one guilty was found
>>> +	 */
>>> +	if ((rs->reset_cnt + 2) == reset_cnt &&
>>> +	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {
>>
>> This logic seems... wrong, or at least weird.  "rs->reset_cnt + 2" is
>> confusing next to "if only one reset was observed".
>>
>> dev_priv->gpu_error.reset_counter is the global GPU reset count since
>> start-up, and rs->reset_cnt is the global GPU count since start-up when
>> the context was created.  Right?
>
> Right. The confusing part in here is the
> dev_priv->gpu_error.reset_counter. If it is odd, reset is in progress,
> if it is even, the reset has been handled and all is well. That is why +2

That's a clever hack, I'm assuming, to use atomic operations instead of 
locks.   Dear God that's awful to understand... it's a tiny bit more 
clear looking back at the 'reset_cnt & I915_RESET_IN_PROGRESS_FLAG'. 
Perhaps we could get some wrapper macros RESET_IN_PROGRESS() and 
RESET_ACTUAL_COUNT() or something?

>> If that's the case, this will cause a context that was completely idle
>> (i.e., didn't actually lose anything) to get a reset notification.
>> That's an absolute deal breaker.
>
> This was then misunderstood by me. I will make it so that if you have
> no batches submitted, you wont observe a reset.
>
>> If that's not the case, then this architecture needs a lot more
>> documentation so that people new to it can understand what's happening.
>
> Agreed. If we don't need to care about the contexts where there
> were no batches submitted, the logic will be simpler tho.
>
> -Mika
>
>>> +		reset_status = 0;
>>> +
>>> +		if (rs->guilty)
>>> +			reset_status |= I915_RESET_BATCH_ACTIVE;
>>> +
>>> +		if (rs->innocent)
>>> +			reset_status |= I915_RESET_BATCH_PENDING;
>>> +
>>> +		if (reset_status == 0)
>>> +			reset_status = I915_RESET_UNKNOWN;
>>> +	} else if (rs->reset_cnt == reset_cnt) {
>>> +		reset_status = I915_RESET_NO_ERROR;
>>> +	}
>>> +
>>> +out:
>>> +	if (!ret)
>>> +		args->reset_status = reset_status;
>>> +
>>> +	mutex_unlock(&dev->struct_mutex);
>>> +
>>> +	return ret ? -EINVAL : 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>> index 3e11acf..2e5e8e7 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -1712,6 +1712,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>>>    				  struct drm_file *file);
>>>    int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>>    				   struct drm_file *file);
>>> +int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
>>> +					    void *data, struct drm_file *file);
>>>
>>>    /* i915_gem_gtt.c */
>>>    void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 07d5941..a195e0e 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>>>    #define DRM_I915_GEM_SET_CACHING	0x2f
>>>    #define DRM_I915_GEM_GET_CACHING	0x30
>>>    #define DRM_I915_REG_READ		0x31
>>> +#define DRM_I915_GET_RESET_STATUS	0x32
>>>
>>>    #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>>>    #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>>> @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
>>>    #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>>>    #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>>>    #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>>> +#define DRM_IOCTL_I915_GET_RESET_STATUS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATUS, struct drm_i915_reset_status)
>>>
>>>    /* Allow drivers to submit batchbuffers directly to hardware, relying
>>>     * on the security mechanisms provided by hardware.
>>> @@ -980,4 +982,30 @@ struct drm_i915_reg_read {
>>>    	__u64 offset;
>>>    	__u64 val; /* Return value */
>>>    };
>>> +
>>> +/* No reset observed */
>>> +#define I915_RESET_NO_ERROR      0
>>> +
>>> +/* Context had batch processing active while
>>> +   gpu hung and batch was guilty of gpu hang */
>>> +#define I915_RESET_BATCH_ACTIVE  (1 << 0)
>>> +
>>> +/* Context had batch queued for processing while
>>> +   reset occurred and guilty batch was found:
>>> +   I915_RESET_BATCH_ACTIVE was set for this or
>>> +   some other context */
>>> +#define I915_RESET_BATCH_PENDING (1 << 1)
>>> +
>>> +/* Context observed gpu hung and reset but guilty context
>>> +   was not found: I915_RESET_BATCH_ACTIVE and
>>> +   I915_RESET_BATCH_PENDING were not set for any context */
>>> +#define I915_RESET_UNKNOWN       (1 << 2)
>>> +
>>> +struct drm_i915_reset_status {
>>> +	__u32 ctx_id;
>>> +	__u32 flags;
>>> +	__u32 reset_status;
>>> +	__u32 pad;
>>> +};
>>> +
>>>    #endif /* _UAPI_I915_DRM_H_ */
>>>
Daniel Vetter March 19, 2013, 7:21 p.m. UTC | #5
On Tue, Mar 19, 2013 at 12:02:48PM -0700, Ian Romanick wrote:
> On 03/19/2013 05:58 AM, Mika Kuoppala wrote:
> >Ian Romanick <idr@freedesktop.org> writes:
> >
> >>On 03/14/2013 08:52 AM, Mika Kuoppala wrote:
> >>>This ioctl returns context reset status for specified context.
> >>>
> >>>Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> >>>CC: idr@freedesktop.org
> >>>---
> >>>   drivers/gpu/drm/i915/i915_dma.c |    1 +
> >>>   drivers/gpu/drm/i915/i915_drv.c |   61 +++++++++++++++++++++++++++++++++++++++
> >>>   drivers/gpu/drm/i915/i915_drv.h |    2 ++
> >>>   include/uapi/drm/i915_drm.h     |   28 ++++++++++++++++++
> >>>   4 files changed, 92 insertions(+)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> >>>index 7902d97..c919832 100644
> >>>--- a/drivers/gpu/drm/i915/i915_dma.c
> >>>+++ b/drivers/gpu/drm/i915/i915_dma.c
> >>>@@ -1903,6 +1903,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
> >>>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
> >>>   	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
> >>>   	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> >>>+	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
> >>>   };
> >>>
> >>>   int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> >>>diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> >>>index 69c9856..a4d06f2 100644
> >>>--- a/drivers/gpu/drm/i915/i915_drv.c
> >>>+++ b/drivers/gpu/drm/i915/i915_drv.c
> >>>@@ -1267,3 +1267,64 @@ int i915_reg_read_ioctl(struct drm_device *dev,
> >>>
> >>>   	return 0;
> >>>   }
> >>>+
> >>>+int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
> >>>+					    void *data, struct drm_file *file)
> >>>+{
> >>>+	struct drm_i915_private *dev_priv = dev->dev_private;
> >>>+	struct intel_ring_buffer *ring;
> >>>+	struct drm_i915_reset_status *args = data;
> >>>+	struct ctx_reset_state *rs = NULL;
> >>>+	unsigned long reset_cnt;
> >>>+	u32 reset_status = I915_RESET_UNKNOWN;
> >>>+	int ret;
> >>>+
> >>>+	ret = mutex_lock_interruptible(&dev->struct_mutex);
> >>>+	if (ret)
> >>>+		return ret;
> >>>+
> >>>+	ring = &dev_priv->ring[RCS];
> >>>+
> >>>+	ret = i915_gem_context_get_reset_state(ring,
> >>>+					       file,
> >>>+					       args->ctx_id,
> >>>+					       &rs);
> >>>+	if (ret)
> >>>+		goto out;
> >>>+
> >>>+	BUG_ON(!rs);
> >>>+
> >>>+	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
> >>>+
> >>>+	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||
> >>
> >>In this case, I believe we're supposed to return the reset state to the
> >>application.  The ARB_robustness spec says:
> >>
> >>      "If a reset status other than NO_ERROR is returned and subsequent
> >>      calls return NO_ERROR, the context reset was encountered and
> >>      completed. If a reset status is repeatedly returned, the context may
> >>      be in the process of resetting."
> >>
> >>If the reset takes a long time, it seems that even a well-behaved app
> >>could run afoul of the 'banned' logic.
> >
> >As there reset status is initialized to I915_RESET_UNKNOWN,
> >we return it if the reset is in progress or gpu is wedged.
> 
> Hmm... so user space will see I915_RESET_UNKNOWN until the reset is
> done, then it will (usually) see either I915_RESET_BATCH_ACTIVE or
> I915_RESET_BATCH_PENDING.  I think that should be okay.
> 
> >>>+	    reset_cnt == I915_WEDGED) {
> >>>+		goto out;
> >>>+	}
> >>>+
> >>>+	/* Set guilty/innocent status if only one reset was
> >>>+	 * observed and if only one guilty was found
> >>>+	 */
> >>>+	if ((rs->reset_cnt + 2) == reset_cnt &&
> >>>+	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {
> >>
> >>This logic seems... wrong, or at least weird.  "rs->reset_cnt + 2" is
> >>confusing next to "if only one reset was observed".
> >>
> >>dev_priv->gpu_error.reset_counter is the global GPU reset count since
> >>start-up, and rs->reset_cnt is the global GPU count since start-up when
> >>the context was created.  Right?
> >
> >Right. The confusing part in here is the
> >dev_priv->gpu_error.reset_counter. If it is odd, reset is in progress,
> >if it is even, the reset has been handled and all is well. That is why +2
> 
> That's a clever hack, I'm assuming, to use atomic operations instead
> of locks.   Dear God that's awful to understand... it's a tiny bit
> more clear looking back at the 'reset_cnt &
> I915_RESET_IN_PROGRESS_FLAG'. Perhaps we could get some wrapper
> macros RESET_IN_PROGRESS() and RESET_ACTUAL_COUNT() or something?

Those exist and are called i915_reset_in_progress and
i915_terminally_wedged (the later for the case where the gpu reset failed
and things are terminally hosed). Since the kernel thus far only cared
whether the reset state changed (either from good -> reset_in_progress,
back or to the terminal state) without ever missing a state transition, it
only compares the counter and doesn't care about the actual reset count
one bit. Hence why I didn't add another helper.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 7902d97..c919832 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1903,6 +1903,7 @@  struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATUS, i915_gem_context_get_reset_status_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 69c9856..a4d06f2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1267,3 +1267,64 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 
 	return 0;
 }
+
+int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
+					    void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	struct drm_i915_reset_status *args = data;
+	struct ctx_reset_state *rs = NULL;
+	unsigned long reset_cnt;
+	u32 reset_status = I915_RESET_UNKNOWN;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ring = &dev_priv->ring[RCS];
+
+	ret = i915_gem_context_get_reset_state(ring,
+					       file,
+					       args->ctx_id,
+					       &rs);
+	if (ret)
+		goto out;
+
+	BUG_ON(!rs);
+
+	reset_cnt = atomic_read(&dev_priv->gpu_error.reset_counter);
+
+	if (reset_cnt & I915_RESET_IN_PROGRESS_FLAG ||
+	    reset_cnt == I915_WEDGED) {
+		goto out;
+	}
+
+	/* Set guilty/innocent status if only one reset was
+	 * observed and if only one guilty was found
+	 */
+	if ((rs->reset_cnt + 2) == reset_cnt &&
+	    (rs->guilty_cnt + 1) == dev_priv->gpu_error.guilty_cnt) {
+		reset_status = 0;
+
+		if (rs->guilty)
+			reset_status |= I915_RESET_BATCH_ACTIVE;
+
+		if (rs->innocent)
+			reset_status |= I915_RESET_BATCH_PENDING;
+
+		if (reset_status == 0)
+			reset_status = I915_RESET_UNKNOWN;
+	} else if (rs->reset_cnt == reset_cnt) {
+		reset_status = I915_RESET_NO_ERROR;
+	}
+
+out:
+	if (!ret)
+		args->reset_status = reset_status;
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret ? -EINVAL : 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3e11acf..2e5e8e7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1712,6 +1712,8 @@  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file);
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
+int i915_gem_context_get_reset_status_ioctl(struct drm_device *dev,
+					    void *data, struct drm_file *file);
 
 /* i915_gem_gtt.c */
 void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 07d5941..a195e0e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING	0x2f
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GET_RESET_STATUS	0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -247,6 +248,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GET_RESET_STATUS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATUS, struct drm_i915_reset_status)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -980,4 +982,30 @@  struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+
+/* No reset observed */
+#define I915_RESET_NO_ERROR      0
+
+/* Context had batch processing active while
+   gpu hung and batch was guilty of gpu hang */
+#define I915_RESET_BATCH_ACTIVE  (1 << 0)
+
+/* Context had batch queued for processing while
+   reset occurred and guilty batch was found:
+   I915_RESET_BATCH_ACTIVE was set for this or
+   some other context */
+#define I915_RESET_BATCH_PENDING (1 << 1)
+
+/* Context observed gpu hung and reset but guilty context
+   was not found: I915_RESET_BATCH_ACTIVE and
+   I915_RESET_BATCH_PENDING were not set for any context */
+#define I915_RESET_UNKNOWN       (1 << 2)
+
+struct drm_i915_reset_status {
+	__u32 ctx_id;
+	__u32 flags;
+	__u32 reset_status;
+	__u32 pad;
+};
+
 #endif /* _UAPI_I915_DRM_H_ */