diff mbox

drm/msm/gpu: Add submit queue queries

Message ID 20180430212053.7438-1-jcrouse@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Jordan Crouse April 30, 2018, 9:20 p.m. UTC
Add the capability to query information from a submit queue.
The first available parameter is for querying the number of
GPU faults (hangs) that can be attributed to the queue.

This is useful for implementing context robustness. A UMD
context can regularly query the number of faults to see
if it is responsible for any. If so it can invalidate itself.

This is also helpful for testing by confirming to the user mode
driver if a particular command stream caused a fault (or not as
the case may be).

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
---
 drivers/gpu/drm/msm/msm_drv.c         | 12 +++++++++++-
 drivers/gpu/drm/msm/msm_drv.h         |  2 ++
 drivers/gpu/drm/msm/msm_gpu.c         |  3 +++
 drivers/gpu/drm/msm/msm_submitqueue.c | 21 +++++++++++++++++++++
 include/uapi/drm/msm_drm.h            | 12 ++++++++++++
 5 files changed, 49 insertions(+), 1 deletion(-)

Comments

Jordan Crouse May 7, 2018, 4:06 p.m. UTC | #1
On Mon, Apr 30, 2018 at 03:20:53PM -0600, Jordan Crouse wrote:
> Add the capability to query information from a submit queue.
> The first available parameter is for querying the number of
> GPU faults (hangs) that can be attributed to the queue.
> 
> This is useful for implementing context robustness. A UMD
> context can regularly query the number of faults to see
> if it is responsible for any. If so it can invalidate itself.
> 
> This is also helpful for testing by confirming to the user mode
> driver if a particular command stream caused a fault (or not as
> the case may be).
> 
> Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
> ---
>  drivers/gpu/drm/msm/msm_drv.c         | 12 +++++++++++-
>  drivers/gpu/drm/msm/msm_drv.h         |  2 ++
>  drivers/gpu/drm/msm/msm_gpu.c         |  3 +++
>  drivers/gpu/drm/msm/msm_submitqueue.c | 21 +++++++++++++++++++++
>  include/uapi/drm/msm_drm.h            | 12 ++++++++++++
>  5 files changed, 49 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 30cd514d8f7c..d01fb101d5ff 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -32,9 +32,10 @@
>   * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW +
>   *           SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for
>   *           MSM_GEM_INFO ioctl.
> + * - 1.4.0 - Add SUBMITQUERY_QUERY ioctl.
>   */
>  #define MSM_VERSION_MAJOR	1
> -#define MSM_VERSION_MINOR	3
> +#define MSM_VERSION_MINOR	4
>  #define MSM_VERSION_PATCHLEVEL	0
>  
>  static const struct drm_mode_config_funcs mode_config_funcs = {
> @@ -803,6 +804,14 @@ static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
>  		args->flags, &args->id);
>  }
>  
> +static int msm_ioctl_submitqueue_query(struct drm_device *dev, void *data,
> +		struct drm_file *file)
> +{
> +	struct drm_msm_submitqueue_query *args = data;
> +
> +	return msm_submitqueue_query(dev, file->driver_priv, args->id,
> +		args->param, args->data, args->len);
> +}
>  
>  static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
>  		struct drm_file *file)
> @@ -823,6 +832,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  msm_ioctl_gem_madvise,  DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,   msm_ioctl_submitqueue_new,   DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_AUTH|DRM_RENDER_ALLOW),
>  };
>  
>  static const struct vm_operations_struct vm_ops = {
> diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
> index 48ed5b9a8580..56c666f25aa1 100644
> --- a/drivers/gpu/drm/msm/msm_drv.h
> +++ b/drivers/gpu/drm/msm/msm_drv.h
> @@ -320,6 +320,8 @@ struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
>  		u32 id);
>  int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
>  		u32 prio, u32 flags, u32 *id);
> +int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
> +		u32 id, u32 param, u64 data, u32 len);
>  int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
>  void msm_submitqueue_close(struct msm_file_private *ctx);
>  
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index 1c09acfb4028..925532197584 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -324,6 +324,9 @@ static void recover_worker(struct work_struct *work)
>  	if (submit) {
>  		struct task_struct *task;
>  
> +		/* Increment the submitqueue fault count */
> +		submit->queue->faults++;
> +
>  		rcu_read_lock();
>  		task = pid_task(submit->pid, PIDTYPE_PID);
>  		if (task) {
> diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
> index 5115f75b5b7f..e12f2bd0347d 100644
> --- a/drivers/gpu/drm/msm/msm_submitqueue.c
> +++ b/drivers/gpu/drm/msm/msm_submitqueue.c
> @@ -120,6 +120,27 @@ int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
>  	return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL);
>  }
>  
> +int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
> +		u32 id,	u32 param, u64 data, u32 len)
> +{
> +	struct msm_gpu_submitqueue *queue = msm_submitqueue_get(ctx, id);
> +	int ret = -EINVAL;
> +
> +	if (!queue)
> +		return -ENOENT;
> +
> +	if (param == MSM_SUBMITQUEUE_PARAM_FAULTS) {
> +		size_t size = min_t(size_t, len, sizeof(queue->faults));
> +
> +		if (copy_to_user(u64_to_user_ptr(data), &queue->faults, size))
> +			ret = -EFAULT;
> +	}

I'll push a new patch to fix it, but pointing out for posterity that we get here
with ret == -EINVAL on success.  There should be ret = 0 before the bracket.

Jordan
diff mbox

Patch

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 30cd514d8f7c..d01fb101d5ff 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -32,9 +32,10 @@ 
  * - 1.3.0 - adds GMEM_BASE + NR_RINGS params, SUBMITQUEUE_NEW +
  *           SUBMITQUEUE_CLOSE ioctls, and MSM_INFO_IOVA flag for
  *           MSM_GEM_INFO ioctl.
+ * - 1.4.0 - Add SUBMITQUERY_QUERY ioctl.
  */
 #define MSM_VERSION_MAJOR	1
-#define MSM_VERSION_MINOR	3
+#define MSM_VERSION_MINOR	4
 #define MSM_VERSION_PATCHLEVEL	0
 
 static const struct drm_mode_config_funcs mode_config_funcs = {
@@ -803,6 +804,14 @@  static int msm_ioctl_submitqueue_new(struct drm_device *dev, void *data,
 		args->flags, &args->id);
 }
 
+static int msm_ioctl_submitqueue_query(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_submitqueue_query *args = data;
+
+	return msm_submitqueue_query(dev, file->driver_priv, args->id,
+		args->param, args->data, args->len);
+}
 
 static int msm_ioctl_submitqueue_close(struct drm_device *dev, void *data,
 		struct drm_file *file)
@@ -823,6 +832,7 @@  static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  msm_ioctl_gem_madvise,  DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_NEW,   msm_ioctl_submitqueue_new,   DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_CLOSE, msm_ioctl_submitqueue_close, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(MSM_SUBMITQUEUE_QUERY, msm_ioctl_submitqueue_query, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 static const struct vm_operations_struct vm_ops = {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 48ed5b9a8580..56c666f25aa1 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -320,6 +320,8 @@  struct msm_gpu_submitqueue *msm_submitqueue_get(struct msm_file_private *ctx,
 		u32 id);
 int msm_submitqueue_create(struct drm_device *drm, struct msm_file_private *ctx,
 		u32 prio, u32 flags, u32 *id);
+int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
+		u32 id, u32 param, u64 data, u32 len);
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id);
 void msm_submitqueue_close(struct msm_file_private *ctx);
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 1c09acfb4028..925532197584 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -324,6 +324,9 @@  static void recover_worker(struct work_struct *work)
 	if (submit) {
 		struct task_struct *task;
 
+		/* Increment the submitqueue fault count */
+		submit->queue->faults++;
+
 		rcu_read_lock();
 		task = pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
diff --git a/drivers/gpu/drm/msm/msm_submitqueue.c b/drivers/gpu/drm/msm/msm_submitqueue.c
index 5115f75b5b7f..e12f2bd0347d 100644
--- a/drivers/gpu/drm/msm/msm_submitqueue.c
+++ b/drivers/gpu/drm/msm/msm_submitqueue.c
@@ -120,6 +120,27 @@  int msm_submitqueue_init(struct drm_device *drm, struct msm_file_private *ctx)
 	return msm_submitqueue_create(drm, ctx, default_prio, 0, NULL);
 }
 
+int msm_submitqueue_query(struct drm_device *drm, struct msm_file_private *ctx,
+		u32 id,	u32 param, u64 data, u32 len)
+{
+	struct msm_gpu_submitqueue *queue = msm_submitqueue_get(ctx, id);
+	int ret = -EINVAL;
+
+	if (!queue)
+		return -ENOENT;
+
+	if (param == MSM_SUBMITQUEUE_PARAM_FAULTS) {
+		size_t size = min_t(size_t, len, sizeof(queue->faults));
+
+		if (copy_to_user(u64_to_user_ptr(data), &queue->faults, size))
+			ret = -EFAULT;
+	}
+
+	msm_submitqueue_put(queue);
+
+	return ret;
+}
+
 int msm_submitqueue_remove(struct msm_file_private *ctx, u32 id)
 {
 	struct msm_gpu_submitqueue *entry;
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index c06d0a5bdd80..0f01ba60fcf7 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -273,6 +273,16 @@  struct drm_msm_submitqueue {
 	__u32 id;      /* out, identifier */
 };
 
+#define MSM_SUBMITQUEUE_PARAM_FAULTS   0
+
+struct drm_msm_submitqueue_query {
+	__u64 data;
+	__u32 id;
+	__u32 param;
+	__u32 len;
+	__u32 pad;
+};
+
 #define DRM_MSM_GET_PARAM              0x00
 /* placeholder:
 #define DRM_MSM_SET_PARAM              0x01
@@ -289,6 +299,7 @@  struct drm_msm_submitqueue {
  */
 #define DRM_MSM_SUBMITQUEUE_NEW        0x0A
 #define DRM_MSM_SUBMITQUEUE_CLOSE      0x0B
+#define DRM_MSM_SUBMITQUEUE_QUERY      0x0C
 
 #define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
 #define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
@@ -300,6 +311,7 @@  struct drm_msm_submitqueue {
 #define DRM_IOCTL_MSM_GEM_MADVISE      DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_MADVISE, struct drm_msm_gem_madvise)
 #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW    DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
 #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
+#define DRM_IOCTL_MSM_SUBMITQUEUE_QUERY  DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_QUERY, struct drm_msm_submitqueue_query)
 
 #if defined(__cplusplus)
 }