diff mbox series

[v2] drm/v3d: clean caches at the end of render jobs on request from user space

Message ID 20190918091521.9765-1-itoral@igalia.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/v3d: clean caches at the end of render jobs on request from user space | expand

Commit Message

Iago Toral Sept. 18, 2019, 9:15 a.m. UTC
Extends the user space ioctl for CL submissions so it can include a request
to flush the cache once the CL execution has completed. Fixes memory
write violation messages reported by the kernel in workloads involving
shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
also lead to GPU resets during Piglit and CTS workloads.

v2: if v3d_job_init() fails we need to kfree() the job instead of
    v3d_job_put() it (Eric Anholt).

Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-itoral@igalia.com
---
 drivers/gpu/drm/v3d/v3d_gem.c | 54 +++++++++++++++++++++++++++++------
 include/uapi/drm/v3d_drm.h    |  7 +++--
 2 files changed, 50 insertions(+), 11 deletions(-)

Comments

Iago Toral Sept. 18, 2019, 9:30 a.m. UTC | #1
Eric, could you push this for me? I don't have push rights to DRM yet.

Iago

On Wed, 2019-09-18 at 11:15 +0200, Iago Toral Quiroga wrote:
> Extends the user space ioctl for CL submissions so it can include a
> request
> to flush the cache once the CL execution has completed. Fixes memory
> write violation messages reported by the kernel in workloads
> involving
> shader memory writes (SSBOs, shader images, scratch, etc) which
> sometimes
> also lead to GPU resets during Piglit and CTS workloads.
> 
> v2: if v3d_job_init() fails we need to kfree() the job instead of
>     v3d_job_put() it (Eric Anholt).
> 
> Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
> Reviewed-by: Eric Anholt <eric@anholt.net>
> Link: 
> https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-itoral@igalia.com
> ---
>  drivers/gpu/drm/v3d/v3d_gem.c | 54 +++++++++++++++++++++++++++++--
> ----
>  include/uapi/drm/v3d_drm.h    |  7 +++--
>  2 files changed, 50 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_gem.c
> b/drivers/gpu/drm/v3d/v3d_gem.c
> index 5d80507b539b..d46d91346d09 100644
> --- a/drivers/gpu/drm/v3d/v3d_gem.c
> +++ b/drivers/gpu/drm/v3d/v3d_gem.c
> @@ -530,13 +530,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
>  	struct drm_v3d_submit_cl *args = data;
>  	struct v3d_bin_job *bin = NULL;
>  	struct v3d_render_job *render;
> +	struct v3d_job *clean_job = NULL;
> +	struct v3d_job *last_job;
>  	struct ww_acquire_ctx acquire_ctx;
>  	int ret = 0;
>  
>  	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args-
> >rcl_end);
>  
> -	if (args->pad != 0) {
> -		DRM_INFO("pad must be zero: %d\n", args->pad);
> +	if (args->flags != 0 &&
> +	    args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> +		DRM_INFO("invalid flags: %d\n", args->flags);
>  		return -EINVAL;
>  	}
>  
> @@ -575,12 +578,31 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
>  		bin->render = render;
>  	}
>  
> -	ret = v3d_lookup_bos(dev, file_priv, &render->base,
> +	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
> +		clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
> +		if (!clean_job) {
> +			ret = -ENOMEM;
> +			goto fail;
> +		}
> +
> +		ret = v3d_job_init(v3d, file_priv, clean_job,
> v3d_job_free, 0);
> +		if (ret) {
> +			kfree(clean_job);
> +			clean_job = NULL;
> +			goto fail;
> +		}
> +
> +		last_job = clean_job;
> +	} else {
> +		last_job = &render->base;
> +	}
> +
> +	ret = v3d_lookup_bos(dev, file_priv, last_job,
>  			     args->bo_handles, args->bo_handle_count);
>  	if (ret)
>  		goto fail;
>  
> -	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
> +	ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
>  	if (ret)
>  		goto fail;
>  
> @@ -599,28 +621,44 @@ v3d_submit_cl_ioctl(struct drm_device *dev,
> void *data,
>  	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
>  	if (ret)
>  		goto fail_unreserve;
> +
> +	if (clean_job) {
> +		struct dma_fence *render_fence =
> +			dma_fence_get(render->base.done_fence);
> +		ret = drm_gem_fence_array_add(&clean_job->deps,
> render_fence);
> +		if (ret)
> +			goto fail_unreserve;
> +		ret = v3d_push_job(v3d_priv, clean_job,
> V3D_CACHE_CLEAN);
> +		if (ret)
> +			goto fail_unreserve;
> +	}
> +
>  	mutex_unlock(&v3d->sched_lock);
>  
>  	v3d_attach_fences_and_unlock_reservation(file_priv,
> -						 &render->base,
> +						 last_job,
>  						 &acquire_ctx,
>  						 args->out_sync,
> -						 render-
> >base.done_fence);
> +						 last_job->done_fence);
>  
>  	if (bin)
>  		v3d_job_put(&bin->base);
>  	v3d_job_put(&render->base);
> +	if (clean_job)
> +		v3d_job_put(clean_job);
>  
>  	return 0;
>  
>  fail_unreserve:
>  	mutex_unlock(&v3d->sched_lock);
> -	drm_gem_unlock_reservations(render->base.bo,
> -				    render->base.bo_count,
> &acquire_ctx);
> +	drm_gem_unlock_reservations(last_job->bo,
> +				    last_job->bo_count, &acquire_ctx);
>  fail:
>  	if (bin)
>  		v3d_job_put(&bin->base);
>  	v3d_job_put(&render->base);
> +	if (clean_job)
> +		v3d_job_put(clean_job);
>  
>  	return ret;
>  }
> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 58fbe48c91e9..58d2040ea48c 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -48,6 +48,8 @@ extern "C" {
>  #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE +
> DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
>  #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE +
> DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
>  
> +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG        0x01
> +
>  /**
>   * struct drm_v3d_submit_cl - ioctl argument for submitting commands
> to the 3D
>   * engine.
> @@ -61,7 +63,7 @@ extern "C" {
>   * flushed by the time the render done IRQ happens, which is the
>   * trigger for out_sync.  Any dirtying of cachelines by the job
> (only
>   * possible using TMU writes) must be flushed by the caller using
> the
> - * CL's cache flush commands.
> + * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag.
>   */
>  struct drm_v3d_submit_cl {
>  	/* Pointer to the binner command list.
> @@ -124,8 +126,7 @@ struct drm_v3d_submit_cl {
>  	/* Number of BO handles passed in (size is that times 4). */
>  	__u32 bo_handle_count;
>  
> -	/* Pad, must be zero-filled. */
> -	__u32 pad;
> +	__u32 flags;
>  };
>  
>  /**
Eric Anholt Sept. 18, 2019, 5:28 p.m. UTC | #2
Iago Toral Quiroga <itoral@igalia.com> writes:

> Extends the user space ioctl for CL submissions so it can include a request
> to flush the cache once the CL execution has completed. Fixes memory
> write violation messages reported by the kernel in workloads involving
> shader memory writes (SSBOs, shader images, scratch, etc) which sometimes
> also lead to GPU resets during Piglit and CTS workloads.
>
> v2: if v3d_job_init() fails we need to kfree() the job instead of
>     v3d_job_put() it (Eric Anholt).
>
> Signed-off-by: Iago Toral Quiroga <itoral@igalia.com>
> Reviewed-by: Eric Anholt <eric@anholt.net>
> Link: https://patchwork.freedesktop.org/patch/msgid/20190912083516.13797-1-itoral@igalia.com

> diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
> index 58fbe48c91e9..58d2040ea48c 100644
> --- a/include/uapi/drm/v3d_drm.h
> +++ b/include/uapi/drm/v3d_drm.h
> @@ -48,6 +48,8 @@ extern "C" {
>  #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
>  #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
>  
> +#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG        0x01
> +

Hmm.  vc4, msm, panfrost, etnaviv, i915 all name their submit flags
without "_FLAG" in the name, can we drop that?

Also, I just noticed: You don't have a new param to indicate the
availability of the new flag.  You're going to need that (unless you've
got some other clever plan?) so that new Mesa can detect an old kernel
and not expose the GLES 3.1 features that require it.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5d80507b539b..d46d91346d09 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -530,13 +530,16 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct drm_v3d_submit_cl *args = data;
 	struct v3d_bin_job *bin = NULL;
 	struct v3d_render_job *render;
+	struct v3d_job *clean_job = NULL;
+	struct v3d_job *last_job;
 	struct ww_acquire_ctx acquire_ctx;
 	int ret = 0;
 
 	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
 
-	if (args->pad != 0) {
-		DRM_INFO("pad must be zero: %d\n", args->pad);
+	if (args->flags != 0 &&
+	    args->flags != DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+		DRM_INFO("invalid flags: %d\n", args->flags);
 		return -EINVAL;
 	}
 
@@ -575,12 +578,31 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 		bin->render = render;
 	}
 
-	ret = v3d_lookup_bos(dev, file_priv, &render->base,
+	if (args->flags & DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG) {
+		clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+		if (!clean_job) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+		if (ret) {
+			kfree(clean_job);
+			clean_job = NULL;
+			goto fail;
+		}
+
+		last_job = clean_job;
+	} else {
+		last_job = &render->base;
+	}
+
+	ret = v3d_lookup_bos(dev, file_priv, last_job,
 			     args->bo_handles, args->bo_handle_count);
 	if (ret)
 		goto fail;
 
-	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
+	ret = v3d_lock_bo_reservations(last_job, &acquire_ctx);
 	if (ret)
 		goto fail;
 
@@ -599,28 +621,44 @@  v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
 	if (ret)
 		goto fail_unreserve;
+
+	if (clean_job) {
+		struct dma_fence *render_fence =
+			dma_fence_get(render->base.done_fence);
+		ret = drm_gem_fence_array_add(&clean_job->deps, render_fence);
+		if (ret)
+			goto fail_unreserve;
+		ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+		if (ret)
+			goto fail_unreserve;
+	}
+
 	mutex_unlock(&v3d->sched_lock);
 
 	v3d_attach_fences_and_unlock_reservation(file_priv,
-						 &render->base,
+						 last_job,
 						 &acquire_ctx,
 						 args->out_sync,
-						 render->base.done_fence);
+						 last_job->done_fence);
 
 	if (bin)
 		v3d_job_put(&bin->base);
 	v3d_job_put(&render->base);
+	if (clean_job)
+		v3d_job_put(clean_job);
 
 	return 0;
 
 fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
-	drm_gem_unlock_reservations(render->base.bo,
-				    render->base.bo_count, &acquire_ctx);
+	drm_gem_unlock_reservations(last_job->bo,
+				    last_job->bo_count, &acquire_ctx);
 fail:
 	if (bin)
 		v3d_job_put(&bin->base);
 	v3d_job_put(&render->base);
+	if (clean_job)
+		v3d_job_put(clean_job);
 
 	return ret;
 }
diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h
index 58fbe48c91e9..58d2040ea48c 100644
--- a/include/uapi/drm/v3d_drm.h
+++ b/include/uapi/drm/v3d_drm.h
@@ -48,6 +48,8 @@  extern "C" {
 #define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 #define DRM_IOCTL_V3D_SUBMIT_CSD          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, struct drm_v3d_submit_csd)
 
+#define DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG        0x01
+
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
  * engine.
@@ -61,7 +63,7 @@  extern "C" {
  * flushed by the time the render done IRQ happens, which is the
  * trigger for out_sync.  Any dirtying of cachelines by the job (only
  * possible using TMU writes) must be flushed by the caller using the
- * CL's cache flush commands.
+ * DRM_V3D_SUBMIT_CL_FLUSH_CACHE_FLAG flag.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -124,8 +126,7 @@  struct drm_v3d_submit_cl {
 	/* Number of BO handles passed in (size is that times 4). */
 	__u32 bo_handle_count;
 
-	/* Pad, must be zero-filled. */
-	__u32 pad;
+	__u32 flags;
 };
 
 /**