diff mbox

[3/4] drm/vc4: Export fence through syncobj

Message ID 20180421225022.7592-4-stschake@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Stefan Schake April 21, 2018, 10:50 p.m. UTC
Allow specifying a syncobj on render job submission where we store the
fence for the job. This gives userland flexible access to the fence.

Signed-off-by: Stefan Schake <stschake@gmail.com>
---
 drivers/gpu/drm/vc4/vc4_gem.c | 38 +++++++++++++++++++++++++++++++++++---
 include/uapi/drm/vc4_drm.h    | 13 +++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

Comments

Eric Anholt April 23, 2018, 7:01 p.m. UTC | #1
Stefan Schake <stschake@gmail.com> writes:

> Allow specifying a syncobj on render job submission where we store the
> fence for the job. This gives userland flexible access to the fence.
>
> Signed-off-by: Stefan Schake <stschake@gmail.com>
> ---
>  drivers/gpu/drm/vc4/vc4_gem.c | 38 +++++++++++++++++++++++++++++++++++---
>  include/uapi/drm/vc4_drm.h    | 13 +++++++++++++
>  2 files changed, 48 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index 232363488125..b39515a4ddcb 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -656,7 +656,8 @@ vc4_lock_bo_reservations(struct drm_device *dev,
>   */
>  static int
>  vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
> -		 struct ww_acquire_ctx *acquire_ctx)
> +		 struct ww_acquire_ctx *acquire_ctx,
> +		 struct drm_syncobj *out_sync)
>  {
>  	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  	struct vc4_exec_info *renderjob;
> @@ -679,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
>  	fence->seqno = exec->seqno;
>  	exec->fence = &fence->base;
>  
> +	if (out_sync)
> +		drm_syncobj_replace_fence(out_sync, exec->fence);
> +
>  	vc4_update_bo_seqnos(exec, seqno);
>  
>  	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
> @@ -1114,6 +1118,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	struct vc4_dev *vc4 = to_vc4_dev(dev);
>  	struct vc4_file *vc4file = file_priv->driver_priv;
>  	struct drm_vc4_submit_cl *args = data;
> +	struct drm_syncobj *out_sync = NULL;
>  	struct vc4_exec_info *exec;
>  	struct ww_acquire_ctx acquire_ctx;
>  	struct dma_fence *in_fence;
> @@ -1123,11 +1128,17 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
>  			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
>  			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y |
> -			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) {
> +			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ |
> +			     VC4_SUBMIT_CL_EXPORT_SYNCOBJ)) != 0) {
>  		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
>  		return -EINVAL;
>  	}
>  
> +	if (args->pad2 != 0) {
> +		DRM_DEBUG("->pad2 must be set to zero\n");
> +		return -EINVAL;
> +	}
> +
>  	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
>  	if (!exec) {
>  		DRM_ERROR("malloc failure on exec struct\n");
> @@ -1202,12 +1213,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto fail;
>  
> +	if (args->flags & VC4_SUBMIT_CL_EXPORT_SYNCOBJ) {
> +		out_sync = drm_syncobj_find(file_priv, args->out_sync);
> +		if (!out_sync) {
> +			ret = -EINVAL;
> +			goto fail;
> +		}
> +
> +		/* We replace the fence in out_sync in vc4_queue_submit since
> +		 * the render job could execute immediately after that call.
> +		 * If it finishes before our ioctl processing resumes the
> +		 * render job fence could already have been freed.
> +		 */
> +	}

Same comment about not needing the exec flag.

> +
>  	/* Clear this out of the struct we'll be putting in the queue,
>  	 * since it's part of our stack.
>  	 */
>  	exec->args = NULL;
>  
> -	ret = vc4_queue_submit(dev, exec, &acquire_ctx);
> +	ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
> +
> +	/* The syncobj isn't part of the exec data and we need to free our
> +	 * reference even if job submission failed.
> +	 */
> +	if (out_sync)
> +		drm_syncobj_put(out_sync);
> +
>  	if (ret)
>  		goto fail;
>  
> diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> index 389f21931c25..3a2ef9b5b60b 100644
> --- a/include/uapi/drm/vc4_drm.h
> +++ b/include/uapi/drm/vc4_drm.h
> @@ -174,6 +174,7 @@ struct drm_vc4_submit_cl {
>  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
>  #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
>  #define VC4_SUBMIT_CL_IMPORT_SYNCOBJ			(1 << 4)
> +#define VC4_SUBMIT_CL_EXPORT_SYNCOBJ			(1 << 5)
>  	__u32 flags;
>  
>  	/* Returned value of the seqno of this render job (for the
> @@ -189,6 +190,18 @@ struct drm_vc4_submit_cl {
>  	 * syncobj is signalled.
>  	 */
>  	__u32 in_sync;
> +
> +	/* Syncobj handle to export fence to. Set together with EXPORT_SYNCOBJ
> +	 * flag. If set, the fence in the syncobj will be replaced with a fence
> +	 * that signals upon completion of this render job.
> +	 */
> +	__u32 out_sync;
> +
> +	/* Unused field to align this struct on 64 bits. Must be set to 0.
> +	 * If one ever needs to add an u32 field to this struct, this field
> +	 * can be used.
> +	 */
> +	__u32 pad2;

As far as I know, there's no need to align the struct to 64 bits.
diff mbox

Patch

diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 232363488125..b39515a4ddcb 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -656,7 +656,8 @@  vc4_lock_bo_reservations(struct drm_device *dev,
  */
 static int
 vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
-		 struct ww_acquire_ctx *acquire_ctx)
+		 struct ww_acquire_ctx *acquire_ctx,
+		 struct drm_syncobj *out_sync)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_exec_info *renderjob;
@@ -679,6 +680,9 @@  vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
 	fence->seqno = exec->seqno;
 	exec->fence = &fence->base;
 
+	if (out_sync)
+		drm_syncobj_replace_fence(out_sync, exec->fence);
+
 	vc4_update_bo_seqnos(exec, seqno);
 
 	vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
@@ -1114,6 +1118,7 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_file *vc4file = file_priv->driver_priv;
 	struct drm_vc4_submit_cl *args = data;
+	struct drm_syncobj *out_sync = NULL;
 	struct vc4_exec_info *exec;
 	struct ww_acquire_ctx acquire_ctx;
 	struct dma_fence *in_fence;
@@ -1123,11 +1128,17 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
 			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
 			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y |
-			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) {
+			     VC4_SUBMIT_CL_IMPORT_SYNCOBJ |
+			     VC4_SUBMIT_CL_EXPORT_SYNCOBJ)) != 0) {
 		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
 
+	if (args->pad2 != 0) {
+		DRM_DEBUG("->pad2 must be set to zero\n");
+		return -EINVAL;
+	}
+
 	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
 	if (!exec) {
 		DRM_ERROR("malloc failure on exec struct\n");
@@ -1202,12 +1213,33 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		goto fail;
 
+	if (args->flags & VC4_SUBMIT_CL_EXPORT_SYNCOBJ) {
+		out_sync = drm_syncobj_find(file_priv, args->out_sync);
+		if (!out_sync) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		/* We replace the fence in out_sync in vc4_queue_submit since
+		 * the render job could execute immediately after that call.
+		 * If it finishes before our ioctl processing resumes the
+		 * render job fence could already have been freed.
+		 */
+	}
+
 	/* Clear this out of the struct we'll be putting in the queue,
 	 * since it's part of our stack.
 	 */
 	exec->args = NULL;
 
-	ret = vc4_queue_submit(dev, exec, &acquire_ctx);
+	ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
+
+	/* The syncobj isn't part of the exec data and we need to free our
+	 * reference even if job submission failed.
+	 */
+	if (out_sync)
+		drm_syncobj_put(out_sync);
+
 	if (ret)
 		goto fail;
 
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 389f21931c25..3a2ef9b5b60b 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -174,6 +174,7 @@  struct drm_vc4_submit_cl {
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
 #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
 #define VC4_SUBMIT_CL_IMPORT_SYNCOBJ			(1 << 4)
+#define VC4_SUBMIT_CL_EXPORT_SYNCOBJ			(1 << 5)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -189,6 +190,18 @@  struct drm_vc4_submit_cl {
 	 * syncobj is signalled.
 	 */
 	__u32 in_sync;
+
+	/* Syncobj handle to export fence to. Set together with EXPORT_SYNCOBJ
+	 * flag. If set, the fence in the syncobj will be replaced with a fence
+	 * that signals upon completion of this render job.
+	 */
+	__u32 out_sync;
+
+	/* Unused field to align this struct on 64 bits. Must be set to 0.
+	 * If one ever needs to add an u32 field to this struct, this field
+	 * can be used.
+	 */
+	__u32 pad2;
 };
 
 /**