Message ID | 20180421225022.7592-4-stschake@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Stefan Schake <stschake@gmail.com> writes: > Allow specifying a syncobj on render job submission where we store the > fence for the job. This gives userland flexible access to the fence. > > Signed-off-by: Stefan Schake <stschake@gmail.com> > --- > drivers/gpu/drm/vc4/vc4_gem.c | 38 +++++++++++++++++++++++++++++++++++--- > include/uapi/drm/vc4_drm.h | 13 +++++++++++++ > 2 files changed, 48 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 232363488125..b39515a4ddcb 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -656,7 +656,8 @@ vc4_lock_bo_reservations(struct drm_device *dev, > */ > static int > vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > - struct ww_acquire_ctx *acquire_ctx) > + struct ww_acquire_ctx *acquire_ctx, > + struct drm_syncobj *out_sync) > { > struct vc4_dev *vc4 = to_vc4_dev(dev); > struct vc4_exec_info *renderjob; > @@ -679,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, > fence->seqno = exec->seqno; > exec->fence = &fence->base; > > + if (out_sync) > + drm_syncobj_replace_fence(out_sync, exec->fence); > + > vc4_update_bo_seqnos(exec, seqno); > > vc4_unlock_bo_reservations(dev, exec, acquire_ctx); > @@ -1114,6 +1118,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > struct vc4_dev *vc4 = to_vc4_dev(dev); > struct vc4_file *vc4file = file_priv->driver_priv; > struct drm_vc4_submit_cl *args = data; > + struct drm_syncobj *out_sync = NULL; > struct vc4_exec_info *exec; > struct ww_acquire_ctx acquire_ctx; > struct dma_fence *in_fence; > @@ -1123,11 +1128,17 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > VC4_SUBMIT_CL_FIXED_RCL_ORDER | > VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | > VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y | > - VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) { > + VC4_SUBMIT_CL_IMPORT_SYNCOBJ | > + VC4_SUBMIT_CL_EXPORT_SYNCOBJ)) != 0) { > DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); > return -EINVAL; > } > > + if (args->pad2 != 0) { > + DRM_DEBUG("->pad2 must be set to zero\n"); > + return -EINVAL; > + } > + > exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); > if (!exec) { > DRM_ERROR("malloc failure on exec struct\n"); > @@ -1202,12 +1213,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > if (ret) > goto fail; > > + if (args->flags & VC4_SUBMIT_CL_EXPORT_SYNCOBJ) { > + out_sync = drm_syncobj_find(file_priv, args->out_sync); > + if (!out_sync) { > + ret = -EINVAL; > + goto fail; > + } > + > + /* We replace the fence in out_sync in vc4_queue_submit since > + * the render job could execute immediately after that call. > + * If it finishes before our ioctl processing resumes the > + * render job fence could already have been freed. > + */ > + } Same comment about not needing the exec flag. > + > /* Clear this out of the struct we'll be putting in the queue, > * since it's part of our stack. > */ > exec->args = NULL; > > - ret = vc4_queue_submit(dev, exec, &acquire_ctx); > + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); > + > + /* The syncobj isn't part of the exec data and we need to free our > + * reference even if job submission failed. > + */ > + if (out_sync) > + drm_syncobj_put(out_sync); > + > if (ret) > goto fail; > > diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h > index 389f21931c25..3a2ef9b5b60b 100644 > --- a/include/uapi/drm/vc4_drm.h > +++ b/include/uapi/drm/vc4_drm.h > @@ -174,6 +174,7 @@ struct drm_vc4_submit_cl { > #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) > #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) > #define VC4_SUBMIT_CL_IMPORT_SYNCOBJ (1 << 4) > +#define VC4_SUBMIT_CL_EXPORT_SYNCOBJ (1 << 5) > __u32 flags; > > /* Returned value of the seqno of this render job (for the > @@ -189,6 +190,18 @@ struct drm_vc4_submit_cl { > * syncobj is signalled. > */ > __u32 in_sync; > + > + /* Syncobj handle to export fence to. Set together with EXPORT_SYNCOBJ > + * flag. If set, the fence in the syncobj will be replaced with a fence > + * that signals upon completion of this render job. > + */ > + __u32 out_sync; > + > + /* Unused field to align this struct on 64 bits. Must be set to 0. > + * If one ever needs to add an u32 field to this struct, this field > + * can be used. > + */ > + __u32 pad2; As far as I know, there's no need to align the struct to 64 bits.
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 232363488125..b39515a4ddcb 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -656,7 +656,8 @@ vc4_lock_bo_reservations(struct drm_device *dev, */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -679,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1114,6 +1118,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; struct dma_fence *in_fence; @@ -1123,11 +1128,17 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, VC4_SUBMIT_CL_FIXED_RCL_ORDER | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y | - VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) { + VC4_SUBMIT_CL_IMPORT_SYNCOBJ | + VC4_SUBMIT_CL_EXPORT_SYNCOBJ)) != 0) { DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; } + if (args->pad2 != 0) { + DRM_DEBUG("->pad2 must be set to zero\n"); + return -EINVAL; + } + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1202,12 +1213,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->flags & VC4_SUBMIT_CL_EXPORT_SYNCOBJ) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index 389f21931c25..3a2ef9b5b60b 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -174,6 +174,7 @@ struct drm_vc4_submit_cl { #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) #define VC4_SUBMIT_CL_IMPORT_SYNCOBJ (1 << 4) +#define VC4_SUBMIT_CL_EXPORT_SYNCOBJ (1 << 5) __u32 flags; /* Returned value of the seqno of this render job (for the @@ -189,6 +190,18 @@ struct drm_vc4_submit_cl { * syncobj is signalled. */ __u32 in_sync; + + /* Syncobj handle to export fence to. Set together with EXPORT_SYNCOBJ + * flag. If set, the fence in the syncobj will be replaced with a fence + * that signals upon completion of this render job. + */ + __u32 out_sync; + + /* Unused field to align this struct on 64 bits. Must be set to 0. + * If one ever needs to add an u32 field to this struct, this field + * can be used. + */ + __u32 pad2; }; /**
Allow specifying a syncobj on render job submission where we store the fence for the job. This gives userland flexible access to the fence. Signed-off-by: Stefan Schake <stschake@gmail.com> --- drivers/gpu/drm/vc4/vc4_gem.c | 38 +++++++++++++++++++++++++++++++++++--- include/uapi/drm/vc4_drm.h | 13 +++++++++++++ 2 files changed, 48 insertions(+), 3 deletions(-)