Message ID | 20180421225022.7592-3-stschake@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Stefan Schake <stschake@gmail.com> writes: > Allow userland to specify a syncobj that is waited on before a render job > starts processing. > > Signed-off-by: Stefan Schake <stschake@gmail.com> > --- > drivers/gpu/drm/vc4/vc4_drv.h | 2 ++ > drivers/gpu/drm/vc4/vc4_gem.c | 33 +++++++++++++++++++++++++++------ > include/uapi/drm/vc4_drm.h | 9 +++++---- > 3 files changed, 34 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h > index 4288615b66a2..3105df99cb12 100644 > --- a/drivers/gpu/drm/vc4/vc4_drv.h > +++ b/drivers/gpu/drm/vc4/vc4_drv.h > @@ -10,6 +10,8 @@ > #include <drm/drmP.h> > #include <drm/drm_encoder.h> > #include <drm/drm_gem_cma_helper.h> > +#include <drm/drm_syncobj.h> > + Drop the extra \n? > #include "uapi/drm/vc4_drm.h" > > diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c > index 2107b0daf8ef..232363488125 100644 > --- a/drivers/gpu/drm/vc4/vc4_gem.c > +++ b/drivers/gpu/drm/vc4/vc4_gem.c > @@ -27,6 +27,7 @@ > #include <linux/device.h> > #include <linux/io.h> > #include <linux/sched/signal.h> > +#include <linux/dma-fence-array.h> > > #include "uapi/drm/vc4_drm.h" > #include "vc4_drv.h" > @@ -1115,21 +1116,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > struct drm_vc4_submit_cl *args = data; > struct vc4_exec_info *exec; > struct ww_acquire_ctx acquire_ctx; > + struct dma_fence *in_fence; > int ret = 0; > > if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | > VC4_SUBMIT_CL_FIXED_RCL_ORDER | > VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | > - VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { > + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y | > + VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) { > DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); > return -EINVAL; > } > > - if (args->pad2 != 0) { > - DRM_DEBUG("->pad2 must be set to zero\n"); > - return -EINVAL; > - } > - > exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); > if (!exec) { > DRM_ERROR("malloc failure on exec struct\n"); > @@ -1164,6 +1162,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, > } > } > > + if (args->flags & VC4_SUBMIT_CL_IMPORT_SYNCOBJ) { > + ret = drm_syncobj_find_fence(file_priv, args->in_sync, > + &in_fence); > + if (ret) > + goto fail; > + > + /* When the fence (or fence array) is exclusively from our > + * context we can skip the wait since jobs are executed in > + * order of their submission through this ioctl and this can > + * only have fences from a prior job. > + */ > + if (!dma_fence_match_context(in_fence, > + vc4->dma_fence_context)) { > + ret = dma_fence_wait(in_fence, true); > + if (ret) { > + dma_fence_put(in_fence); > + goto fail; > + } > + } > + > + dma_fence_put(in_fence); > + } I don't think we need an extra flag here. 0 is an invalid syncobj handle, so that could be the indication that there's no input syncobj. Long term, we should probably only block once we're ready to exec the job, not at submit time. However, I think we can wait on fixing that until we start using the GPU scheduler.
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 4288615b66a2..3105df99cb12 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -10,6 +10,8 @@ #include <drm/drmP.h> #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_syncobj.h> + #include "uapi/drm/vc4_drm.h" diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0daf8ef..232363488125 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -1115,21 +1116,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct drm_vc4_submit_cl *args = data; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | VC4_SUBMIT_CL_FIXED_RCL_ORDER | VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X | - VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) { + VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y | + VC4_SUBMIT_CL_IMPORT_SYNCOBJ)) != 0) { DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags); return -EINVAL; } - if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); - return -EINVAL; - } - exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); if (!exec) { DRM_ERROR("malloc failure on exec struct\n"); @@ -1164,6 +1162,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->flags & VC4_SUBMIT_CL_IMPORT_SYNCOBJ) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h index b95a0e11cb07..389f21931c25 100644 --- a/include/uapi/drm/vc4_drm.h +++ b/include/uapi/drm/vc4_drm.h @@ -173,6 +173,7 @@ struct drm_vc4_submit_cl { #define VC4_SUBMIT_CL_FIXED_RCL_ORDER (1 << 1) #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X (1 << 2) #define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y (1 << 3) +#define VC4_SUBMIT_CL_IMPORT_SYNCOBJ (1 << 4) __u32 flags; /* Returned value of the seqno of this render job (for the @@ -183,11 +184,11 @@ struct drm_vc4_submit_cl { /* ID of the perfmon to attach to this job. 0 means no perfmon. */ __u32 perfmonid; - /* Unused field to align this struct on 64 bits. Must be set to 0. - * If one ever needs to add an u32 field to this struct, this field - * can be used. + /* Syncobj handle to wait on. Set together with IMPORT_SYNCOBJ flag. + * If set, processing of this render job will not start until the + * syncobj is signalled. */ - __u32 pad2; + __u32 in_sync; }; /**
Allow userland to specify a syncobj that is waited on before a render job starts processing. Signed-off-by: Stefan Schake <stschake@gmail.com> --- drivers/gpu/drm/vc4/vc4_drv.h | 2 ++ drivers/gpu/drm/vc4/vc4_gem.c | 33 +++++++++++++++++++++++++++------ include/uapi/drm/vc4_drm.h | 9 +++++---- 3 files changed, 34 insertions(+), 10 deletions(-)