Message ID | 20210705082950.3573841-6-boris.brezillon@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/panfrost: drm/panfrost: Add a new submit ioctl | expand |
On Mon, Jul 05, 2021 at 10:29:48AM +0200, Boris Brezillon wrote: > This should help limit the number of ioctls when submitting multiple > jobs. The new ioctl also supports syncobj timelines and BO access flags. > > v4: > * Implement panfrost_ioctl_submit() as a wrapper around > panfrost_submit_job() > * Replace stride fields by a version field which is mapped to > a <job_stride,bo_ref_stride,syncobj_ref_stride> tuple internally > > v3: > * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the > old submit path > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 562 ++++++++++++++++-------- > drivers/gpu/drm/panfrost/panfrost_job.c | 3 + > include/uapi/drm/panfrost_drm.h | 92 ++++ > 3 files changed, 479 insertions(+), 178 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index 8e28ef30310b..a624e4f86aff 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) > return 0; > } > > -/** > - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve handles from userspace to BOs and attach them to job. > - * > - * Note that this function doesn't need to unreference the BOs on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > -static int > -panfrost_lookup_bos(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > -{ > - unsigned int i; > - int ret; > - > - job->bo_count = args->bo_handle_count; > - > - if (!job->bo_count) > - return 0; > - > - job->bo_flags = kvmalloc_array(job->bo_count, > - sizeof(*job->bo_flags), > - GFP_KERNEL | __GFP_ZERO); > - if (!job->bo_flags) > - return -ENOMEM; > - > - for (i = 0; i < job->bo_count; i++) > - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; > - > - ret = drm_gem_objects_lookup(file_priv, > - (void __user *)(uintptr_t)args->bo_handles, > - job->bo_count, &job->bos); > - if (ret) > - return ret; > - > - return panfrost_get_job_mappings(file_priv, job); > -} > - > -/** > - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve syncobjs from userspace to fences and attach them to job. > - * > - * Note that this function doesn't need to unreference the fences on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > -static int > -panfrost_copy_in_sync(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > -{ > - u32 *handles; > - int ret = 0; > - int i, in_fence_count; > - > - in_fence_count = args->in_sync_count; > - > - if (!in_fence_count) > - return 0; > - > - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); > - if (!handles) { > - ret = -ENOMEM; > - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); > - goto fail; > - } > - > - if (copy_from_user(handles, > - (void __user *)(uintptr_t)args->in_syncs, > - in_fence_count * sizeof(u32))) { > - ret = -EFAULT; > - DRM_DEBUG("Failed to copy in syncobj handles\n"); > - goto fail; > - } > - > - for (i = 0; i < in_fence_count; i++) { > - struct dma_fence *fence; > - > - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, > - &fence); > - if (ret) > - goto fail; > - > - ret = drm_gem_fence_array_add(&job->deps, fence); > - > - if (ret) > - goto fail; > - } > - > -fail: > - kvfree(handles); > - return ret; > -} > - > -static int panfrost_ioctl_submit(struct drm_device *dev, void *data, > - struct drm_file *file) > -{ > - struct panfrost_device *pfdev = dev->dev_private; > - struct drm_panfrost_submit *args = data; > - struct drm_syncobj *sync_out = NULL; > - struct panfrost_submitqueue *queue; > - struct panfrost_job *job; > - int ret = 0; > - > - if (!args->jc) > - return -EINVAL; > - > - if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) > - return -EINVAL; > - > - queue = panfrost_submitqueue_get(file->driver_priv, 0); > - if (IS_ERR(queue)) > - return PTR_ERR(queue); > - > - if (args->out_sync > 0) { > - sync_out = drm_syncobj_find(file, args->out_sync); > - if (!sync_out) { > - ret = -ENODEV; > - goto fail_put_queue; > - } > - } > - > - job = kzalloc(sizeof(*job), GFP_KERNEL); > - if (!job) { > - ret = -ENOMEM; > - goto fail_out_sync; > - } > - > - kref_init(&job->refcount); > - > - xa_init_flags(&job->deps, XA_FLAGS_ALLOC); > - > - job->pfdev = pfdev; > - job->jc = args->jc; > - job->requirements = args->requirements; > - job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > - job->file_priv = file->driver_priv; > - > - ret = panfrost_copy_in_sync(dev, file, args, job); > - if (ret) > - goto fail_job; > - > - ret = panfrost_lookup_bos(dev, file, args, job); > - if (ret) > - goto fail_job; > - > - ret = panfrost_job_push(queue, job); > - if (ret) > - goto fail_job; > - > - /* Update the return sync object for the job */ > - if (sync_out) > - drm_syncobj_replace_fence(sync_out, job->render_done_fence); > - > -fail_job: > - panfrost_job_put(job); > -fail_out_sync: > - if (sync_out) > - drm_syncobj_put(sync_out); > -fail_put_queue: > - panfrost_submitqueue_put(queue); > - > - return ret; > -} > - > static int > panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, > struct drm_file *file_priv) > @@ -491,6 +313,389 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, > return panfrost_submitqueue_destroy(priv, id); > } > > +#define PANFROST_BO_REF_ALLOWED_FLAGS \ > + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) > + > +static int > +panfrost_get_job_bos(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, u32 count, > + struct panfrost_job *job) > +{ > + void __user *in = u64_to_user_ptr(refs); > + unsigned int i; > + > + job->bo_count = count; > + > + if (!count) > + return 0; > + > + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), > + GFP_KERNEL | __GFP_ZERO); > + job->bo_flags = kvmalloc_array(job->bo_count, > + sizeof(*job->bo_flags), > + GFP_KERNEL | __GFP_ZERO); > + if (!job->bos || !job->bo_flags) > + return -ENOMEM; > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_bo_ref ref = { }; > + int ret; > + > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + return ret; > + > + /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were > + * treated as exclusive. > + */ > + if (ref_stride == sizeof(u32)) > + ref.flags = PANFROST_BO_REF_EXCLUSIVE; > + > + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) > + return -EINVAL; > + > + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); > + if (!job->bos[i]) > + return -EINVAL; > + > + job->bo_flags[i] = ref.flags; > + } > + > + return 0; > +} > + > +static int > +panfrost_get_job_in_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count, struct panfrost_job *job) > +{ > + const void __user *in = u64_to_user_ptr(refs); > + unsigned int i; > + int ret; > + > + if (!count) > + return 0; > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > + struct dma_fence *fence; > + > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + return ret; > + > + if (ref.pad) > + return -EINVAL; > + > + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, > + 0, &fence); > + if (ret) > + return ret; > + > + ret = drm_gem_fence_array_add(&job->deps, fence); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > +struct panfrost_job_out_sync { > + struct drm_syncobj *syncobj; > + struct dma_fence_chain *chain; > + u64 point; > +}; > + > +static void > +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (!out_syncs[i].syncobj) > + break; > + > + drm_syncobj_put(out_syncs[i].syncobj); > + kvfree(out_syncs[i].chain); > + } > + > + kvfree(out_syncs); > +} > + > +static struct panfrost_job_out_sync * > +panfrost_get_job_out_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count) > +{ > + void __user *in = u64_to_user_ptr(refs); > + struct panfrost_job_out_sync *out_syncs; > + unsigned int i; > + int ret; > + > + if (!count) > + return NULL; > + > + /* If the syncobj ref_stride == sizeof(u32) we are called from the > + * old submit ioctl() which only accepted one out syncobj. In that > + * case the syncobj handle is passed directly through the > + * ->out_syncs field, so let's make sure the refs fits in a u32. > + */ > + if (ref_stride == sizeof(u32) && > + (count != 1 || refs > UINT_MAX)) > + return ERR_PTR(-EINVAL); > + > + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), > + GFP_KERNEL | __GFP_ZERO); > + if (!out_syncs) > + return ERR_PTR(-ENOMEM); > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > + > + if (ref_stride == sizeof(u32)) { > + /* Special case for the old submit wrapper: in that > + * case there's only one out_sync, and the syncobj > + * handle is passed directly in the out_syncs field. > + */ > + ref.handle = refs; > + } else { > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + goto err_free_out_syncs; > + } > + > + if (ref.pad) { > + ret = -EINVAL; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); > + if (!out_syncs[i].syncobj) { > + ret = -ENODEV; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].point = ref.point; > + if (!out_syncs[i].point) > + continue; > + > + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), > + GFP_KERNEL); > + if (!out_syncs[i].chain) { > + ret = -ENOMEM; > + goto err_free_out_syncs; > + } > + } > + > + return out_syncs; > + > +err_free_out_syncs: > + panfrost_put_job_out_syncs(out_syncs, count); > + return ERR_PTR(ret); > +} > + > +static void > +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, > + unsigned int count, struct dma_fence *fence) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (out_syncs[i].chain) { > + drm_syncobj_add_point(out_syncs[i].syncobj, > + out_syncs[i].chain, > + fence, out_syncs[i].point); > + out_syncs[i].chain = NULL; > + } else { > + drm_syncobj_replace_fence(out_syncs[i].syncobj, > + fence); > + } > + } > +} > + > +struct panfrost_submit_ioctl_version_info { > + u32 job_stride; > + u32 bo_ref_stride; > + u32 syncobj_ref_stride; > +}; > + > +static const struct panfrost_submit_ioctl_version_info submit_versions[] = { > + /* SUBMIT */ > + [0] = { 0, 4, 4 }, > + > + /* BATCH_SUBMIT v1 */ > + [1] = { 48, 8, 16 }, > +}; > + > +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS > + > +static int > +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, > + struct panfrost_submitqueue *queue, > + const struct drm_panfrost_job *args, > + u32 version) > +{ > + struct panfrost_device *pfdev = dev->dev_private; > + struct panfrost_job_out_sync *out_syncs; > + u32 bo_stride, syncobj_stride; > + struct panfrost_job *job; > + int ret; > + > + if (!args->head) > + return -EINVAL; > + > + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) > + return -EINVAL; > + > + bo_stride = submit_versions[version].bo_ref_stride; > + syncobj_stride = submit_versions[version].syncobj_ref_stride; > + > + job = kzalloc(sizeof(*job), GFP_KERNEL); > + if (!job) > + return -ENOMEM; > + > + kref_init(&job->refcount); > + > + job->pfdev = pfdev; > + job->jc = args->head; > + job->requirements = args->requirements; > + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > + job->file_priv = file_priv->driver_priv; > + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); > + > + ret = panfrost_get_job_in_syncs(file_priv, > + args->in_syncs, > + syncobj_stride, > + args->in_sync_count, > + job); > + if (ret) > + goto err_put_job; > + > + out_syncs = panfrost_get_job_out_syncs(file_priv, > + args->out_syncs, > + syncobj_stride, > + args->out_sync_count); > + if (IS_ERR(out_syncs)) { > + ret = PTR_ERR(out_syncs); > + goto err_put_job; > + } > + > + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, > + args->bo_count, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_get_job_mappings(file_priv, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_job_push(queue, job); > + if (ret) { > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + goto err_put_job; > + } > + > + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, > + job->render_done_fence); > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + return 0; > + > +err_put_job: > + panfrost_job_put(job); > + return ret; > +} > + > +static int > +panfrost_ioctl_submit(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > + struct drm_panfrost_submit *args = data; > + struct drm_panfrost_job job_args = { > + .head = args->jc, > + .bos = args->bo_handles, > + .in_syncs = args->in_syncs, > + > + /* We are abusing .out_syncs and passing the handle directly > + * instead of a pointer to a user u32 array, but > + * panfrost_job_submit() knows about it, so it's fine. > + */ > + .out_syncs = args->out_sync, > + .in_sync_count = args->in_sync_count, > + .out_sync_count = args->out_sync > 0 ? 1 : 0, > + .bo_count = args->bo_handle_count, > + .requirements = args->requirements > + }; > + struct panfrost_submitqueue *queue; > + int ret; > + > + queue = panfrost_submitqueue_get(file->driver_priv, 0); > + if (IS_ERR(queue)) > + return PTR_ERR(queue); > + > + ret = panfrost_submit_job(dev, file, queue, &job_args, 0); > + panfrost_submitqueue_put(queue); > + > + return ret; > +} > + > +static int > +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, > + struct drm_file *file_priv) > +{ > + struct drm_panfrost_batch_submit *args = data; > + void __user *jobs_args = u64_to_user_ptr(args->jobs); > + struct panfrost_submitqueue *queue; > + u32 version = args->version; > + u32 job_stride; > + unsigned int i; > + int ret; > + > + /* Version 0 doesn't exists (it's reserved for the SUBMIT ioctl) */ > + if (!version) > + return -EINVAL; > + > + /* If the version specified is bigger than what we currently support, > + * pick the last supported version and let copy_struct_from_user() > + * check that any extra job, bo_ref and syncobj_ref fields are zeroed. > + */ > + if (version >= ARRAY_SIZE(submit_versions)) > + version = ARRAY_SIZE(submit_versions) - 1; > + > + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); > + if (IS_ERR(queue)) > + return PTR_ERR(queue); > + > + job_stride = submit_versions[version].job_stride; > + for (i = 0; i < args->job_count; i++) { > + struct drm_panfrost_job job_args = { }; > + > + ret = copy_struct_from_user(&job_args, sizeof(job_args), > + jobs_args + (i * job_stride), > + job_stride); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } > + > + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, > + version); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } > + } > + > +out_put_queue: > + panfrost_submitqueue_put(queue); > + return 0; > +} > + > int panfrost_unstable_ioctl_check(void) > { > if (!unstable_ioctls) > @@ -572,6 +777,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { > PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), > PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), > PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), > + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), > }; > > DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index 56ae89272e19..4e1540bce865 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) > return ret; > } > > + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) > + continue; This breaks dma_resv rules. I'll send out patch set fixing this pattern in other drivers, I'll ping you on that for what you need to change. Should go out today or so. Also cc: Christian König. -Daniel > + > ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], > exclusive); > if (ret) > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index e31a22c176d9..5d534e61c28e 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -23,6 +23,7 @@ extern "C" { > #define DRM_PANFROST_MADVISE 0x08 > #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 > #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a > +#define DRM_PANFROST_BATCH_SUBMIT 0x0b > > #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) > #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) > @@ -33,6 +34,7 @@ extern "C" { > #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) > #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) > #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) > +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) > > /* > * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module > @@ -241,9 +243,99 @@ struct drm_panfrost_create_submitqueue { > __u32 id; /* out, identifier */ > }; > > +/* Syncobj reference passed at job submission time to encode explicit > + * input/output fences. > + */ > +struct drm_panfrost_syncobj_ref { > + /** Syncobj handle */ > + __u32 handle; > + > + /** Padding field, must be set to 0 */ > + __u32 pad; > + > + /** > + * For timeline syncobjs, the point on the timeline the reference > + * points to. 0 for the last point. > + * Must be set to 0 for non-timeline syncobjs > + */ > + __u64 point; > +}; > + > /* Exclusive (AKA write) access to the BO */ > #define PANFROST_BO_REF_EXCLUSIVE 0x1 > > +/* Disable the implicit depency on the BO fence */ > +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 > + > +/* Describes a BO referenced by a job and the type of access. */ > +struct drm_panfrost_bo_ref { > + /** A GEM handle */ > + __u32 handle; > + > + /** A combination of PANFROST_BO_REF_x flags */ > + __u32 flags; > +}; > + > +/* Describes a GPU job and the resources attached to it. */ > +struct drm_panfrost_job { > + /** GPU pointer to the head of the job chain. */ > + __u64 head; > + > + /** > + * Array of drm_panfrost_bo_ref objects describing the BOs referenced > + * by this job. > + */ > + __u64 bos; > + > + /** > + * Arrays of drm_panfrost_syncobj_ref objects describing the input > + * and output fences. > + */ > + __u64 in_syncs; > + __u64 out_syncs; > + > + /** Syncobj reference array sizes. */ > + __u32 in_sync_count; > + __u32 out_sync_count; > + > + /** BO reference array size. */ > + __u32 bo_count; > + > + /** Combination of PANFROST_JD_REQ_* flags. */ > + __u32 requirements; > +}; > + > +#define PANFROST_SUBMIT_BATCH_VERSION 1 > + > +/* Used to submit multiple jobs in one call */ > +struct drm_panfrost_batch_submit { > + /** > + * Always set to PANFROST_SUBMIT_BATCH_VERSION. This is used to let the > + * kernel know about the size of the various structs passed to the > + * BATCH_SUBMIT ioctl. > + */ > + __u32 version; > + > + /** Number of jobs to submit. */ > + __u32 job_count; > + > + /* Pointer to a job array. */ > + __u64 jobs; > + > + /** > + * ID of the queue to submit those jobs to. 0 is the default > + * submit queue and should always exists. If you need a dedicated > + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. > + */ > + __u32 queue; > + > + /** > + * If the submission fails, this encodes the index of the job > + * failed. > + */ > + __u32 fail_idx; > +}; > + > #if defined(__cplusplus) > } > #endif > -- > 2.31.1 >
On 05/07/2021 09:29, Boris Brezillon wrote: > This should help limit the number of ioctls when submitting multiple > jobs. The new ioctl also supports syncobj timelines and BO access flags. > > v4: > * Implement panfrost_ioctl_submit() as a wrapper around > panfrost_submit_job() > * Replace stride fields by a version field which is mapped to > a <job_stride,bo_ref_stride,syncobj_ref_stride> tuple internally > > v3: > * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the > old submit path > > Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> > --- > drivers/gpu/drm/panfrost/panfrost_drv.c | 562 ++++++++++++++++-------- > drivers/gpu/drm/panfrost/panfrost_job.c | 3 + > include/uapi/drm/panfrost_drm.h | 92 ++++ > 3 files changed, 479 insertions(+), 178 deletions(-) > > diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c > index 8e28ef30310b..a624e4f86aff 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_drv.c > +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c > @@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) > return 0; > } > > -/** > - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve handles from userspace to BOs and attach them to job. > - * > - * Note that this function doesn't need to unreference the BOs on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > -static int > -panfrost_lookup_bos(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > -{ > - unsigned int i; > - int ret; > - > - job->bo_count = args->bo_handle_count; > - > - if (!job->bo_count) > - return 0; > - > - job->bo_flags = kvmalloc_array(job->bo_count, > - sizeof(*job->bo_flags), > - GFP_KERNEL | __GFP_ZERO); > - if (!job->bo_flags) > - return -ENOMEM; > - > - for (i = 0; i < job->bo_count; i++) > - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; > - > - ret = drm_gem_objects_lookup(file_priv, > - (void __user *)(uintptr_t)args->bo_handles, > - job->bo_count, &job->bos); > - if (ret) > - return ret; > - > - return panfrost_get_job_mappings(file_priv, job); > -} > - > -/** > - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects > - * referenced by the job. > - * @dev: DRM device > - * @file_priv: DRM file for this fd > - * @args: IOCTL args > - * @job: job being set up > - * > - * Resolve syncobjs from userspace to fences and attach them to job. > - * > - * Note that this function doesn't need to unreference the fences on > - * failure, because that will happen at panfrost_job_cleanup() time. > - */ > -static int > -panfrost_copy_in_sync(struct drm_device *dev, > - struct drm_file *file_priv, > - struct drm_panfrost_submit *args, > - struct panfrost_job *job) > -{ > - u32 *handles; > - int ret = 0; > - int i, in_fence_count; > - > - in_fence_count = args->in_sync_count; > - > - if (!in_fence_count) > - return 0; > - > - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); > - if (!handles) { > - ret = -ENOMEM; > - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); > - goto fail; > - } > - > - if (copy_from_user(handles, > - (void __user *)(uintptr_t)args->in_syncs, > - in_fence_count * sizeof(u32))) { > - ret = -EFAULT; > - DRM_DEBUG("Failed to copy in syncobj handles\n"); > - goto fail; > - } > - > - for (i = 0; i < in_fence_count; i++) { > - struct dma_fence *fence; > - > - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, > - &fence); > - if (ret) > - goto fail; > - > - ret = drm_gem_fence_array_add(&job->deps, fence); > - > - if (ret) > - goto fail; > - } > - > -fail: > - kvfree(handles); > - return ret; > -} > - > -static int panfrost_ioctl_submit(struct drm_device *dev, void *data, > - struct drm_file *file) > -{ > - struct panfrost_device *pfdev = dev->dev_private; > - struct drm_panfrost_submit *args = data; > - struct drm_syncobj *sync_out = NULL; > - struct panfrost_submitqueue *queue; > - struct panfrost_job *job; > - int ret = 0; > - > - if (!args->jc) > - return -EINVAL; > - > - if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) > - return -EINVAL; > - > - queue = panfrost_submitqueue_get(file->driver_priv, 0); > - if (IS_ERR(queue)) > - return PTR_ERR(queue); > - > - if (args->out_sync > 0) { > - sync_out = drm_syncobj_find(file, args->out_sync); > - if (!sync_out) { > - ret = -ENODEV; > - goto fail_put_queue; > - } > - } > - > - job = kzalloc(sizeof(*job), GFP_KERNEL); > - if (!job) { > - ret = -ENOMEM; > - goto fail_out_sync; > - } > - > - kref_init(&job->refcount); > - > - xa_init_flags(&job->deps, XA_FLAGS_ALLOC); > - > - job->pfdev = pfdev; > - job->jc = args->jc; > - job->requirements = args->requirements; > - job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > - job->file_priv = file->driver_priv; > - > - ret = panfrost_copy_in_sync(dev, file, args, job); > - if (ret) > - goto fail_job; > - > - ret = panfrost_lookup_bos(dev, file, args, job); > - if (ret) > - goto fail_job; > - > - ret = panfrost_job_push(queue, job); > - if (ret) > - goto fail_job; > - > - /* Update the return sync object for the job */ > - if (sync_out) > - drm_syncobj_replace_fence(sync_out, job->render_done_fence); > - > -fail_job: > - panfrost_job_put(job); > -fail_out_sync: > - if (sync_out) > - drm_syncobj_put(sync_out); > -fail_put_queue: > - panfrost_submitqueue_put(queue); > - > - return ret; > -} > - > static int > panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, > struct drm_file *file_priv) > @@ -491,6 +313,389 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, > return panfrost_submitqueue_destroy(priv, id); > } > > +#define PANFROST_BO_REF_ALLOWED_FLAGS \ > + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) > + > +static int > +panfrost_get_job_bos(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, u32 count, > + struct panfrost_job *job) > +{ > + void __user *in = u64_to_user_ptr(refs); > + unsigned int i; > + > + job->bo_count = count; > + > + if (!count) > + return 0; > + > + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), > + GFP_KERNEL | __GFP_ZERO); > + job->bo_flags = kvmalloc_array(job->bo_count, > + sizeof(*job->bo_flags), > + GFP_KERNEL | __GFP_ZERO); > + if (!job->bos || !job->bo_flags) > + return -ENOMEM; > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_bo_ref ref = { }; > + int ret; > + > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + return ret; > + > + /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were > + * treated as exclusive. > + */ > + if (ref_stride == sizeof(u32)) > + ref.flags = PANFROST_BO_REF_EXCLUSIVE; > + > + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) > + return -EINVAL; > + > + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); > + if (!job->bos[i]) > + return -EINVAL; > + > + job->bo_flags[i] = ref.flags; > + } > + > + return 0; > +} > + > +static int > +panfrost_get_job_in_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count, struct panfrost_job *job) > +{ > + const void __user *in = u64_to_user_ptr(refs); > + unsigned int i; > + int ret; > + > + if (!count) > + return 0; > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > + struct dma_fence *fence; > + > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + return ret; > + > + if (ref.pad) > + return -EINVAL; > + > + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, > + 0, &fence); > + if (ret) > + return ret; > + > + ret = drm_gem_fence_array_add(&job->deps, fence); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > +struct panfrost_job_out_sync { > + struct drm_syncobj *syncobj; > + struct dma_fence_chain *chain; > + u64 point; > +}; > + > +static void > +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (!out_syncs[i].syncobj) > + break; > + > + drm_syncobj_put(out_syncs[i].syncobj); > + kvfree(out_syncs[i].chain); > + } > + > + kvfree(out_syncs); > +} > + > +static struct panfrost_job_out_sync * > +panfrost_get_job_out_syncs(struct drm_file *file_priv, > + u64 refs, u32 ref_stride, > + u32 count) > +{ > + void __user *in = u64_to_user_ptr(refs); > + struct panfrost_job_out_sync *out_syncs; > + unsigned int i; > + int ret; > + > + if (!count) > + return NULL; > + > + /* If the syncobj ref_stride == sizeof(u32) we are called from the > + * old submit ioctl() which only accepted one out syncobj. In that > + * case the syncobj handle is passed directly through the > + * ->out_syncs field, so let's make sure the refs fits in a u32. > + */ > + if (ref_stride == sizeof(u32) && > + (count != 1 || refs > UINT_MAX)) > + return ERR_PTR(-EINVAL); > + > + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), > + GFP_KERNEL | __GFP_ZERO); > + if (!out_syncs) > + return ERR_PTR(-ENOMEM); > + > + for (i = 0; i < count; i++) { > + struct drm_panfrost_syncobj_ref ref = { }; > + > + if (ref_stride == sizeof(u32)) { > + /* Special case for the old submit wrapper: in that > + * case there's only one out_sync, and the syncobj > + * handle is passed directly in the out_syncs field. > + */ > + ref.handle = refs; > + } else { > + ret = copy_struct_from_user(&ref, sizeof(ref), > + in + (i * ref_stride), > + ref_stride); > + if (ret) > + goto err_free_out_syncs; > + } > + > + if (ref.pad) { > + ret = -EINVAL; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); > + if (!out_syncs[i].syncobj) { > + ret = -ENODEV; > + goto err_free_out_syncs; > + } > + > + out_syncs[i].point = ref.point; > + if (!out_syncs[i].point) > + continue; > + > + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), > + GFP_KERNEL); > + if (!out_syncs[i].chain) { > + ret = -ENOMEM; > + goto err_free_out_syncs; > + } > + } > + > + return out_syncs; > + > +err_free_out_syncs: > + panfrost_put_job_out_syncs(out_syncs, count); > + return ERR_PTR(ret); > +} > + > +static void > +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, > + unsigned int count, struct dma_fence *fence) > +{ > + unsigned int i; > + > + for (i = 0; i < count; i++) { > + if (out_syncs[i].chain) { > + drm_syncobj_add_point(out_syncs[i].syncobj, > + out_syncs[i].chain, > + fence, out_syncs[i].point); > + out_syncs[i].chain = NULL; > + } else { > + drm_syncobj_replace_fence(out_syncs[i].syncobj, > + fence); > + } > + } > +} > + > +struct panfrost_submit_ioctl_version_info { > + u32 job_stride; > + u32 bo_ref_stride; > + u32 syncobj_ref_stride; > +}; > + > +static const struct panfrost_submit_ioctl_version_info submit_versions[] = { > + /* SUBMIT */ > + [0] = { 0, 4, 4 }, > + > + /* BATCH_SUBMIT v1 */ > + [1] = { 48, 8, 16 }, > +}; It might be worth putting something like this in: BUILD_BUG_ON(sizeof(struct drm_panfrost_job) != submit_versions[PANFROST_SUBMIT_BATCH_VERSION].job_stride); BUILD_BUG_ON(sizeof(struct drm_panfrost_bo_ref) != submit_versions[PANFROST_SUBMIT_BATCH_VERSION].bo_ref_stride); BUILD_BUG_ON(sizeof(struct drm_panfrost_syncobj_ref) != submit_versions[PANFROST_SUBMIT_BATCH_VERSION].syncobj_ref_stride); That way we won't accidentally let it get out of sync. > + > +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS > + > +static int > +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, > + struct panfrost_submitqueue *queue, > + const struct drm_panfrost_job *args, > + u32 version) > +{ > + struct panfrost_device *pfdev = dev->dev_private; > + struct panfrost_job_out_sync *out_syncs; > + u32 bo_stride, syncobj_stride; > + struct panfrost_job *job; > + int ret; > + > + if (!args->head) > + return -EINVAL; > + > + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) > + return -EINVAL; > + > + bo_stride = submit_versions[version].bo_ref_stride; > + syncobj_stride = submit_versions[version].syncobj_ref_stride; > + > + job = kzalloc(sizeof(*job), GFP_KERNEL); > + if (!job) > + return -ENOMEM; > + > + kref_init(&job->refcount); > + > + job->pfdev = pfdev; > + job->jc = args->head; > + job->requirements = args->requirements; > + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); > + job->file_priv = file_priv->driver_priv; > + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); > + > + ret = panfrost_get_job_in_syncs(file_priv, > + args->in_syncs, > + syncobj_stride, > + args->in_sync_count, > + job); > + if (ret) > + goto err_put_job; > + > + out_syncs = panfrost_get_job_out_syncs(file_priv, > + args->out_syncs, > + syncobj_stride, > + args->out_sync_count); > + if (IS_ERR(out_syncs)) { > + ret = PTR_ERR(out_syncs); > + goto err_put_job; > + } > + > + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, > + args->bo_count, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_get_job_mappings(file_priv, job); > + if (ret) > + goto err_put_job; > + > + ret = panfrost_job_push(queue, job); > + if (ret) { > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + goto err_put_job; > + } > + > + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, > + job->render_done_fence); > + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); > + return 0; > + > +err_put_job: > + panfrost_job_put(job); > + return ret; > +} > + > +static int > +panfrost_ioctl_submit(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > + struct drm_panfrost_submit *args = data; > + struct drm_panfrost_job job_args = { > + .head = args->jc, > + .bos = args->bo_handles, > + .in_syncs = args->in_syncs, > + > + /* We are abusing .out_syncs and passing the handle directly > + * instead of a pointer to a user u32 array, but > + * panfrost_job_submit() knows about it, so it's fine. > + */ > + .out_syncs = args->out_sync, > + .in_sync_count = args->in_sync_count, > + .out_sync_count = args->out_sync > 0 ? 1 : 0, > + .bo_count = args->bo_handle_count, > + .requirements = args->requirements > + }; > + struct panfrost_submitqueue *queue; > + int ret; > + > + queue = panfrost_submitqueue_get(file->driver_priv, 0); > + if (IS_ERR(queue)) > + return PTR_ERR(queue); > + > + ret = panfrost_submit_job(dev, file, queue, &job_args, 0); > + panfrost_submitqueue_put(queue); > + > + return ret; > +} > + > +static int > +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, > + struct drm_file *file_priv) > +{ > + struct drm_panfrost_batch_submit *args = data; > + void __user *jobs_args = u64_to_user_ptr(args->jobs); > + struct panfrost_submitqueue *queue; > + u32 version = args->version; > + u32 job_stride; > + unsigned int i; > + int ret; > + > + /* Version 0 doesn't exists (it's reserved for the SUBMIT ioctl) */ > + if (!version) > + return -EINVAL; > + > + /* If the version specified is bigger than what we currently support, > + * pick the last supported version and let copy_struct_from_user() > + * check that any extra job, bo_ref and syncobj_ref fields are zeroed. > + */ > + if (version >= ARRAY_SIZE(submit_versions)) > + version = ARRAY_SIZE(submit_versions) - 1; > + > + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); > + if (IS_ERR(queue)) > + return PTR_ERR(queue); > + > + job_stride = submit_versions[version].job_stride; > + for (i = 0; i < args->job_count; i++) { > + struct drm_panfrost_job job_args = { }; > + > + ret = copy_struct_from_user(&job_args, sizeof(job_args), > + jobs_args + (i * job_stride), > + job_stride); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } > + > + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, > + version); > + if (ret) { > + args->fail_idx = i; > + goto out_put_queue; > + } NIT: You could avoid the repeated code with something like: ret = copy_struct_from_user(...) if (!ret) ret = panfrost_submit_job(...) if (ret){ args->fail_idx = i; goto out_out_queue; } > + } > + > +out_put_queue: > + panfrost_submitqueue_put(queue); > + return 0; Shouldn't this be "return ret" so that it returns an error if any job fails? (You will also need to init ret for the case where job_count==0). Otherwise user space would need to set fail_idx to something invalid and check if it's been modified to know about the failure. > +} > + > int panfrost_unstable_ioctl_check(void) > { > if (!unstable_ioctls) > @@ -572,6 +777,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { > PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), > PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), > PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), > + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), > }; > > DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); > diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c > index 56ae89272e19..4e1540bce865 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_job.c > +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) > return ret; > } > > + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) > + continue; > + > ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], > exclusive); > if (ret) > diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h > index e31a22c176d9..5d534e61c28e 100644 > --- a/include/uapi/drm/panfrost_drm.h > +++ b/include/uapi/drm/panfrost_drm.h > @@ -23,6 +23,7 @@ extern "C" { > #define DRM_PANFROST_MADVISE 0x08 > #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 > #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a > +#define DRM_PANFROST_BATCH_SUBMIT 0x0b > > #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) > #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) > @@ -33,6 +34,7 @@ extern "C" { > #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) > #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) > #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) > +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) > > /* > * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module > @@ -241,9 +243,99 @@ struct drm_panfrost_create_submitqueue { > __u32 id; /* out, identifier */ > }; > > +/* Syncobj reference passed at job submission time to encode explicit > + * input/output fences. > + */ > +struct drm_panfrost_syncobj_ref { > + /** Syncobj handle */ > + __u32 handle; > + > + /** Padding field, must be set to 0 */ > + __u32 pad; > + > + /** > + * For timeline syncobjs, the point on the timeline the reference > + * points to. 0 for the last point. > + * Must be set to 0 for non-timeline syncobjs > + */ > + __u64 point; > +}; > + > /* Exclusive (AKA write) access to the BO */ > #define PANFROST_BO_REF_EXCLUSIVE 0x1 > > +/* Disable the implicit depency on the BO fence */ NIT: s/depency/dependency/ > +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 > + > +/* Describes a BO referenced by a job and the type of access. */ > +struct drm_panfrost_bo_ref { > + /** A GEM handle */ > + __u32 handle; > + > + /** A combination of PANFROST_BO_REF_x flags */ > + __u32 flags; > +}; > + > +/* Describes a GPU job and the resources attached to it. */ > +struct drm_panfrost_job { > + /** GPU pointer to the head of the job chain. */ > + __u64 head; > + > + /** > + * Array of drm_panfrost_bo_ref objects describing the BOs referenced > + * by this job. > + */ > + __u64 bos; > + > + /** > + * Arrays of drm_panfrost_syncobj_ref objects describing the input > + * and output fences. > + */ > + __u64 in_syncs; > + __u64 out_syncs; > + > + /** Syncobj reference array sizes. */ > + __u32 in_sync_count; > + __u32 out_sync_count; > + > + /** BO reference array size. */ > + __u32 bo_count; > + > + /** Combination of PANFROST_JD_REQ_* flags. */ > + __u32 requirements; > +}; > + > +#define PANFROST_SUBMIT_BATCH_VERSION 1 > + > +/* Used to submit multiple jobs in one call */ > +struct drm_panfrost_batch_submit { > + /** > + * Always set to PANFROST_SUBMIT_BATCH_VERSION. This is used to let the > + * kernel know about the size of the various structs passed to the > + * BATCH_SUBMIT ioctl. > + */ > + __u32 version; > + > + /** Number of jobs to submit. */ > + __u32 job_count; > + > + /* Pointer to a job array. */ > + __u64 jobs; > + > + /** > + * ID of the queue to submit those jobs to. 0 is the default > + * submit queue and should always exists. If you need a dedicated > + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. > + */ > + __u32 queue; > + > + /** > + * If the submission fails, this encodes the index of the job > + * failed. NIT: "the index of the first job that failed" Steve > + */ > + __u32 fail_idx; > +}; > + > #if defined(__cplusplus) > } > #endif >
Am 05.07.21 um 11:32 schrieb Daniel Vetter: > On Mon, Jul 05, 2021 at 10:29:48AM +0200, Boris Brezillon wrote: >> This should help limit the number of ioctls when submitting multiple >> jobs. The new ioctl also supports syncobj timelines and BO access flags. >> >> v4: >> * Implement panfrost_ioctl_submit() as a wrapper around >> panfrost_submit_job() >> * Replace stride fields by a version field which is mapped to >> a <job_stride,bo_ref_stride,syncobj_ref_stride> tuple internally >> >> v3: >> * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the >> old submit path >> >> Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> >> --- >> drivers/gpu/drm/panfrost/panfrost_drv.c | 562 ++++++++++++++++-------- >> drivers/gpu/drm/panfrost/panfrost_job.c | 3 + >> include/uapi/drm/panfrost_drm.h | 92 ++++ >> 3 files changed, 479 insertions(+), 178 deletions(-) >> >> diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c >> index 8e28ef30310b..a624e4f86aff 100644 >> --- a/drivers/gpu/drm/panfrost/panfrost_drv.c >> +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c >> @@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) >> return 0; >> } >> >> -/** >> - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects >> - * referenced by the job. >> - * @dev: DRM device >> - * @file_priv: DRM file for this fd >> - * @args: IOCTL args >> - * @job: job being set up >> - * >> - * Resolve handles from userspace to BOs and attach them to job. >> - * >> - * Note that this function doesn't need to unreference the BOs on >> - * failure, because that will happen at panfrost_job_cleanup() time. >> - */ >> -static int >> -panfrost_lookup_bos(struct drm_device *dev, >> - struct drm_file *file_priv, >> - struct drm_panfrost_submit *args, >> - struct panfrost_job *job) >> -{ >> - unsigned int i; >> - int ret; >> - >> - job->bo_count = args->bo_handle_count; >> - >> - if (!job->bo_count) >> - return 0; >> - >> - job->bo_flags = kvmalloc_array(job->bo_count, >> - sizeof(*job->bo_flags), >> - GFP_KERNEL | __GFP_ZERO); >> - if (!job->bo_flags) >> - return -ENOMEM; >> - >> - for (i = 0; i < job->bo_count; i++) >> - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; >> - >> - ret = drm_gem_objects_lookup(file_priv, >> - (void __user *)(uintptr_t)args->bo_handles, >> - job->bo_count, &job->bos); >> - if (ret) >> - return ret; >> - >> - return panfrost_get_job_mappings(file_priv, job); >> -} >> - >> -/** >> - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects >> - * referenced by the job. >> - * @dev: DRM device >> - * @file_priv: DRM file for this fd >> - * @args: IOCTL args >> - * @job: job being set up >> - * >> - * Resolve syncobjs from userspace to fences and attach them to job. >> - * >> - * Note that this function doesn't need to unreference the fences on >> - * failure, because that will happen at panfrost_job_cleanup() time. >> - */ >> -static int >> -panfrost_copy_in_sync(struct drm_device *dev, >> - struct drm_file *file_priv, >> - struct drm_panfrost_submit *args, >> - struct panfrost_job *job) >> -{ >> - u32 *handles; >> - int ret = 0; >> - int i, in_fence_count; >> - >> - in_fence_count = args->in_sync_count; >> - >> - if (!in_fence_count) >> - return 0; >> - >> - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); >> - if (!handles) { >> - ret = -ENOMEM; >> - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); >> - goto fail; >> - } >> - >> - if (copy_from_user(handles, >> - (void __user *)(uintptr_t)args->in_syncs, >> - in_fence_count * sizeof(u32))) { >> - ret = -EFAULT; >> - DRM_DEBUG("Failed to copy in syncobj handles\n"); >> - goto fail; >> - } >> - >> - for (i = 0; i < in_fence_count; i++) { >> - struct dma_fence *fence; >> - >> - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, >> - &fence); >> - if (ret) >> - goto fail; >> - >> - ret = drm_gem_fence_array_add(&job->deps, fence); >> - >> - if (ret) >> - goto fail; >> - } >> - >> -fail: >> - kvfree(handles); >> - return ret; >> -} >> - >> -static int panfrost_ioctl_submit(struct drm_device *dev, void *data, >> - struct drm_file *file) >> -{ >> - struct panfrost_device *pfdev = dev->dev_private; >> - struct drm_panfrost_submit *args = data; >> - struct drm_syncobj *sync_out = NULL; >> - struct panfrost_submitqueue *queue; >> - struct panfrost_job *job; >> - int ret = 0; >> - >> - if (!args->jc) >> - return -EINVAL; >> - >> - if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) >> - return -EINVAL; >> - >> - queue = panfrost_submitqueue_get(file->driver_priv, 0); >> - if (IS_ERR(queue)) >> - return PTR_ERR(queue); >> - >> - if (args->out_sync > 0) { >> - sync_out = drm_syncobj_find(file, args->out_sync); >> - if (!sync_out) { >> - ret = -ENODEV; >> - goto fail_put_queue; >> - } >> - } >> - >> - job = kzalloc(sizeof(*job), GFP_KERNEL); >> - if (!job) { >> - ret = -ENOMEM; >> - goto fail_out_sync; >> - } >> - >> - kref_init(&job->refcount); >> - >> - xa_init_flags(&job->deps, XA_FLAGS_ALLOC); >> - >> - job->pfdev = pfdev; >> - job->jc = args->jc; >> - job->requirements = args->requirements; >> - job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); >> - job->file_priv = file->driver_priv; >> - >> - ret = panfrost_copy_in_sync(dev, file, args, job); >> - if (ret) >> - goto fail_job; >> - >> - ret = panfrost_lookup_bos(dev, file, args, job); >> - if (ret) >> - goto fail_job; >> - >> - ret = panfrost_job_push(queue, job); >> - if (ret) >> - goto fail_job; >> - >> - /* Update the return sync object for the job */ >> - if (sync_out) >> - drm_syncobj_replace_fence(sync_out, job->render_done_fence); >> - >> -fail_job: >> - panfrost_job_put(job); >> -fail_out_sync: >> - if (sync_out) >> - drm_syncobj_put(sync_out); >> -fail_put_queue: >> - panfrost_submitqueue_put(queue); >> - >> - return ret; >> -} >> - >> static int >> panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, >> struct drm_file *file_priv) >> @@ -491,6 +313,389 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, >> return panfrost_submitqueue_destroy(priv, id); >> } >> >> +#define PANFROST_BO_REF_ALLOWED_FLAGS \ >> + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) >> + >> +static int >> +panfrost_get_job_bos(struct drm_file *file_priv, >> + u64 refs, u32 ref_stride, u32 count, >> + struct panfrost_job *job) >> +{ >> + void __user *in = u64_to_user_ptr(refs); >> + unsigned int i; >> + >> + job->bo_count = count; >> + >> + if (!count) >> + return 0; >> + >> + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), >> + GFP_KERNEL | __GFP_ZERO); >> + job->bo_flags = kvmalloc_array(job->bo_count, >> + sizeof(*job->bo_flags), >> + GFP_KERNEL | __GFP_ZERO); >> + if (!job->bos || !job->bo_flags) >> + return -ENOMEM; >> + >> + for (i = 0; i < count; i++) { >> + struct drm_panfrost_bo_ref ref = { }; >> + int ret; >> + >> + ret = copy_struct_from_user(&ref, sizeof(ref), >> + in + (i * ref_stride), >> + ref_stride); >> + if (ret) >> + return ret; >> + >> + /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were >> + * treated as exclusive. >> + */ >> + if (ref_stride == sizeof(u32)) >> + ref.flags = PANFROST_BO_REF_EXCLUSIVE; >> + >> + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) >> + return -EINVAL; >> + >> + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); >> + if (!job->bos[i]) >> + return -EINVAL; >> + >> + job->bo_flags[i] = ref.flags; >> + } >> + >> + return 0; >> +} >> + >> +static int >> +panfrost_get_job_in_syncs(struct drm_file *file_priv, >> + u64 refs, u32 ref_stride, >> + u32 count, struct panfrost_job *job) >> +{ >> + const void __user *in = u64_to_user_ptr(refs); >> + unsigned int i; >> + int ret; >> + >> + if (!count) >> + return 0; >> + >> + for (i = 0; i < count; i++) { >> + struct drm_panfrost_syncobj_ref ref = { }; >> + struct dma_fence *fence; >> + >> + ret = copy_struct_from_user(&ref, sizeof(ref), >> + in + (i * ref_stride), >> + ref_stride); >> + if (ret) >> + return ret; >> + >> + if (ref.pad) >> + return -EINVAL; >> + >> + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, >> + 0, &fence); >> + if (ret) >> + return ret; >> + >> + ret = drm_gem_fence_array_add(&job->deps, fence); >> + if (ret) >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> +struct panfrost_job_out_sync { >> + struct drm_syncobj *syncobj; >> + struct dma_fence_chain *chain; >> + u64 point; >> +}; >> + >> +static void >> +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) >> +{ >> + unsigned int i; >> + >> + for (i = 0; i < count; i++) { >> + if (!out_syncs[i].syncobj) >> + break; >> + >> + drm_syncobj_put(out_syncs[i].syncobj); >> + kvfree(out_syncs[i].chain); >> + } >> + >> + kvfree(out_syncs); >> +} >> + >> +static struct panfrost_job_out_sync * >> +panfrost_get_job_out_syncs(struct drm_file *file_priv, >> + u64 refs, u32 ref_stride, >> + u32 count) >> +{ >> + void __user *in = u64_to_user_ptr(refs); >> + struct panfrost_job_out_sync *out_syncs; >> + unsigned int i; >> + int ret; >> + >> + if (!count) >> + return NULL; >> + >> + /* If the syncobj ref_stride == sizeof(u32) we are called from the >> + * old submit ioctl() which only accepted one out syncobj. In that >> + * case the syncobj handle is passed directly through the >> + * ->out_syncs field, so let's make sure the refs fits in a u32. >> + */ >> + if (ref_stride == sizeof(u32) && >> + (count != 1 || refs > UINT_MAX)) >> + return ERR_PTR(-EINVAL); >> + >> + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), >> + GFP_KERNEL | __GFP_ZERO); >> + if (!out_syncs) >> + return ERR_PTR(-ENOMEM); >> + >> + for (i = 0; i < count; i++) { >> + struct drm_panfrost_syncobj_ref ref = { }; >> + >> + if (ref_stride == sizeof(u32)) { >> + /* Special case for the old submit wrapper: in that >> + * case there's only one out_sync, and the syncobj >> + * handle is passed directly in the out_syncs field. >> + */ >> + ref.handle = refs; >> + } else { >> + ret = copy_struct_from_user(&ref, sizeof(ref), >> + in + (i * ref_stride), >> + ref_stride); >> + if (ret) >> + goto err_free_out_syncs; >> + } >> + >> + if (ref.pad) { >> + ret = -EINVAL; >> + goto err_free_out_syncs; >> + } >> + >> + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); >> + if (!out_syncs[i].syncobj) { >> + ret = -ENODEV; >> + goto err_free_out_syncs; >> + } >> + >> + out_syncs[i].point = ref.point; >> + if (!out_syncs[i].point) >> + continue; >> + >> + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), >> + GFP_KERNEL); >> + if (!out_syncs[i].chain) { >> + ret = -ENOMEM; >> + goto err_free_out_syncs; >> + } >> + } >> + >> + return out_syncs; >> + >> +err_free_out_syncs: >> + panfrost_put_job_out_syncs(out_syncs, count); >> + return ERR_PTR(ret); >> +} >> + >> +static void >> +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, >> + unsigned int count, struct dma_fence *fence) >> +{ >> + unsigned int i; >> + >> + for (i = 0; i < count; i++) { >> + if (out_syncs[i].chain) { >> + drm_syncobj_add_point(out_syncs[i].syncobj, >> + out_syncs[i].chain, >> + fence, out_syncs[i].point); >> + out_syncs[i].chain = NULL; >> + } else { >> + drm_syncobj_replace_fence(out_syncs[i].syncobj, >> + fence); >> + } >> + } >> +} >> + >> +struct panfrost_submit_ioctl_version_info { >> + u32 job_stride; >> + u32 bo_ref_stride; >> + u32 syncobj_ref_stride; >> +}; >> + >> +static const struct panfrost_submit_ioctl_version_info submit_versions[] = { >> + /* SUBMIT */ >> + [0] = { 0, 4, 4 }, >> + >> + /* BATCH_SUBMIT v1 */ >> + [1] = { 48, 8, 16 }, >> +}; >> + >> +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS >> + >> +static int >> +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, >> + struct panfrost_submitqueue *queue, >> + const struct drm_panfrost_job *args, >> + u32 version) >> +{ >> + struct panfrost_device *pfdev = dev->dev_private; >> + struct panfrost_job_out_sync *out_syncs; >> + u32 bo_stride, syncobj_stride; >> + struct panfrost_job *job; >> + int ret; >> + >> + if (!args->head) >> + return -EINVAL; >> + >> + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) >> + return -EINVAL; >> + >> + bo_stride = submit_versions[version].bo_ref_stride; >> + syncobj_stride = submit_versions[version].syncobj_ref_stride; >> + >> + job = kzalloc(sizeof(*job), GFP_KERNEL); >> + if (!job) >> + return -ENOMEM; >> + >> + kref_init(&job->refcount); >> + >> + job->pfdev = pfdev; >> + job->jc = args->head; >> + job->requirements = args->requirements; >> + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); >> + job->file_priv = file_priv->driver_priv; >> + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); >> + >> + ret = panfrost_get_job_in_syncs(file_priv, >> + args->in_syncs, >> + syncobj_stride, >> + args->in_sync_count, >> + job); >> + if (ret) >> + goto err_put_job; >> + >> + out_syncs = panfrost_get_job_out_syncs(file_priv, >> + args->out_syncs, >> + syncobj_stride, >> + args->out_sync_count); >> + if (IS_ERR(out_syncs)) { >> + ret = PTR_ERR(out_syncs); >> + goto err_put_job; >> + } >> + >> + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, >> + args->bo_count, job); >> + if (ret) >> + goto err_put_job; >> + >> + ret = panfrost_get_job_mappings(file_priv, job); >> + if (ret) >> + goto err_put_job; >> + >> + ret = panfrost_job_push(queue, job); >> + if (ret) { >> + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); >> + goto err_put_job; >> + } >> + >> + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, >> + job->render_done_fence); >> + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); >> + return 0; >> + >> +err_put_job: >> + panfrost_job_put(job); >> + return ret; >> +} >> + >> +static int >> +panfrost_ioctl_submit(struct drm_device *dev, void *data, >> + struct drm_file *file) >> +{ >> + struct drm_panfrost_submit *args = data; >> + struct drm_panfrost_job job_args = { >> + .head = args->jc, >> + .bos = args->bo_handles, >> + .in_syncs = args->in_syncs, >> + >> + /* We are abusing .out_syncs and passing the handle directly >> + * instead of a pointer to a user u32 array, but >> + * panfrost_job_submit() knows about it, so it's fine. >> + */ >> + .out_syncs = args->out_sync, >> + .in_sync_count = args->in_sync_count, >> + .out_sync_count = args->out_sync > 0 ? 1 : 0, >> + .bo_count = args->bo_handle_count, >> + .requirements = args->requirements >> + }; >> + struct panfrost_submitqueue *queue; >> + int ret; >> + >> + queue = panfrost_submitqueue_get(file->driver_priv, 0); >> + if (IS_ERR(queue)) >> + return PTR_ERR(queue); >> + >> + ret = panfrost_submit_job(dev, file, queue, &job_args, 0); >> + panfrost_submitqueue_put(queue); >> + >> + return ret; >> +} >> + >> +static int >> +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, >> + struct drm_file *file_priv) >> +{ >> + struct drm_panfrost_batch_submit *args = data; >> + void __user *jobs_args = u64_to_user_ptr(args->jobs); >> + struct panfrost_submitqueue *queue; >> + u32 version = args->version; >> + u32 job_stride; >> + unsigned int i; >> + int ret; >> + >> + /* Version 0 doesn't exists (it's reserved for the SUBMIT ioctl) */ >> + if (!version) >> + return -EINVAL; >> + >> + /* If the version specified is bigger than what we currently support, >> + * pick the last supported version and let copy_struct_from_user() >> + * check that any extra job, bo_ref and syncobj_ref fields are zeroed. >> + */ >> + if (version >= ARRAY_SIZE(submit_versions)) >> + version = ARRAY_SIZE(submit_versions) - 1; >> + >> + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); >> + if (IS_ERR(queue)) >> + return PTR_ERR(queue); >> + >> + job_stride = submit_versions[version].job_stride; >> + for (i = 0; i < args->job_count; i++) { >> + struct drm_panfrost_job job_args = { }; >> + >> + ret = copy_struct_from_user(&job_args, sizeof(job_args), >> + jobs_args + (i * job_stride), >> + job_stride); >> + if (ret) { >> + args->fail_idx = i; >> + goto out_put_queue; >> + } >> + >> + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, >> + version); >> + if (ret) { >> + args->fail_idx = i; >> + goto out_put_queue; >> + } >> + } >> + >> +out_put_queue: >> + panfrost_submitqueue_put(queue); >> + return 0; >> +} >> + >> int panfrost_unstable_ioctl_check(void) >> { >> if (!unstable_ioctls) >> @@ -572,6 +777,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { >> PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), >> PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), >> PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), >> + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), >> }; >> >> DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); >> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c >> index 56ae89272e19..4e1540bce865 100644 >> --- a/drivers/gpu/drm/panfrost/panfrost_job.c >> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c >> @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) >> return ret; >> } >> >> + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) >> + continue; > This breaks dma_resv rules. I'll send out patch set fixing this pattern in > other drivers, I'll ping you on that for what you need to change. Should > go out today or so. I'm really wondering if the behavior that the exclusive fences replaces all the shared fences was such a good idea. It just allows drivers to mess up things in a way which can be easily used to compromise the system. Christian. > > Also cc: Christian König. > -Daniel > >> + >> ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], >> exclusive); >> if (ret) >> diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h >> index e31a22c176d9..5d534e61c28e 100644 >> --- a/include/uapi/drm/panfrost_drm.h >> +++ b/include/uapi/drm/panfrost_drm.h >> @@ -23,6 +23,7 @@ extern "C" { >> #define DRM_PANFROST_MADVISE 0x08 >> #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 >> #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a >> +#define DRM_PANFROST_BATCH_SUBMIT 0x0b >> >> #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) >> #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) >> @@ -33,6 +34,7 @@ extern "C" { >> #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) >> #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) >> #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) >> +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) >> >> /* >> * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module >> @@ -241,9 +243,99 @@ struct drm_panfrost_create_submitqueue { >> __u32 id; /* out, identifier */ >> }; >> >> +/* Syncobj reference passed at job submission time to encode explicit >> + * input/output fences. >> + */ >> +struct drm_panfrost_syncobj_ref { >> + /** Syncobj handle */ >> + __u32 handle; >> + >> + /** Padding field, must be set to 0 */ >> + __u32 pad; >> + >> + /** >> + * For timeline syncobjs, the point on the timeline the reference >> + * points to. 0 for the last point. >> + * Must be set to 0 for non-timeline syncobjs >> + */ >> + __u64 point; >> +}; >> + >> /* Exclusive (AKA write) access to the BO */ >> #define PANFROST_BO_REF_EXCLUSIVE 0x1 >> >> +/* Disable the implicit depency on the BO fence */ >> +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 >> + >> +/* Describes a BO referenced by a job and the type of access. */ >> +struct drm_panfrost_bo_ref { >> + /** A GEM handle */ >> + __u32 handle; >> + >> + /** A combination of PANFROST_BO_REF_x flags */ >> + __u32 flags; >> +}; >> + >> +/* Describes a GPU job and the resources attached to it. */ >> +struct drm_panfrost_job { >> + /** GPU pointer to the head of the job chain. */ >> + __u64 head; >> + >> + /** >> + * Array of drm_panfrost_bo_ref objects describing the BOs referenced >> + * by this job. >> + */ >> + __u64 bos; >> + >> + /** >> + * Arrays of drm_panfrost_syncobj_ref objects describing the input >> + * and output fences. >> + */ >> + __u64 in_syncs; >> + __u64 out_syncs; >> + >> + /** Syncobj reference array sizes. */ >> + __u32 in_sync_count; >> + __u32 out_sync_count; >> + >> + /** BO reference array size. */ >> + __u32 bo_count; >> + >> + /** Combination of PANFROST_JD_REQ_* flags. */ >> + __u32 requirements; >> +}; >> + >> +#define PANFROST_SUBMIT_BATCH_VERSION 1 >> + >> +/* Used to submit multiple jobs in one call */ >> +struct drm_panfrost_batch_submit { >> + /** >> + * Always set to PANFROST_SUBMIT_BATCH_VERSION. This is used to let the >> + * kernel know about the size of the various structs passed to the >> + * BATCH_SUBMIT ioctl. >> + */ >> + __u32 version; >> + >> + /** Number of jobs to submit. */ >> + __u32 job_count; >> + >> + /* Pointer to a job array. */ >> + __u64 jobs; >> + >> + /** >> + * ID of the queue to submit those jobs to. 0 is the default >> + * submit queue and should always exists. If you need a dedicated >> + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. >> + */ >> + __u32 queue; >> + >> + /** >> + * If the submission fails, this encodes the index of the job >> + * failed. >> + */ >> + __u32 fail_idx; >> +}; >> + >> #if defined(__cplusplus) >> } >> #endif >> -- >> 2.31.1 >>
On Thu, 8 Jul 2021 14:10:45 +0200 Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > >> --- a/drivers/gpu/drm/panfrost/panfrost_job.c > >> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > >> @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) > >> return ret; > >> } > >> > >> + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) > >> + continue; > > This breaks dma_resv rules. I'll send out patch set fixing this pattern in > > other drivers, I'll ping you on that for what you need to change. Should > > go out today or so. I guess you're talking about [1]. TBH, I don't quite see the point of exposing a 'no-implicit' flag if we end up forcing this implicit dep anyway, but I'm probably missing something. > > I'm really wondering if the behavior that the exclusive fences replaces > all the shared fences was such a good idea. Is that what's done in [1], or are you talking about a different patchset/approach? > > It just allows drivers to mess up things in a way which can be easily > used to compromise the system. I must admit I'm a bit lost, so I'm tempted to drop that flag for now :-). [1]https://patchwork.freedesktop.org/patch/443711/?series=92334&rev=3
On Mon, Jul 26, 2021 at 12:27:06PM +0200, Boris Brezillon wrote: > On Thu, 8 Jul 2021 14:10:45 +0200 > Christian König <ckoenig.leichtzumerken@gmail.com> wrote: > > > >> --- a/drivers/gpu/drm/panfrost/panfrost_job.c > > >> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c > > >> @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) > > >> return ret; > > >> } > > >> > > >> + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) > > >> + continue; > > > This breaks dma_resv rules. I'll send out patch set fixing this pattern in > > > other drivers, I'll ping you on that for what you need to change. Should > > > go out today or so. > > I guess you're talking about [1]. TBH, I don't quite see the point of > exposing a 'no-implicit' flag if we end up forcing this implicit dep > anyway, but I'm probably missing something. Yeah that's the patch set. Note that there's better ways to do this, it's just that these better ways take more typing and need some actual testing (and ideally igts and everything). The NO_IMPLICIT flag is still useful even with the hacky solution, as long as you don't set a write fence too. For that it might be better to look into the dma_fence import patch from Jason: https://lore.kernel.org/dri-devel/20210610210925.642582-7-jason@jlekstrand.net/ > > I'm really wondering if the behavior that the exclusive fences replaces > > all the shared fences was such a good idea. > > Is that what's done in [1], or are you talking about a different > patchset/approach? That's just how dma_resv works for the exclusive slot. -Daniel > > > > > It just allows drivers to mess up things in a way which can be easily > > used to compromise the system. > > I must admit I'm a bit lost, so I'm tempted to drop that flag for now > :-). > > [1]https://patchwork.freedesktop.org/patch/443711/?series=92334&rev=3
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 8e28ef30310b..a624e4f86aff 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -138,184 +138,6 @@ panfrost_get_job_mappings(struct drm_file *file_priv, struct panfrost_job *job) return 0; } -/** - * panfrost_lookup_bos() - Sets up job->bo[] with the GEM objects - * referenced by the job. - * @dev: DRM device - * @file_priv: DRM file for this fd - * @args: IOCTL args - * @job: job being set up - * - * Resolve handles from userspace to BOs and attach them to job. - * - * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at panfrost_job_cleanup() time. - */ -static int -panfrost_lookup_bos(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_panfrost_submit *args, - struct panfrost_job *job) -{ - unsigned int i; - int ret; - - job->bo_count = args->bo_handle_count; - - if (!job->bo_count) - return 0; - - job->bo_flags = kvmalloc_array(job->bo_count, - sizeof(*job->bo_flags), - GFP_KERNEL | __GFP_ZERO); - if (!job->bo_flags) - return -ENOMEM; - - for (i = 0; i < job->bo_count; i++) - job->bo_flags[i] = PANFROST_BO_REF_EXCLUSIVE; - - ret = drm_gem_objects_lookup(file_priv, - (void __user *)(uintptr_t)args->bo_handles, - job->bo_count, &job->bos); - if (ret) - return ret; - - return panfrost_get_job_mappings(file_priv, job); -} - -/** - * panfrost_copy_in_sync() - Sets up job->deps with the sync objects - * referenced by the job. - * @dev: DRM device - * @file_priv: DRM file for this fd - * @args: IOCTL args - * @job: job being set up - * - * Resolve syncobjs from userspace to fences and attach them to job. - * - * Note that this function doesn't need to unreference the fences on - * failure, because that will happen at panfrost_job_cleanup() time. - */ -static int -panfrost_copy_in_sync(struct drm_device *dev, - struct drm_file *file_priv, - struct drm_panfrost_submit *args, - struct panfrost_job *job) -{ - u32 *handles; - int ret = 0; - int i, in_fence_count; - - in_fence_count = args->in_sync_count; - - if (!in_fence_count) - return 0; - - handles = kvmalloc_array(in_fence_count, sizeof(u32), GFP_KERNEL); - if (!handles) { - ret = -ENOMEM; - DRM_DEBUG("Failed to allocate incoming syncobj handles\n"); - goto fail; - } - - if (copy_from_user(handles, - (void __user *)(uintptr_t)args->in_syncs, - in_fence_count * sizeof(u32))) { - ret = -EFAULT; - DRM_DEBUG("Failed to copy in syncobj handles\n"); - goto fail; - } - - for (i = 0; i < in_fence_count; i++) { - struct dma_fence *fence; - - ret = drm_syncobj_find_fence(file_priv, handles[i], 0, 0, - &fence); - if (ret) - goto fail; - - ret = drm_gem_fence_array_add(&job->deps, fence); - - if (ret) - goto fail; - } - -fail: - kvfree(handles); - return ret; -} - -static int panfrost_ioctl_submit(struct drm_device *dev, void *data, - struct drm_file *file) -{ - struct panfrost_device *pfdev = dev->dev_private; - struct drm_panfrost_submit *args = data; - struct drm_syncobj *sync_out = NULL; - struct panfrost_submitqueue *queue; - struct panfrost_job *job; - int ret = 0; - - if (!args->jc) - return -EINVAL; - - if (args->requirements && args->requirements != PANFROST_JD_REQ_FS) - return -EINVAL; - - queue = panfrost_submitqueue_get(file->driver_priv, 0); - if (IS_ERR(queue)) - return PTR_ERR(queue); - - if (args->out_sync > 0) { - sync_out = drm_syncobj_find(file, args->out_sync); - if (!sync_out) { - ret = -ENODEV; - goto fail_put_queue; - } - } - - job = kzalloc(sizeof(*job), GFP_KERNEL); - if (!job) { - ret = -ENOMEM; - goto fail_out_sync; - } - - kref_init(&job->refcount); - - xa_init_flags(&job->deps, XA_FLAGS_ALLOC); - - job->pfdev = pfdev; - job->jc = args->jc; - job->requirements = args->requirements; - job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); - job->file_priv = file->driver_priv; - - ret = panfrost_copy_in_sync(dev, file, args, job); - if (ret) - goto fail_job; - - ret = panfrost_lookup_bos(dev, file, args, job); - if (ret) - goto fail_job; - - ret = panfrost_job_push(queue, job); - if (ret) - goto fail_job; - - /* Update the return sync object for the job */ - if (sync_out) - drm_syncobj_replace_fence(sync_out, job->render_done_fence); - -fail_job: - panfrost_job_put(job); -fail_out_sync: - if (sync_out) - drm_syncobj_put(sync_out); -fail_put_queue: - panfrost_submitqueue_put(queue); - - return ret; -} - static int panfrost_ioctl_wait_bo(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -491,6 +313,389 @@ panfrost_ioctl_destroy_submitqueue(struct drm_device *dev, void *data, return panfrost_submitqueue_destroy(priv, id); } +#define PANFROST_BO_REF_ALLOWED_FLAGS \ + (PANFROST_BO_REF_EXCLUSIVE | PANFROST_BO_REF_NO_IMPLICIT_DEP) + +static int +panfrost_get_job_bos(struct drm_file *file_priv, + u64 refs, u32 ref_stride, u32 count, + struct panfrost_job *job) +{ + void __user *in = u64_to_user_ptr(refs); + unsigned int i; + + job->bo_count = count; + + if (!count) + return 0; + + job->bos = kvmalloc_array(job->bo_count, sizeof(*job->bos), + GFP_KERNEL | __GFP_ZERO); + job->bo_flags = kvmalloc_array(job->bo_count, + sizeof(*job->bo_flags), + GFP_KERNEL | __GFP_ZERO); + if (!job->bos || !job->bo_flags) + return -ENOMEM; + + for (i = 0; i < count; i++) { + struct drm_panfrost_bo_ref ref = { }; + int ret; + + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + return ret; + + /* Prior to the BATCH_SUBMIT ioctl all accessed BOs were + * treated as exclusive. + */ + if (ref_stride == sizeof(u32)) + ref.flags = PANFROST_BO_REF_EXCLUSIVE; + + if ((ref.flags & ~PANFROST_BO_REF_ALLOWED_FLAGS)) + return -EINVAL; + + job->bos[i] = drm_gem_object_lookup(file_priv, ref.handle); + if (!job->bos[i]) + return -EINVAL; + + job->bo_flags[i] = ref.flags; + } + + return 0; +} + +static int +panfrost_get_job_in_syncs(struct drm_file *file_priv, + u64 refs, u32 ref_stride, + u32 count, struct panfrost_job *job) +{ + const void __user *in = u64_to_user_ptr(refs); + unsigned int i; + int ret; + + if (!count) + return 0; + + for (i = 0; i < count; i++) { + struct drm_panfrost_syncobj_ref ref = { }; + struct dma_fence *fence; + + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + return ret; + + if (ref.pad) + return -EINVAL; + + ret = drm_syncobj_find_fence(file_priv, ref.handle, ref.point, + 0, &fence); + if (ret) + return ret; + + ret = drm_gem_fence_array_add(&job->deps, fence); + if (ret) + return ret; + } + + return 0; +} + +struct panfrost_job_out_sync { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + +static void +panfrost_put_job_out_syncs(struct panfrost_job_out_sync *out_syncs, u32 count) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (!out_syncs[i].syncobj) + break; + + drm_syncobj_put(out_syncs[i].syncobj); + kvfree(out_syncs[i].chain); + } + + kvfree(out_syncs); +} + +static struct panfrost_job_out_sync * +panfrost_get_job_out_syncs(struct drm_file *file_priv, + u64 refs, u32 ref_stride, + u32 count) +{ + void __user *in = u64_to_user_ptr(refs); + struct panfrost_job_out_sync *out_syncs; + unsigned int i; + int ret; + + if (!count) + return NULL; + + /* If the syncobj ref_stride == sizeof(u32) we are called from the + * old submit ioctl() which only accepted one out syncobj. In that + * case the syncobj handle is passed directly through the + * ->out_syncs field, so let's make sure the refs fits in a u32. + */ + if (ref_stride == sizeof(u32) && + (count != 1 || refs > UINT_MAX)) + return ERR_PTR(-EINVAL); + + out_syncs = kvmalloc_array(count, sizeof(*out_syncs), + GFP_KERNEL | __GFP_ZERO); + if (!out_syncs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + struct drm_panfrost_syncobj_ref ref = { }; + + if (ref_stride == sizeof(u32)) { + /* Special case for the old submit wrapper: in that + * case there's only one out_sync, and the syncobj + * handle is passed directly in the out_syncs field. + */ + ref.handle = refs; + } else { + ret = copy_struct_from_user(&ref, sizeof(ref), + in + (i * ref_stride), + ref_stride); + if (ret) + goto err_free_out_syncs; + } + + if (ref.pad) { + ret = -EINVAL; + goto err_free_out_syncs; + } + + out_syncs[i].syncobj = drm_syncobj_find(file_priv, ref.handle); + if (!out_syncs[i].syncobj) { + ret = -ENODEV; + goto err_free_out_syncs; + } + + out_syncs[i].point = ref.point; + if (!out_syncs[i].point) + continue; + + out_syncs[i].chain = kmalloc(sizeof(*out_syncs[i].chain), + GFP_KERNEL); + if (!out_syncs[i].chain) { + ret = -ENOMEM; + goto err_free_out_syncs; + } + } + + return out_syncs; + +err_free_out_syncs: + panfrost_put_job_out_syncs(out_syncs, count); + return ERR_PTR(ret); +} + +static void +panfrost_set_job_out_fence(struct panfrost_job_out_sync *out_syncs, + unsigned int count, struct dma_fence *fence) +{ + unsigned int i; + + for (i = 0; i < count; i++) { + if (out_syncs[i].chain) { + drm_syncobj_add_point(out_syncs[i].syncobj, + out_syncs[i].chain, + fence, out_syncs[i].point); + out_syncs[i].chain = NULL; + } else { + drm_syncobj_replace_fence(out_syncs[i].syncobj, + fence); + } + } +} + +struct panfrost_submit_ioctl_version_info { + u32 job_stride; + u32 bo_ref_stride; + u32 syncobj_ref_stride; +}; + +static const struct panfrost_submit_ioctl_version_info submit_versions[] = { + /* SUBMIT */ + [0] = { 0, 4, 4 }, + + /* BATCH_SUBMIT v1 */ + [1] = { 48, 8, 16 }, +}; + +#define PANFROST_JD_ALLOWED_REQS PANFROST_JD_REQ_FS + +static int +panfrost_submit_job(struct drm_device *dev, struct drm_file *file_priv, + struct panfrost_submitqueue *queue, + const struct drm_panfrost_job *args, + u32 version) +{ + struct panfrost_device *pfdev = dev->dev_private; + struct panfrost_job_out_sync *out_syncs; + u32 bo_stride, syncobj_stride; + struct panfrost_job *job; + int ret; + + if (!args->head) + return -EINVAL; + + if (args->requirements & ~PANFROST_JD_ALLOWED_REQS) + return -EINVAL; + + bo_stride = submit_versions[version].bo_ref_stride; + syncobj_stride = submit_versions[version].syncobj_ref_stride; + + job = kzalloc(sizeof(*job), GFP_KERNEL); + if (!job) + return -ENOMEM; + + kref_init(&job->refcount); + + job->pfdev = pfdev; + job->jc = args->head; + job->requirements = args->requirements; + job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev); + job->file_priv = file_priv->driver_priv; + xa_init_flags(&job->deps, XA_FLAGS_ALLOC); + + ret = panfrost_get_job_in_syncs(file_priv, + args->in_syncs, + syncobj_stride, + args->in_sync_count, + job); + if (ret) + goto err_put_job; + + out_syncs = panfrost_get_job_out_syncs(file_priv, + args->out_syncs, + syncobj_stride, + args->out_sync_count); + if (IS_ERR(out_syncs)) { + ret = PTR_ERR(out_syncs); + goto err_put_job; + } + + ret = panfrost_get_job_bos(file_priv, args->bos, bo_stride, + args->bo_count, job); + if (ret) + goto err_put_job; + + ret = panfrost_get_job_mappings(file_priv, job); + if (ret) + goto err_put_job; + + ret = panfrost_job_push(queue, job); + if (ret) { + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); + goto err_put_job; + } + + panfrost_set_job_out_fence(out_syncs, args->out_sync_count, + job->render_done_fence); + panfrost_put_job_out_syncs(out_syncs, args->out_sync_count); + return 0; + +err_put_job: + panfrost_job_put(job); + return ret; +} + +static int +panfrost_ioctl_submit(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_panfrost_submit *args = data; + struct drm_panfrost_job job_args = { + .head = args->jc, + .bos = args->bo_handles, + .in_syncs = args->in_syncs, + + /* We are abusing .out_syncs and passing the handle directly + * instead of a pointer to a user u32 array, but + * panfrost_job_submit() knows about it, so it's fine. + */ + .out_syncs = args->out_sync, + .in_sync_count = args->in_sync_count, + .out_sync_count = args->out_sync > 0 ? 1 : 0, + .bo_count = args->bo_handle_count, + .requirements = args->requirements + }; + struct panfrost_submitqueue *queue; + int ret; + + queue = panfrost_submitqueue_get(file->driver_priv, 0); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + ret = panfrost_submit_job(dev, file, queue, &job_args, 0); + panfrost_submitqueue_put(queue); + + return ret; +} + +static int +panfrost_ioctl_batch_submit(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_panfrost_batch_submit *args = data; + void __user *jobs_args = u64_to_user_ptr(args->jobs); + struct panfrost_submitqueue *queue; + u32 version = args->version; + u32 job_stride; + unsigned int i; + int ret; + + /* Version 0 doesn't exists (it's reserved for the SUBMIT ioctl) */ + if (!version) + return -EINVAL; + + /* If the version specified is bigger than what we currently support, + * pick the last supported version and let copy_struct_from_user() + * check that any extra job, bo_ref and syncobj_ref fields are zeroed. + */ + if (version >= ARRAY_SIZE(submit_versions)) + version = ARRAY_SIZE(submit_versions) - 1; + + queue = panfrost_submitqueue_get(file_priv->driver_priv, args->queue); + if (IS_ERR(queue)) + return PTR_ERR(queue); + + job_stride = submit_versions[version].job_stride; + for (i = 0; i < args->job_count; i++) { + struct drm_panfrost_job job_args = { }; + + ret = copy_struct_from_user(&job_args, sizeof(job_args), + jobs_args + (i * job_stride), + job_stride); + if (ret) { + args->fail_idx = i; + goto out_put_queue; + } + + ret = panfrost_submit_job(dev, file_priv, queue, &job_args, + version); + if (ret) { + args->fail_idx = i; + goto out_put_queue; + } + } + +out_put_queue: + panfrost_submitqueue_put(queue); + return 0; +} + int panfrost_unstable_ioctl_check(void) { if (!unstable_ioctls) @@ -572,6 +777,7 @@ static const struct drm_ioctl_desc panfrost_drm_driver_ioctls[] = { PANFROST_IOCTL(MADVISE, madvise, DRM_RENDER_ALLOW), PANFROST_IOCTL(CREATE_SUBMITQUEUE, create_submitqueue, DRM_RENDER_ALLOW), PANFROST_IOCTL(DESTROY_SUBMITQUEUE, destroy_submitqueue, DRM_RENDER_ALLOW), + PANFROST_IOCTL(BATCH_SUBMIT, batch_submit, DRM_RENDER_ALLOW), }; DEFINE_DRM_GEM_FOPS(panfrost_drm_driver_fops); diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 56ae89272e19..4e1540bce865 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -254,6 +254,9 @@ static int panfrost_acquire_object_fences(struct panfrost_job *job) return ret; } + if (job->bo_flags[i] & PANFROST_BO_REF_NO_IMPLICIT_DEP) + continue; + ret = drm_gem_fence_array_add_implicit(&job->deps, job->bos[i], exclusive); if (ret) diff --git a/include/uapi/drm/panfrost_drm.h b/include/uapi/drm/panfrost_drm.h index e31a22c176d9..5d534e61c28e 100644 --- a/include/uapi/drm/panfrost_drm.h +++ b/include/uapi/drm/panfrost_drm.h @@ -23,6 +23,7 @@ extern "C" { #define DRM_PANFROST_MADVISE 0x08 #define DRM_PANFROST_CREATE_SUBMITQUEUE 0x09 #define DRM_PANFROST_DESTROY_SUBMITQUEUE 0x0a +#define DRM_PANFROST_BATCH_SUBMIT 0x0b #define DRM_IOCTL_PANFROST_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_SUBMIT, struct drm_panfrost_submit) #define DRM_IOCTL_PANFROST_WAIT_BO DRM_IOW(DRM_COMMAND_BASE + DRM_PANFROST_WAIT_BO, struct drm_panfrost_wait_bo) @@ -33,6 +34,7 @@ extern "C" { #define DRM_IOCTL_PANFROST_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_MADVISE, struct drm_panfrost_madvise) #define DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_CREATE_SUBMITQUEUE, struct drm_panfrost_create_submitqueue) #define DRM_IOCTL_PANFROST_DESTROY_SUBMITQUEUE DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_DESTROY_SUBMITQUEUE, __u32) +#define DRM_IOCTL_PANFROST_BATCH_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_PANFROST_BATCH_SUBMIT, struct drm_panfrost_batch_submit) /* * Unstable ioctl(s): only exposed when the unsafe unstable_ioctls module @@ -241,9 +243,99 @@ struct drm_panfrost_create_submitqueue { __u32 id; /* out, identifier */ }; +/* Syncobj reference passed at job submission time to encode explicit + * input/output fences. + */ +struct drm_panfrost_syncobj_ref { + /** Syncobj handle */ + __u32 handle; + + /** Padding field, must be set to 0 */ + __u32 pad; + + /** + * For timeline syncobjs, the point on the timeline the reference + * points to. 0 for the last point. + * Must be set to 0 for non-timeline syncobjs + */ + __u64 point; +}; + /* Exclusive (AKA write) access to the BO */ #define PANFROST_BO_REF_EXCLUSIVE 0x1 +/* Disable the implicit depency on the BO fence */ +#define PANFROST_BO_REF_NO_IMPLICIT_DEP 0x2 + +/* Describes a BO referenced by a job and the type of access. */ +struct drm_panfrost_bo_ref { + /** A GEM handle */ + __u32 handle; + + /** A combination of PANFROST_BO_REF_x flags */ + __u32 flags; +}; + +/* Describes a GPU job and the resources attached to it. */ +struct drm_panfrost_job { + /** GPU pointer to the head of the job chain. */ + __u64 head; + + /** + * Array of drm_panfrost_bo_ref objects describing the BOs referenced + * by this job. + */ + __u64 bos; + + /** + * Arrays of drm_panfrost_syncobj_ref objects describing the input + * and output fences. + */ + __u64 in_syncs; + __u64 out_syncs; + + /** Syncobj reference array sizes. */ + __u32 in_sync_count; + __u32 out_sync_count; + + /** BO reference array size. */ + __u32 bo_count; + + /** Combination of PANFROST_JD_REQ_* flags. */ + __u32 requirements; +}; + +#define PANFROST_SUBMIT_BATCH_VERSION 1 + +/* Used to submit multiple jobs in one call */ +struct drm_panfrost_batch_submit { + /** + * Always set to PANFROST_SUBMIT_BATCH_VERSION. This is used to let the + * kernel know about the size of the various structs passed to the + * BATCH_SUBMIT ioctl. + */ + __u32 version; + + /** Number of jobs to submit. */ + __u32 job_count; + + /* Pointer to a job array. */ + __u64 jobs; + + /** + * ID of the queue to submit those jobs to. 0 is the default + * submit queue and should always exists. If you need a dedicated + * queue, create it with DRM_IOCTL_PANFROST_CREATE_SUBMITQUEUE. + */ + __u32 queue; + + /** + * If the submission fails, this encodes the index of the job + * failed. + */ + __u32 fail_idx; +}; + #if defined(__cplusplus) } #endif
This should help limit the number of ioctls when submitting multiple jobs. The new ioctl also supports syncobj timelines and BO access flags. v4: * Implement panfrost_ioctl_submit() as a wrapper around panfrost_submit_job() * Replace stride fields by a version field which is mapped to a <job_stride,bo_ref_stride,syncobj_ref_stride> tuple internally v3: * Re-use panfrost_get_job_bos() and panfrost_get_job_in_syncs() in the old submit path Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com> --- drivers/gpu/drm/panfrost/panfrost_drv.c | 562 ++++++++++++++++-------- drivers/gpu/drm/panfrost/panfrost_job.c | 3 + include/uapi/drm/panfrost_drm.h | 92 ++++ 3 files changed, 479 insertions(+), 178 deletions(-)