Message ID | 20190315120959.25489-3-david1.zhou@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/9] dma-buf: add new dma_fence_chain container v5 | expand |
On 15/03/2019 12:09, Chunming Zhou wrote: > points array is one-to-one match with syncobjs array. > v2: > add seperate ioctl for timeline point wait, otherwise break uapi. > v3: > userspace can specify two kinds waits:: > a. Wait for time point to be completed. > b. and wait for time point to become available > v4: > rebase > v5: > add comment for xxx_WAIT_AVAILABLE > v6: rebase and rework on new container > v7: drop _WAIT_COMPLETED, it is the default anyway > v8: correctly handle garbage collected fences > > Signed-off-by: Chunming Zhou <david1.zhou@amd.com> > Signed-off-by: Christian König <christian.koenig@amd.com> > Cc: Daniel Rakos <Daniel.Rakos@amd.com> > Cc: Jason Ekstrand <jason@jlekstrand.net> > Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> > Cc: Dave Airlie <airlied@redhat.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 153 ++++++++++++++++++++++++++------- > include/uapi/drm/drm.h | 15 ++++ > 4 files changed, 143 insertions(+), 29 deletions(-) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h > index 251d67e04c2d..331ac6225b58 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index 687943df58e1..c984654646fa 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 19a9ce638119..eae51978cda4 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -61,6 +61,7 @@ struct syncobj_wait_entry { > struct task_struct *task; > struct dma_fence *fence; > struct dma_fence_cb fence_cb; > + u64 point; > }; > > static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > @@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find); > static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj, > struct syncobj_wait_entry *wait) > { > + struct dma_fence *fence; > + > if (wait->fence) > return; > > @@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj, > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > - if (syncobj->fence) > - wait->fence = dma_fence_get( > - rcu_dereference_protected(syncobj->fence, 1)); > - else > + fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); > + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { > + dma_fence_put(fence); > list_add_tail(&wait->node, &syncobj->cb_list); > + } else if (!fence) { > + wait->fence = dma_fence_get_stub(); > + } else { > + wait->fence = fence; > + } > spin_unlock(&syncobj->lock); > } > > @@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj, > dma_fence_chain_init(chain, prev, fence, point); > rcu_assign_pointer(syncobj->fence, &chain->base); > > - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { > - list_del_init(&cur->node); > + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) > syncobj_wait_syncobj_func(syncobj, cur); > - } > spin_unlock(&syncobj->lock); > > /* Walk the chain once to trigger garbage collection */ > @@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, > rcu_assign_pointer(syncobj->fence, fence); > > if (fence != old_fence) { > - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { > - list_del_init(&cur->node); > + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) > syncobj_wait_syncobj_func(syncobj, cur); > - } > } > > spin_unlock(&syncobj->lock); > @@ -644,13 +647,27 @@ static void syncobj_wait_fence_func(struct dma_fence *fence, > static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, > struct syncobj_wait_entry *wait) > { > + struct dma_fence *fence; > + > /* This happens inside the syncobj lock */ > - wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, > - lockdep_is_held(&syncobj->lock))); > + fence = rcu_dereference_protected(syncobj->fence, > + lockdep_is_held(&syncobj->lock)); > + dma_fence_get(fence); > + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { > + dma_fence_put(fence); > + return; > + } else if (!fence) { > + wait->fence = dma_fence_get_stub(); > + } else { > + wait->fence = fence; > + } > + > wake_up_process(wait->task); > + list_del_init(&wait->node); > } > > static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > + void __user *user_points, > uint32_t count, > uint32_t flags, > signed long timeout, > @@ -658,12 +675,27 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > { > struct syncobj_wait_entry *entries; > struct dma_fence *fence; > + uint64_t *points; > uint32_t signaled_count, i; > > - entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); > - if (!entries) > + points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); > + if (points == NULL) > return -ENOMEM; > > + if (!user_points) { > + memset(points, 0, count * sizeof(uint64_t)); > + > + } else if (copy_from_user(points, user_points, > + sizeof(uint64_t) * count)) { > + timeout = -EFAULT; > + goto err_free_points; > + } > + > + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); > + if (!entries) { > + timeout = -ENOMEM; > + goto err_free_points; > + } > /* Walk the list of sync objects and initialize entries. We do > * this up-front so that we can properly return -EINVAL if there is > * a syncobj with a missing fence and then never have the chance of > @@ -671,9 +703,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > */ > signaled_count = 0; > for (i = 0; i < count; ++i) { > + struct dma_fence *fence; > + > entries[i].task = current; > - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); > - if (!entries[i].fence) { > + entries[i].point = points[i]; > + fence = drm_syncobj_fence_get(syncobjs[i]); > + if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) { > + dma_fence_put(fence); > if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { > continue; > } else { > @@ -682,7 +718,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > } > } > > - if (dma_fence_is_signaled(entries[i].fence)) { > + if (fence) > + entries[i].fence = fence; > + else > + entries[i].fence = dma_fence_get_stub(); > + > + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || Hey David, Could you remind me what DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE is used for? I didn't have a use for it in userspace, Thanks, -Lionel > + dma_fence_is_signaled(entries[i].fence)) { > if (signaled_count == 0 && idx) > *idx = i; > signaled_count++; > @@ -715,7 +757,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > if (!fence) > continue; > > - if (dma_fence_is_signaled(fence) || > + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || > + dma_fence_is_signaled(fence) || > (!entries[i].fence_cb.func && > dma_fence_add_callback(fence, > &entries[i].fence_cb, > @@ -760,6 +803,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, > } > kfree(entries); > > +err_free_points: > + kfree(points); > + > return timeout; > } > > @@ -799,19 +845,33 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies); > static int drm_syncobj_array_wait(struct drm_device *dev, > struct drm_file *file_private, > struct drm_syncobj_wait *wait, > - struct drm_syncobj **syncobjs) > + struct drm_syncobj_timeline_wait *timeline_wait, > + struct drm_syncobj **syncobjs, bool timeline) > { > - signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); > + signed long timeout = 0; > uint32_t first = ~0; > > - timeout = drm_syncobj_array_wait_timeout(syncobjs, > - wait->count_handles, > - wait->flags, > - timeout, &first); > - if (timeout < 0) > - return timeout; > - > - wait->first_signaled = first; > + if (!timeline) { > + timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); > + timeout = drm_syncobj_array_wait_timeout(syncobjs, > + NULL, > + wait->count_handles, > + wait->flags, > + timeout, &first); > + if (timeout < 0) > + return timeout; > + wait->first_signaled = first; > + } else { > + timeout = drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec); > + timeout = drm_syncobj_array_wait_timeout(syncobjs, > + u64_to_user_ptr(timeline_wait->points), > + timeline_wait->count_handles, > + timeline_wait->flags, > + timeout, &first); > + if (timeout < 0) > + return timeout; > + timeline_wait->first_signaled = first; > + } > return 0; > } > > @@ -897,13 +957,48 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, > return ret; > > ret = drm_syncobj_array_wait(dev, file_private, > - args, syncobjs); > + args, NULL, syncobjs, false); > > drm_syncobj_array_free(syncobjs, args->count_handles); > > return ret; > } > > +int > +drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_timeline_wait *args = data; > + struct drm_syncobj **syncobjs; > + int ret = 0; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -ENODEV; > + > + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | > + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | > + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + ret = drm_syncobj_array_find(file_private, > + u64_to_user_ptr(args->handles), > + args->count_handles, > + &syncobjs); > + if (ret < 0) > + return ret; > + > + ret = drm_syncobj_array_wait(dev, file_private, > + NULL, args, syncobjs, true); > + > + drm_syncobj_array_free(syncobjs, args->count_handles); > + > + return ret; > +} > + > + > int > drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 300f336633f2..0092111d002c 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -737,6 +737,7 @@ struct drm_syncobj_handle { > > #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) > #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) > +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) > struct drm_syncobj_wait { > __u64 handles; > /* absolute timeout */ > @@ -747,6 +748,19 @@ struct drm_syncobj_wait { > __u32 pad; > }; > > +struct drm_syncobj_timeline_wait { > + __u64 handles; > + /* wait on specific timeline point for every handles*/ > + __u64 points; > + /* absolute timeout */ > + __s64 timeout_nsec; > + __u32 count_handles; > + __u32 flags; > + __u32 first_signaled; /* only valid when not waiting all */ > + __u32 pad; > +}; > + > + > struct drm_syncobj_array { > __u64 handles; > __u32 count_handles; > @@ -909,6 +923,7 @@ extern "C" { > #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) > #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) > > +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) > /** > * Device specific ioctls should only be in their respective headers > * The device specific ioctl range is from 0x40 to 0x9f.
Am 18.03.19 um 17:59 schrieb Lionel Landwerlin: > On 15/03/2019 12:09, Chunming Zhou wrote: >> points array is one-to-one match with syncobjs array. >> v2: >> add seperate ioctl for timeline point wait, otherwise break uapi. >> v3: >> userspace can specify two kinds waits:: >> a. Wait for time point to be completed. >> b. and wait for time point to become available >> v4: >> rebase >> v5: >> add comment for xxx_WAIT_AVAILABLE >> v6: rebase and rework on new container >> v7: drop _WAIT_COMPLETED, it is the default anyway >> v8: correctly handle garbage collected fences >> >> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >> Signed-off-by: Christian König <christian.koenig@amd.com> >> Cc: Daniel Rakos <Daniel.Rakos@amd.com> >> Cc: Jason Ekstrand <jason@jlekstrand.net> >> Cc: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> >> Cc: Dave Airlie <airlied@redhat.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> --- >> drivers/gpu/drm/drm_internal.h | 2 + >> drivers/gpu/drm/drm_ioctl.c | 2 + >> drivers/gpu/drm/drm_syncobj.c | 153 ++++++++++++++++++++++++++------- >> include/uapi/drm/drm.h | 15 ++++ >> 4 files changed, 143 insertions(+), 29 deletions(-) >> >> diff --git a/drivers/gpu/drm/drm_internal.h >> b/drivers/gpu/drm/drm_internal.h >> index 251d67e04c2d..331ac6225b58 100644 >> --- a/drivers/gpu/drm/drm_internal.h >> +++ b/drivers/gpu/drm/drm_internal.h >> @@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct >> drm_device *dev, void *data, >> struct drm_file *file_private); >> int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> +int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private); >> int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c >> index 687943df58e1..c984654646fa 100644 >> --- a/drivers/gpu/drm/drm_ioctl.c >> +++ b/drivers/gpu/drm/drm_ioctl.c >> @@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, >> drm_syncobj_timeline_wait_ioctl, >> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >> diff --git a/drivers/gpu/drm/drm_syncobj.c >> b/drivers/gpu/drm/drm_syncobj.c >> index 19a9ce638119..eae51978cda4 100644 >> --- a/drivers/gpu/drm/drm_syncobj.c >> +++ b/drivers/gpu/drm/drm_syncobj.c >> @@ -61,6 +61,7 @@ struct syncobj_wait_entry { >> struct task_struct *task; >> struct dma_fence *fence; >> struct dma_fence_cb fence_cb; >> + u64 point; >> }; >> static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, >> @@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find); >> static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj, >> struct syncobj_wait_entry *wait) >> { >> + struct dma_fence *fence; >> + >> if (wait->fence) >> return; >> @@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct >> drm_syncobj *syncobj, >> * have the lock, try one more time just to be sure we don't add a >> * callback when a fence has already been set. >> */ >> - if (syncobj->fence) >> - wait->fence = dma_fence_get( >> - rcu_dereference_protected(syncobj->fence, 1)); >> - else >> + fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, >> 1)); >> + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { >> + dma_fence_put(fence); >> list_add_tail(&wait->node, &syncobj->cb_list); >> + } else if (!fence) { >> + wait->fence = dma_fence_get_stub(); >> + } else { >> + wait->fence = fence; >> + } >> spin_unlock(&syncobj->lock); >> } >> @@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj >> *syncobj, >> dma_fence_chain_init(chain, prev, fence, point); >> rcu_assign_pointer(syncobj->fence, &chain->base); >> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { >> - list_del_init(&cur->node); >> + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) >> syncobj_wait_syncobj_func(syncobj, cur); >> - } >> spin_unlock(&syncobj->lock); >> /* Walk the chain once to trigger garbage collection */ >> @@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct >> drm_syncobj *syncobj, >> rcu_assign_pointer(syncobj->fence, fence); >> if (fence != old_fence) { >> - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { >> - list_del_init(&cur->node); >> + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) >> syncobj_wait_syncobj_func(syncobj, cur); >> - } >> } >> spin_unlock(&syncobj->lock); >> @@ -644,13 +647,27 @@ static void syncobj_wait_fence_func(struct >> dma_fence *fence, >> static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, >> struct syncobj_wait_entry *wait) >> { >> + struct dma_fence *fence; >> + >> /* This happens inside the syncobj lock */ >> - wait->fence = >> dma_fence_get(rcu_dereference_protected(syncobj->fence, >> - lockdep_is_held(&syncobj->lock))); >> + fence = rcu_dereference_protected(syncobj->fence, >> + lockdep_is_held(&syncobj->lock)); >> + dma_fence_get(fence); >> + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { >> + dma_fence_put(fence); >> + return; >> + } else if (!fence) { >> + wait->fence = dma_fence_get_stub(); >> + } else { >> + wait->fence = fence; >> + } >> + >> wake_up_process(wait->task); >> + list_del_init(&wait->node); >> } >> static signed long drm_syncobj_array_wait_timeout(struct >> drm_syncobj **syncobjs, >> + void __user *user_points, >> uint32_t count, >> uint32_t flags, >> signed long timeout, >> @@ -658,12 +675,27 @@ static signed long >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >> { >> struct syncobj_wait_entry *entries; >> struct dma_fence *fence; >> + uint64_t *points; >> uint32_t signaled_count, i; >> - entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); >> - if (!entries) >> + points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); >> + if (points == NULL) >> return -ENOMEM; >> + if (!user_points) { >> + memset(points, 0, count * sizeof(uint64_t)); >> + >> + } else if (copy_from_user(points, user_points, >> + sizeof(uint64_t) * count)) { >> + timeout = -EFAULT; >> + goto err_free_points; >> + } >> + >> + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); >> + if (!entries) { >> + timeout = -ENOMEM; >> + goto err_free_points; >> + } >> /* Walk the list of sync objects and initialize entries. We do >> * this up-front so that we can properly return -EINVAL if >> there is >> * a syncobj with a missing fence and then never have the >> chance of >> @@ -671,9 +703,13 @@ static signed long >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >> */ >> signaled_count = 0; >> for (i = 0; i < count; ++i) { >> + struct dma_fence *fence; >> + >> entries[i].task = current; >> - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); >> - if (!entries[i].fence) { >> + entries[i].point = points[i]; >> + fence = drm_syncobj_fence_get(syncobjs[i]); >> + if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) { >> + dma_fence_put(fence); >> if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { >> continue; >> } else { >> @@ -682,7 +718,13 @@ static signed long >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >> } >> } >> - if (dma_fence_is_signaled(entries[i].fence)) { >> + if (fence) >> + entries[i].fence = fence; >> + else >> + entries[i].fence = dma_fence_get_stub(); >> + >> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || > > > Hey David, > > Could you remind me what DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE is used > for? > > I didn't have a use for it in userspace, The flag is used to wait for fences for a sequence number to show up. Christian. > > Thanks, > > -Lionel > > >> + dma_fence_is_signaled(entries[i].fence)) { >> if (signaled_count == 0 && idx) >> *idx = i; >> signaled_count++; >> @@ -715,7 +757,8 @@ static signed long >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >> if (!fence) >> continue; >> - if (dma_fence_is_signaled(fence) || >> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || >> + dma_fence_is_signaled(fence) || >> (!entries[i].fence_cb.func && >> dma_fence_add_callback(fence, >> &entries[i].fence_cb, >> @@ -760,6 +803,9 @@ static signed long >> drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, >> } >> kfree(entries); >> +err_free_points: >> + kfree(points); >> + >> return timeout; >> } >> @@ -799,19 +845,33 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies); >> static int drm_syncobj_array_wait(struct drm_device *dev, >> struct drm_file *file_private, >> struct drm_syncobj_wait *wait, >> - struct drm_syncobj **syncobjs) >> + struct drm_syncobj_timeline_wait *timeline_wait, >> + struct drm_syncobj **syncobjs, bool timeline) >> { >> - signed long timeout = >> drm_timeout_abs_to_jiffies(wait->timeout_nsec); >> + signed long timeout = 0; >> uint32_t first = ~0; >> - timeout = drm_syncobj_array_wait_timeout(syncobjs, >> - wait->count_handles, >> - wait->flags, >> - timeout, &first); >> - if (timeout < 0) >> - return timeout; >> - >> - wait->first_signaled = first; >> + if (!timeline) { >> + timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); >> + timeout = drm_syncobj_array_wait_timeout(syncobjs, >> + NULL, >> + wait->count_handles, >> + wait->flags, >> + timeout, &first); >> + if (timeout < 0) >> + return timeout; >> + wait->first_signaled = first; >> + } else { >> + timeout = >> drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec); >> + timeout = drm_syncobj_array_wait_timeout(syncobjs, >> + u64_to_user_ptr(timeline_wait->points), >> + timeline_wait->count_handles, >> + timeline_wait->flags, >> + timeout, &first); >> + if (timeout < 0) >> + return timeout; >> + timeline_wait->first_signaled = first; >> + } >> return 0; >> } >> @@ -897,13 +957,48 @@ drm_syncobj_wait_ioctl(struct drm_device >> *dev, void *data, >> return ret; >> ret = drm_syncobj_array_wait(dev, file_private, >> - args, syncobjs); >> + args, NULL, syncobjs, false); >> drm_syncobj_array_free(syncobjs, args->count_handles); >> return ret; >> } >> +int >> +drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private) >> +{ >> + struct drm_syncobj_timeline_wait *args = data; >> + struct drm_syncobj **syncobjs; >> + int ret = 0; >> + >> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >> + return -ENODEV; >> + >> + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | >> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | >> + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) >> + return -EINVAL; >> + >> + if (args->count_handles == 0) >> + return -EINVAL; >> + >> + ret = drm_syncobj_array_find(file_private, >> + u64_to_user_ptr(args->handles), >> + args->count_handles, >> + &syncobjs); >> + if (ret < 0) >> + return ret; >> + >> + ret = drm_syncobj_array_wait(dev, file_private, >> + NULL, args, syncobjs, true); >> + >> + drm_syncobj_array_free(syncobjs, args->count_handles); >> + >> + return ret; >> +} >> + >> + >> int >> drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private) >> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >> index 300f336633f2..0092111d002c 100644 >> --- a/include/uapi/drm/drm.h >> +++ b/include/uapi/drm/drm.h >> @@ -737,6 +737,7 @@ struct drm_syncobj_handle { >> #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) >> #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) >> +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) >> struct drm_syncobj_wait { >> __u64 handles; >> /* absolute timeout */ >> @@ -747,6 +748,19 @@ struct drm_syncobj_wait { >> __u32 pad; >> }; >> +struct drm_syncobj_timeline_wait { >> + __u64 handles; >> + /* wait on specific timeline point for every handles*/ >> + __u64 points; >> + /* absolute timeout */ >> + __s64 timeout_nsec; >> + __u32 count_handles; >> + __u32 flags; >> + __u32 first_signaled; /* only valid when not waiting all */ >> + __u32 pad; >> +}; >> + >> + >> struct drm_syncobj_array { >> __u64 handles; >> __u32 count_handles; >> @@ -909,6 +923,7 @@ extern "C" { >> #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct >> drm_mode_get_lease) >> #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct >> drm_mode_revoke_lease) >> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >> drm_syncobj_timeline_wait) >> /** >> * Device specific ioctls should only be in their respective headers >> * The device specific ioctl range is from 0x40 to 0x9f. > >
On 18/03/2019 17:20, Koenig, Christian wrote: >>> - if (dma_fence_is_signaled(entries[i].fence)) { >>> + if (fence) >>> + entries[i].fence = fence; >>> + else >>> + entries[i].fence = dma_fence_get_stub(); >>> + >>> + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || >> Hey David, >> >> Could you remind me what DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE is used >> for? >> >> I didn't have a use for it in userspace, > The flag is used to wait for fences for a sequence number to show up. > > Christian. > Thanks Christian. I guess I missed the additive nature of WAIT_FOR_SUBMIT. It feels like WAIT_AVAILABLE almost deserves its own commit as it affects both timelines & binary syncobjs. -Lionel <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body text="#000000" bgcolor="#FFFFFF"> <div class="moz-cite-prefix">On 18/03/2019 17:20, Koenig, Christian wrote:<br> </div> <blockquote type="cite" cite="mid:990db9e7-6cae-1165-637c-84aed3a9af49@amd.com"> <blockquote type="cite" style="color: #000000;"> <blockquote type="cite" style="color: #000000;"> <pre class="moz-quote-pre" wrap=""> - if (dma_fence_is_signaled(entries[i].fence)) { + if (fence) + entries[i].fence = fence; + else + entries[i].fence = dma_fence_get_stub(); + + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || </pre> </blockquote> <pre class="moz-quote-pre" wrap=""> Hey David, Could you remind me what DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE is used for? I didn't have a use for it in userspace, </pre> </blockquote> <pre class="moz-quote-pre" wrap="">The flag is used to wait for fences for a sequence number to show up. Christian. </pre> </blockquote> <p><br> </p> <p>Thanks Christian.</p> <p>I guess I missed the additive nature of WAIT_FOR_SUBMIT.</p> <p><br> </p> <p>It feels like WAIT_AVAILABLE almost deserves its own commit as it affects both timelines & binary syncobjs.</p> <p><br> </p> <p>-Lionel</p> </body> </html>
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 251d67e04c2d..331ac6225b58 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -182,6 +182,8 @@ int drm_syncobj_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 687943df58e1..c984654646fa 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -688,6 +688,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_WAIT, drm_syncobj_wait_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT, drm_syncobj_timeline_wait_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_RESET, drm_syncobj_reset_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 19a9ce638119..eae51978cda4 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -61,6 +61,7 @@ struct syncobj_wait_entry { struct task_struct *task; struct dma_fence *fence; struct dma_fence_cb fence_cb; + u64 point; }; static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, @@ -95,6 +96,8 @@ EXPORT_SYMBOL(drm_syncobj_find); static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj, struct syncobj_wait_entry *wait) { + struct dma_fence *fence; + if (wait->fence) return; @@ -103,11 +106,15 @@ static void drm_syncobj_fence_add_wait(struct drm_syncobj *syncobj, * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ - if (syncobj->fence) - wait->fence = dma_fence_get( - rcu_dereference_protected(syncobj->fence, 1)); - else + fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, 1)); + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { + dma_fence_put(fence); list_add_tail(&wait->node, &syncobj->cb_list); + } else if (!fence) { + wait->fence = dma_fence_get_stub(); + } else { + wait->fence = fence; + } spin_unlock(&syncobj->lock); } @@ -149,10 +156,8 @@ void drm_syncobj_add_point(struct drm_syncobj *syncobj, dma_fence_chain_init(chain, prev, fence, point); rcu_assign_pointer(syncobj->fence, &chain->base); - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { - list_del_init(&cur->node); + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) syncobj_wait_syncobj_func(syncobj, cur); - } spin_unlock(&syncobj->lock); /* Walk the chain once to trigger garbage collection */ @@ -184,10 +189,8 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, rcu_assign_pointer(syncobj->fence, fence); if (fence != old_fence) { - list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) { - list_del_init(&cur->node); + list_for_each_entry_safe(cur, tmp, &syncobj->cb_list, node) syncobj_wait_syncobj_func(syncobj, cur); - } } spin_unlock(&syncobj->lock); @@ -644,13 +647,27 @@ static void syncobj_wait_fence_func(struct dma_fence *fence, static void syncobj_wait_syncobj_func(struct drm_syncobj *syncobj, struct syncobj_wait_entry *wait) { + struct dma_fence *fence; + /* This happens inside the syncobj lock */ - wait->fence = dma_fence_get(rcu_dereference_protected(syncobj->fence, - lockdep_is_held(&syncobj->lock))); + fence = rcu_dereference_protected(syncobj->fence, + lockdep_is_held(&syncobj->lock)); + dma_fence_get(fence); + if (!fence || dma_fence_chain_find_seqno(&fence, wait->point)) { + dma_fence_put(fence); + return; + } else if (!fence) { + wait->fence = dma_fence_get_stub(); + } else { + wait->fence = fence; + } + wake_up_process(wait->task); + list_del_init(&wait->node); } static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, + void __user *user_points, uint32_t count, uint32_t flags, signed long timeout, @@ -658,12 +675,27 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, { struct syncobj_wait_entry *entries; struct dma_fence *fence; + uint64_t *points; uint32_t signaled_count, i; - entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); - if (!entries) + points = kmalloc_array(count, sizeof(*points), GFP_KERNEL); + if (points == NULL) return -ENOMEM; + if (!user_points) { + memset(points, 0, count * sizeof(uint64_t)); + + } else if (copy_from_user(points, user_points, + sizeof(uint64_t) * count)) { + timeout = -EFAULT; + goto err_free_points; + } + + entries = kcalloc(count, sizeof(*entries), GFP_KERNEL); + if (!entries) { + timeout = -ENOMEM; + goto err_free_points; + } /* Walk the list of sync objects and initialize entries. We do * this up-front so that we can properly return -EINVAL if there is * a syncobj with a missing fence and then never have the chance of @@ -671,9 +703,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, */ signaled_count = 0; for (i = 0; i < count; ++i) { + struct dma_fence *fence; + entries[i].task = current; - entries[i].fence = drm_syncobj_fence_get(syncobjs[i]); - if (!entries[i].fence) { + entries[i].point = points[i]; + fence = drm_syncobj_fence_get(syncobjs[i]); + if (!fence || dma_fence_chain_find_seqno(&fence, points[i])) { + dma_fence_put(fence); if (flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT) { continue; } else { @@ -682,7 +718,13 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, } } - if (dma_fence_is_signaled(entries[i].fence)) { + if (fence) + entries[i].fence = fence; + else + entries[i].fence = dma_fence_get_stub(); + + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || + dma_fence_is_signaled(entries[i].fence)) { if (signaled_count == 0 && idx) *idx = i; signaled_count++; @@ -715,7 +757,8 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, if (!fence) continue; - if (dma_fence_is_signaled(fence) || + if ((flags & DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE) || + dma_fence_is_signaled(fence) || (!entries[i].fence_cb.func && dma_fence_add_callback(fence, &entries[i].fence_cb, @@ -760,6 +803,9 @@ static signed long drm_syncobj_array_wait_timeout(struct drm_syncobj **syncobjs, } kfree(entries); +err_free_points: + kfree(points); + return timeout; } @@ -799,19 +845,33 @@ EXPORT_SYMBOL(drm_timeout_abs_to_jiffies); static int drm_syncobj_array_wait(struct drm_device *dev, struct drm_file *file_private, struct drm_syncobj_wait *wait, - struct drm_syncobj **syncobjs) + struct drm_syncobj_timeline_wait *timeline_wait, + struct drm_syncobj **syncobjs, bool timeline) { - signed long timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); + signed long timeout = 0; uint32_t first = ~0; - timeout = drm_syncobj_array_wait_timeout(syncobjs, - wait->count_handles, - wait->flags, - timeout, &first); - if (timeout < 0) - return timeout; - - wait->first_signaled = first; + if (!timeline) { + timeout = drm_timeout_abs_to_jiffies(wait->timeout_nsec); + timeout = drm_syncobj_array_wait_timeout(syncobjs, + NULL, + wait->count_handles, + wait->flags, + timeout, &first); + if (timeout < 0) + return timeout; + wait->first_signaled = first; + } else { + timeout = drm_timeout_abs_to_jiffies(timeline_wait->timeout_nsec); + timeout = drm_syncobj_array_wait_timeout(syncobjs, + u64_to_user_ptr(timeline_wait->points), + timeline_wait->count_handles, + timeline_wait->flags, + timeout, &first); + if (timeout < 0) + return timeout; + timeline_wait->first_signaled = first; + } return 0; } @@ -897,13 +957,48 @@ drm_syncobj_wait_ioctl(struct drm_device *dev, void *data, return ret; ret = drm_syncobj_array_wait(dev, file_private, - args, syncobjs); + args, NULL, syncobjs, false); drm_syncobj_array_free(syncobjs, args->count_handles); return ret; } +int +drm_syncobj_timeline_wait_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_timeline_wait *args = data; + struct drm_syncobj **syncobjs; + int ret = 0; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -ENODEV; + + if (args->flags & ~(DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | + DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE)) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + ret = drm_syncobj_array_wait(dev, file_private, + NULL, args, syncobjs, true); + + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} + + int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 300f336633f2..0092111d002c 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -737,6 +737,7 @@ struct drm_syncobj_handle { #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE (1 << 2) struct drm_syncobj_wait { __u64 handles; /* absolute timeout */ @@ -747,6 +748,19 @@ struct drm_syncobj_wait { __u32 pad; }; +struct drm_syncobj_timeline_wait { + __u64 handles; + /* wait on specific timeline point for every handles*/ + __u64 points; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + + struct drm_syncobj_array { __u64 handles; __u32 count_handles; @@ -909,6 +923,7 @@ extern "C" { #define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease) #define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) /** * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f.