Message ID | 20190325083224.2983-8-david1.zhou@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/9] dma-buf: add new dma_fence_chain container v7 | expand |
On 25/03/2019 08:32, Chunming Zhou wrote: > v2: individually allocate chain array, since chain node is free independently. > v3: all existing points must be already signaled before cpu perform signal operation, > so add check condition for that. > v4: remove v3 change and add checking to prevent out-of-order > v5: unify binary and timeline > > Signed-off-by: Chunming Zhou <david1.zhou@amd.com> > Cc: Tobias Hector <Tobias.Hector@amd.com> > Cc: Jason Ekstrand <jason@jlekstrand.net> > Cc: Dave Airlie <airlied@redhat.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 73 ++++++++++++++++++++++++++++++++++ > include/uapi/drm/drm.h | 1 + > 4 files changed, 78 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h > index dd11ae5f1eef..d9a483a5fce0 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index 92b3b7b2fd81..d337f161909c 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index ee2d66e047e7..099596190845 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > return ret; > } > > +int > +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_timeline_array *args = data; > + struct drm_syncobj **syncobjs; > + struct dma_fence_chain **chains; > + uint64_t *points; > + uint32_t i, j; > + int ret; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -EOPNOTSUPP; > + > + if (args->pad != 0) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + ret = drm_syncobj_array_find(file_private, > + u64_to_user_ptr(args->handles), > + args->count_handles, > + &syncobjs); > + if (ret < 0) > + return ret; > + > + points = kmalloc_array(args->count_handles, sizeof(*points), > + GFP_KERNEL); > + if (!points) { > + ret = -ENOMEM; > + goto out; > + } > + if (!u64_to_user_ptr(args->points)) { > + memset(points, 0, args->count_handles * sizeof(uint64_t)); > + } else if (copy_from_user(points, u64_to_user_ptr(args->points), > + sizeof(uint64_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_points; > + } > + > + chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL); > + if (!chains) { > + ret = -ENOMEM; > + goto err_points; > + } > + for (i = 0; i < args->count_handles; i++) { > + chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); > + if (!chains[i]) { > + for (j = 0; j < i; j++) > + kfree(chains[j]); > + ret = -ENOMEM; > + goto err_chains; > + } > + } > + > + for (i = 0; i < args->count_handles; i++) { > + struct dma_fence *fence = dma_fence_get_stub(); > + > + drm_syncobj_add_point(syncobjs[i], chains[i], > + fence, points[i]); > + dma_fence_put(fence); > + } I think I found an issue where the implementation doesn't match what the spec requires on host signaling. Consider the following : Timeline semaphore has a value of 4. A device submits a commands to signal point 5. The host signals point 6. The value of the timeline will remain 4 until the device commands complete, at which point it will change to 6. But the specification seems to say that the timeline value should be 6 once the host signaling completes. The only option that I see working would be to add point 6 in the timeline and wait on that point to complete. Christian, David, what do you think? Thanks, -Lionel > +err_chains: > + kfree(chains); > +err_points: > + kfree(points); > +out: > + drm_syncobj_array_free(syncobjs, args->count_handles); > + > + return ret; > +} > + > int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index e8d0d6b51875..236b01a1fabf 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -943,6 +943,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) > #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) > #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) > +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) > > /** > * Device specific ioctls should only be in their respective headers
在 2019/3/28 20:53, Lionel Landwerlin 写道: > On 25/03/2019 08:32, Chunming Zhou wrote: >> v2: individually allocate chain array, since chain node is free >> independently. >> v3: all existing points must be already signaled before cpu perform >> signal operation, >> so add check condition for that. >> v4: remove v3 change and add checking to prevent out-of-order >> v5: unify binary and timeline >> >> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >> Cc: Tobias Hector <Tobias.Hector@amd.com> >> Cc: Jason Ekstrand <jason@jlekstrand.net> >> Cc: Dave Airlie <airlied@redhat.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> drivers/gpu/drm/drm_internal.h | 2 + >> drivers/gpu/drm/drm_ioctl.c | 2 + >> drivers/gpu/drm/drm_syncobj.c | 73 ++++++++++++++++++++++++++++++++++ >> include/uapi/drm/drm.h | 1 + >> 4 files changed, 78 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_internal.h >> b/drivers/gpu/drm/drm_internal.h >> index dd11ae5f1eef..d9a483a5fce0 100644 >> --- a/drivers/gpu/drm/drm_internal.h >> +++ b/drivers/gpu/drm/drm_internal.h >> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >> *dev, void *data, >> struct drm_file *file_private); >> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >> *data, >> + struct drm_file *file_private); >> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c >> index 92b3b7b2fd81..d337f161909c 100644 >> --- a/drivers/gpu/drm/drm_ioctl.c >> +++ b/drivers/gpu/drm/drm_ioctl.c >> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >> drm_syncobj_timeline_signal_ioctl, >> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >> diff --git a/drivers/gpu/drm/drm_syncobj.c >> b/drivers/gpu/drm/drm_syncobj.c >> index ee2d66e047e7..099596190845 100644 >> --- a/drivers/gpu/drm/drm_syncobj.c >> +++ b/drivers/gpu/drm/drm_syncobj.c >> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device >> *dev, void *data, >> return ret; >> } >> +int >> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private) >> +{ >> + struct drm_syncobj_timeline_array *args = data; >> + struct drm_syncobj **syncobjs; >> + struct dma_fence_chain **chains; >> + uint64_t *points; >> + uint32_t i, j; >> + int ret; >> + >> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >> + return -EOPNOTSUPP; >> + >> + if (args->pad != 0) >> + return -EINVAL; >> + >> + if (args->count_handles == 0) >> + return -EINVAL; >> + >> + ret = drm_syncobj_array_find(file_private, >> + u64_to_user_ptr(args->handles), >> + args->count_handles, >> + &syncobjs); >> + if (ret < 0) >> + return ret; >> + >> + points = kmalloc_array(args->count_handles, sizeof(*points), >> + GFP_KERNEL); >> + if (!points) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + if (!u64_to_user_ptr(args->points)) { >> + memset(points, 0, args->count_handles * sizeof(uint64_t)); >> + } else if (copy_from_user(points, u64_to_user_ptr(args->points), >> + sizeof(uint64_t) * args->count_handles)) { >> + ret = -EFAULT; >> + goto err_points; >> + } >> + >> + chains = kmalloc_array(args->count_handles, sizeof(void *), >> GFP_KERNEL); >> + if (!chains) { >> + ret = -ENOMEM; >> + goto err_points; >> + } >> + for (i = 0; i < args->count_handles; i++) { >> + chains[i] = kzalloc(sizeof(struct dma_fence_chain), >> GFP_KERNEL); >> + if (!chains[i]) { >> + for (j = 0; j < i; j++) >> + kfree(chains[j]); >> + ret = -ENOMEM; >> + goto err_chains; >> + } >> + } >> + >> + for (i = 0; i < args->count_handles; i++) { >> + struct dma_fence *fence = dma_fence_get_stub(); >> + >> + drm_syncobj_add_point(syncobjs[i], chains[i], >> + fence, points[i]); >> + dma_fence_put(fence); >> + } > > > I think I found an issue where the implementation doesn't match what > the spec requires on host signaling. > > Consider the following : > > > Timeline semaphore has a value of 4. > > A device submits a commands to signal point 5. > > The host signals point 6. > > > The value of the timeline will remain 4 until the device commands > complete, at which point it will change to 6. > > But the specification seems to say that the timeline value should be 6 > once the host signaling completes. > > > The only option that I see working would be to add point 6 in the > timeline and wait on that point to complete. > > > Christian, David, what do you think? This is exactly what I said before, see V3 comment in the patch head. But Jeson said this should be guranteed by user and application. Btw, if we wait all previous points completion this ioctl could be blocked. -David > > > Thanks, > > > -Lionel > > >> +err_chains: >> + kfree(chains); >> +err_points: >> + kfree(points); >> +out: >> + drm_syncobj_array_free(syncobjs, args->count_handles); >> + >> + return ret; >> +} >> + >> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private) >> { >> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >> index e8d0d6b51875..236b01a1fabf 100644 >> --- a/include/uapi/drm/drm.h >> +++ b/include/uapi/drm/drm.h >> @@ -943,6 +943,7 @@ extern "C" { >> #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >> drm_syncobj_timeline_wait) >> #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct >> drm_syncobj_timeline_array) >> #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct >> drm_syncobj_transfer) >> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct >> drm_syncobj_timeline_array) >> /** >> * Device specific ioctls should only be in their respective headers > >
On 28/03/2019 13:08, Chunming Zhou wrote: > 在 2019/3/28 20:53, Lionel Landwerlin 写道: >> On 25/03/2019 08:32, Chunming Zhou wrote: >>> v2: individually allocate chain array, since chain node is free >>> independently. >>> v3: all existing points must be already signaled before cpu perform >>> signal operation, >>> so add check condition for that. >>> v4: remove v3 change and add checking to prevent out-of-order >>> v5: unify binary and timeline >>> >>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >>> Cc: Tobias Hector <Tobias.Hector@amd.com> >>> Cc: Jason Ekstrand <jason@jlekstrand.net> >>> Cc: Dave Airlie <airlied@redhat.com> >>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>> --- >>> drivers/gpu/drm/drm_internal.h | 2 + >>> drivers/gpu/drm/drm_ioctl.c | 2 + >>> drivers/gpu/drm/drm_syncobj.c | 73 ++++++++++++++++++++++++++++++++++ >>> include/uapi/drm/drm.h | 1 + >>> 4 files changed, 78 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/drm_internal.h >>> b/drivers/gpu/drm/drm_internal.h >>> index dd11ae5f1eef..d9a483a5fce0 100644 >>> --- a/drivers/gpu/drm/drm_internal.h >>> +++ b/drivers/gpu/drm/drm_internal.h >>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >>> *dev, void *data, >>> struct drm_file *file_private); >>> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >>> *data, >>> + struct drm_file *file_private); >>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c >>> index 92b3b7b2fd81..d337f161909c 100644 >>> --- a/drivers/gpu/drm/drm_ioctl.c >>> +++ b/drivers/gpu/drm/drm_ioctl.c >>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >>> drm_syncobj_timeline_signal_ioctl, >>> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index ee2d66e047e7..099596190845 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device >>> *dev, void *data, >>> return ret; >>> } >>> +int >>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >>> + struct drm_file *file_private) >>> +{ >>> + struct drm_syncobj_timeline_array *args = data; >>> + struct drm_syncobj **syncobjs; >>> + struct dma_fence_chain **chains; >>> + uint64_t *points; >>> + uint32_t i, j; >>> + int ret; >>> + >>> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >>> + return -EOPNOTSUPP; >>> + >>> + if (args->pad != 0) >>> + return -EINVAL; >>> + >>> + if (args->count_handles == 0) >>> + return -EINVAL; >>> + >>> + ret = drm_syncobj_array_find(file_private, >>> + u64_to_user_ptr(args->handles), >>> + args->count_handles, >>> + &syncobjs); >>> + if (ret < 0) >>> + return ret; >>> + >>> + points = kmalloc_array(args->count_handles, sizeof(*points), >>> + GFP_KERNEL); >>> + if (!points) { >>> + ret = -ENOMEM; >>> + goto out; >>> + } >>> + if (!u64_to_user_ptr(args->points)) { >>> + memset(points, 0, args->count_handles * sizeof(uint64_t)); >>> + } else if (copy_from_user(points, u64_to_user_ptr(args->points), >>> + sizeof(uint64_t) * args->count_handles)) { >>> + ret = -EFAULT; >>> + goto err_points; >>> + } >>> + >>> + chains = kmalloc_array(args->count_handles, sizeof(void *), >>> GFP_KERNEL); >>> + if (!chains) { >>> + ret = -ENOMEM; >>> + goto err_points; >>> + } >>> + for (i = 0; i < args->count_handles; i++) { >>> + chains[i] = kzalloc(sizeof(struct dma_fence_chain), >>> GFP_KERNEL); >>> + if (!chains[i]) { >>> + for (j = 0; j < i; j++) >>> + kfree(chains[j]); >>> + ret = -ENOMEM; >>> + goto err_chains; >>> + } >>> + } >>> + >>> + for (i = 0; i < args->count_handles; i++) { >>> + struct dma_fence *fence = dma_fence_get_stub(); >>> + >>> + drm_syncobj_add_point(syncobjs[i], chains[i], >>> + fence, points[i]); >>> + dma_fence_put(fence); >>> + } >> >> I think I found an issue where the implementation doesn't match what >> the spec requires on host signaling. >> >> Consider the following : >> >> >> Timeline semaphore has a value of 4. >> >> A device submits a commands to signal point 5. >> >> The host signals point 6. >> >> >> The value of the timeline will remain 4 until the device commands >> complete, at which point it will change to 6. >> >> But the specification seems to say that the timeline value should be 6 >> once the host signaling completes. >> >> >> The only option that I see working would be to add point 6 in the >> timeline and wait on that point to complete. >> >> >> Christian, David, what do you think? > This is exactly what I said before, see V3 comment in the patch head. > > But Jeson said this should be guranteed by user and application. > > Btw, if we wait all previous points completion this ioctl could be blocked. > > > -David I haven't seen anything that explicitly forbids that in the spec. Trying to get a clarification. -Lionel > > >> >> Thanks, >> >> >> -Lionel >> >> >>> +err_chains: >>> + kfree(chains); >>> +err_points: >>> + kfree(points); >>> +out: >>> + drm_syncobj_array_free(syncobjs, args->count_handles); >>> + >>> + return ret; >>> +} >>> + >>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private) >>> { >>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >>> index e8d0d6b51875..236b01a1fabf 100644 >>> --- a/include/uapi/drm/drm.h >>> +++ b/include/uapi/drm/drm.h >>> @@ -943,6 +943,7 @@ extern "C" { >>> #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >>> drm_syncobj_timeline_wait) >>> #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct >>> drm_syncobj_timeline_array) >>> #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct >>> drm_syncobj_transfer) >>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct >>> drm_syncobj_timeline_array) >>> /** >>> * Device specific ioctls should only be in their respective headers >>
On 28/03/2019 13:33, Lionel Landwerlin wrote: > On 28/03/2019 13:08, Chunming Zhou wrote: >> 在 2019/3/28 20:53, Lionel Landwerlin 写道: >>> On 25/03/2019 08:32, Chunming Zhou wrote: >>>> v2: individually allocate chain array, since chain node is free >>>> independently. >>>> v3: all existing points must be already signaled before cpu perform >>>> signal operation, >>>> so add check condition for that. >>>> v4: remove v3 change and add checking to prevent out-of-order >>>> v5: unify binary and timeline >>>> >>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >>>> Cc: Tobias Hector <Tobias.Hector@amd.com> >>>> Cc: Jason Ekstrand <jason@jlekstrand.net> >>>> Cc: Dave Airlie <airlied@redhat.com> >>>> Cc: Chris Wilson <chris@chris-wilson.co.uk> >>>> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >>>> --- >>>> drivers/gpu/drm/drm_internal.h | 2 + >>>> drivers/gpu/drm/drm_ioctl.c | 2 + >>>> drivers/gpu/drm/drm_syncobj.c | 73 >>>> ++++++++++++++++++++++++++++++++++ >>>> include/uapi/drm/drm.h | 1 + >>>> 4 files changed, 78 insertions(+) >>>> >>>> diff --git a/drivers/gpu/drm/drm_internal.h >>>> b/drivers/gpu/drm/drm_internal.h >>>> index dd11ae5f1eef..d9a483a5fce0 100644 >>>> --- a/drivers/gpu/drm/drm_internal.h >>>> +++ b/drivers/gpu/drm/drm_internal.h >>>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >>>> *dev, void *data, >>>> struct drm_file *file_private); >>>> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >>>> struct drm_file *file_private); >>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >>>> *data, >>>> + struct drm_file *file_private); >>>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>>> struct drm_file *file_private); >>>> diff --git a/drivers/gpu/drm/drm_ioctl.c >>>> b/drivers/gpu/drm/drm_ioctl.c >>>> index 92b3b7b2fd81..d337f161909c 100644 >>>> --- a/drivers/gpu/drm/drm_ioctl.c >>>> +++ b/drivers/gpu/drm/drm_ioctl.c >>>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] >>>> = { >>>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, >>>> drm_syncobj_signal_ioctl, >>>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >>>> drm_syncobj_timeline_signal_ioctl, >>>> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >>>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >>>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >>>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>>> b/drivers/gpu/drm/drm_syncobj.c >>>> index ee2d66e047e7..099596190845 100644 >>>> --- a/drivers/gpu/drm/drm_syncobj.c >>>> +++ b/drivers/gpu/drm/drm_syncobj.c >>>> @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device >>>> *dev, void *data, >>>> return ret; >>>> } >>>> +int >>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >>>> + struct drm_file *file_private) >>>> +{ >>>> + struct drm_syncobj_timeline_array *args = data; >>>> + struct drm_syncobj **syncobjs; >>>> + struct dma_fence_chain **chains; >>>> + uint64_t *points; >>>> + uint32_t i, j; >>>> + int ret; >>>> + >>>> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >>>> + return -EOPNOTSUPP; >>>> + >>>> + if (args->pad != 0) >>>> + return -EINVAL; >>>> + >>>> + if (args->count_handles == 0) >>>> + return -EINVAL; >>>> + >>>> + ret = drm_syncobj_array_find(file_private, >>>> + u64_to_user_ptr(args->handles), >>>> + args->count_handles, >>>> + &syncobjs); >>>> + if (ret < 0) >>>> + return ret; >>>> + >>>> + points = kmalloc_array(args->count_handles, sizeof(*points), >>>> + GFP_KERNEL); >>>> + if (!points) { >>>> + ret = -ENOMEM; >>>> + goto out; >>>> + } >>>> + if (!u64_to_user_ptr(args->points)) { >>>> + memset(points, 0, args->count_handles * sizeof(uint64_t)); >>>> + } else if (copy_from_user(points, u64_to_user_ptr(args->points), >>>> + sizeof(uint64_t) * args->count_handles)) { >>>> + ret = -EFAULT; >>>> + goto err_points; >>>> + } >>>> + >>>> + chains = kmalloc_array(args->count_handles, sizeof(void *), >>>> GFP_KERNEL); >>>> + if (!chains) { >>>> + ret = -ENOMEM; >>>> + goto err_points; >>>> + } >>>> + for (i = 0; i < args->count_handles; i++) { >>>> + chains[i] = kzalloc(sizeof(struct dma_fence_chain), >>>> GFP_KERNEL); >>>> + if (!chains[i]) { >>>> + for (j = 0; j < i; j++) >>>> + kfree(chains[j]); >>>> + ret = -ENOMEM; >>>> + goto err_chains; >>>> + } >>>> + } >>>> + >>>> + for (i = 0; i < args->count_handles; i++) { >>>> + struct dma_fence *fence = dma_fence_get_stub(); >>>> + >>>> + drm_syncobj_add_point(syncobjs[i], chains[i], >>>> + fence, points[i]); >>>> + dma_fence_put(fence); >>>> + } >>> >>> I think I found an issue where the implementation doesn't match what >>> the spec requires on host signaling. >>> >>> Consider the following : >>> >>> >>> Timeline semaphore has a value of 4. >>> >>> A device submits a commands to signal point 5. >>> >>> The host signals point 6. >>> >>> >>> The value of the timeline will remain 4 until the device commands >>> complete, at which point it will change to 6. >>> >>> But the specification seems to say that the timeline value should be 6 >>> once the host signaling completes. >>> >>> >>> The only option that I see working would be to add point 6 in the >>> timeline and wait on that point to complete. >>> >>> >>> Christian, David, what do you think? >> This is exactly what I said before, see V3 comment in the patch head. >> >> But Jeson said this should be guranteed by user and application. >> >> Btw, if we wait all previous points completion this ioctl could be >> blocked. >> >> >> -David > > > I haven't seen anything that explicitly forbids that in the spec. > > Trying to get a clarification. > > > -Lionel So it looks like this won't be allowed by the vulkan spec. Sorry, I genuinely thought this was allowed because we can end up in such situation on the device side :( Not quite sure how useful it would be to have some checks here as most of the error handling can only be done within the add_point() function and requires taking the syncobj lock. As much as I could like to see more error checking, I guess this isn't doable without more locking. -Lionel > > >> >> >>> >>> Thanks, >>> >>> >>> -Lionel >>> >>> >>>> +err_chains: >>>> + kfree(chains); >>>> +err_points: >>>> + kfree(points); >>>> +out: >>>> + drm_syncobj_array_free(syncobjs, args->count_handles); >>>> + >>>> + return ret; >>>> +} >>>> + >>>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>>> struct drm_file *file_private) >>>> { >>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >>>> index e8d0d6b51875..236b01a1fabf 100644 >>>> --- a/include/uapi/drm/drm.h >>>> +++ b/include/uapi/drm/drm.h >>>> @@ -943,6 +943,7 @@ extern "C" { >>>> #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >>>> drm_syncobj_timeline_wait) >>>> #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct >>>> drm_syncobj_timeline_array) >>>> #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct >>>> drm_syncobj_transfer) >>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct >>>> drm_syncobj_timeline_array) >>>> /** >>>> * Device specific ioctls should only be in their respective >>>> headers >>> > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index dd11ae5f1eef..d9a483a5fce0 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 92b3b7b2fd81..d337f161909c 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index ee2d66e047e7..099596190845 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1183,6 +1183,79 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, return ret; } +int +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_timeline_array *args = data; + struct drm_syncobj **syncobjs; + struct dma_fence_chain **chains; + uint64_t *points; + uint32_t i, j; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -EOPNOTSUPP; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + points = kmalloc_array(args->count_handles, sizeof(*points), + GFP_KERNEL); + if (!points) { + ret = -ENOMEM; + goto out; + } + if (!u64_to_user_ptr(args->points)) { + memset(points, 0, args->count_handles * sizeof(uint64_t)); + } else if (copy_from_user(points, u64_to_user_ptr(args->points), + sizeof(uint64_t) * args->count_handles)) { + ret = -EFAULT; + goto err_points; + } + + chains = kmalloc_array(args->count_handles, sizeof(void *), GFP_KERNEL); + if (!chains) { + ret = -ENOMEM; + goto err_points; + } + for (i = 0; i < args->count_handles; i++) { + chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); + if (!chains[i]) { + for (j = 0; j < i; j++) + kfree(chains[j]); + ret = -ENOMEM; + goto err_chains; + } + } + + for (i = 0; i < args->count_handles; i++) { + struct dma_fence *fence = dma_fence_get_stub(); + + drm_syncobj_add_point(syncobjs[i], chains[i], + fence, points[i]); + dma_fence_put(fence); + } +err_chains: + kfree(chains); +err_points: + kfree(points); +out: + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} + int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index e8d0d6b51875..236b01a1fabf 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -943,6 +943,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) /** * Device specific ioctls should only be in their respective headers
v2: individually allocate chain array, since chain node is free independently. v3: all existing points must be already signaled before cpu perform signal operation, so add check condition for that. v4: remove v3 change and add checking to prevent out-of-order v5: unify binary and timeline Signed-off-by: Chunming Zhou <david1.zhou@amd.com> Cc: Tobias Hector <Tobias.Hector@amd.com> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@redhat.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 73 ++++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 1 + 4 files changed, 78 insertions(+)