Message ID | 20190315120959.25489-8-david1.zhou@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/9] dma-buf: add new dma_fence_chain container v5 | expand |
On 15/03/2019 12:09, Chunming Zhou wrote: > v2: individually allocate chain array, since chain node is free independently. > v3: all existing points must be already signaled before cpu perform signal operation, > so add check condition for that. > > Signed-off-by: Chunming Zhou <david1.zhou@amd.com> > --- > drivers/gpu/drm/drm_internal.h | 2 + > drivers/gpu/drm/drm_ioctl.c | 2 + > drivers/gpu/drm/drm_syncobj.c | 103 +++++++++++++++++++++++++++++++++ > include/uapi/drm/drm.h | 1 + > 4 files changed, 108 insertions(+) > > diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h > index dd11ae5f1eef..d9a483a5fce0 100644 > --- a/drivers/gpu/drm/drm_internal.h > +++ b/drivers/gpu/drm/drm_internal.h > @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private); > int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private); > > diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c > index 92b3b7b2fd81..d337f161909c 100644 > --- a/drivers/gpu/drm/drm_ioctl.c > +++ b/drivers/gpu/drm/drm_ioctl.c > @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl, > + DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, > DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 306c7b7e2770..eaeb038f97d7 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, > return ret; > } > > +int > +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file_private) > +{ > + struct drm_syncobj_timeline_array *args = data; > + struct drm_syncobj **syncobjs; > + struct dma_fence_chain **chains; > + uint64_t *points; > + uint32_t i, j, timeline_count = 0; > + int ret; > + > + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) > + return -EOPNOTSUPP; > + > + if (args->pad != 0) > + return -EINVAL; > + > + if (args->count_handles == 0) > + return -EINVAL; > + > + ret = drm_syncobj_array_find(file_private, > + u64_to_user_ptr(args->handles), > + args->count_handles, > + &syncobjs); > + if (ret < 0) > + return ret; > + > + for (i = 0; i < args->count_handles; i++) { > + struct dma_fence_chain *chain; > + struct dma_fence *fence; > + > + fence = drm_syncobj_fence_get(syncobjs[i]); > + chain = to_dma_fence_chain(fence); > + if (chain) { > + struct dma_fence *iter; > + > + dma_fence_chain_for_each(iter, fence) { > + if (!iter) > + break; > + if (!dma_fence_is_signaled(iter)) { > + dma_fence_put(iter); > + DRM_ERROR("Client must guarantee all existing timeline points signaled before performing host signal operation!"); > + ret = -EPERM; > + goto out; Sorry if I'm failing to remember whether we discussed this before. Signaling a point from the host should be fine even if the previous points in the timeline are not signaled. After all this can happen on the device side as well (out of order signaling). I thought the thing we didn't want is out of order submission. Just checking the last chain node seqno against the host signal point should be enough. What about simply returning -EPERM, we can warn the application from userspace? > + } > + } > + } > + } > + > + points = kmalloc_array(args->count_handles, sizeof(*points), > + GFP_KERNEL); > + if (!points) { > + ret = -ENOMEM; > + goto out; > + } > + if (!u64_to_user_ptr(args->points)) { > + memset(points, 0, args->count_handles * sizeof(uint64_t)); > + } else if (copy_from_user(points, u64_to_user_ptr(args->points), > + sizeof(uint64_t) * args->count_handles)) { > + ret = -EFAULT; > + goto err_points; > + } > + > + > + for (i = 0; i < args->count_handles; i++) { > + if (points[i]) > + timeline_count++; > + } > + chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL); > + if (!chains) { > + ret = -ENOMEM; > + goto err_points; > + } > + for (i = 0; i < timeline_count; i++) { > + chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); > + if (!chains[i]) { > + for (j = 0; j < i; j++) > + kfree(chains[j]); > + ret = -ENOMEM; > + goto err_chains; > + } > + } > + > + for (i = 0, j = 0; i < args->count_handles; i++) { > + if (points[i]) { > + struct dma_fence *fence = dma_fence_get_stub(); > + > + drm_syncobj_add_point(syncobjs[i], chains[j++], Isn't this breaking with j++ ?, it should be part of the for() loop expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++) Otherwise j will not increment at the same pace as i. > + fence, points[i]); > + dma_fence_put(fence); > + } else > + drm_syncobj_assign_null_handle(syncobjs[i]); > + } > +err_chains: > + kfree(chains); > +err_points: > + kfree(points); > +out: > + drm_syncobj_array_free(syncobjs, args->count_handles); > + > + return ret; > +} > + > int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, > struct drm_file *file_private) > { > diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h > index 4c1e2e6579fa..fe00b74268eb 100644 > --- a/include/uapi/drm/drm.h > +++ b/include/uapi/drm/drm.h > @@ -943,6 +943,7 @@ extern "C" { > #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) > #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) > #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) > +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) > > /** > * Device specific ioctls should only be in their respective headers
On 2019年03月19日 19:54, Lionel Landwerlin wrote: > On 15/03/2019 12:09, Chunming Zhou wrote: >> v2: individually allocate chain array, since chain node is free >> independently. >> v3: all existing points must be already signaled before cpu perform >> signal operation, >> so add check condition for that. >> >> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >> --- >> drivers/gpu/drm/drm_internal.h | 2 + >> drivers/gpu/drm/drm_ioctl.c | 2 + >> drivers/gpu/drm/drm_syncobj.c | 103 +++++++++++++++++++++++++++++++++ >> include/uapi/drm/drm.h | 1 + >> 4 files changed, 108 insertions(+) >> >> diff --git a/drivers/gpu/drm/drm_internal.h >> b/drivers/gpu/drm/drm_internal.h >> index dd11ae5f1eef..d9a483a5fce0 100644 >> --- a/drivers/gpu/drm/drm_internal.h >> +++ b/drivers/gpu/drm/drm_internal.h >> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >> *dev, void *data, >> struct drm_file *file_private); >> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >> *data, >> + struct drm_file *file_private); >> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private); >> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c >> index 92b3b7b2fd81..d337f161909c 100644 >> --- a/drivers/gpu/drm/drm_ioctl.c >> +++ b/drivers/gpu/drm/drm_ioctl.c >> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >> drm_syncobj_timeline_signal_ioctl, >> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >> DRM_UNLOCKED|DRM_RENDER_ALLOW), >> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >> diff --git a/drivers/gpu/drm/drm_syncobj.c >> b/drivers/gpu/drm/drm_syncobj.c >> index 306c7b7e2770..eaeb038f97d7 100644 >> --- a/drivers/gpu/drm/drm_syncobj.c >> +++ b/drivers/gpu/drm/drm_syncobj.c >> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device >> *dev, void *data, >> return ret; >> } >> +int >> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >> + struct drm_file *file_private) >> +{ >> + struct drm_syncobj_timeline_array *args = data; >> + struct drm_syncobj **syncobjs; >> + struct dma_fence_chain **chains; >> + uint64_t *points; >> + uint32_t i, j, timeline_count = 0; >> + int ret; >> + >> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >> + return -EOPNOTSUPP; >> + >> + if (args->pad != 0) >> + return -EINVAL; >> + >> + if (args->count_handles == 0) >> + return -EINVAL; >> + >> + ret = drm_syncobj_array_find(file_private, >> + u64_to_user_ptr(args->handles), >> + args->count_handles, >> + &syncobjs); >> + if (ret < 0) >> + return ret; >> + >> + for (i = 0; i < args->count_handles; i++) { >> + struct dma_fence_chain *chain; >> + struct dma_fence *fence; >> + >> + fence = drm_syncobj_fence_get(syncobjs[i]); >> + chain = to_dma_fence_chain(fence); >> + if (chain) { >> + struct dma_fence *iter; >> + >> + dma_fence_chain_for_each(iter, fence) { >> + if (!iter) >> + break; >> + if (!dma_fence_is_signaled(iter)) { >> + dma_fence_put(iter); >> + DRM_ERROR("Client must guarantee all existing >> timeline points signaled before performing host signal operation!"); >> + ret = -EPERM; >> + goto out; > > > Sorry if I'm failing to remember whether we discussed this before. > > > Signaling a point from the host should be fine even if the previous > points in the timeline are not signaled. ok, will remove that checking. > > After all this can happen on the device side as well (out of order > signaling). > > > I thought the thing we didn't want is out of order submission. > > Just checking the last chain node seqno against the host signal point > should be enough. > > > What about simply returning -EPERM, we can warn the application from > userspace? OK, will add that. > > >> + } >> + } >> + } >> + } >> + >> + points = kmalloc_array(args->count_handles, sizeof(*points), >> + GFP_KERNEL); >> + if (!points) { >> + ret = -ENOMEM; >> + goto out; >> + } >> + if (!u64_to_user_ptr(args->points)) { >> + memset(points, 0, args->count_handles * sizeof(uint64_t)); >> + } else if (copy_from_user(points, u64_to_user_ptr(args->points), >> + sizeof(uint64_t) * args->count_handles)) { >> + ret = -EFAULT; >> + goto err_points; >> + } >> + >> + >> + for (i = 0; i < args->count_handles; i++) { >> + if (points[i]) >> + timeline_count++; >> + } >> + chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL); >> + if (!chains) { >> + ret = -ENOMEM; >> + goto err_points; >> + } >> + for (i = 0; i < timeline_count; i++) { >> + chains[i] = kzalloc(sizeof(struct dma_fence_chain), >> GFP_KERNEL); >> + if (!chains[i]) { >> + for (j = 0; j < i; j++) >> + kfree(chains[j]); >> + ret = -ENOMEM; >> + goto err_chains; >> + } >> + } >> + >> + for (i = 0, j = 0; i < args->count_handles; i++) { >> + if (points[i]) { >> + struct dma_fence *fence = dma_fence_get_stub(); >> + >> + drm_syncobj_add_point(syncobjs[i], chains[j++], > > > Isn't this breaking with j++ ?, it should be part of the for() loop > expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++) > > > Otherwise j will not increment at the same pace as i. I think that is intentional, because syncobj array could contain both binary and timeline. j is for timelien_count in this context. -David > > >> + fence, points[i]); >> + dma_fence_put(fence); >> + } else >> + drm_syncobj_assign_null_handle(syncobjs[i]); >> + } >> +err_chains: >> + kfree(chains); >> +err_points: >> + kfree(points); >> +out: >> + drm_syncobj_array_free(syncobjs, args->count_handles); >> + >> + return ret; >> +} >> + >> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file_private) >> { >> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >> index 4c1e2e6579fa..fe00b74268eb 100644 >> --- a/include/uapi/drm/drm.h >> +++ b/include/uapi/drm/drm.h >> @@ -943,6 +943,7 @@ extern "C" { >> #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >> drm_syncobj_timeline_wait) >> #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct >> drm_syncobj_timeline_array) >> #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct >> drm_syncobj_transfer) >> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct >> drm_syncobj_timeline_array) >> /** >> * Device specific ioctls should only be in their respective headers > >
On 20/03/2019 03:53, zhoucm1 wrote: > > > On 2019年03月19日 19:54, Lionel Landwerlin wrote: >> On 15/03/2019 12:09, Chunming Zhou wrote: >>> v2: individually allocate chain array, since chain node is free >>> independently. >>> v3: all existing points must be already signaled before cpu perform >>> signal operation, >>> so add check condition for that. >>> >>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >>> --- >>> drivers/gpu/drm/drm_internal.h | 2 + >>> drivers/gpu/drm/drm_ioctl.c | 2 + >>> drivers/gpu/drm/drm_syncobj.c | 103 >>> +++++++++++++++++++++++++++++++++ >>> include/uapi/drm/drm.h | 1 + >>> 4 files changed, 108 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/drm_internal.h >>> b/drivers/gpu/drm/drm_internal.h >>> index dd11ae5f1eef..d9a483a5fce0 100644 >>> --- a/drivers/gpu/drm/drm_internal.h >>> +++ b/drivers/gpu/drm/drm_internal.h >>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >>> *dev, void *data, >>> struct drm_file *file_private); >>> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >>> *data, >>> + struct drm_file *file_private); >>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> diff --git a/drivers/gpu/drm/drm_ioctl.c >>> b/drivers/gpu/drm/drm_ioctl.c >>> index 92b3b7b2fd81..d337f161909c 100644 >>> --- a/drivers/gpu/drm/drm_ioctl.c >>> +++ b/drivers/gpu/drm/drm_ioctl.c >>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >>> drm_syncobj_timeline_signal_ioctl, >>> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index 306c7b7e2770..eaeb038f97d7 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device >>> *dev, void *data, >>> return ret; >>> } >>> +int >>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >>> + struct drm_file *file_private) >>> +{ >>> + struct drm_syncobj_timeline_array *args = data; >>> + struct drm_syncobj **syncobjs; >>> + struct dma_fence_chain **chains; >>> + uint64_t *points; >>> + uint32_t i, j, timeline_count = 0; >>> + int ret; >>> + >>> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >>> + return -EOPNOTSUPP; >>> + >>> + if (args->pad != 0) >>> + return -EINVAL; >>> + >>> + if (args->count_handles == 0) >>> + return -EINVAL; >>> + >>> + ret = drm_syncobj_array_find(file_private, >>> + u64_to_user_ptr(args->handles), >>> + args->count_handles, >>> + &syncobjs); >>> + if (ret < 0) >>> + return ret; >>> + >>> + for (i = 0; i < args->count_handles; i++) { >>> + struct dma_fence_chain *chain; >>> + struct dma_fence *fence; >>> + >>> + fence = drm_syncobj_fence_get(syncobjs[i]); >>> + chain = to_dma_fence_chain(fence); >>> + if (chain) { >>> + struct dma_fence *iter; >>> + >>> + dma_fence_chain_for_each(iter, fence) { >>> + if (!iter) >>> + break; >>> + if (!dma_fence_is_signaled(iter)) { >>> + dma_fence_put(iter); >>> + DRM_ERROR("Client must guarantee all existing >>> timeline points signaled before performing host signal operation!"); >>> + ret = -EPERM; >>> + goto out; >> >> >> Sorry if I'm failing to remember whether we discussed this before. >> >> >> Signaling a point from the host should be fine even if the previous >> points in the timeline are not signaled. > ok, will remove that checking. > >> >> After all this can happen on the device side as well (out of order >> signaling). >> >> >> I thought the thing we didn't want is out of order submission. >> >> Just checking the last chain node seqno against the host signal point >> should be enough. >> >> >> What about simply returning -EPERM, we can warn the application from >> userspace? > OK, will add that. > >> >> >>> + } >>> + } >>> + } >>> + } >>> + >>> + points = kmalloc_array(args->count_handles, sizeof(*points), >>> + GFP_KERNEL); >>> + if (!points) { >>> + ret = -ENOMEM; >>> + goto out; >>> + } >>> + if (!u64_to_user_ptr(args->points)) { >>> + memset(points, 0, args->count_handles * sizeof(uint64_t)); >>> + } else if (copy_from_user(points, u64_to_user_ptr(args->points), >>> + sizeof(uint64_t) * args->count_handles)) { >>> + ret = -EFAULT; >>> + goto err_points; >>> + } >>> + >>> + >>> + for (i = 0; i < args->count_handles; i++) { >>> + if (points[i]) >>> + timeline_count++; >>> + } >>> + chains = kmalloc_array(timeline_count, sizeof(void *), >>> GFP_KERNEL); >>> + if (!chains) { >>> + ret = -ENOMEM; >>> + goto err_points; >>> + } >>> + for (i = 0; i < timeline_count; i++) { >>> + chains[i] = kzalloc(sizeof(struct dma_fence_chain), >>> GFP_KERNEL); >>> + if (!chains[i]) { >>> + for (j = 0; j < i; j++) >>> + kfree(chains[j]); >>> + ret = -ENOMEM; >>> + goto err_chains; >>> + } >>> + } >>> + >>> + for (i = 0, j = 0; i < args->count_handles; i++) { >>> + if (points[i]) { >>> + struct dma_fence *fence = dma_fence_get_stub(); >>> + >>> + drm_syncobj_add_point(syncobjs[i], chains[j++], >> >> >> Isn't this breaking with j++ ?, it should be part of the for() loop >> expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++) >> >> >> Otherwise j will not increment at the same pace as i. > I think that is intentional, because syncobj array could contain both > binary and timeline. j is for timelien_count in this context. > > -David To be fair, I think it would be easier for applications to supply 0 for binary syncobjs. It also copies count_handles points from userspace, so it would also be more consistent. Sorry for not spotting this earlier. -Lionel >> >> >>> + fence, points[i]); >>> + dma_fence_put(fence); >>> + } else >>> + drm_syncobj_assign_null_handle(syncobjs[i]); >>> + } >>> +err_chains: >>> + kfree(chains); >>> +err_points: >>> + kfree(points); >>> +out: >>> + drm_syncobj_array_free(syncobjs, args->count_handles); >>> + >>> + return ret; >>> +} >>> + >>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private) >>> { >>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h >>> index 4c1e2e6579fa..fe00b74268eb 100644 >>> --- a/include/uapi/drm/drm.h >>> +++ b/include/uapi/drm/drm.h >>> @@ -943,6 +943,7 @@ extern "C" { >>> #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct >>> drm_syncobj_timeline_wait) >>> #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct >>> drm_syncobj_timeline_array) >>> #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct >>> drm_syncobj_transfer) >>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct >>> drm_syncobj_timeline_array) >>> /** >>> * Device specific ioctls should only be in their respective headers >> >> > >
On 20/03/2019 03:53, zhoucm1 wrote: > > > On 2019年03月19日 19:54, Lionel Landwerlin wrote: >> On 15/03/2019 12:09, Chunming Zhou wrote: >>> v2: individually allocate chain array, since chain node is free >>> independently. >>> v3: all existing points must be already signaled before cpu perform >>> signal operation, >>> so add check condition for that. >>> >>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com> >>> --- >>> drivers/gpu/drm/drm_internal.h | 2 + >>> drivers/gpu/drm/drm_ioctl.c | 2 + >>> drivers/gpu/drm/drm_syncobj.c | 103 >>> +++++++++++++++++++++++++++++++++ >>> include/uapi/drm/drm.h | 1 + >>> 4 files changed, 108 insertions(+) >>> >>> diff --git a/drivers/gpu/drm/drm_internal.h >>> b/drivers/gpu/drm/drm_internal.h >>> index dd11ae5f1eef..d9a483a5fce0 100644 >>> --- a/drivers/gpu/drm/drm_internal.h >>> +++ b/drivers/gpu/drm/drm_internal.h >>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device >>> *dev, void *data, >>> struct drm_file *file_private); >>> int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void >>> *data, >>> + struct drm_file *file_private); >>> int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file_private); >>> diff --git a/drivers/gpu/drm/drm_ioctl.c >>> b/drivers/gpu/drm/drm_ioctl.c >>> index 92b3b7b2fd81..d337f161909c 100644 >>> --- a/drivers/gpu/drm/drm_ioctl.c >>> +++ b/drivers/gpu/drm/drm_ioctl.c >>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, >>> drm_syncobj_timeline_signal_ioctl, >>> + DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, >>> DRM_UNLOCKED|DRM_RENDER_ALLOW), >>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, >>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), >>> diff --git a/drivers/gpu/drm/drm_syncobj.c >>> b/drivers/gpu/drm/drm_syncobj.c >>> index 306c7b7e2770..eaeb038f97d7 100644 >>> --- a/drivers/gpu/drm/drm_syncobj.c >>> +++ b/drivers/gpu/drm/drm_syncobj.c >>> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device >>> *dev, void *data, >>> return ret; >>> } >>> +int >>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, >>> + struct drm_file *file_private) >>> +{ >>> + struct drm_syncobj_timeline_array *args = data; >>> + struct drm_syncobj **syncobjs; >>> + struct dma_fence_chain **chains; >>> + uint64_t *points; >>> + uint32_t i, j, timeline_count = 0; >>> + int ret; >>> + >>> + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) >>> + return -EOPNOTSUPP; >>> + >>> + if (args->pad != 0) >>> + return -EINVAL; >>> + >>> + if (args->count_handles == 0) >>> + return -EINVAL; >>> + >>> + ret = drm_syncobj_array_find(file_private, >>> + u64_to_user_ptr(args->handles), >>> + args->count_handles, >>> + &syncobjs); >>> + if (ret < 0) >>> + return ret; >>> + >>> + for (i = 0; i < args->count_handles; i++) { >>> + struct dma_fence_chain *chain; >>> + struct dma_fence *fence; >>> + >>> + fence = drm_syncobj_fence_get(syncobjs[i]); >>> + chain = to_dma_fence_chain(fence); >>> + if (chain) { >>> + struct dma_fence *iter; >>> + >>> + dma_fence_chain_for_each(iter, fence) { >>> + if (!iter) >>> + break; >>> + if (!dma_fence_is_signaled(iter)) { >>> + dma_fence_put(iter); >>> + DRM_ERROR("Client must guarantee all existing >>> timeline points signaled before performing host signal operation!"); >>> + ret = -EPERM; >>> + goto out; >> >> >> Sorry if I'm failing to remember whether we discussed this before. >> >> >> Signaling a point from the host should be fine even if the previous >> points in the timeline are not signaled. > ok, will remove that checking. > >> >> After all this can happen on the device side as well (out of order >> signaling). >> >> >> I thought the thing we didn't want is out of order submission. >> >> Just checking the last chain node seqno against the host signal point >> should be enough. >> >> >> What about simply returning -EPERM, we can warn the application from >> userspace? > OK, will add that. Sorry for coming back to this again, but we probably ought to drop this check completely. This is allowed on the device signaling side, so I'm not quite sure what that protects us from. Also we do the check at this point in the signal_timeline_ioctl() function but there is nothing that says that when we later call the add_point() function this condition still holds. -Lionel
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index dd11ae5f1eef..d9a483a5fce0 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private); int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 92b3b7b2fd81..d337f161909c 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl, + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 306c7b7e2770..eaeb038f97d7 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data, return ret; } +int +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_private) +{ + struct drm_syncobj_timeline_array *args = data; + struct drm_syncobj **syncobjs; + struct dma_fence_chain **chains; + uint64_t *points; + uint32_t i, j, timeline_count = 0; + int ret; + + if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ)) + return -EOPNOTSUPP; + + if (args->pad != 0) + return -EINVAL; + + if (args->count_handles == 0) + return -EINVAL; + + ret = drm_syncobj_array_find(file_private, + u64_to_user_ptr(args->handles), + args->count_handles, + &syncobjs); + if (ret < 0) + return ret; + + for (i = 0; i < args->count_handles; i++) { + struct dma_fence_chain *chain; + struct dma_fence *fence; + + fence = drm_syncobj_fence_get(syncobjs[i]); + chain = to_dma_fence_chain(fence); + if (chain) { + struct dma_fence *iter; + + dma_fence_chain_for_each(iter, fence) { + if (!iter) + break; + if (!dma_fence_is_signaled(iter)) { + dma_fence_put(iter); + DRM_ERROR("Client must guarantee all existing timeline points signaled before performing host signal operation!"); + ret = -EPERM; + goto out; + } + } + } + } + + points = kmalloc_array(args->count_handles, sizeof(*points), + GFP_KERNEL); + if (!points) { + ret = -ENOMEM; + goto out; + } + if (!u64_to_user_ptr(args->points)) { + memset(points, 0, args->count_handles * sizeof(uint64_t)); + } else if (copy_from_user(points, u64_to_user_ptr(args->points), + sizeof(uint64_t) * args->count_handles)) { + ret = -EFAULT; + goto err_points; + } + + + for (i = 0; i < args->count_handles; i++) { + if (points[i]) + timeline_count++; + } + chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL); + if (!chains) { + ret = -ENOMEM; + goto err_points; + } + for (i = 0; i < timeline_count; i++) { + chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL); + if (!chains[i]) { + for (j = 0; j < i; j++) + kfree(chains[j]); + ret = -ENOMEM; + goto err_chains; + } + } + + for (i = 0, j = 0; i < args->count_handles; i++) { + if (points[i]) { + struct dma_fence *fence = dma_fence_get_stub(); + + drm_syncobj_add_point(syncobjs[i], chains[j++], + fence, points[i]); + dma_fence_put(fence); + } else + drm_syncobj_assign_null_handle(syncobjs[i]); + } +err_chains: + kfree(chains); +err_points: + kfree(points); +out: + drm_syncobj_array_free(syncobjs, args->count_handles); + + return ret; +} + int drm_syncobj_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file_private) { diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h index 4c1e2e6579fa..fe00b74268eb 100644 --- a/include/uapi/drm/drm.h +++ b/include/uapi/drm/drm.h @@ -943,6 +943,7 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait) #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct drm_syncobj_timeline_array) #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) /** * Device specific ioctls should only be in their respective headers
v2: individually allocate chain array, since chain node is free independently. v3: all existing points must be already signaled before cpu perform signal operation, so add check condition for that. Signed-off-by: Chunming Zhou <david1.zhou@amd.com> --- drivers/gpu/drm/drm_internal.h | 2 + drivers/gpu/drm/drm_ioctl.c | 2 + drivers/gpu/drm/drm_syncobj.c | 103 +++++++++++++++++++++++++++++++++ include/uapi/drm/drm.h | 1 + 4 files changed, 108 insertions(+)