diff mbox series

[8/9] drm/syncobj: add timeline signal ioctl for syncobj v3

Message ID 20190315120959.25489-8-david1.zhou@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/9] dma-buf: add new dma_fence_chain container v5 | expand

Commit Message

Chunming Zhou March 15, 2019, 12:09 p.m. UTC
v2: individually allocate chain array, since chain node is free independently.
v3: all existing points must be already signaled before cpu perform signal operation,
    so add check condition for that.

Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
---
 drivers/gpu/drm/drm_internal.h |   2 +
 drivers/gpu/drm/drm_ioctl.c    |   2 +
 drivers/gpu/drm/drm_syncobj.c  | 103 +++++++++++++++++++++++++++++++++
 include/uapi/drm/drm.h         |   1 +
 4 files changed, 108 insertions(+)

Comments

Lionel Landwerlin March 19, 2019, 11:54 a.m. UTC | #1
On 15/03/2019 12:09, Chunming Zhou wrote:
> v2: individually allocate chain array, since chain node is free independently.
> v3: all existing points must be already signaled before cpu perform signal operation,
>      so add check condition for that.
>
> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
> ---
>   drivers/gpu/drm/drm_internal.h |   2 +
>   drivers/gpu/drm/drm_ioctl.c    |   2 +
>   drivers/gpu/drm/drm_syncobj.c  | 103 +++++++++++++++++++++++++++++++++
>   include/uapi/drm/drm.h         |   1 +
>   4 files changed, 108 insertions(+)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index dd11ae5f1eef..d9a483a5fce0 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   			     struct drm_file *file_private);
> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				      struct drm_file *file_private);
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
>   
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index 92b3b7b2fd81..d337f161909c 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
> +		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>   		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index 306c7b7e2770..eaeb038f97d7 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>   	return ret;
>   }
>   
> +int
> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
> +				  struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	struct dma_fence_chain **chains;
> +	uint64_t *points;
> +	uint32_t i, j, timeline_count = 0;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
> +		return -EOPNOTSUPP;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		struct dma_fence_chain *chain;
> +                struct dma_fence *fence;
> +
> +                fence = drm_syncobj_fence_get(syncobjs[i]);
> +                chain = to_dma_fence_chain(fence);
> +                if (chain) {
> +                        struct dma_fence *iter;
> +
> +                        dma_fence_chain_for_each(iter, fence) {
> +                                if (!iter)
> +                                        break;
> +				if (!dma_fence_is_signaled(iter)) {
> +					dma_fence_put(iter);
> +					DRM_ERROR("Client must guarantee all existing timeline points signaled before performing host signal operation!");
> +					ret = -EPERM;
> +					goto out;


Sorry if I'm failing to remember whether we discussed this before.


Signaling a point from the host should be fine even if the previous 
points in the timeline are not signaled.

After all this can happen on the device side as well (out of order 
signaling).


I thought the thing we didn't want is out of order submission.

Just checking the last chain node seqno against the host signal point 
should be enough.


What about simply returning -EPERM, we can warn the application from 
userspace?


> +				}
> +                        }
> +                }
> +        }
> +
> +	points = kmalloc_array(args->count_handles, sizeof(*points),
> +			       GFP_KERNEL);
> +	if (!points) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +	if (!u64_to_user_ptr(args->points)) {
> +		memset(points, 0, args->count_handles * sizeof(uint64_t));
> +	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
> +				  sizeof(uint64_t) * args->count_handles)) {
> +		ret = -EFAULT;
> +		goto err_points;
> +	}
> +
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		if (points[i])
> +			timeline_count++;
> +	}
> +	chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL);
> +	if (!chains) {
> +		ret = -ENOMEM;
> +		goto err_points;
> +	}
> +	for (i = 0; i < timeline_count; i++) {
> +		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
> +		if (!chains[i]) {
> +			for (j = 0; j < i; j++)
> +				kfree(chains[j]);
> +			ret = -ENOMEM;
> +			goto err_chains;
> +		}
> +	}
> +
> +	for (i = 0, j = 0; i < args->count_handles; i++) {
> +		if (points[i]) {
> +			struct dma_fence *fence = dma_fence_get_stub();
> +
> +			drm_syncobj_add_point(syncobjs[i], chains[j++],


Isn't this breaking with j++ ?, it should be part of the for() loop 
expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++)


Otherwise j will not increment at the same pace as i.


> +					      fence, points[i]);
> +			dma_fence_put(fence);
> +		} else
> +			drm_syncobj_assign_null_handle(syncobjs[i]);
> +	}
> +err_chains:
> +	kfree(chains);
> +err_points:
> +	kfree(points);
> +out:
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> +
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private)
>   {
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 4c1e2e6579fa..fe00b74268eb 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -943,6 +943,7 @@ extern "C" {
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>   #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers
Chunming Zhou March 20, 2019, 3:53 a.m. UTC | #2
On 2019年03月19日 19:54, Lionel Landwerlin wrote:
> On 15/03/2019 12:09, Chunming Zhou wrote:
>> v2: individually allocate chain array, since chain node is free 
>> independently.
>> v3: all existing points must be already signaled before cpu perform 
>> signal operation,
>>      so add check condition for that.
>>
>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>> ---
>>   drivers/gpu/drm/drm_internal.h |   2 +
>>   drivers/gpu/drm/drm_ioctl.c    |   2 +
>>   drivers/gpu/drm/drm_syncobj.c  | 103 +++++++++++++++++++++++++++++++++
>>   include/uapi/drm/drm.h         |   1 +
>>   4 files changed, 108 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h 
>> b/drivers/gpu/drm/drm_internal.h
>> index dd11ae5f1eef..d9a483a5fce0 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>> *dev, void *data,
>>                   struct drm_file *file_private);
>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>                    struct drm_file *file_private);
>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
>> *data,
>> +                      struct drm_file *file_private);
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private);
>>   diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index 92b3b7b2fd81..d337f161909c 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
>> drm_syncobj_timeline_signal_ioctl,
>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>> b/drivers/gpu/drm/drm_syncobj.c
>> index 306c7b7e2770..eaeb038f97d7 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device 
>> *dev, void *data,
>>       return ret;
>>   }
>>   +int
>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>> +                  struct drm_file *file_private)
>> +{
>> +    struct drm_syncobj_timeline_array *args = data;
>> +    struct drm_syncobj **syncobjs;
>> +    struct dma_fence_chain **chains;
>> +    uint64_t *points;
>> +    uint32_t i, j, timeline_count = 0;
>> +    int ret;
>> +
>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>> +        return -EOPNOTSUPP;
>> +
>> +    if (args->pad != 0)
>> +        return -EINVAL;
>> +
>> +    if (args->count_handles == 0)
>> +        return -EINVAL;
>> +
>> +    ret = drm_syncobj_array_find(file_private,
>> +                     u64_to_user_ptr(args->handles),
>> +                     args->count_handles,
>> +                     &syncobjs);
>> +    if (ret < 0)
>> +        return ret;
>> +
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        struct dma_fence_chain *chain;
>> +                struct dma_fence *fence;
>> +
>> +                fence = drm_syncobj_fence_get(syncobjs[i]);
>> +                chain = to_dma_fence_chain(fence);
>> +                if (chain) {
>> +                        struct dma_fence *iter;
>> +
>> +                        dma_fence_chain_for_each(iter, fence) {
>> +                                if (!iter)
>> +                                        break;
>> +                if (!dma_fence_is_signaled(iter)) {
>> +                    dma_fence_put(iter);
>> +                    DRM_ERROR("Client must guarantee all existing 
>> timeline points signaled before performing host signal operation!");
>> +                    ret = -EPERM;
>> +                    goto out;
>
>
> Sorry if I'm failing to remember whether we discussed this before.
>
>
> Signaling a point from the host should be fine even if the previous 
> points in the timeline are not signaled.
ok, will remove that checking.

>
> After all this can happen on the device side as well (out of order 
> signaling).
>
>
> I thought the thing we didn't want is out of order submission.
>
> Just checking the last chain node seqno against the host signal point 
> should be enough.
>
>
> What about simply returning -EPERM, we can warn the application from 
> userspace?
OK, will add that.

>
>
>> +                }
>> +                        }
>> +                }
>> +        }
>> +
>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>> +                   GFP_KERNEL);
>> +    if (!points) {
>> +        ret = -ENOMEM;
>> +        goto out;
>> +    }
>> +    if (!u64_to_user_ptr(args->points)) {
>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>> +                  sizeof(uint64_t) * args->count_handles)) {
>> +        ret = -EFAULT;
>> +        goto err_points;
>> +    }
>> +
>> +
>> +    for (i = 0; i < args->count_handles; i++) {
>> +        if (points[i])
>> +            timeline_count++;
>> +    }
>> +    chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL);
>> +    if (!chains) {
>> +        ret = -ENOMEM;
>> +        goto err_points;
>> +    }
>> +    for (i = 0; i < timeline_count; i++) {
>> +        chains[i] = kzalloc(sizeof(struct dma_fence_chain), 
>> GFP_KERNEL);
>> +        if (!chains[i]) {
>> +            for (j = 0; j < i; j++)
>> +                kfree(chains[j]);
>> +            ret = -ENOMEM;
>> +            goto err_chains;
>> +        }
>> +    }
>> +
>> +    for (i = 0, j = 0; i < args->count_handles; i++) {
>> +        if (points[i]) {
>> +            struct dma_fence *fence = dma_fence_get_stub();
>> +
>> +            drm_syncobj_add_point(syncobjs[i], chains[j++],
>
>
> Isn't this breaking with j++ ?, it should be part of the for() loop 
> expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++)
>
>
> Otherwise j will not increment at the same pace as i.
I think that is intentional, because syncobj array could contain both 
binary and timeline. j is for timelien_count in this context.

-David
>
>
>> +                          fence, points[i]);
>> +            dma_fence_put(fence);
>> +        } else
>> +            drm_syncobj_assign_null_handle(syncobjs[i]);
>> +    }
>> +err_chains:
>> +    kfree(chains);
>> +err_points:
>> +    kfree(points);
>> +out:
>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +    return ret;
>> +}
>> +
>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>                   struct drm_file *file_private)
>>   {
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 4c1e2e6579fa..fe00b74268eb 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -943,6 +943,7 @@ extern "C" {
>>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct 
>> drm_syncobj_timeline_wait)
>>   #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>> drm_syncobj_timeline_array)
>>   #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct 
>> drm_syncobj_transfer)
>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, struct 
>> drm_syncobj_timeline_array)
>>     /**
>>    * Device specific ioctls should only be in their respective headers
>
>
Lionel Landwerlin March 20, 2019, 11:58 a.m. UTC | #3
On 20/03/2019 03:53, zhoucm1 wrote:
>
>
> On 2019年03月19日 19:54, Lionel Landwerlin wrote:
>> On 15/03/2019 12:09, Chunming Zhou wrote:
>>> v2: individually allocate chain array, since chain node is free 
>>> independently.
>>> v3: all existing points must be already signaled before cpu perform 
>>> signal operation,
>>>      so add check condition for that.
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> ---
>>>   drivers/gpu/drm/drm_internal.h |   2 +
>>>   drivers/gpu/drm/drm_ioctl.c    |   2 +
>>>   drivers/gpu/drm/drm_syncobj.c  | 103 
>>> +++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/drm.h         |   1 +
>>>   4 files changed, 108 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h 
>>> b/drivers/gpu/drm/drm_internal.h
>>> index dd11ae5f1eef..d9a483a5fce0 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>                   struct drm_file *file_private);
>>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
>>> *data,
>>> +                      struct drm_file *file_private);
>>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                   struct drm_file *file_private);
>>>   diff --git a/drivers/gpu/drm/drm_ioctl.c 
>>> b/drivers/gpu/drm/drm_ioctl.c
>>> index 92b3b7b2fd81..d337f161909c 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
>>> drm_syncobj_timeline_signal_ioctl,
>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 306c7b7e2770..eaeb038f97d7 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>       return ret;
>>>   }
>>>   +int
>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>> +                  struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    struct dma_fence_chain **chains;
>>> +    uint64_t *points;
>>> +    uint32_t i, j, timeline_count = 0;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        struct dma_fence_chain *chain;
>>> +                struct dma_fence *fence;
>>> +
>>> +                fence = drm_syncobj_fence_get(syncobjs[i]);
>>> +                chain = to_dma_fence_chain(fence);
>>> +                if (chain) {
>>> +                        struct dma_fence *iter;
>>> +
>>> +                        dma_fence_chain_for_each(iter, fence) {
>>> +                                if (!iter)
>>> +                                        break;
>>> +                if (!dma_fence_is_signaled(iter)) {
>>> +                    dma_fence_put(iter);
>>> +                    DRM_ERROR("Client must guarantee all existing 
>>> timeline points signaled before performing host signal operation!");
>>> +                    ret = -EPERM;
>>> +                    goto out;
>>
>>
>> Sorry if I'm failing to remember whether we discussed this before.
>>
>>
>> Signaling a point from the host should be fine even if the previous 
>> points in the timeline are not signaled.
> ok, will remove that checking.
>
>>
>> After all this can happen on the device side as well (out of order 
>> signaling).
>>
>>
>> I thought the thing we didn't want is out of order submission.
>>
>> Just checking the last chain node seqno against the host signal point 
>> should be enough.
>>
>>
>> What about simply returning -EPERM, we can warn the application from 
>> userspace?
> OK, will add that.
>
>>
>>
>>> +                }
>>> +                        }
>>> +                }
>>> +        }
>>> +
>>> +    points = kmalloc_array(args->count_handles, sizeof(*points),
>>> +                   GFP_KERNEL);
>>> +    if (!points) {
>>> +        ret = -ENOMEM;
>>> +        goto out;
>>> +    }
>>> +    if (!u64_to_user_ptr(args->points)) {
>>> +        memset(points, 0, args->count_handles * sizeof(uint64_t));
>>> +    } else if (copy_from_user(points, u64_to_user_ptr(args->points),
>>> +                  sizeof(uint64_t) * args->count_handles)) {
>>> +        ret = -EFAULT;
>>> +        goto err_points;
>>> +    }
>>> +
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        if (points[i])
>>> +            timeline_count++;
>>> +    }
>>> +    chains = kmalloc_array(timeline_count, sizeof(void *), 
>>> GFP_KERNEL);
>>> +    if (!chains) {
>>> +        ret = -ENOMEM;
>>> +        goto err_points;
>>> +    }
>>> +    for (i = 0; i < timeline_count; i++) {
>>> +        chains[i] = kzalloc(sizeof(struct dma_fence_chain), 
>>> GFP_KERNEL);
>>> +        if (!chains[i]) {
>>> +            for (j = 0; j < i; j++)
>>> +                kfree(chains[j]);
>>> +            ret = -ENOMEM;
>>> +            goto err_chains;
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0, j = 0; i < args->count_handles; i++) {
>>> +        if (points[i]) {
>>> +            struct dma_fence *fence = dma_fence_get_stub();
>>> +
>>> +            drm_syncobj_add_point(syncobjs[i], chains[j++],
>>
>>
>> Isn't this breaking with j++ ?, it should be part of the for() loop 
>> expression : for (i = 0, j = 0; i < arrags->count_handles; i++, j++)
>>
>>
>> Otherwise j will not increment at the same pace as i.
> I think that is intentional, because syncobj array could contain both 
> binary and timeline. j is for timelien_count in this context.
>
> -David


To be fair, I think it would be easier for applications to supply 0 for 
binary syncobjs.

It also copies count_handles points from userspace, so it would also be 
more consistent.


Sorry for not spotting this earlier.


-Lionel


>>
>>
>>> +                          fence, points[i]);
>>> +            dma_fence_put(fence);
>>> +        } else
>>> +            drm_syncobj_assign_null_handle(syncobjs[i]);
>>> +    }
>>> +err_chains:
>>> +    kfree(chains);
>>> +err_points:
>>> +    kfree(points);
>>> +out:
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                   struct drm_file *file_private)
>>>   {
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 4c1e2e6579fa..fe00b74268eb 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -943,6 +943,7 @@ extern "C" {
>>>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT    DRM_IOWR(0xCA, struct 
>>> drm_syncobj_timeline_wait)
>>>   #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>>> drm_syncobj_timeline_array)
>>>   #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct 
>>> drm_syncobj_transfer)
>>> +#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, struct 
>>> drm_syncobj_timeline_array)
>>>     /**
>>>    * Device specific ioctls should only be in their respective headers
>>
>>
>
>
Lionel Landwerlin March 20, 2019, 1:23 p.m. UTC | #4
On 20/03/2019 03:53, zhoucm1 wrote:
>
>
> On 2019年03月19日 19:54, Lionel Landwerlin wrote:
>> On 15/03/2019 12:09, Chunming Zhou wrote:
>>> v2: individually allocate chain array, since chain node is free 
>>> independently.
>>> v3: all existing points must be already signaled before cpu perform 
>>> signal operation,
>>>      so add check condition for that.
>>>
>>> Signed-off-by: Chunming Zhou <david1.zhou@amd.com>
>>> ---
>>>   drivers/gpu/drm/drm_internal.h |   2 +
>>>   drivers/gpu/drm/drm_ioctl.c    |   2 +
>>>   drivers/gpu/drm/drm_syncobj.c  | 103 
>>> +++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/drm.h         |   1 +
>>>   4 files changed, 108 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h 
>>> b/drivers/gpu/drm/drm_internal.h
>>> index dd11ae5f1eef..d9a483a5fce0 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -190,6 +190,8 @@ int drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>                   struct drm_file *file_private);
>>>   int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>> +int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void 
>>> *data,
>>> +                      struct drm_file *file_private);
>>>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                   struct drm_file *file_private);
>>>   diff --git a/drivers/gpu/drm/drm_ioctl.c 
>>> b/drivers/gpu/drm/drm_ioctl.c
>>> index 92b3b7b2fd81..d337f161909c 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -696,6 +696,8 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, 
>>> drm_syncobj_timeline_signal_ioctl,
>>> +              DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>                 DRM_UNLOCKED|DRM_RENDER_ALLOW),
>>>       DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>>> drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index 306c7b7e2770..eaeb038f97d7 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1183,6 +1183,109 @@ drm_syncobj_signal_ioctl(struct drm_device 
>>> *dev, void *data,
>>>       return ret;
>>>   }
>>>   +int
>>> +drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>> +                  struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    struct dma_fence_chain **chains;
>>> +    uint64_t *points;
>>> +    uint32_t i, j, timeline_count = 0;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        struct dma_fence_chain *chain;
>>> +                struct dma_fence *fence;
>>> +
>>> +                fence = drm_syncobj_fence_get(syncobjs[i]);
>>> +                chain = to_dma_fence_chain(fence);
>>> +                if (chain) {
>>> +                        struct dma_fence *iter;
>>> +
>>> +                        dma_fence_chain_for_each(iter, fence) {
>>> +                                if (!iter)
>>> +                                        break;
>>> +                if (!dma_fence_is_signaled(iter)) {
>>> +                    dma_fence_put(iter);
>>> +                    DRM_ERROR("Client must guarantee all existing 
>>> timeline points signaled before performing host signal operation!");
>>> +                    ret = -EPERM;
>>> +                    goto out;
>>
>>
>> Sorry if I'm failing to remember whether we discussed this before.
>>
>>
>> Signaling a point from the host should be fine even if the previous 
>> points in the timeline are not signaled.
> ok, will remove that checking.
>
>>
>> After all this can happen on the device side as well (out of order 
>> signaling).
>>
>>
>> I thought the thing we didn't want is out of order submission.
>>
>> Just checking the last chain node seqno against the host signal point 
>> should be enough.
>>
>>
>> What about simply returning -EPERM, we can warn the application from 
>> userspace?
> OK, will add that. 


Sorry for coming back to this again, but we probably ought to drop this 
check completely.

This is allowed on the device signaling side, so I'm not quite sure what 
that protects us from.


Also we do the check at this point in the signal_timeline_ioctl() 
function but there is nothing that says that when we later call the 
add_point() function this condition still holds.


-Lionel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index dd11ae5f1eef..d9a483a5fce0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -190,6 +190,8 @@  int drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 int drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_private);
+int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 92b3b7b2fd81..d337f161909c 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -696,6 +696,8 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SIGNAL, drm_syncobj_signal_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL, drm_syncobj_timeline_signal_ioctl,
+		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
 		      DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index 306c7b7e2770..eaeb038f97d7 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1183,6 +1183,109 @@  drm_syncobj_signal_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+int
+drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	struct dma_fence_chain **chains;
+	uint64_t *points;
+	uint32_t i, j, timeline_count = 0;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		struct dma_fence_chain *chain;
+                struct dma_fence *fence;
+
+                fence = drm_syncobj_fence_get(syncobjs[i]);
+                chain = to_dma_fence_chain(fence);
+                if (chain) {
+                        struct dma_fence *iter;
+
+                        dma_fence_chain_for_each(iter, fence) {
+                                if (!iter)
+                                        break;
+				if (!dma_fence_is_signaled(iter)) {
+					dma_fence_put(iter);
+					DRM_ERROR("Client must guarantee all existing timeline points signaled before performing host signal operation!");
+					ret = -EPERM;
+					goto out;
+				}
+                        }
+                }
+        }
+
+	points = kmalloc_array(args->count_handles, sizeof(*points),
+			       GFP_KERNEL);
+	if (!points) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	if (!u64_to_user_ptr(args->points)) {
+		memset(points, 0, args->count_handles * sizeof(uint64_t));
+	} else if (copy_from_user(points, u64_to_user_ptr(args->points),
+				  sizeof(uint64_t) * args->count_handles)) {
+		ret = -EFAULT;
+		goto err_points;
+	}
+
+
+	for (i = 0; i < args->count_handles; i++) {
+		if (points[i])
+			timeline_count++;
+	}
+	chains = kmalloc_array(timeline_count, sizeof(void *), GFP_KERNEL);
+	if (!chains) {
+		ret = -ENOMEM;
+		goto err_points;
+	}
+	for (i = 0; i < timeline_count; i++) {
+		chains[i] = kzalloc(sizeof(struct dma_fence_chain), GFP_KERNEL);
+		if (!chains[i]) {
+			for (j = 0; j < i; j++)
+				kfree(chains[j]);
+			ret = -ENOMEM;
+			goto err_chains;
+		}
+	}
+
+	for (i = 0, j = 0; i < args->count_handles; i++) {
+		if (points[i]) {
+			struct dma_fence *fence = dma_fence_get_stub();
+
+			drm_syncobj_add_point(syncobjs[i], chains[j++],
+					      fence, points[i]);
+			dma_fence_put(fence);
+		} else
+			drm_syncobj_assign_null_handle(syncobjs[i]);
+	}
+err_chains:
+	kfree(chains);
+err_points:
+	kfree(points);
+out:
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private)
 {
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 4c1e2e6579fa..fe00b74268eb 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -943,6 +943,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT	DRM_IOWR(0xCA, struct drm_syncobj_timeline_wait)
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
+#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
 
 /**
  * Device specific ioctls should only be in their respective headers