diff mbox series

[v3,1/1] drm/syncobj: add sideband payload

Message ID 20190808131402.14519-2-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/syncobj: add syncobj sideband payload for threaded submission | expand

Commit Message

Lionel Landwerlin Aug. 8, 2019, 1:14 p.m. UTC
The Vulkan timeline semaphores allow signaling to happen on the point
of the timeline without all of the its dependencies to be created.

The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
the Linux kernel are using a thread to wait on the dependencies of a
given point to materialize and delay actual submission to the kernel
driver until the wait completes.

If a binary semaphore is submitted for signaling along the side of a
timeline semaphore waiting for completion that means that the drm
syncobj associated with that binary semaphore will not have a DMA
fence associated with it by the time vkQueueSubmit() returns. This and
the fact that a binary semaphore can be signaled and unsignaled as
before its DMA fences materialize mean that we cannot just rely on the
fence within the syncobj but we also need a sideband payload verifying
that the fence in the syncobj matches the last submission from the
Vulkan API point of view.

This change adds a sideband payload that is incremented with signaled
syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
waiting on a the syncobj will read the sideband payload and wait for a
fence chain element with a seqno superior or equal to the sideband
payload value to be added into the fence chain and use that fence to
trigger the submission on the kernel driver.

v2: Use a separate ioctl to get/set the sideband value (Christian)

v3: Use 2 ioctls for get/set (Christian)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Christian Koenig <Christian.Koenig@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
---
 drivers/gpu/drm/drm_internal.h |  4 ++
 drivers/gpu/drm/drm_ioctl.c    |  5 +++
 drivers/gpu/drm/drm_syncobj.c  | 79 +++++++++++++++++++++++++++++++++-
 include/drm/drm_syncobj.h      |  9 ++++
 include/uapi/drm/drm.h         |  2 +
 5 files changed, 98 insertions(+), 1 deletion(-)

Comments

Chunming Zhou Aug. 8, 2019, 1:23 p.m. UTC | #1
The propursal is fine with me.

one question:

how to wait sideband payload? Following patch will show that?

-David

在 2019/8/8 21:14, Lionel Landwerlin 写道:
> The Vulkan timeline semaphores allow signaling to happen on the point
> of the timeline without all of the its dependencies to be created.
>
> The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
> the Linux kernel are using a thread to wait on the dependencies of a
> given point to materialize and delay actual submission to the kernel
> driver until the wait completes.
>
> If a binary semaphore is submitted for signaling along the side of a
> timeline semaphore waiting for completion that means that the drm
> syncobj associated with that binary semaphore will not have a DMA
> fence associated with it by the time vkQueueSubmit() returns. This and
> the fact that a binary semaphore can be signaled and unsignaled as
> before its DMA fences materialize mean that we cannot just rely on the
> fence within the syncobj but we also need a sideband payload verifying
> that the fence in the syncobj matches the last submission from the
> Vulkan API point of view.
>
> This change adds a sideband payload that is incremented with signaled
> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
> waiting on a the syncobj will read the sideband payload and wait for a
> fence chain element with a seqno superior or equal to the sideband
> payload value to be added into the fence chain and use that fence to
> trigger the submission on the kernel driver.
>
> v2: Use a separate ioctl to get/set the sideband value (Christian)
>
> v3: Use 2 ioctls for get/set (Christian)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Cc: Christian Koenig <Christian.Koenig@amd.com>
> Cc: Jason Ekstrand <jason@jlekstrand.net>
> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
> ---
>   drivers/gpu/drm/drm_internal.h |  4 ++
>   drivers/gpu/drm/drm_ioctl.c    |  5 +++
>   drivers/gpu/drm/drm_syncobj.c  | 79 +++++++++++++++++++++++++++++++++-
>   include/drm/drm_syncobj.h      |  9 ++++
>   include/uapi/drm/drm.h         |  2 +
>   5 files changed, 98 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
> index 51a2055c8f18..0c9736199df0 100644
> --- a/drivers/gpu/drm/drm_internal.h
> +++ b/drivers/gpu/drm/drm_internal.h
> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>   				      struct drm_file *file_private);
>   int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   			    struct drm_file *file_private);
> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file_private);
> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file_private);
>   
>   /* drm_framebuffer.c */
>   void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
> index f675a3bb2c88..48500bf62801 100644
> --- a/drivers/gpu/drm/drm_ioctl.c
> +++ b/drivers/gpu/drm/drm_ioctl.c
> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>   		      DRM_RENDER_ALLOW),
>   	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>   		      DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND, drm_syncobj_get_sideband_ioctl,
> +		      DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND, drm_syncobj_set_sideband_ioctl,
> +		      DRM_RENDER_ALLOW),
> +
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
>   	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
>   	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
> index b927e482e554..c90ef20b9242 100644
> --- a/drivers/gpu/drm/drm_syncobj.c
> +++ b/drivers/gpu/drm/drm_syncobj.c
> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>   	if (ret < 0)
>   		return ret;
>   
> -	for (i = 0; i < args->count_handles; i++)
> +	for (i = 0; i < args->count_handles; i++) {
>   		drm_syncobj_replace_fence(syncobjs[i], NULL);
> +		syncobjs[i]->sideband_payload = 0;
> +	}
>   
>   	drm_syncobj_array_free(syncobjs, args->count_handles);
>   
> @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>   		if (ret)
>   			break;
>   	}
> +
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> +
> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	uint64_t __user *points = u64_to_user_ptr(args->points);
> +	uint32_t i;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> +		return -EOPNOTSUPP;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		copy_to_user(&points[i], &syncobjs[i]->sideband_payload, sizeof(uint64_t));
> +		ret = ret ? -EFAULT : 0;
> +		if (ret)
> +			break;
> +	}
> +
> +	drm_syncobj_array_free(syncobjs, args->count_handles);
> +
> +	return ret;
> +}
> +
> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file_private)
> +{
> +	struct drm_syncobj_timeline_array *args = data;
> +	struct drm_syncobj **syncobjs;
> +	uint64_t __user *points = u64_to_user_ptr(args->points);
> +	uint32_t i;
> +	int ret;
> +
> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> +		return -EOPNOTSUPP;
> +
> +	if (args->pad != 0)
> +		return -EINVAL;
> +
> +	if (args->count_handles == 0)
> +		return -EINVAL;
> +
> +	ret = drm_syncobj_array_find(file_private,
> +				     u64_to_user_ptr(args->handles),
> +				     args->count_handles,
> +				     &syncobjs);
> +	if (ret < 0)
> +		return ret;
> +
> +	for (i = 0; i < args->count_handles; i++) {
> +		copy_from_user(&syncobjs[i]->sideband_payload, &points[i], sizeof(uint64_t));
> +		ret = ret ? -EFAULT : 0;
> +		if (ret)
> +			break;
> +	}
> +
>   	drm_syncobj_array_free(syncobjs, args->count_handles);
>   
>   	return ret;
> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> index 6cf7243a1dc5..b587b8e07547 100644
> --- a/include/drm/drm_syncobj.h
> +++ b/include/drm/drm_syncobj.h
> @@ -61,6 +61,15 @@ struct drm_syncobj {
>   	 * @file: A file backing for this syncobj.
>   	 */
>   	struct file *file;
> +	/**
> +	 * @sideband_payload: A 64bit side band payload.
> +	 *
> +	 * We use the sideband payload value to wait on binary syncobj fences
> +	 * to materialize. It is a reservation mechanism for the signaler to
> +	 * express that at some point in the future a dma fence with the same
> +	 * seqno will be put into the syncobj.
> +	 */
> +	u64 sideband_payload;
>   };
>   
>   void drm_syncobj_free(struct kref *kref);
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 8a5b2f8f8eb9..cffdc6c9772c 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -946,6 +946,8 @@ extern "C" {
>   #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>   #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>   #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND	DRM_IOR(0xCE, struct drm_syncobj_timeline_array)
> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND	DRM_IOWR(0xCF, struct drm_syncobj_timeline_array)
>   
>   /**
>    * Device specific ioctls should only be in their respective headers
Lionel Landwerlin Aug. 8, 2019, 1:38 p.m. UTC | #2
Interesting question :)

I didn't see any usecase for that.
This sideband payload value is used for a wait so waiting on the wait 
value for another wait is bit meta :)

Do you have a use case for that?

-Lionel

On 08/08/2019 16:23, Chunming Zhou wrote:
> The propursal is fine with me.
>
> one question:
>
> how to wait sideband payload? Following patch will show that?
>
> -David
>
> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>> The Vulkan timeline semaphores allow signaling to happen on the point
>> of the timeline without all of the its dependencies to be created.
>>
>> The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
>> the Linux kernel are using a thread to wait on the dependencies of a
>> given point to materialize and delay actual submission to the kernel
>> driver until the wait completes.
>>
>> If a binary semaphore is submitted for signaling along the side of a
>> timeline semaphore waiting for completion that means that the drm
>> syncobj associated with that binary semaphore will not have a DMA
>> fence associated with it by the time vkQueueSubmit() returns. This and
>> the fact that a binary semaphore can be signaled and unsignaled as
>> before its DMA fences materialize mean that we cannot just rely on the
>> fence within the syncobj but we also need a sideband payload verifying
>> that the fence in the syncobj matches the last submission from the
>> Vulkan API point of view.
>>
>> This change adds a sideband payload that is incremented with signaled
>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>> waiting on a the syncobj will read the sideband payload and wait for a
>> fence chain element with a seqno superior or equal to the sideband
>> payload value to be added into the fence chain and use that fence to
>> trigger the submission on the kernel driver.
>>
>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>
>> v3: Use 2 ioctls for get/set (Christian)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>> ---
>>    drivers/gpu/drm/drm_internal.h |  4 ++
>>    drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>    drivers/gpu/drm/drm_syncobj.c  | 79 +++++++++++++++++++++++++++++++++-
>>    include/drm/drm_syncobj.h      |  9 ++++
>>    include/uapi/drm/drm.h         |  2 +
>>    5 files changed, 98 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
>> index 51a2055c8f18..0c9736199df0 100644
>> --- a/drivers/gpu/drm/drm_internal.h
>> +++ b/drivers/gpu/drm/drm_internal.h
>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
>>    				      struct drm_file *file_private);
>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>    			    struct drm_file *file_private);
>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>> +				   struct drm_file *file_private);
>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>> +				   struct drm_file *file_private);
>>    
>>    /* drm_framebuffer.c */
>>    void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>> index f675a3bb2c88..48500bf62801 100644
>> --- a/drivers/gpu/drm/drm_ioctl.c
>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
>>    		      DRM_RENDER_ALLOW),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>    		      DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND, drm_syncobj_get_sideband_ioctl,
>> +		      DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND, drm_syncobj_set_sideband_ioctl,
>> +		      DRM_RENDER_ALLOW),
>> +
>>    	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
>>    	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
>> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
>> index b927e482e554..c90ef20b9242 100644
>> --- a/drivers/gpu/drm/drm_syncobj.c
>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
>>    	if (ret < 0)
>>    		return ret;
>>    
>> -	for (i = 0; i < args->count_handles; i++)
>> +	for (i = 0; i < args->count_handles; i++) {
>>    		drm_syncobj_replace_fence(syncobjs[i], NULL);
>> +		syncobjs[i]->sideband_payload = 0;
>> +	}
>>    
>>    	drm_syncobj_array_free(syncobjs, args->count_handles);
>>    
>> @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>    		if (ret)
>>    			break;
>>    	}
>> +
>> +	drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +	return ret;
>> +}
>> +
>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>> +				   struct drm_file *file_private)
>> +{
>> +	struct drm_syncobj_timeline_array *args = data;
>> +	struct drm_syncobj **syncobjs;
>> +	uint64_t __user *points = u64_to_user_ptr(args->points);
>> +	uint32_t i;
>> +	int ret;
>> +
>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>> +		return -EOPNOTSUPP;
>> +
>> +	if (args->pad != 0)
>> +		return -EINVAL;
>> +
>> +	if (args->count_handles == 0)
>> +		return -EINVAL;
>> +
>> +	ret = drm_syncobj_array_find(file_private,
>> +				     u64_to_user_ptr(args->handles),
>> +				     args->count_handles,
>> +				     &syncobjs);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	for (i = 0; i < args->count_handles; i++) {
>> +		copy_to_user(&points[i], &syncobjs[i]->sideband_payload, sizeof(uint64_t));
>> +		ret = ret ? -EFAULT : 0;
>> +		if (ret)
>> +			break;
>> +	}
>> +
>> +	drm_syncobj_array_free(syncobjs, args->count_handles);
>> +
>> +	return ret;
>> +}
>> +
>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>> +				   struct drm_file *file_private)
>> +{
>> +	struct drm_syncobj_timeline_array *args = data;
>> +	struct drm_syncobj **syncobjs;
>> +	uint64_t __user *points = u64_to_user_ptr(args->points);
>> +	uint32_t i;
>> +	int ret;
>> +
>> +	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>> +		return -EOPNOTSUPP;
>> +
>> +	if (args->pad != 0)
>> +		return -EINVAL;
>> +
>> +	if (args->count_handles == 0)
>> +		return -EINVAL;
>> +
>> +	ret = drm_syncobj_array_find(file_private,
>> +				     u64_to_user_ptr(args->handles),
>> +				     args->count_handles,
>> +				     &syncobjs);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	for (i = 0; i < args->count_handles; i++) {
>> +		copy_from_user(&syncobjs[i]->sideband_payload, &points[i], sizeof(uint64_t));
>> +		ret = ret ? -EFAULT : 0;
>> +		if (ret)
>> +			break;
>> +	}
>> +
>>    	drm_syncobj_array_free(syncobjs, args->count_handles);
>>    
>>    	return ret;
>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>> index 6cf7243a1dc5..b587b8e07547 100644
>> --- a/include/drm/drm_syncobj.h
>> +++ b/include/drm/drm_syncobj.h
>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>    	 * @file: A file backing for this syncobj.
>>    	 */
>>    	struct file *file;
>> +	/**
>> +	 * @sideband_payload: A 64bit side band payload.
>> +	 *
>> +	 * We use the sideband payload value to wait on binary syncobj fences
>> +	 * to materialize. It is a reservation mechanism for the signaler to
>> +	 * express that at some point in the future a dma fence with the same
>> +	 * seqno will be put into the syncobj.
>> +	 */
>> +	u64 sideband_payload;
>>    };
>>    
>>    void drm_syncobj_free(struct kref *kref);
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -946,6 +946,8 @@ extern "C" {
>>    #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
>>    #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND	DRM_IOR(0xCE, struct drm_syncobj_timeline_array)
>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND	DRM_IOWR(0xCF, struct drm_syncobj_timeline_array)
>>    
>>    /**
>>     * Device specific ioctls should only be in their respective headers
Chunming Zhou Aug. 8, 2019, 1:42 p.m. UTC | #3
No, I just see your comment "The next vkQueueSubmit()
waiting on a the syncobj will read the sideband payload and wait for a
fence chain element with a seqno superior or equal to the sideband
payload value to be added into the fence chain and use that fence to
trigger the submission on the kernel driver. ".

In that, you mentioned wait for sideband.
So I want to know we how to use that, maybe I miss something in previous 
discussing thread.

-DAvid


在 2019/8/8 21:38, Lionel Landwerlin 写道:
> Interesting question :)
>
> I didn't see any usecase for that.
> This sideband payload value is used for a wait so waiting on the wait 
> value for another wait is bit meta :)
>
> Do you have a use case for that?
>
> -Lionel
>
> On 08/08/2019 16:23, Chunming Zhou wrote:
>> The propursal is fine with me.
>>
>> one question:
>>
>> how to wait sideband payload? Following patch will show that?
>>
>> -David
>>
>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>> The Vulkan timeline semaphores allow signaling to happen on the point
>>> of the timeline without all of the its dependencies to be created.
>>>
>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
>>> the Linux kernel are using a thread to wait on the dependencies of a
>>> given point to materialize and delay actual submission to the kernel
>>> driver until the wait completes.
>>>
>>> If a binary semaphore is submitted for signaling along the side of a
>>> timeline semaphore waiting for completion that means that the drm
>>> syncobj associated with that binary semaphore will not have a DMA
>>> fence associated with it by the time vkQueueSubmit() returns. This and
>>> the fact that a binary semaphore can be signaled and unsignaled as
>>> before its DMA fences materialize mean that we cannot just rely on the
>>> fence within the syncobj but we also need a sideband payload verifying
>>> that the fence in the syncobj matches the last submission from the
>>> Vulkan API point of view.
>>>
>>> This change adds a sideband payload that is incremented with signaled
>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>> waiting on a the syncobj will read the sideband payload and wait for a
>>> fence chain element with a seqno superior or equal to the sideband
>>> payload value to be added into the fence chain and use that fence to
>>> trigger the submission on the kernel driver.
>>>
>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>
>>> v3: Use 2 ioctls for get/set (Christian)
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>> ---
>>>    drivers/gpu/drm/drm_internal.h |  4 ++
>>>    drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>    drivers/gpu/drm/drm_syncobj.c  | 79 
>>> +++++++++++++++++++++++++++++++++-
>>>    include/drm/drm_syncobj.h      |  9 ++++
>>>    include/uapi/drm/drm.h         |  2 +
>>>    5 files changed, 98 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_internal.h 
>>> b/drivers/gpu/drm/drm_internal.h
>>> index 51a2055c8f18..0c9736199df0 100644
>>> --- a/drivers/gpu/drm/drm_internal.h
>>> +++ b/drivers/gpu/drm/drm_internal.h
>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct 
>>> drm_device *dev, void *data,
>>>                          struct drm_file *file_private);
>>>    int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>                    struct drm_file *file_private);
>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_private);
>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_private);
>>>       /* drm_framebuffer.c */
>>>    void drm_framebuffer_print_info(struct drm_printer *p, unsigned 
>>> int indent,
>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>> index f675a3bb2c88..48500bf62801 100644
>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[] 
>>> = {
>>>                  DRM_RENDER_ALLOW),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>                  DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND, 
>>> drm_syncobj_get_sideband_ioctl,
>>> +              DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND, 
>>> drm_syncobj_set_sideband_ioctl,
>>> +              DRM_RENDER_ALLOW),
>>> +
>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, 
>>> drm_crtc_get_sequence_ioctl, 0),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, 
>>> drm_crtc_queue_sequence_ioctl, 0),
>>>        DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, 
>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>> diff --git a/drivers/gpu/drm/drm_syncobj.c 
>>> b/drivers/gpu/drm/drm_syncobj.c
>>> index b927e482e554..c90ef20b9242 100644
>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device 
>>> *dev, void *data,
>>>        if (ret < 0)
>>>            return ret;
>>>    -    for (i = 0; i < args->count_handles; i++)
>>> +    for (i = 0; i < args->count_handles; i++) {
>>>            drm_syncobj_replace_fence(syncobjs[i], NULL);
>>> +        syncobjs[i]->sideband_payload = 0;
>>> +    }
>>>           drm_syncobj_array_free(syncobjs, args->count_handles);
>>>    @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct 
>>> drm_device *dev, void *data,
>>>            if (ret)
>>>                break;
>>>        }
>>> +
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>> +    uint32_t i;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload, 
>>> sizeof(uint64_t));
>>> +        ret = ret ? -EFAULT : 0;
>>> +        if (ret)
>>> +            break;
>>> +    }
>>> +
>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_private)
>>> +{
>>> +    struct drm_syncobj_timeline_array *args = data;
>>> +    struct drm_syncobj **syncobjs;
>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>> +    uint32_t i;
>>> +    int ret;
>>> +
>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>> +        return -EOPNOTSUPP;
>>> +
>>> +    if (args->pad != 0)
>>> +        return -EINVAL;
>>> +
>>> +    if (args->count_handles == 0)
>>> +        return -EINVAL;
>>> +
>>> +    ret = drm_syncobj_array_find(file_private,
>>> +                     u64_to_user_ptr(args->handles),
>>> +                     args->count_handles,
>>> +                     &syncobjs);
>>> +    if (ret < 0)
>>> +        return ret;
>>> +
>>> +    for (i = 0; i < args->count_handles; i++) {
>>> +        copy_from_user(&syncobjs[i]->sideband_payload, &points[i], 
>>> sizeof(uint64_t));
>>> +        ret = ret ? -EFAULT : 0;
>>> +        if (ret)
>>> +            break;
>>> +    }
>>> +
>>>        drm_syncobj_array_free(syncobjs, args->count_handles);
>>>           return ret;
>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>> index 6cf7243a1dc5..b587b8e07547 100644
>>> --- a/include/drm/drm_syncobj.h
>>> +++ b/include/drm/drm_syncobj.h
>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>         * @file: A file backing for this syncobj.
>>>         */
>>>        struct file *file;
>>> +    /**
>>> +     * @sideband_payload: A 64bit side band payload.
>>> +     *
>>> +     * We use the sideband payload value to wait on binary syncobj 
>>> fences
>>> +     * to materialize. It is a reservation mechanism for the 
>>> signaler to
>>> +     * express that at some point in the future a dma fence with 
>>> the same
>>> +     * seqno will be put into the syncobj.
>>> +     */
>>> +    u64 sideband_payload;
>>>    };
>>>       void drm_syncobj_free(struct kref *kref);
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -946,6 +946,8 @@ extern "C" {
>>>    #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct 
>>> drm_syncobj_timeline_array)
>>>    #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct 
>>> drm_syncobj_transfer)
>>>    #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD, 
>>> struct drm_syncobj_timeline_array)
>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND    DRM_IOR(0xCE, struct 
>>> drm_syncobj_timeline_array)
>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND    DRM_IOWR(0xCF, struct 
>>> drm_syncobj_timeline_array)
>>>       /**
>>>     * Device specific ioctls should only be in their respective headers
>
>
Lionel Landwerlin Aug. 8, 2019, 2:01 p.m. UTC | #4
On 08/08/2019 16:42, Chunming Zhou wrote:
> No, I just see your comment "The next vkQueueSubmit()
> waiting on a the syncobj will read the sideband payload and wait for a
> fence chain element with a seqno superior or equal to the sideband
> payload value to be added into the fence chain and use that fence to
> trigger the submission on the kernel driver. ".


That was meant to say wait on the chain fence to reach the sideband 
payload value.

It a bit confusing but I can't see any other way to word it.


> In that, you mentioned wait for sideband.
> So I want to know we how to use that, maybe I miss something in previous
> discussing thread.


In QueueSubmit(), we start by reading the sideband payloads : 
https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655

Then build everything for the submission and hand it over to the 
submission thread.

Instead of the just waiting on the timeline semaphore values, we also 
wait on the binary semaphore sideband payload values.

Finally before exiting the QueueSubmit() call, we bump the sideband 
payloads of all the binary semaphores have have been signaled : 
https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806


Whoever calls QueueSubmit() after that will pickup the new sideband 
payload values to wait on.


-Lionel



>
> -DAvid
>
>
> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>> Interesting question :)
>>
>> I didn't see any usecase for that.
>> This sideband payload value is used for a wait so waiting on the wait
>> value for another wait is bit meta :)
>>
>> Do you have a use case for that?
>>
>> -Lionel
>>
>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>> The propursal is fine with me.
>>>
>>> one question:
>>>
>>> how to wait sideband payload? Following patch will show that?
>>>
>>> -David
>>>
>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>> The Vulkan timeline semaphores allow signaling to happen on the point
>>>> of the timeline without all of the its dependencies to be created.
>>>>
>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
>>>> the Linux kernel are using a thread to wait on the dependencies of a
>>>> given point to materialize and delay actual submission to the kernel
>>>> driver until the wait completes.
>>>>
>>>> If a binary semaphore is submitted for signaling along the side of a
>>>> timeline semaphore waiting for completion that means that the drm
>>>> syncobj associated with that binary semaphore will not have a DMA
>>>> fence associated with it by the time vkQueueSubmit() returns. This and
>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>> before its DMA fences materialize mean that we cannot just rely on the
>>>> fence within the syncobj but we also need a sideband payload verifying
>>>> that the fence in the syncobj matches the last submission from the
>>>> Vulkan API point of view.
>>>>
>>>> This change adds a sideband payload that is incremented with signaled
>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>> waiting on a the syncobj will read the sideband payload and wait for a
>>>> fence chain element with a seqno superior or equal to the sideband
>>>> payload value to be added into the fence chain and use that fence to
>>>> trigger the submission on the kernel driver.
>>>>
>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>
>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>
>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>> ---
>>>>     drivers/gpu/drm/drm_internal.h |  4 ++
>>>>     drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>     drivers/gpu/drm/drm_syncobj.c  | 79
>>>> +++++++++++++++++++++++++++++++++-
>>>>     include/drm/drm_syncobj.h      |  9 ++++
>>>>     include/uapi/drm/drm.h         |  2 +
>>>>     5 files changed, 98 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>> b/drivers/gpu/drm/drm_internal.h
>>>> index 51a2055c8f18..0c9736199df0 100644
>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>> drm_device *dev, void *data,
>>>>                           struct drm_file *file_private);
>>>>     int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>                     struct drm_file *file_private);
>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>>>> +                   struct drm_file *file_private);
>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>>>> +                   struct drm_file *file_private);
>>>>        /* drm_framebuffer.c */
>>>>     void drm_framebuffer_print_info(struct drm_printer *p, unsigned
>>>> int indent,
>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
>>>> index f675a3bb2c88..48500bf62801 100644
>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[]
>>>> = {
>>>>                   DRM_RENDER_ALLOW),
>>>>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
>>>>                   DRM_RENDER_ALLOW),
>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>> drm_syncobj_get_sideband_ioctl,
>>>> +              DRM_RENDER_ALLOW),
>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>> drm_syncobj_set_sideband_ioctl,
>>>> +              DRM_RENDER_ALLOW),
>>>> +
>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>         DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>> index b927e482e554..c90ef20b9242 100644
>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>> *dev, void *data,
>>>>         if (ret < 0)
>>>>             return ret;
>>>>     -    for (i = 0; i < args->count_handles; i++)
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>             drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>> +        syncobjs[i]->sideband_payload = 0;
>>>> +    }
>>>>            drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>     @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>> drm_device *dev, void *data,
>>>>             if (ret)
>>>>                 break;
>>>>         }
>>>> +
>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
>>>> +                   struct drm_file *file_private)
>>>> +{
>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>> +    struct drm_syncobj **syncobjs;
>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>> +    uint32_t i;
>>>> +    int ret;
>>>> +
>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>> +        return -EOPNOTSUPP;
>>>> +
>>>> +    if (args->pad != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (args->count_handles == 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    ret = drm_syncobj_array_find(file_private,
>>>> +                     u64_to_user_ptr(args->handles),
>>>> +                     args->count_handles,
>>>> +                     &syncobjs);
>>>> +    if (ret < 0)
>>>> +        return ret;
>>>> +
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>> sizeof(uint64_t));
>>>> +        ret = ret ? -EFAULT : 0;
>>>> +        if (ret)
>>>> +            break;
>>>> +    }
>>>> +
>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
>>>> +                   struct drm_file *file_private)
>>>> +{
>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>> +    struct drm_syncobj **syncobjs;
>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>> +    uint32_t i;
>>>> +    int ret;
>>>> +
>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>> +        return -EOPNOTSUPP;
>>>> +
>>>> +    if (args->pad != 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    if (args->count_handles == 0)
>>>> +        return -EINVAL;
>>>> +
>>>> +    ret = drm_syncobj_array_find(file_private,
>>>> +                     u64_to_user_ptr(args->handles),
>>>> +                     args->count_handles,
>>>> +                     &syncobjs);
>>>> +    if (ret < 0)
>>>> +        return ret;
>>>> +
>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>> +        copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>> sizeof(uint64_t));
>>>> +        ret = ret ? -EFAULT : 0;
>>>> +        if (ret)
>>>> +            break;
>>>> +    }
>>>> +
>>>>         drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>            return ret;
>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>> --- a/include/drm/drm_syncobj.h
>>>> +++ b/include/drm/drm_syncobj.h
>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>          * @file: A file backing for this syncobj.
>>>>          */
>>>>         struct file *file;
>>>> +    /**
>>>> +     * @sideband_payload: A 64bit side band payload.
>>>> +     *
>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>> fences
>>>> +     * to materialize. It is a reservation mechanism for the
>>>> signaler to
>>>> +     * express that at some point in the future a dma fence with
>>>> the same
>>>> +     * seqno will be put into the syncobj.
>>>> +     */
>>>> +    u64 sideband_payload;
>>>>     };
>>>>        void drm_syncobj_free(struct kref *kref);
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>     #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>> drm_syncobj_timeline_array)
>>>>     #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>>> drm_syncobj_transfer)
>>>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL    DRM_IOWR(0xCD,
>>>> struct drm_syncobj_timeline_array)
>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND    DRM_IOR(0xCE, struct
>>>> drm_syncobj_timeline_array)
>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND    DRM_IOWR(0xCF, struct
>>>> drm_syncobj_timeline_array)
>>>>        /**
>>>>      * Device specific ioctls should only be in their respective headers
>>
Lionel Landwerlin Aug. 8, 2019, 2:04 p.m. UTC | #5
On 08/08/2019 17:01, Lionel Landwerlin wrote:
> On 08/08/2019 16:42, Chunming Zhou wrote:
>> No, I just see your comment "The next vkQueueSubmit()
>> waiting on a the syncobj will read the sideband payload and wait for a
>> fence chain element with a seqno superior or equal to the sideband
>> payload value to be added into the fence chain and use that fence to
>> trigger the submission on the kernel driver. ".
>
>
> That was meant to say wait on the chain fence to reach the sideband 
> payload value.
>
> It a bit confusing but I can't see any other way to word it.
>
>
>> In that, you mentioned wait for sideband.
>> So I want to know we how to use that, maybe I miss something in previous
>> discussing thread.
>
>
> In QueueSubmit(), we start by reading the sideband payloads : 
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655


We only need to read/write sideband payloads for binary semaphores. The 
behavior for timeline semaphores remains unchanged. :)


-Lionel


>
> Then build everything for the submission and hand it over to the 
> submission thread.
>
> Instead of the just waiting on the timeline semaphore values, we also 
> wait on the binary semaphore sideband payload values.
>
> Finally before exiting the QueueSubmit() call, we bump the sideband 
> payloads of all the binary semaphores have have been signaled : 
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>
>
> Whoever calls QueueSubmit() after that will pickup the new sideband 
> payload values to wait on.
>
>
> -Lionel
>
>
>
>>
>> -DAvid
>>
>>
>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>> Interesting question :)
>>>
>>> I didn't see any usecase for that.
>>> This sideband payload value is used for a wait so waiting on the wait
>>> value for another wait is bit meta :)
>>>
>>> Do you have a use case for that?
>>>
>>> -Lionel
>>>
>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>> The propursal is fine with me.
>>>>
>>>> one question:
>>>>
>>>> how to wait sideband payload? Following patch will show that?
>>>>
>>>> -David
>>>>
>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>> The Vulkan timeline semaphores allow signaling to happen on the point
>>>>> of the timeline without all of the its dependencies to be created.
>>>>>
>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on 
>>>>> top of
>>>>> the Linux kernel are using a thread to wait on the dependencies of a
>>>>> given point to materialize and delay actual submission to the kernel
>>>>> driver until the wait completes.
>>>>>
>>>>> If a binary semaphore is submitted for signaling along the side of a
>>>>> timeline semaphore waiting for completion that means that the drm
>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>> fence associated with it by the time vkQueueSubmit() returns. This 
>>>>> and
>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>> before its DMA fences materialize mean that we cannot just rely on 
>>>>> the
>>>>> fence within the syncobj but we also need a sideband payload 
>>>>> verifying
>>>>> that the fence in the syncobj matches the last submission from the
>>>>> Vulkan API point of view.
>>>>>
>>>>> This change adds a sideband payload that is incremented with signaled
>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>> waiting on a the syncobj will read the sideband payload and wait 
>>>>> for a
>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>> payload value to be added into the fence chain and use that fence to
>>>>> trigger the submission on the kernel driver.
>>>>>
>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>
>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>
>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>     drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>     drivers/gpu/drm/drm_syncobj.c  | 79
>>>>> +++++++++++++++++++++++++++++++++-
>>>>>     include/drm/drm_syncobj.h      |  9 ++++
>>>>>     include/uapi/drm/drm.h         |  2 +
>>>>>     5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>> drm_device *dev, void *data,
>>>>>                           struct drm_file *file_private);
>>>>>     int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>>                     struct drm_file *file_private);
>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private);
>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private);
>>>>>        /* drm_framebuffer.c */
>>>>>     void drm_framebuffer_print_info(struct drm_printer *p, unsigned
>>>>> int indent,
>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c 
>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[]
>>>>> = {
>>>>>                   DRM_RENDER_ALLOW),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, 
>>>>> drm_syncobj_query_ioctl,
>>>>>                   DRM_RENDER_ALLOW),
>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>> drm_syncobj_get_sideband_ioctl,
>>>>> +              DRM_RENDER_ALLOW),
>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>> drm_syncobj_set_sideband_ioctl,
>>>>> +              DRM_RENDER_ALLOW),
>>>>> +
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>> index b927e482e554..c90ef20b9242 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>         if (ret < 0)
>>>>>             return ret;
>>>>>     -    for (i = 0; i < args->count_handles; i++)
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>             drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>> +    }
>>>>>            drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>     @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>> drm_device *dev, void *data,
>>>>>             if (ret)
>>>>>                 break;
>>>>>         }
>>>>> +
>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private)
>>>>> +{
>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>> +    struct drm_syncobj **syncobjs;
>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>> +    uint32_t i;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (args->pad != 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (args->count_handles == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>> +                     u64_to_user_ptr(args->handles),
>>>>> +                     args->count_handles,
>>>>> +                     &syncobjs);
>>>>> +    if (ret < 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>> sizeof(uint64_t));
>>>>> +        ret = ret ? -EFAULT : 0;
>>>>> +        if (ret)
>>>>> +            break;
>>>>> +    }
>>>>> +
>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private)
>>>>> +{
>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>> +    struct drm_syncobj **syncobjs;
>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>> +    uint32_t i;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (args->pad != 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (args->count_handles == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>> +                     u64_to_user_ptr(args->handles),
>>>>> +                     args->count_handles,
>>>>> +                     &syncobjs);
>>>>> +    if (ret < 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>> sizeof(uint64_t));
>>>>> +        ret = ret ? -EFAULT : 0;
>>>>> +        if (ret)
>>>>> +            break;
>>>>> +    }
>>>>> +
>>>>>         drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>            return ret;
>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>> --- a/include/drm/drm_syncobj.h
>>>>> +++ b/include/drm/drm_syncobj.h
>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>          * @file: A file backing for this syncobj.
>>>>>          */
>>>>>         struct file *file;
>>>>> +    /**
>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>> +     *
>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>> fences
>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>> signaler to
>>>>> +     * express that at some point in the future a dma fence with
>>>>> the same
>>>>> +     * seqno will be put into the syncobj.
>>>>> +     */
>>>>> +    u64 sideband_payload;
>>>>>     };
>>>>>        void drm_syncobj_free(struct kref *kref);
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>     #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>>> drm_syncobj_timeline_array)
>>>>>     #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>>>> drm_syncobj_transfer)
>>>>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>> struct drm_syncobj_timeline_array)
>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND    DRM_IOR(0xCE, struct
>>>>> drm_syncobj_timeline_array)
>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND    DRM_IOWR(0xCF, struct
>>>>> drm_syncobj_timeline_array)
>>>>>        /**
>>>>>      * Device specific ioctls should only be in their respective 
>>>>> headers
>>>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Chunming Zhou Aug. 8, 2019, 2:16 p.m. UTC | #6
在 2019/8/8 22:01, Lionel Landwerlin 写道:
> On 08/08/2019 16:42, Chunming Zhou wrote:
>> No, I just see your comment "The next vkQueueSubmit()
>> waiting on a the syncobj will read the sideband payload and wait for a
>> fence chain element with a seqno superior or equal to the sideband
>> payload value to be added into the fence chain and use that fence to
>> trigger the submission on the kernel driver. ".
>
>
> That was meant to say wait on the chain fence to reach the sideband 
> payload value.
>
> It a bit confusing but I can't see any other way to word it.
>
>
>> In that, you mentioned wait for sideband.
>> So I want to know we how to use that, maybe I miss something in previous
>> discussing thread.
>
>
> In QueueSubmit(), we start by reading the sideband payloads : 
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>
> Then build everything for the submission and hand it over to the 
> submission thread.
>
> Instead of the just waiting on the timeline semaphore values, we also 
> wait on the binary semaphore sideband payload values.

Waiting on timeline values is when finding fence in kernel.

But I don't see how to wait/block in kernel when finding fence for 
binary sideband payload  values.


-David

>
> Finally before exiting the QueueSubmit() call, we bump the sideband 
> payloads of all the binary semaphores have have been signaled : 
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>
>
> Whoever calls QueueSubmit() after that will pickup the new sideband 
> payload values to wait on.
>
>
> -Lionel
>
>
>
>>
>> -DAvid
>>
>>
>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>> Interesting question :)
>>>
>>> I didn't see any usecase for that.
>>> This sideband payload value is used for a wait so waiting on the wait
>>> value for another wait is bit meta :)
>>>
>>> Do you have a use case for that?
>>>
>>> -Lionel
>>>
>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>> The propursal is fine with me.
>>>>
>>>> one question:
>>>>
>>>> how to wait sideband payload? Following patch will show that?
>>>>
>>>> -David
>>>>
>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>> The Vulkan timeline semaphores allow signaling to happen on the point
>>>>> of the timeline without all of the its dependencies to be created.
>>>>>
>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on 
>>>>> top of
>>>>> the Linux kernel are using a thread to wait on the dependencies of a
>>>>> given point to materialize and delay actual submission to the kernel
>>>>> driver until the wait completes.
>>>>>
>>>>> If a binary semaphore is submitted for signaling along the side of a
>>>>> timeline semaphore waiting for completion that means that the drm
>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>> fence associated with it by the time vkQueueSubmit() returns. This 
>>>>> and
>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>> before its DMA fences materialize mean that we cannot just rely on 
>>>>> the
>>>>> fence within the syncobj but we also need a sideband payload 
>>>>> verifying
>>>>> that the fence in the syncobj matches the last submission from the
>>>>> Vulkan API point of view.
>>>>>
>>>>> This change adds a sideband payload that is incremented with signaled
>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>> waiting on a the syncobj will read the sideband payload and wait 
>>>>> for a
>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>> payload value to be added into the fence chain and use that fence to
>>>>> trigger the submission on the kernel driver.
>>>>>
>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>
>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>
>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>     drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>     drivers/gpu/drm/drm_syncobj.c  | 79
>>>>> +++++++++++++++++++++++++++++++++-
>>>>>     include/drm/drm_syncobj.h      |  9 ++++
>>>>>     include/uapi/drm/drm.h         |  2 +
>>>>>     5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>> drm_device *dev, void *data,
>>>>>                           struct drm_file *file_private);
>>>>>     int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>>                     struct drm_file *file_private);
>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private);
>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private);
>>>>>        /* drm_framebuffer.c */
>>>>>     void drm_framebuffer_print_info(struct drm_printer *p, unsigned
>>>>> int indent,
>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c 
>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[]
>>>>> = {
>>>>>                   DRM_RENDER_ALLOW),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, 
>>>>> drm_syncobj_query_ioctl,
>>>>>                   DRM_RENDER_ALLOW),
>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>> drm_syncobj_get_sideband_ioctl,
>>>>> +              DRM_RENDER_ALLOW),
>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>> drm_syncobj_set_sideband_ioctl,
>>>>> +              DRM_RENDER_ALLOW),
>>>>> +
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>         DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>> index b927e482e554..c90ef20b9242 100644
>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>> *dev, void *data,
>>>>>         if (ret < 0)
>>>>>             return ret;
>>>>>     -    for (i = 0; i < args->count_handles; i++)
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>             drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>> +    }
>>>>>            drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>     @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>> drm_device *dev, void *data,
>>>>>             if (ret)
>>>>>                 break;
>>>>>         }
>>>>> +
>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private)
>>>>> +{
>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>> +    struct drm_syncobj **syncobjs;
>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>> +    uint32_t i;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (args->pad != 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (args->count_handles == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>> +                     u64_to_user_ptr(args->handles),
>>>>> +                     args->count_handles,
>>>>> +                     &syncobjs);
>>>>> +    if (ret < 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>> sizeof(uint64_t));
>>>>> +        ret = ret ? -EFAULT : 0;
>>>>> +        if (ret)
>>>>> +            break;
>>>>> +    }
>>>>> +
>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void 
>>>>> *data,
>>>>> +                   struct drm_file *file_private)
>>>>> +{
>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>> +    struct drm_syncobj **syncobjs;
>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>> +    uint32_t i;
>>>>> +    int ret;
>>>>> +
>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>> +        return -EOPNOTSUPP;
>>>>> +
>>>>> +    if (args->pad != 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    if (args->count_handles == 0)
>>>>> +        return -EINVAL;
>>>>> +
>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>> +                     u64_to_user_ptr(args->handles),
>>>>> +                     args->count_handles,
>>>>> +                     &syncobjs);
>>>>> +    if (ret < 0)
>>>>> +        return ret;
>>>>> +
>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>> sizeof(uint64_t));
>>>>> +        ret = ret ? -EFAULT : 0;
>>>>> +        if (ret)
>>>>> +            break;
>>>>> +    }
>>>>> +
>>>>>         drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>            return ret;
>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>> --- a/include/drm/drm_syncobj.h
>>>>> +++ b/include/drm/drm_syncobj.h
>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>          * @file: A file backing for this syncobj.
>>>>>          */
>>>>>         struct file *file;
>>>>> +    /**
>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>> +     *
>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>> fences
>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>> signaler to
>>>>> +     * express that at some point in the future a dma fence with
>>>>> the same
>>>>> +     * seqno will be put into the syncobj.
>>>>> +     */
>>>>> +    u64 sideband_payload;
>>>>>     };
>>>>>        void drm_syncobj_free(struct kref *kref);
>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>> --- a/include/uapi/drm/drm.h
>>>>> +++ b/include/uapi/drm/drm.h
>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>     #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>>> drm_syncobj_timeline_array)
>>>>>     #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>>>> drm_syncobj_transfer)
>>>>>     #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>> struct drm_syncobj_timeline_array)
>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND    DRM_IOR(0xCE, struct
>>>>> drm_syncobj_timeline_array)
>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND    DRM_IOWR(0xCF, struct
>>>>> drm_syncobj_timeline_array)
>>>>>        /**
>>>>>      * Device specific ioctls should only be in their respective 
>>>>> headers
>>>
>
Lionel Landwerlin Aug. 8, 2019, 2:34 p.m. UTC | #7
On 08/08/2019 17:16, Chunming Zhou wrote:
> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>> No, I just see your comment "The next vkQueueSubmit()
>>> waiting on a the syncobj will read the sideband payload and wait for a
>>> fence chain element with a seqno superior or equal to the sideband
>>> payload value to be added into the fence chain and use that fence to
>>> trigger the submission on the kernel driver. ".
>>
>> That was meant to say wait on the chain fence to reach the sideband
>> payload value.
>>
>> It a bit confusing but I can't see any other way to word it.
>>
>>
>>> In that, you mentioned wait for sideband.
>>> So I want to know we how to use that, maybe I miss something in previous
>>> discussing thread.
>>
>> In QueueSubmit(), we start by reading the sideband payloads :
>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>>
>> Then build everything for the submission and hand it over to the
>> submission thread.
>>
>> Instead of the just waiting on the timeline semaphore values, we also
>> wait on the binary semaphore sideband payload values.
> Waiting on timeline values is when finding fence in kernel.


Hmm aren't you waiting in a thread in userspace?


>
> But I don't see how to wait/block in kernel when finding fence for
> binary sideband payload  values.


Essentially our driver now handles timelines & binary semaphore using 
dma-fence-chain in both cases.

Only with timelines we take the values submitted by the vulkan application.

The binary semaphore auto increment on vkQueueSubmit() and that is 
managed by the userspace driver.


-Lionel


>
>
> -David
>
>> Finally before exiting the QueueSubmit() call, we bump the sideband
>> payloads of all the binary semaphores have have been signaled :
>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>>
>>
>> Whoever calls QueueSubmit() after that will pickup the new sideband
>> payload values to wait on.
>>
>>
>> -Lionel
>>
>>
>>
>>> -DAvid
>>>
>>>
>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>>> Interesting question :)
>>>>
>>>> I didn't see any usecase for that.
>>>> This sideband payload value is used for a wait so waiting on the wait
>>>> value for another wait is bit meta :)
>>>>
>>>> Do you have a use case for that?
>>>>
>>>> -Lionel
>>>>
>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>>> The propursal is fine with me.
>>>>>
>>>>> one question:
>>>>>
>>>>> how to wait sideband payload? Following patch will show that?
>>>>>
>>>>> -David
>>>>>
>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>>> The Vulkan timeline semaphores allow signaling to happen on the point
>>>>>> of the timeline without all of the its dependencies to be created.
>>>>>>
>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>>>>>> top of
>>>>>> the Linux kernel are using a thread to wait on the dependencies of a
>>>>>> given point to materialize and delay actual submission to the kernel
>>>>>> driver until the wait completes.
>>>>>>
>>>>>> If a binary semaphore is submitted for signaling along the side of a
>>>>>> timeline semaphore waiting for completion that means that the drm
>>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
>>>>>> and
>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>>> before its DMA fences materialize mean that we cannot just rely on
>>>>>> the
>>>>>> fence within the syncobj but we also need a sideband payload
>>>>>> verifying
>>>>>> that the fence in the syncobj matches the last submission from the
>>>>>> Vulkan API point of view.
>>>>>>
>>>>>> This change adds a sideband payload that is incremented with signaled
>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>>> waiting on a the syncobj will read the sideband payload and wait
>>>>>> for a
>>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>>> payload value to be added into the fence chain and use that fence to
>>>>>> trigger the submission on the kernel driver.
>>>>>>
>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>>
>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>>
>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>>> ---
>>>>>>      drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>>      drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>>      drivers/gpu/drm/drm_syncobj.c  | 79
>>>>>> +++++++++++++++++++++++++++++++++-
>>>>>>      include/drm/drm_syncobj.h      |  9 ++++
>>>>>>      include/uapi/drm/drm.h         |  2 +
>>>>>>      5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>>> drm_device *dev, void *data,
>>>>>>                            struct drm_file *file_private);
>>>>>>      int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
>>>>>>                      struct drm_file *file_private);
>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>> *data,
>>>>>> +                   struct drm_file *file_private);
>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>> *data,
>>>>>> +                   struct drm_file *file_private);
>>>>>>         /* drm_framebuffer.c */
>>>>>>      void drm_framebuffer_print_info(struct drm_printer *p, unsigned
>>>>>> int indent,
>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc drm_ioctls[]
>>>>>> = {
>>>>>>                    DRM_RENDER_ALLOW),
>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>>>>> drm_syncobj_query_ioctl,
>>>>>>                    DRM_RENDER_ALLOW),
>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>>> drm_syncobj_get_sideband_ioctl,
>>>>>> +              DRM_RENDER_ALLOW),
>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>>> drm_syncobj_set_sideband_ioctl,
>>>>>> +              DRM_RENDER_ALLOW),
>>>>>> +
>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>>> index b927e482e554..c90ef20b9242 100644
>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>>> *dev, void *data,
>>>>>>          if (ret < 0)
>>>>>>              return ret;
>>>>>>      -    for (i = 0; i < args->count_handles; i++)
>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>              drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>>> +    }
>>>>>>             drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>      @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>>> drm_device *dev, void *data,
>>>>>>              if (ret)
>>>>>>                  break;
>>>>>>          }
>>>>>> +
>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>> *data,
>>>>>> +                   struct drm_file *file_private)
>>>>>> +{
>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>> +    uint32_t i;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>> +        return -EOPNOTSUPP;
>>>>>> +
>>>>>> +    if (args->pad != 0)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    if (args->count_handles == 0)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>> +                     u64_to_user_ptr(args->handles),
>>>>>> +                     args->count_handles,
>>>>>> +                     &syncobjs);
>>>>>> +    if (ret < 0)
>>>>>> +        return ret;
>>>>>> +
>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>>> sizeof(uint64_t));
>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>> +        if (ret)
>>>>>> +            break;
>>>>>> +    }
>>>>>> +
>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>> +
>>>>>> +    return ret;
>>>>>> +}
>>>>>> +
>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>> *data,
>>>>>> +                   struct drm_file *file_private)
>>>>>> +{
>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>> +    uint32_t i;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>> +        return -EOPNOTSUPP;
>>>>>> +
>>>>>> +    if (args->pad != 0)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    if (args->count_handles == 0)
>>>>>> +        return -EINVAL;
>>>>>> +
>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>> +                     u64_to_user_ptr(args->handles),
>>>>>> +                     args->count_handles,
>>>>>> +                     &syncobjs);
>>>>>> +    if (ret < 0)
>>>>>> +        return ret;
>>>>>> +
>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>>> sizeof(uint64_t));
>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>> +        if (ret)
>>>>>> +            break;
>>>>>> +    }
>>>>>> +
>>>>>>          drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>             return ret;
>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>>> --- a/include/drm/drm_syncobj.h
>>>>>> +++ b/include/drm/drm_syncobj.h
>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>>           * @file: A file backing for this syncobj.
>>>>>>           */
>>>>>>          struct file *file;
>>>>>> +    /**
>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>>> +     *
>>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>>> fences
>>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>>> signaler to
>>>>>> +     * express that at some point in the future a dma fence with
>>>>>> the same
>>>>>> +     * seqno will be put into the syncobj.
>>>>>> +     */
>>>>>> +    u64 sideband_payload;
>>>>>>      };
>>>>>>         void drm_syncobj_free(struct kref *kref);
>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>>> --- a/include/uapi/drm/drm.h
>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>>      #define DRM_IOCTL_SYNCOBJ_QUERY        DRM_IOWR(0xCB, struct
>>>>>> drm_syncobj_timeline_array)
>>>>>>      #define DRM_IOCTL_SYNCOBJ_TRANSFER    DRM_IOWR(0xCC, struct
>>>>>> drm_syncobj_transfer)
>>>>>>      #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>>> struct drm_syncobj_timeline_array)
>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND    DRM_IOR(0xCE, struct
>>>>>> drm_syncobj_timeline_array)
>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND    DRM_IOWR(0xCF, struct
>>>>>> drm_syncobj_timeline_array)
>>>>>>         /**
>>>>>>       * Device specific ioctls should only be in their respective
>>>>>> headers
Chunming Zhou Aug. 8, 2019, 2:48 p.m. UTC | #8
在 2019/8/8 22:34, Lionel Landwerlin 写道:
> On 08/08/2019 17:16, Chunming Zhou wrote:
>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>>> No, I just see your comment "The next vkQueueSubmit()
>>>> waiting on a the syncobj will read the sideband payload and wait for a
>>>> fence chain element with a seqno superior or equal to the sideband
>>>> payload value to be added into the fence chain and use that fence to
>>>> trigger the submission on the kernel driver. ".
>>>
>>> That was meant to say wait on the chain fence to reach the sideband
>>> payload value.
>>>
>>> It a bit confusing but I can't see any other way to word it.
>>>
>>>
>>>> In that, you mentioned wait for sideband.
>>>> So I want to know we how to use that, maybe I miss something in 
>>>> previous
>>>> discussing thread.
>>>
>>> In QueueSubmit(), we start by reading the sideband payloads :
>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655 
>>>
>>>
>>> Then build everything for the submission and hand it over to the
>>> submission thread.
>>>
>>> Instead of the just waiting on the timeline semaphore values, we also
>>> wait on the binary semaphore sideband payload values.
>> Waiting on timeline values is when finding fence in kernel.
>
>
> Hmm aren't you waiting in a thread in userspace?

Yes, For timeline case, we can use waitForSyncobj()..., At begin of 
QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.

But I still didn't get how to wait on sideband for binary Syncobj.

Ah, I see, you will compare it in your QueueThread, if sideband value is 
 >= expected, then do submission, otherwise, loop QueueThread, right?

That sounds the QueueThread will be always busy.

-David


>
>
>>
>> But I don't see how to wait/block in kernel when finding fence for
>> binary sideband payload  values.
>
>
> Essentially our driver now handles timelines & binary semaphore using 
> dma-fence-chain in both cases.



>
> Only with timelines we take the values submitted by the vulkan 
> application.


>
> The binary semaphore auto increment on vkQueueSubmit() and that is 
> managed by the userspace driver.
>
>
> -Lionel
>
>
>>
>>
>> -David
>>
>>> Finally before exiting the QueueSubmit() call, we bump the sideband
>>> payloads of all the binary semaphores have have been signaled :
>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806 
>>>
>>>
>>>
>>> Whoever calls QueueSubmit() after that will pickup the new sideband
>>> payload values to wait on.
>>>
>>>
>>> -Lionel
>>>
>>>
>>>
>>>> -DAvid
>>>>
>>>>
>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>>>> Interesting question :)
>>>>>
>>>>> I didn't see any usecase for that.
>>>>> This sideband payload value is used for a wait so waiting on the wait
>>>>> value for another wait is bit meta :)
>>>>>
>>>>> Do you have a use case for that?
>>>>>
>>>>> -Lionel
>>>>>
>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>>>> The propursal is fine with me.
>>>>>>
>>>>>> one question:
>>>>>>
>>>>>> how to wait sideband payload? Following patch will show that?
>>>>>>
>>>>>> -David
>>>>>>
>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the 
>>>>>>> point
>>>>>>> of the timeline without all of the its dependencies to be created.
>>>>>>>
>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>>>>>>> top of
>>>>>>> the Linux kernel are using a thread to wait on the dependencies 
>>>>>>> of a
>>>>>>> given point to materialize and delay actual submission to the 
>>>>>>> kernel
>>>>>>> driver until the wait completes.
>>>>>>>
>>>>>>> If a binary semaphore is submitted for signaling along the side 
>>>>>>> of a
>>>>>>> timeline semaphore waiting for completion that means that the drm
>>>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
>>>>>>> and
>>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>>>> before its DMA fences materialize mean that we cannot just rely on
>>>>>>> the
>>>>>>> fence within the syncobj but we also need a sideband payload
>>>>>>> verifying
>>>>>>> that the fence in the syncobj matches the last submission from the
>>>>>>> Vulkan API point of view.
>>>>>>>
>>>>>>> This change adds a sideband payload that is incremented with 
>>>>>>> signaled
>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>>>> waiting on a the syncobj will read the sideband payload and wait
>>>>>>> for a
>>>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>>>> payload value to be added into the fence chain and use that 
>>>>>>> fence to
>>>>>>> trigger the submission on the kernel driver.
>>>>>>>
>>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>>>
>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>>>
>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>>>> ---
>>>>>>>      drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>>>      drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>>>      drivers/gpu/drm/drm_syncobj.c  | 79
>>>>>>> +++++++++++++++++++++++++++++++++-
>>>>>>>      include/drm/drm_syncobj.h      |  9 ++++
>>>>>>>      include/uapi/drm/drm.h         |  2 +
>>>>>>>      5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>>>> drm_device *dev, void *data,
>>>>>>>                            struct drm_file *file_private);
>>>>>>>      int drm_syncobj_query_ioctl(struct drm_device *dev, void 
>>>>>>> *data,
>>>>>>>                      struct drm_file *file_private);
>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>> *data,
>>>>>>> +                   struct drm_file *file_private);
>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>> *data,
>>>>>>> +                   struct drm_file *file_private);
>>>>>>>         /* drm_framebuffer.c */
>>>>>>>      void drm_framebuffer_print_info(struct drm_printer *p, 
>>>>>>> unsigned
>>>>>>> int indent,
>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc 
>>>>>>> drm_ioctls[]
>>>>>>> = {
>>>>>>>                    DRM_RENDER_ALLOW),
>>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>>>>>> drm_syncobj_query_ioctl,
>>>>>>>                    DRM_RENDER_ALLOW),
>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>>>> drm_syncobj_get_sideband_ioctl,
>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>>>> drm_syncobj_set_sideband_ioctl,
>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>> +
>>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>>>          DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>>>> index b927e482e554..c90ef20b9242 100644
>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>>>> *dev, void *data,
>>>>>>>          if (ret < 0)
>>>>>>>              return ret;
>>>>>>>      -    for (i = 0; i < args->count_handles; i++)
>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>              drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>>>> +    }
>>>>>>>             drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>      @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>>>> drm_device *dev, void *data,
>>>>>>>              if (ret)
>>>>>>>                  break;
>>>>>>>          }
>>>>>>> +
>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>> *data,
>>>>>>> +                   struct drm_file *file_private)
>>>>>>> +{
>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>> +    uint32_t i;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>> +        return -EOPNOTSUPP;
>>>>>>> +
>>>>>>> +    if (args->pad != 0)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    if (args->count_handles == 0)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>> +                     args->count_handles,
>>>>>>> +                     &syncobjs);
>>>>>>> +    if (ret < 0)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>>>> sizeof(uint64_t));
>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>> +        if (ret)
>>>>>>> +            break;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>> +
>>>>>>> +    return ret;
>>>>>>> +}
>>>>>>> +
>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>> *data,
>>>>>>> +                   struct drm_file *file_private)
>>>>>>> +{
>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>> +    uint32_t i;
>>>>>>> +    int ret;
>>>>>>> +
>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>> +        return -EOPNOTSUPP;
>>>>>>> +
>>>>>>> +    if (args->pad != 0)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    if (args->count_handles == 0)
>>>>>>> +        return -EINVAL;
>>>>>>> +
>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>> +                     args->count_handles,
>>>>>>> +                     &syncobjs);
>>>>>>> +    if (ret < 0)
>>>>>>> +        return ret;
>>>>>>> +
>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>>>> sizeof(uint64_t));
>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>> +        if (ret)
>>>>>>> +            break;
>>>>>>> +    }
>>>>>>> +
>>>>>>>          drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>             return ret;
>>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>>>> --- a/include/drm/drm_syncobj.h
>>>>>>> +++ b/include/drm/drm_syncobj.h
>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>>>           * @file: A file backing for this syncobj.
>>>>>>>           */
>>>>>>>          struct file *file;
>>>>>>> +    /**
>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>>>> +     *
>>>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>>>> fences
>>>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>>>> signaler to
>>>>>>> +     * express that at some point in the future a dma fence with
>>>>>>> the same
>>>>>>> +     * seqno will be put into the syncobj.
>>>>>>> +     */
>>>>>>> +    u64 sideband_payload;
>>>>>>>      };
>>>>>>>         void drm_syncobj_free(struct kref *kref);
>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>>>      #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>>>>>>> drm_syncobj_timeline_array)
>>>>>>>      #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
>>>>>>> drm_syncobj_transfer)
>>>>>>>      #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>>>> struct drm_syncobj_timeline_array)
>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>>>>>>> drm_syncobj_timeline_array)
>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
>>>>>>> drm_syncobj_timeline_array)
>>>>>>>         /**
>>>>>>>       * Device specific ioctls should only be in their respective
>>>>>>> headers
>
>
Lionel Landwerlin Aug. 8, 2019, 3:01 p.m. UTC | #9
On 08/08/2019 17:48, Chunming Zhou wrote:
> 在 2019/8/8 22:34, Lionel Landwerlin 写道:
>> On 08/08/2019 17:16, Chunming Zhou wrote:
>>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>>>> No, I just see your comment "The next vkQueueSubmit()
>>>>> waiting on a the syncobj will read the sideband payload and wait for a
>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>> payload value to be added into the fence chain and use that fence to
>>>>> trigger the submission on the kernel driver. ".
>>>> That was meant to say wait on the chain fence to reach the sideband
>>>> payload value.
>>>>
>>>> It a bit confusing but I can't see any other way to word it.
>>>>
>>>>
>>>>> In that, you mentioned wait for sideband.
>>>>> So I want to know we how to use that, maybe I miss something in
>>>>> previous
>>>>> discussing thread.
>>>> In QueueSubmit(), we start by reading the sideband payloads :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>>>>
>>>>
>>>> Then build everything for the submission and hand it over to the
>>>> submission thread.
>>>>
>>>> Instead of the just waiting on the timeline semaphore values, we also
>>>> wait on the binary semaphore sideband payload values.
>>> Waiting on timeline values is when finding fence in kernel.
>>
>> Hmm aren't you waiting in a thread in userspace?
> Yes, For timeline case, we can use waitForSyncobj()..., At begin of
> QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
>
> But I still didn't get how to wait on sideband for binary Syncobj.
>
> Ah, I see, you will compare it in your QueueThread, if sideband value is
>   >= expected, then do submission, otherwise, loop QueueThread, right?


Just treat binary semaphores as timelines and waitForSyncobj on the 
sideband payload value.

It should make the submission thread any busier than currently.


-Lionel


>
> That sounds the QueueThread will be always busy.
>
> -David
>
>
>>
>>> But I don't see how to wait/block in kernel when finding fence for
>>> binary sideband payload  values.
>>
>> Essentially our driver now handles timelines & binary semaphore using
>> dma-fence-chain in both cases.
>
>
>> Only with timelines we take the values submitted by the vulkan
>> application.
>
>> The binary semaphore auto increment on vkQueueSubmit() and that is
>> managed by the userspace driver.
>>
>>
>> -Lionel
>>
>>
>>>
>>> -David
>>>
>>>> Finally before exiting the QueueSubmit() call, we bump the sideband
>>>> payloads of all the binary semaphores have have been signaled :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>>>>
>>>>
>>>>
>>>> Whoever calls QueueSubmit() after that will pickup the new sideband
>>>> payload values to wait on.
>>>>
>>>>
>>>> -Lionel
>>>>
>>>>
>>>>
>>>>> -DAvid
>>>>>
>>>>>
>>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>>>>> Interesting question :)
>>>>>>
>>>>>> I didn't see any usecase for that.
>>>>>> This sideband payload value is used for a wait so waiting on the wait
>>>>>> value for another wait is bit meta :)
>>>>>>
>>>>>> Do you have a use case for that?
>>>>>>
>>>>>> -Lionel
>>>>>>
>>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>>>>> The propursal is fine with me.
>>>>>>>
>>>>>>> one question:
>>>>>>>
>>>>>>> how to wait sideband payload? Following patch will show that?
>>>>>>>
>>>>>>> -David
>>>>>>>
>>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the
>>>>>>>> point
>>>>>>>> of the timeline without all of the its dependencies to be created.
>>>>>>>>
>>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>>>>>>>> top of
>>>>>>>> the Linux kernel are using a thread to wait on the dependencies
>>>>>>>> of a
>>>>>>>> given point to materialize and delay actual submission to the
>>>>>>>> kernel
>>>>>>>> driver until the wait completes.
>>>>>>>>
>>>>>>>> If a binary semaphore is submitted for signaling along the side
>>>>>>>> of a
>>>>>>>> timeline semaphore waiting for completion that means that the drm
>>>>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
>>>>>>>> and
>>>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>>>>> before its DMA fences materialize mean that we cannot just rely on
>>>>>>>> the
>>>>>>>> fence within the syncobj but we also need a sideband payload
>>>>>>>> verifying
>>>>>>>> that the fence in the syncobj matches the last submission from the
>>>>>>>> Vulkan API point of view.
>>>>>>>>
>>>>>>>> This change adds a sideband payload that is incremented with
>>>>>>>> signaled
>>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>>>>> waiting on a the syncobj will read the sideband payload and wait
>>>>>>>> for a
>>>>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>>>>> payload value to be added into the fence chain and use that
>>>>>>>> fence to
>>>>>>>> trigger the submission on the kernel driver.
>>>>>>>>
>>>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>>>>
>>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>>>>
>>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>>>>> ---
>>>>>>>>       drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>>>>       drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>>>>       drivers/gpu/drm/drm_syncobj.c  | 79
>>>>>>>> +++++++++++++++++++++++++++++++++-
>>>>>>>>       include/drm/drm_syncobj.h      |  9 ++++
>>>>>>>>       include/uapi/drm/drm.h         |  2 +
>>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>                             struct drm_file *file_private);
>>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>>                       struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>>          /* drm_framebuffer.c */
>>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
>>>>>>>> unsigned
>>>>>>>> int indent,
>>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
>>>>>>>> drm_ioctls[]
>>>>>>>> = {
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>>>>>>> drm_syncobj_query_ioctl,
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>>>>> drm_syncobj_get_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>>>>> drm_syncobj_set_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> index b927e482e554..c90ef20b9242 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>>>>> *dev, void *data,
>>>>>>>>           if (ret < 0)
>>>>>>>>               return ret;
>>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>>               drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>>>>> +    }
>>>>>>>>              drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>       @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>               if (ret)
>>>>>>>>                   break;
>>>>>>>>           }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>           drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>              return ret;
>>>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>>>>> --- a/include/drm/drm_syncobj.h
>>>>>>>> +++ b/include/drm/drm_syncobj.h
>>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>>>>            * @file: A file backing for this syncobj.
>>>>>>>>            */
>>>>>>>>           struct file *file;
>>>>>>>> +    /**
>>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>>>>> +     *
>>>>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>>>>> fences
>>>>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>>>>> signaler to
>>>>>>>> +     * express that at some point in the future a dma fence with
>>>>>>>> the same
>>>>>>>> +     * seqno will be put into the syncobj.
>>>>>>>> +     */
>>>>>>>> +    u64 sideband_payload;
>>>>>>>>       };
>>>>>>>>          void drm_syncobj_free(struct kref *kref);
>>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
>>>>>>>> drm_syncobj_transfer)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>>>>> struct drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>          /**
>>>>>>>>        * Device specific ioctls should only be in their respective
>>>>>>>> headers
>>
Chunming Zhou Aug. 8, 2019, 3:23 p.m. UTC | #10
Thank you, I got your mean.
when you have sideband payload, you will go into timeline path. Clever!

-David

-------- 原始邮件 --------
主题:Re: [PATCH v3 1/1] drm/syncobj: add sideband payload
发件人:Lionel Landwerlin
收件人:"Zhou, David(ChunMing)" ,dri-devel@freedesktop.org
抄送:"Koenig, Christian" ,Jason Ekstrand ,"Zhou, David(ChunMing)"

On 08/08/2019 17:48, Chunming Zhou wrote:
> 在 2019/8/8 22:34, Lionel Landwerlin 写道:
>> On 08/08/2019 17:16, Chunming Zhou wrote:
>>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>>>> No, I just see your comment "The next vkQueueSubmit()
>>>>> waiting on a the syncobj will read the sideband payload and wait for a
>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>> payload value to be added into the fence chain and use that fence to
>>>>> trigger the submission on the kernel driver. ".
>>>> That was meant to say wait on the chain fence to reach the sideband
>>>> payload value.
>>>>
>>>> It a bit confusing but I can't see any other way to word it.
>>>>
>>>>
>>>>> In that, you mentioned wait for sideband.
>>>>> So I want to know we how to use that, maybe I miss something in
>>>>> previous
>>>>> discussing thread.
>>>> In QueueSubmit(), we start by reading the sideband payloads :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>>>>
>>>>
>>>> Then build everything for the submission and hand it over to the
>>>> submission thread.
>>>>
>>>> Instead of the just waiting on the timeline semaphore values, we also
>>>> wait on the binary semaphore sideband payload values.
>>> Waiting on timeline values is when finding fence in kernel.
>>
>> Hmm aren't you waiting in a thread in userspace?
> Yes, For timeline case, we can use waitForSyncobj()..., At begin of
> QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
>
> But I still didn't get how to wait on sideband for binary Syncobj.
>
> Ah, I see, you will compare it in your QueueThread, if sideband value is
>   >= expected, then do submission, otherwise, loop QueueThread, right?


Just treat binary semaphores as timelines and waitForSyncobj on the
sideband payload value.

It should make the submission thread any busier than currently.


-Lionel


>
> That sounds the QueueThread will be always busy.
>
> -David
>
>
>>
>>> But I don't see how to wait/block in kernel when finding fence for
>>> binary sideband payload  values.
>>
>> Essentially our driver now handles timelines & binary semaphore using
>> dma-fence-chain in both cases.
>
>
>> Only with timelines we take the values submitted by the vulkan
>> application.
>
>> The binary semaphore auto increment on vkQueueSubmit() and that is
>> managed by the userspace driver.
>>
>>
>> -Lionel
>>
>>
>>>
>>> -David
>>>
>>>> Finally before exiting the QueueSubmit() call, we bump the sideband
>>>> payloads of all the binary semaphores have have been signaled :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>>>>
>>>>
>>>>
>>>> Whoever calls QueueSubmit() after that will pickup the new sideband
>>>> payload values to wait on.
>>>>
>>>>
>>>> -Lionel
>>>>
>>>>
>>>>
>>>>> -DAvid
>>>>>
>>>>>
>>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>>>>> Interesting question :)
>>>>>>
>>>>>> I didn't see any usecase for that.
>>>>>> This sideband payload value is used for a wait so waiting on the wait
>>>>>> value for another wait is bit meta :)
>>>>>>
>>>>>> Do you have a use case for that?
>>>>>>
>>>>>> -Lionel
>>>>>>
>>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>>>>> The propursal is fine with me.
>>>>>>>
>>>>>>> one question:
>>>>>>>
>>>>>>> how to wait sideband payload? Following patch will show that?
>>>>>>>
>>>>>>> -David
>>>>>>>
>>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the
>>>>>>>> point
>>>>>>>> of the timeline without all of the its dependencies to be created.
>>>>>>>>
>>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>>>>>>>> top of
>>>>>>>> the Linux kernel are using a thread to wait on the dependencies
>>>>>>>> of a
>>>>>>>> given point to materialize and delay actual submission to the
>>>>>>>> kernel
>>>>>>>> driver until the wait completes.
>>>>>>>>
>>>>>>>> If a binary semaphore is submitted for signaling along the side
>>>>>>>> of a
>>>>>>>> timeline semaphore waiting for completion that means that the drm
>>>>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
>>>>>>>> and
>>>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>>>>> before its DMA fences materialize mean that we cannot just rely on
>>>>>>>> the
>>>>>>>> fence within the syncobj but we also need a sideband payload
>>>>>>>> verifying
>>>>>>>> that the fence in the syncobj matches the last submission from the
>>>>>>>> Vulkan API point of view.
>>>>>>>>
>>>>>>>> This change adds a sideband payload that is incremented with
>>>>>>>> signaled
>>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>>>>> waiting on a the syncobj will read the sideband payload and wait
>>>>>>>> for a
>>>>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>>>>> payload value to be added into the fence chain and use that
>>>>>>>> fence to
>>>>>>>> trigger the submission on the kernel driver.
>>>>>>>>
>>>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>>>>
>>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>>>>
>>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>>>>>>> ---
>>>>>>>>       drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>>>>       drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>>>>       drivers/gpu/drm/drm_syncobj.c  | 79
>>>>>>>> +++++++++++++++++++++++++++++++++-
>>>>>>>>       include/drm/drm_syncobj.h      |  9 ++++
>>>>>>>>       include/uapi/drm/drm.h         |  2 +
>>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>                             struct drm_file *file_private);
>>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>>                       struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>>          /* drm_framebuffer.c */
>>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
>>>>>>>> unsigned
>>>>>>>> int indent,
>>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
>>>>>>>> drm_ioctls[]
>>>>>>>> = {
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>>>>>>> drm_syncobj_query_ioctl,
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>>>>> drm_syncobj_get_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>>>>> drm_syncobj_set_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> index b927e482e554..c90ef20b9242 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>>>>> *dev, void *data,
>>>>>>>>           if (ret < 0)
>>>>>>>>               return ret;
>>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>>               drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>>>>> +    }
>>>>>>>>              drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>       @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>               if (ret)
>>>>>>>>                   break;
>>>>>>>>           }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>           drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>              return ret;
>>>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>>>>> --- a/include/drm/drm_syncobj.h
>>>>>>>> +++ b/include/drm/drm_syncobj.h
>>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>>>>            * @file: A file backing for this syncobj.
>>>>>>>>            */
>>>>>>>>           struct file *file;
>>>>>>>> +    /**
>>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>>>>> +     *
>>>>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>>>>> fences
>>>>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>>>>> signaler to
>>>>>>>> +     * express that at some point in the future a dma fence with
>>>>>>>> the same
>>>>>>>> +     * seqno will be put into the syncobj.
>>>>>>>> +     */
>>>>>>>> +    u64 sideband_payload;
>>>>>>>>       };
>>>>>>>>          void drm_syncobj_free(struct kref *kref);
>>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
>>>>>>>> drm_syncobj_transfer)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>>>>> struct drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>          /**
>>>>>>>>        * Device specific ioctls should only be in their respective
>>>>>>>> headers
>>
Christian König Aug. 8, 2019, 3:26 p.m. UTC | #11
Yeah, the idea is indeed rather clever. I'm just wondering if we don't need to optimize it even more.

In other words could some IOCTL overhead be saved here if we directly take the value for binary semaphores waits and signals?

Christian.

Am 08.08.19 um 17:23 schrieb Zhou, David(ChunMing):
Thank you, I got your mean.
when you have sideband payload, you will go into timeline path. Clever!

-David

-------- 原始邮件 --------
主题:Re: [PATCH v3 1/1] drm/syncobj: add sideband payload
发件人:Lionel Landwerlin
收件人:"Zhou, David(ChunMing)" ,dri-devel@freedesktop.org<mailto:dri-devel@freedesktop.org>
抄送:"Koenig, Christian" ,Jason Ekstrand ,"Zhou, David(ChunMing)"

On 08/08/2019 17:48, Chunming Zhou wrote:
> 在 2019/8/8 22:34, Lionel Landwerlin 写道:
>> On 08/08/2019 17:16, Chunming Zhou wrote:
>>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>>>> No, I just see your comment "The next vkQueueSubmit()
>>>>> waiting on a the syncobj will read the sideband payload and wait for a
>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>> payload value to be added into the fence chain and use that fence to
>>>>> trigger the submission on the kernel driver. ".
>>>> That was meant to say wait on the chain fence to reach the sideband
>>>> payload value.
>>>>
>>>> It a bit confusing but I can't see any other way to word it.
>>>>
>>>>
>>>>> In that, you mentioned wait for sideband.
>>>>> So I want to know we how to use that, maybe I miss something in
>>>>> previous
>>>>> discussing thread.
>>>> In QueueSubmit(), we start by reading the sideband payloads :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>>>>
>>>>
>>>> Then build everything for the submission and hand it over to the
>>>> submission thread.
>>>>
>>>> Instead of the just waiting on the timeline semaphore values, we also
>>>> wait on the binary semaphore sideband payload values.
>>> Waiting on timeline values is when finding fence in kernel.
>>
>> Hmm aren't you waiting in a thread in userspace?
> Yes, For timeline case, we can use waitForSyncobj()..., At begin of
> QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
>
> But I still didn't get how to wait on sideband for binary Syncobj.
>
> Ah, I see, you will compare it in your QueueThread, if sideband value is
>   >= expected, then do submission, otherwise, loop QueueThread, right?


Just treat binary semaphores as timelines and waitForSyncobj on the
sideband payload value.

It should make the submission thread any busier than currently.


-Lionel


>
> That sounds the QueueThread will be always busy.
>
> -David
>
>
>>
>>> But I don't see how to wait/block in kernel when finding fence for
>>> binary sideband payload  values.
>>
>> Essentially our driver now handles timelines & binary semaphore using
>> dma-fence-chain in both cases.
>
>
>> Only with timelines we take the values submitted by the vulkan
>> application.
>
>> The binary semaphore auto increment on vkQueueSubmit() and that is
>> managed by the userspace driver.
>>
>>
>> -Lionel
>>
>>
>>>
>>> -David
>>>
>>>> Finally before exiting the QueueSubmit() call, we bump the sideband
>>>> payloads of all the binary semaphores have have been signaled :
>>>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>>>>
>>>>
>>>>
>>>> Whoever calls QueueSubmit() after that will pickup the new sideband
>>>> payload values to wait on.
>>>>
>>>>
>>>> -Lionel
>>>>
>>>>
>>>>
>>>>> -DAvid
>>>>>
>>>>>
>>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>>>>> Interesting question :)
>>>>>>
>>>>>> I didn't see any usecase for that.
>>>>>> This sideband payload value is used for a wait so waiting on the wait
>>>>>> value for another wait is bit meta :)
>>>>>>
>>>>>> Do you have a use case for that?
>>>>>>
>>>>>> -Lionel
>>>>>>
>>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>>>>>> The propursal is fine with me.
>>>>>>>
>>>>>>> one question:
>>>>>>>
>>>>>>> how to wait sideband payload? Following patch will show that?
>>>>>>>
>>>>>>> -David
>>>>>>>
>>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the
>>>>>>>> point
>>>>>>>> of the timeline without all of the its dependencies to be created.
>>>>>>>>
>>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>>>>>>>> top of
>>>>>>>> the Linux kernel are using a thread to wait on the dependencies
>>>>>>>> of a
>>>>>>>> given point to materialize and delay actual submission to the
>>>>>>>> kernel
>>>>>>>> driver until the wait completes.
>>>>>>>>
>>>>>>>> If a binary semaphore is submitted for signaling along the side
>>>>>>>> of a
>>>>>>>> timeline semaphore waiting for completion that means that the drm
>>>>>>>> syncobj associated with that binary semaphore will not have a DMA
>>>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
>>>>>>>> and
>>>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
>>>>>>>> before its DMA fences materialize mean that we cannot just rely on
>>>>>>>> the
>>>>>>>> fence within the syncobj but we also need a sideband payload
>>>>>>>> verifying
>>>>>>>> that the fence in the syncobj matches the last submission from the
>>>>>>>> Vulkan API point of view.
>>>>>>>>
>>>>>>>> This change adds a sideband payload that is incremented with
>>>>>>>> signaled
>>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>>>>>>>> waiting on a the syncobj will read the sideband payload and wait
>>>>>>>> for a
>>>>>>>> fence chain element with a seqno superior or equal to the sideband
>>>>>>>> payload value to be added into the fence chain and use that
>>>>>>>> fence to
>>>>>>>> trigger the submission on the kernel driver.
>>>>>>>>
>>>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
>>>>>>>>
>>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>>>>>>>
>>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com><mailto:lionel.g.landwerlin@intel.com>
>>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com><mailto:Christian.Koenig@amd.com>
>>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net><mailto:jason@jlekstrand.net>
>>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com><mailto:David1.Zhou@amd.com>
>>>>>>>> ---
>>>>>>>>       drivers/gpu/drm/drm_internal.h |  4 ++
>>>>>>>>       drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>>>>>>>       drivers/gpu/drm/drm_syncobj.c  | 79
>>>>>>>> +++++++++++++++++++++++++++++++++-
>>>>>>>>       include/drm/drm_syncobj.h      |  9 ++++
>>>>>>>>       include/uapi/drm/drm.h         |  2 +
>>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>>>>>>> b/drivers/gpu/drm/drm_internal.h
>>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>                             struct drm_file *file_private);
>>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>>                       struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private);
>>>>>>>>          /* drm_framebuffer.c */
>>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
>>>>>>>> unsigned
>>>>>>>> int indent,
>>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> index f675a3bb2c88..48500bf62801 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
>>>>>>>> drm_ioctls[]
>>>>>>>> = {
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>>>>>>> drm_syncobj_query_ioctl,
>>>>>>>>                     DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>>>>>>> drm_syncobj_get_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>>>>>>> drm_syncobj_set_sideband_ioctl,
>>>>>>>> +              DRM_RENDER_ALLOW),
>>>>>>>> +
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> index b927e482e554..c90ef20b9242 100644
>>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>>>>>>>> *dev, void *data,
>>>>>>>>           if (ret < 0)
>>>>>>>>               return ret;
>>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>>               drm_syncobj_replace_fence(syncobjs[i], NULL);
>>>>>>>> +        syncobjs[i]->sideband_payload = 0;
>>>>>>>> +    }
>>>>>>>>              drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>       @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>>>>>>>> drm_device *dev, void *data,
>>>>>>>>               if (ret)
>>>>>>>>                   break;
>>>>>>>>           }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>> +
>>>>>>>> +    return ret;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>>>>>>>> *data,
>>>>>>>> +                   struct drm_file *file_private)
>>>>>>>> +{
>>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>>>>>>> +    struct drm_syncobj **syncobjs;
>>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>>>>>>>> +    uint32_t i;
>>>>>>>> +    int ret;
>>>>>>>> +
>>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>>>>>>>> +        return -EOPNOTSUPP;
>>>>>>>> +
>>>>>>>> +    if (args->pad != 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    if (args->count_handles == 0)
>>>>>>>> +        return -EINVAL;
>>>>>>>> +
>>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>>>>>>> + u64_to_user_ptr(args->handles),
>>>>>>>> +                     args->count_handles,
>>>>>>>> +                     &syncobjs);
>>>>>>>> +    if (ret < 0)
>>>>>>>> +        return ret;
>>>>>>>> +
>>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>>>>>>> sizeof(uint64_t));
>>>>>>>> +        ret = ret ? -EFAULT : 0;
>>>>>>>> +        if (ret)
>>>>>>>> +            break;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>>           drm_syncobj_array_free(syncobjs, args->count_handles);
>>>>>>>>              return ret;
>>>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
>>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>>>>>>> --- a/include/drm/drm_syncobj.h
>>>>>>>> +++ b/include/drm/drm_syncobj.h
>>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>>>>>>>            * @file: A file backing for this syncobj.
>>>>>>>>            */
>>>>>>>>           struct file *file;
>>>>>>>> +    /**
>>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>>>>>>> +     *
>>>>>>>> +     * We use the sideband payload value to wait on binary syncobj
>>>>>>>> fences
>>>>>>>> +     * to materialize. It is a reservation mechanism for the
>>>>>>>> signaler to
>>>>>>>> +     * express that at some point in the future a dma fence with
>>>>>>>> the same
>>>>>>>> +     * seqno will be put into the syncobj.
>>>>>>>> +     */
>>>>>>>> +    u64 sideband_payload;
>>>>>>>>       };
>>>>>>>>          void drm_syncobj_free(struct kref *kref);
>>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>>>>>>> --- a/include/uapi/drm/drm.h
>>>>>>>> +++ b/include/uapi/drm/drm.h
>>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
>>>>>>>> drm_syncobj_transfer)
>>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>>>>>>>> struct drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
>>>>>>>> drm_syncobj_timeline_array)
>>>>>>>>          /**
>>>>>>>>        * Device specific ioctls should only be in their respective
>>>>>>>> headers
>>
Jason Ekstrand Aug. 8, 2019, 4:23 p.m. UTC | #12
I've got a couple questions that I think go along the lines of what
Christian wrote:

1. Should it really be called sideband?  It's a very generic term for what
is currently a very generic thing.  However, maybe we don't want it to be
so generic?

2. Do we really want get/set?  Or do we want get/increment?  We have to do
the get/increment during vkQueueSubmit on both the wait and the signal side
but get+add+set is a bit heavy.  I guess you could do one bit get of all
the binary semaphores and then a set for all the signaled ones which sets
them to the previous value + 1.

My concern with making it a super-generic thing is that it has an extremely
specific use and in order for cross-process sharing to work, all users have
to use it exactly the same way.  We could call it a "reserved value" or
something like that to make it more clear what the intention is.  In any
case, we should make sure that it's very clearly documented in the design
doc.

On Thu, Aug 8, 2019 at 10:26 AM Koenig, Christian <Christian.Koenig@amd.com>
wrote:

> Yeah, the idea is indeed rather clever. I'm just wondering if we don't
> need to optimize it even more.
>
> In other words could some IOCTL overhead be saved here if we directly take
> the value for binary semaphores waits and signals?
>
> Christian.
>
> Am 08.08.19 um 17:23 schrieb Zhou, David(ChunMing):
>
> Thank you, I got your mean.
> when you have sideband payload, you will go into timeline path. Clever!
>
> -David
>
> -------- 原始邮件 --------
> 主题:Re: [PATCH v3 1/1] drm/syncobj: add sideband payload
> 发件人:Lionel Landwerlin
> 收件人:"Zhou, David(ChunMing)" ,dri-devel@freedesktop.org
> 抄送:"Koenig, Christian" ,Jason Ekstrand ,"Zhou, David(ChunMing)"
>
> On 08/08/2019 17:48, Chunming Zhou wrote:
> > 在 2019/8/8 22:34, Lionel Landwerlin 写道:
> >> On 08/08/2019 17:16, Chunming Zhou wrote:
> >>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
> >>>> On 08/08/2019 16:42, Chunming Zhou wrote:
> >>>>> No, I just see your comment "The next vkQueueSubmit()
> >>>>> waiting on a the syncobj will read the sideband payload and wait for
> a
> >>>>> fence chain element with a seqno superior or equal to the sideband
> >>>>> payload value to be added into the fence chain and use that fence to
> >>>>> trigger the submission on the kernel driver. ".
> >>>> That was meant to say wait on the chain fence to reach the sideband
> >>>> payload value.
> >>>>
> >>>> It a bit confusing but I can't see any other way to word it.
> >>>>
> >>>>
> >>>>> In that, you mentioned wait for sideband.
> >>>>> So I want to know we how to use that, maybe I miss something in
> >>>>> previous
> >>>>> discussing thread.
> >>>> In QueueSubmit(), we start by reading the sideband payloads :
> >>>>
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
> >>>>
> >>>>
> >>>> Then build everything for the submission and hand it over to the
> >>>> submission thread.
> >>>>
> >>>> Instead of the just waiting on the timeline semaphore values, we also
> >>>> wait on the binary semaphore sideband payload values.
> >>> Waiting on timeline values is when finding fence in kernel.
> >>
> >> Hmm aren't you waiting in a thread in userspace?
> > Yes, For timeline case, we can use waitForSyncobj()..., At begin of
> > QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
> >
> > But I still didn't get how to wait on sideband for binary Syncobj.
> >
> > Ah, I see, you will compare it in your QueueThread, if sideband value is
> >   >= expected, then do submission, otherwise, loop QueueThread, right?
>
>
> Just treat binary semaphores as timelines and waitForSyncobj on the
> sideband payload value.
>
> It should make the submission thread any busier than currently.
>
>
> -Lionel
>
>
> >
> > That sounds the QueueThread will be always busy.
> >
> > -David
> >
> >
> >>
> >>> But I don't see how to wait/block in kernel when finding fence for
> >>> binary sideband payload  values.
> >>
> >> Essentially our driver now handles timelines & binary semaphore using
> >> dma-fence-chain in both cases.
> >
> >
> >> Only with timelines we take the values submitted by the vulkan
> >> application.
> >
> >> The binary semaphore auto increment on vkQueueSubmit() and that is
> >> managed by the userspace driver.
> >>
> >>
> >> -Lionel
> >>
> >>
> >>>
> >>> -David
> >>>
> >>>> Finally before exiting the QueueSubmit() call, we bump the sideband
> >>>> payloads of all the binary semaphores have have been signaled :
> >>>>
> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
> >>>>
> >>>>
> >>>>
> >>>> Whoever calls QueueSubmit() after that will pickup the new sideband
> >>>> payload values to wait on.
> >>>>
> >>>>
> >>>> -Lionel
> >>>>
> >>>>
> >>>>
> >>>>> -DAvid
> >>>>>
> >>>>>
> >>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
> >>>>>> Interesting question :)
> >>>>>>
> >>>>>> I didn't see any usecase for that.
> >>>>>> This sideband payload value is used for a wait so waiting on the
> wait
> >>>>>> value for another wait is bit meta :)
> >>>>>>
> >>>>>> Do you have a use case for that?
> >>>>>>
> >>>>>> -Lionel
> >>>>>>
> >>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
> >>>>>>> The propursal is fine with me.
> >>>>>>>
> >>>>>>> one question:
> >>>>>>>
> >>>>>>> how to wait sideband payload? Following patch will show that?
> >>>>>>>
> >>>>>>> -David
> >>>>>>>
> >>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
> >>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the
> >>>>>>>> point
> >>>>>>>> of the timeline without all of the its dependencies to be created.
> >>>>>>>>
> >>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
> >>>>>>>> top of
> >>>>>>>> the Linux kernel are using a thread to wait on the dependencies
> >>>>>>>> of a
> >>>>>>>> given point to materialize and delay actual submission to the
> >>>>>>>> kernel
> >>>>>>>> driver until the wait completes.
> >>>>>>>>
> >>>>>>>> If a binary semaphore is submitted for signaling along the side
> >>>>>>>> of a
> >>>>>>>> timeline semaphore waiting for completion that means that the drm
> >>>>>>>> syncobj associated with that binary semaphore will not have a DMA
> >>>>>>>> fence associated with it by the time vkQueueSubmit() returns. This
> >>>>>>>> and
> >>>>>>>> the fact that a binary semaphore can be signaled and unsignaled as
> >>>>>>>> before its DMA fences materialize mean that we cannot just rely on
> >>>>>>>> the
> >>>>>>>> fence within the syncobj but we also need a sideband payload
> >>>>>>>> verifying
> >>>>>>>> that the fence in the syncobj matches the last submission from the
> >>>>>>>> Vulkan API point of view.
> >>>>>>>>
> >>>>>>>> This change adds a sideband payload that is incremented with
> >>>>>>>> signaled
> >>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
> >>>>>>>> waiting on a the syncobj will read the sideband payload and wait
> >>>>>>>> for a
> >>>>>>>> fence chain element with a seqno superior or equal to the sideband
> >>>>>>>> payload value to be added into the fence chain and use that
> >>>>>>>> fence to
> >>>>>>>> trigger the submission on the kernel driver.
> >>>>>>>>
> >>>>>>>> v2: Use a separate ioctl to get/set the sideband value (Christian)
> >>>>>>>>
> >>>>>>>> v3: Use 2 ioctls for get/set (Christian)
> >>>>>>>>
> >>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> <lionel.g.landwerlin@intel.com>
> >>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
> <Christian.Koenig@amd.com>
> >>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net> <jason@jlekstrand.net>
> >>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
> <David1.Zhou@amd.com>
> >>>>>>>> ---
> >>>>>>>>       drivers/gpu/drm/drm_internal.h |  4 ++
> >>>>>>>>       drivers/gpu/drm/drm_ioctl.c    |  5 +++
> >>>>>>>>       drivers/gpu/drm/drm_syncobj.c  | 79
> >>>>>>>> +++++++++++++++++++++++++++++++++-
> >>>>>>>>       include/drm/drm_syncobj.h      |  9 ++++
> >>>>>>>>       include/uapi/drm/drm.h         |  2 +
> >>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
> >>>>>>>> b/drivers/gpu/drm/drm_internal.h
> >>>>>>>> index 51a2055c8f18..0c9736199df0 100644
> >>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
> >>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
> >>>>>>>> @@ -208,6 +208,10 @@ int drm_syncobj_timeline_signal_ioctl(struct
> >>>>>>>> drm_device *dev, void *data,
> >>>>>>>>                             struct drm_file *file_private);
> >>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device *dev, void
> >>>>>>>> *data,
> >>>>>>>>                       struct drm_file *file_private);
> >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
> >>>>>>>> *data,
> >>>>>>>> +                   struct drm_file *file_private);
> >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
> >>>>>>>> *data,
> >>>>>>>> +                   struct drm_file *file_private);
> >>>>>>>>          /* drm_framebuffer.c */
> >>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
> >>>>>>>> unsigned
> >>>>>>>> int indent,
> >>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
> >>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
> >>>>>>>> index f675a3bb2c88..48500bf62801 100644
> >>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
> >>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
> >>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
> >>>>>>>> drm_ioctls[]
> >>>>>>>> = {
> >>>>>>>>                     DRM_RENDER_ALLOW),
> >>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
> >>>>>>>> drm_syncobj_query_ioctl,
> >>>>>>>>                     DRM_RENDER_ALLOW),
> >>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
> >>>>>>>> drm_syncobj_get_sideband_ioctl,
> >>>>>>>> +              DRM_RENDER_ALLOW),
> >>>>>>>> +    DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
> >>>>>>>> drm_syncobj_set_sideband_ioctl,
> >>>>>>>> +              DRM_RENDER_ALLOW),
> >>>>>>>> +
> >>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
> >>>>>>>> drm_crtc_get_sequence_ioctl, 0),
> >>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
> >>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
> >>>>>>>>           DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
> >>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
> >>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
> >>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
> >>>>>>>> index b927e482e554..c90ef20b9242 100644
> >>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
> >>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
> >>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
> >>>>>>>> *dev, void *data,
> >>>>>>>>           if (ret < 0)
> >>>>>>>>               return ret;
> >>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
> >>>>>>>>               drm_syncobj_replace_fence(syncobjs[i], NULL);
> >>>>>>>> +        syncobjs[i]->sideband_payload = 0;
> >>>>>>>> +    }
> >>>>>>>>              drm_syncobj_array_free(syncobjs,
> args->count_handles);
> >>>>>>>>       @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
> >>>>>>>> drm_device *dev, void *data,
> >>>>>>>>               if (ret)
> >>>>>>>>                   break;
> >>>>>>>>           }
> >>>>>>>> +
> >>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
> >>>>>>>> +
> >>>>>>>> +    return ret;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
> >>>>>>>> *data,
> >>>>>>>> +                   struct drm_file *file_private)
> >>>>>>>> +{
> >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
> >>>>>>>> +    struct drm_syncobj **syncobjs;
> >>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
> >>>>>>>> +    uint32_t i;
> >>>>>>>> +    int ret;
> >>>>>>>> +
> >>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> >>>>>>>> +        return -EOPNOTSUPP;
> >>>>>>>> +
> >>>>>>>> +    if (args->pad != 0)
> >>>>>>>> +        return -EINVAL;
> >>>>>>>> +
> >>>>>>>> +    if (args->count_handles == 0)
> >>>>>>>> +        return -EINVAL;
> >>>>>>>> +
> >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
> >>>>>>>> + u64_to_user_ptr(args->handles),
> >>>>>>>> +                     args->count_handles,
> >>>>>>>> +                     &syncobjs);
> >>>>>>>> +    if (ret < 0)
> >>>>>>>> +        return ret;
> >>>>>>>> +
> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
> >>>>>>>> +        copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
> >>>>>>>> sizeof(uint64_t));
> >>>>>>>> +        ret = ret ? -EFAULT : 0;
> >>>>>>>> +        if (ret)
> >>>>>>>> +            break;
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +    drm_syncobj_array_free(syncobjs, args->count_handles);
> >>>>>>>> +
> >>>>>>>> +    return ret;
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
> >>>>>>>> *data,
> >>>>>>>> +                   struct drm_file *file_private)
> >>>>>>>> +{
> >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
> >>>>>>>> +    struct drm_syncobj **syncobjs;
> >>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
> >>>>>>>> +    uint32_t i;
> >>>>>>>> +    int ret;
> >>>>>>>> +
> >>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> >>>>>>>> +        return -EOPNOTSUPP;
> >>>>>>>> +
> >>>>>>>> +    if (args->pad != 0)
> >>>>>>>> +        return -EINVAL;
> >>>>>>>> +
> >>>>>>>> +    if (args->count_handles == 0)
> >>>>>>>> +        return -EINVAL;
> >>>>>>>> +
> >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
> >>>>>>>> + u64_to_user_ptr(args->handles),
> >>>>>>>> +                     args->count_handles,
> >>>>>>>> +                     &syncobjs);
> >>>>>>>> +    if (ret < 0)
> >>>>>>>> +        return ret;
> >>>>>>>> +
> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
> >>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
> >>>>>>>> sizeof(uint64_t));
> >>>>>>>> +        ret = ret ? -EFAULT : 0;
> >>>>>>>> +        if (ret)
> >>>>>>>> +            break;
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>>           drm_syncobj_array_free(syncobjs, args->count_handles);
> >>>>>>>>              return ret;
> >>>>>>>> diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
> >>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
> >>>>>>>> --- a/include/drm/drm_syncobj.h
> >>>>>>>> +++ b/include/drm/drm_syncobj.h
> >>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
> >>>>>>>>            * @file: A file backing for this syncobj.
> >>>>>>>>            */
> >>>>>>>>           struct file *file;
> >>>>>>>> +    /**
> >>>>>>>> +     * @sideband_payload: A 64bit side band payload.
> >>>>>>>> +     *
> >>>>>>>> +     * We use the sideband payload value to wait on binary
> syncobj
> >>>>>>>> fences
> >>>>>>>> +     * to materialize. It is a reservation mechanism for the
> >>>>>>>> signaler to
> >>>>>>>> +     * express that at some point in the future a dma fence with
> >>>>>>>> the same
> >>>>>>>> +     * seqno will be put into the syncobj.
> >>>>>>>> +     */
> >>>>>>>> +    u64 sideband_payload;
> >>>>>>>>       };
> >>>>>>>>          void drm_syncobj_free(struct kref *kref);
> >>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> >>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
> >>>>>>>> --- a/include/uapi/drm/drm.h
> >>>>>>>> +++ b/include/uapi/drm/drm.h
> >>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
> >>>>>>>> drm_syncobj_timeline_array)
> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
> >>>>>>>> drm_syncobj_transfer)
> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
> >>>>>>>> struct drm_syncobj_timeline_array)
> >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
> >>>>>>>> drm_syncobj_timeline_array)
> >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
> >>>>>>>> drm_syncobj_timeline_array)
> >>>>>>>>          /**
> >>>>>>>>        * Device specific ioctls should only be in their respective
> >>>>>>>> headers
> >>
>
>
>
Lionel Landwerlin Aug. 8, 2019, 9:21 p.m. UTC | #13
On 08/08/2019 19:23, Jason Ekstrand wrote:
> I've got a couple questions that I think go along the lines of what 
> Christian wrote:
>
> 1. Should it really be called sideband?  It's a very generic term for 
> what is currently a very generic thing. However, maybe we don't want 
> it to be so generic?
>
> 2. Do we really want get/set?  Or do we want get/increment?  We have 
> to do the get/increment during vkQueueSubmit on both the wait and the 
> signal side but get+add+set is a bit heavy.  I guess you could do one 
> bit get of all the binary semaphores and then a set for all the 
> signaled ones which sets them to the previous value + 1.


Our last iteration gather all the binary semaphores to read them and 
write them all back with signaled one incremented by one.

It could be batched into one ioctl with a flag that checks whether a 
given syncobj needs to be get+inc or just get.


>
> My concern with making it a super-generic thing is that it has an 
> extremely specific use and in order for cross-process sharing to work, 
> all users have to use it exactly the same way.  We could call it a 
> "reserved value" or something like that to make it more clear what the 
> intention is.  In any case, we should make sure that it's very clearly 
> documented in the design doc.


Agreed, this needs documentation.

I'll add a preceding commit adding timeline documentation and document this.


-Lionel


>
> On Thu, Aug 8, 2019 at 10:26 AM Koenig, Christian 
> <Christian.Koenig@amd.com <mailto:Christian.Koenig@amd.com>> wrote:
>
>     Yeah, the idea is indeed rather clever. I'm just wondering if we
>     don't need to optimize it even more.
>
>     In other words could some IOCTL overhead be saved here if we
>     directly take the value for binary semaphores waits and signals?
>
>     Christian.
>
>     Am 08.08.19 um 17:23 schrieb Zhou, David(ChunMing):
>>     Thank you, I got your mean.
>>     when you have sideband payload, you will go into timeline path.
>>     Clever!
>>
>>     -David
>>
>>     -------- 原始邮件 --------
>>     主题:Re: [PATCH v3 1/1] drm/syncobj: add sideband payload
>>     发件人:Lionel Landwerlin
>>     收件人:"Zhou, David(ChunMing)" ,dri-devel@freedesktop.org
>>     <mailto:dri-devel@freedesktop.org>
>>     抄送:"Koenig, Christian" ,Jason Ekstrand ,"Zhou, David(ChunMing)"
>>
>>     On 08/08/2019 17:48, Chunming Zhou wrote:
>>     > 在 2019/8/8 22:34, Lionel Landwerlin 写道:
>>     >> On 08/08/2019 17:16, Chunming Zhou wrote:
>>     >>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>>     >>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>>     >>>>> No, I just see your comment "The next vkQueueSubmit()
>>     >>>>> waiting on a the syncobj will read the sideband payload and
>>     wait for a
>>     >>>>> fence chain element with a seqno superior or equal to the
>>     sideband
>>     >>>>> payload value to be added into the fence chain and use that
>>     fence to
>>     >>>>> trigger the submission on the kernel driver. ".
>>     >>>> That was meant to say wait on the chain fence to reach the
>>     sideband
>>     >>>> payload value.
>>     >>>>
>>     >>>> It a bit confusing but I can't see any other way to word it.
>>     >>>>
>>     >>>>
>>     >>>>> In that, you mentioned wait for sideband.
>>     >>>>> So I want to know we how to use that, maybe I miss something in
>>     >>>>> previous
>>     >>>>> discussing thread.
>>     >>>> In QueueSubmit(), we start by reading the sideband payloads :
>>     >>>>
>>     https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>>     >>>>
>>     >>>>
>>     >>>> Then build everything for the submission and hand it over to the
>>     >>>> submission thread.
>>     >>>>
>>     >>>> Instead of the just waiting on the timeline semaphore
>>     values, we also
>>     >>>> wait on the binary semaphore sideband payload values.
>>     >>> Waiting on timeline values is when finding fence in kernel.
>>     >>
>>     >> Hmm aren't you waiting in a thread in userspace?
>>     > Yes, For timeline case, we can use waitForSyncobj()..., At begin of
>>     > QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
>>     >
>>     > But I still didn't get how to wait on sideband for binary Syncobj.
>>     >
>>     > Ah, I see, you will compare it in your QueueThread, if sideband
>>     value is
>>     >   >= expected, then do submission, otherwise, loop QueueThread,
>>     right?
>>
>>
>>     Just treat binary semaphores as timelines and waitForSyncobj on the
>>     sideband payload value.
>>
>>     It should make the submission thread any busier than currently.
>>
>>
>>     -Lionel
>>
>>
>>     >
>>     > That sounds the QueueThread will be always busy.
>>     >
>>     > -David
>>     >
>>     >
>>     >>
>>     >>> But I don't see how to wait/block in kernel when finding
>>     fence for
>>     >>> binary sideband payload  values.
>>     >>
>>     >> Essentially our driver now handles timelines & binary
>>     semaphore using
>>     >> dma-fence-chain in both cases.
>>     >
>>     >
>>     >> Only with timelines we take the values submitted by the vulkan
>>     >> application.
>>     >
>>     >> The binary semaphore auto increment on vkQueueSubmit() and that is
>>     >> managed by the userspace driver.
>>     >>
>>     >>
>>     >> -Lionel
>>     >>
>>     >>
>>     >>>
>>     >>> -David
>>     >>>
>>     >>>> Finally before exiting the QueueSubmit() call, we bump the
>>     sideband
>>     >>>> payloads of all the binary semaphores have have been signaled :
>>     >>>>
>>     https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>>     >>>>
>>     >>>>
>>     >>>>
>>     >>>> Whoever calls QueueSubmit() after that will pickup the new
>>     sideband
>>     >>>> payload values to wait on.
>>     >>>>
>>     >>>>
>>     >>>> -Lionel
>>     >>>>
>>     >>>>
>>     >>>>
>>     >>>>> -DAvid
>>     >>>>>
>>     >>>>>
>>     >>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>>     >>>>>> Interesting question :)
>>     >>>>>>
>>     >>>>>> I didn't see any usecase for that.
>>     >>>>>> This sideband payload value is used for a wait so waiting
>>     on the wait
>>     >>>>>> value for another wait is bit meta :)
>>     >>>>>>
>>     >>>>>> Do you have a use case for that?
>>     >>>>>>
>>     >>>>>> -Lionel
>>     >>>>>>
>>     >>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>>     >>>>>>> The propursal is fine with me.
>>     >>>>>>>
>>     >>>>>>> one question:
>>     >>>>>>>
>>     >>>>>>> how to wait sideband payload? Following patch will show that?
>>     >>>>>>>
>>     >>>>>>> -David
>>     >>>>>>>
>>     >>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>>     >>>>>>>> The Vulkan timeline semaphores allow signaling to happen
>>     on the
>>     >>>>>>>> point
>>     >>>>>>>> of the timeline without all of the its dependencies to
>>     be created.
>>     >>>>>>>>
>>     >>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan
>>     spec on
>>     >>>>>>>> top of
>>     >>>>>>>> the Linux kernel are using a thread to wait on the
>>     dependencies
>>     >>>>>>>> of a
>>     >>>>>>>> given point to materialize and delay actual submission
>>     to the
>>     >>>>>>>> kernel
>>     >>>>>>>> driver until the wait completes.
>>     >>>>>>>>
>>     >>>>>>>> If a binary semaphore is submitted for signaling along
>>     the side
>>     >>>>>>>> of a
>>     >>>>>>>> timeline semaphore waiting for completion that means
>>     that the drm
>>     >>>>>>>> syncobj associated with that binary semaphore will not
>>     have a DMA
>>     >>>>>>>> fence associated with it by the time vkQueueSubmit()
>>     returns. This
>>     >>>>>>>> and
>>     >>>>>>>> the fact that a binary semaphore can be signaled and
>>     unsignaled as
>>     >>>>>>>> before its DMA fences materialize mean that we cannot
>>     just rely on
>>     >>>>>>>> the
>>     >>>>>>>> fence within the syncobj but we also need a sideband payload
>>     >>>>>>>> verifying
>>     >>>>>>>> that the fence in the syncobj matches the last
>>     submission from the
>>     >>>>>>>> Vulkan API point of view.
>>     >>>>>>>>
>>     >>>>>>>> This change adds a sideband payload that is incremented with
>>     >>>>>>>> signaled
>>     >>>>>>>> syncobj when vkQueueSubmit() is called. The next
>>     vkQueueSubmit()
>>     >>>>>>>> waiting on a the syncobj will read the sideband payload
>>     and wait
>>     >>>>>>>> for a
>>     >>>>>>>> fence chain element with a seqno superior or equal to
>>     the sideband
>>     >>>>>>>> payload value to be added into the fence chain and use that
>>     >>>>>>>> fence to
>>     >>>>>>>> trigger the submission on the kernel driver.
>>     >>>>>>>>
>>     >>>>>>>> v2: Use a separate ioctl to get/set the sideband value
>>     (Christian)
>>     >>>>>>>>
>>     >>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>>     >>>>>>>>
>>     >>>>>>>> Signed-off-by: Lionel Landwerlin
>>     <lionel.g.landwerlin@intel.com>
>>     <mailto:lionel.g.landwerlin@intel.com>
>>     >>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>>     <mailto:Christian.Koenig@amd.com>
>>     >>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>>     <mailto:jason@jlekstrand.net>
>>     >>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>>     <mailto:David1.Zhou@amd.com>
>>     >>>>>>>> ---
>>     >>>>>>>> drivers/gpu/drm/drm_internal.h |  4 ++
>>     >>>>>>>> drivers/gpu/drm/drm_ioctl.c    |  5 +++
>>     >>>>>>>> drivers/gpu/drm/drm_syncobj.c  | 79
>>     >>>>>>>> +++++++++++++++++++++++++++++++++-
>>     >>>>>>>> include/drm/drm_syncobj.h      |  9 ++++
>>     >>>>>>>> include/uapi/drm/drm.h         |  2 +
>>     >>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
>>     >>>>>>>>
>>     >>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>>     >>>>>>>> b/drivers/gpu/drm/drm_internal.h
>>     >>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>>     >>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>>     >>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>>     >>>>>>>> @@ -208,6 +208,10 @@ int
>>     drm_syncobj_timeline_signal_ioctl(struct
>>     >>>>>>>> drm_device *dev, void *data,
>>     >>>>>>>>                            struct drm_file *file_private);
>>     >>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device
>>     *dev, void
>>     >>>>>>>> *data,
>>     >>>>>>>>                      struct drm_file *file_private);
>>     >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device
>>     *dev, void
>>     >>>>>>>> *data,
>>     >>>>>>>> +                   struct drm_file *file_private);
>>     >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device
>>     *dev, void
>>     >>>>>>>> *data,
>>     >>>>>>>> +                   struct drm_file *file_private);
>>     >>>>>>>>          /* drm_framebuffer.c */
>>     >>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
>>     >>>>>>>> unsigned
>>     >>>>>>>> int indent,
>>     >>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>>     >>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>>     >>>>>>>> index f675a3bb2c88..48500bf62801 100644
>>     >>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>>     >>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>>     >>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
>>     >>>>>>>> drm_ioctls[]
>>     >>>>>>>> = {
>>     >>>>>>>>                    DRM_RENDER_ALLOW),
>>     >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>>     >>>>>>>> drm_syncobj_query_ioctl,
>>     >>>>>>>>                    DRM_RENDER_ALLOW),
>>     >>>>>>>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>>     >>>>>>>> drm_syncobj_get_sideband_ioctl,
>>     >>>>>>>> + DRM_RENDER_ALLOW),
>>     >>>>>>>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>>     >>>>>>>> drm_syncobj_set_sideband_ioctl,
>>     >>>>>>>> + DRM_RENDER_ALLOW),
>>     >>>>>>>> +
>>     >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>>     >>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>>     >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>>     >>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>>     >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>>     >>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>>     >>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>>     >>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>>     >>>>>>>> index b927e482e554..c90ef20b9242 100644
>>     >>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>>     >>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>>     >>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct
>>     drm_device
>>     >>>>>>>> *dev, void *data,
>>     >>>>>>>>           if (ret < 0)
>>     >>>>>>>> return ret;
>>     >>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
>>     >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>     >>>>>>>> drm_syncobj_replace_fence(syncobjs[i], NULL);
>>     >>>>>>>> + syncobjs[i]->sideband_payload = 0;
>>     >>>>>>>> +    }
>>     >>>>>>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>>     >>>>>>>>       @@ -1321,6 +1323,81 @@ int
>>     drm_syncobj_query_ioctl(struct
>>     >>>>>>>> drm_device *dev, void *data,
>>     >>>>>>>>               if (ret)
>>     >>>>>>>> break;
>>     >>>>>>>>           }
>>     >>>>>>>> +
>>     >>>>>>>> + drm_syncobj_array_free(syncobjs, args->count_handles);
>>     >>>>>>>> +
>>     >>>>>>>> +    return ret;
>>     >>>>>>>> +}
>>     >>>>>>>> +
>>     >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device
>>     *dev, void
>>     >>>>>>>> *data,
>>     >>>>>>>> +                   struct drm_file *file_private)
>>     >>>>>>>> +{
>>     >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>     >>>>>>>> +    struct drm_syncobj **syncobjs;
>>     >>>>>>>> +    uint64_t __user *points =
>>     u64_to_user_ptr(args->points);
>>     >>>>>>>> +    uint32_t i;
>>     >>>>>>>> +    int ret;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (!drm_core_check_feature(dev,
>>     DRIVER_SYNCOBJ_TIMELINE))
>>     >>>>>>>> +        return -EOPNOTSUPP;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (args->pad != 0)
>>     >>>>>>>> +        return -EINVAL;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (args->count_handles == 0)
>>     >>>>>>>> +        return -EINVAL;
>>     >>>>>>>> +
>>     >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>     >>>>>>>> + u64_to_user_ptr(args->handles),
>>     >>>>>>>> +                     args->count_handles,
>>     >>>>>>>> +                     &syncobjs);
>>     >>>>>>>> +    if (ret < 0)
>>     >>>>>>>> +        return ret;
>>     >>>>>>>> +
>>     >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>     >>>>>>>> + copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>>     >>>>>>>> sizeof(uint64_t));
>>     >>>>>>>> +        ret = ret ? -EFAULT : 0;
>>     >>>>>>>> +        if (ret)
>>     >>>>>>>> + break;
>>     >>>>>>>> +    }
>>     >>>>>>>> +
>>     >>>>>>>> + drm_syncobj_array_free(syncobjs, args->count_handles);
>>     >>>>>>>> +
>>     >>>>>>>> +    return ret;
>>     >>>>>>>> +}
>>     >>>>>>>> +
>>     >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device
>>     *dev, void
>>     >>>>>>>> *data,
>>     >>>>>>>> +                   struct drm_file *file_private)
>>     >>>>>>>> +{
>>     >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>>     >>>>>>>> +    struct drm_syncobj **syncobjs;
>>     >>>>>>>> +    uint64_t __user *points =
>>     u64_to_user_ptr(args->points);
>>     >>>>>>>> +    uint32_t i;
>>     >>>>>>>> +    int ret;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (!drm_core_check_feature(dev,
>>     DRIVER_SYNCOBJ_TIMELINE))
>>     >>>>>>>> +        return -EOPNOTSUPP;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (args->pad != 0)
>>     >>>>>>>> +        return -EINVAL;
>>     >>>>>>>> +
>>     >>>>>>>> +    if (args->count_handles == 0)
>>     >>>>>>>> +        return -EINVAL;
>>     >>>>>>>> +
>>     >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>>     >>>>>>>> + u64_to_user_ptr(args->handles),
>>     >>>>>>>> +                     args->count_handles,
>>     >>>>>>>> +                     &syncobjs);
>>     >>>>>>>> +    if (ret < 0)
>>     >>>>>>>> +        return ret;
>>     >>>>>>>> +
>>     >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>>     >>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>>     >>>>>>>> sizeof(uint64_t));
>>     >>>>>>>> +        ret = ret ? -EFAULT : 0;
>>     >>>>>>>> +        if (ret)
>>     >>>>>>>> + break;
>>     >>>>>>>> +    }
>>     >>>>>>>> +
>>     >>>>>>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>>     >>>>>>>> return ret;
>>     >>>>>>>> diff --git a/include/drm/drm_syncobj.h
>>     b/include/drm/drm_syncobj.h
>>     >>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>>     >>>>>>>> --- a/include/drm/drm_syncobj.h
>>     >>>>>>>> +++ b/include/drm/drm_syncobj.h
>>     >>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>>     >>>>>>>>            * @file: A file backing for this syncobj.
>>     >>>>>>>>            */
>>     >>>>>>>>           struct file *file;
>>     >>>>>>>> +    /**
>>     >>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>>     >>>>>>>> +     *
>>     >>>>>>>> +     * We use the sideband payload value to wait on
>>     binary syncobj
>>     >>>>>>>> fences
>>     >>>>>>>> +     * to materialize. It is a reservation mechanism
>>     for the
>>     >>>>>>>> signaler to
>>     >>>>>>>> +     * express that at some point in the future a dma
>>     fence with
>>     >>>>>>>> the same
>>     >>>>>>>> +     * seqno will be put into the syncobj.
>>     >>>>>>>> +     */
>>     >>>>>>>> +    u64 sideband_payload;
>>     >>>>>>>>       };
>>     >>>>>>>>          void drm_syncobj_free(struct kref *kref);
>>     >>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>     >>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>>     >>>>>>>> --- a/include/uapi/drm/drm.h
>>     >>>>>>>> +++ b/include/uapi/drm/drm.h
>>     >>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>>     >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>>     >>>>>>>> drm_syncobj_timeline_array)
>>     >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC,
>>     struct
>>     >>>>>>>> drm_syncobj_transfer)
>>     >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL
>>     DRM_IOWR(0xCD,
>>     >>>>>>>> struct drm_syncobj_timeline_array)
>>     >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>>     >>>>>>>> drm_syncobj_timeline_array)
>>     >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF,
>>     struct
>>     >>>>>>>> drm_syncobj_timeline_array)
>>     >>>>>>>>          /**
>>     >>>>>>>>        * Device specific ioctls should only be in their
>>     respective
>>     >>>>>>>> headers
>>     >>
>>
>
Lionel Landwerlin Aug. 9, 2019, 6:54 a.m. UTC | #14
Are you talking about the new ioctls or the existing wait?

Because we do not use the signal ioctl for binary semaphores.
On the wait side, it's used only for VkFence and we would save a 64bit 
copy per fence we wait on.

I can't see that being a perf bottleneck.

On the new sideband ioctls if we can find a way to have only one it 
would save an ioctl per QueueSubmit.

-Lionel

On 08/08/2019 18:26, Koenig, Christian wrote:
> Yeah, the idea is indeed rather clever. I'm just wondering if we don't 
> need to optimize it even more.
>
> In other words could some IOCTL overhead be saved here if we directly 
> take the value for binary semaphores waits and signals?
>
> Christian.
>
> Am 08.08.19 um 17:23 schrieb Zhou, David(ChunMing):
>> Thank you, I got your mean.
>> when you have sideband payload, you will go into timeline path. Clever!
>>
>> -David
>>
>> -------- 原始邮件 --------
>> 主题:Re: [PATCH v3 1/1] drm/syncobj: add sideband payload
>> 发件人:Lionel Landwerlin
>> 收件人:"Zhou, David(ChunMing)" ,dri-devel@freedesktop.org
>> 抄送:"Koenig, Christian" ,Jason Ekstrand ,"Zhou, David(ChunMing)"
>>
>> On 08/08/2019 17:48, Chunming Zhou wrote:
>> > 在 2019/8/8 22:34, Lionel Landwerlin 写道:
>> >> On 08/08/2019 17:16, Chunming Zhou wrote:
>> >>> 在 2019/8/8 22:01, Lionel Landwerlin 写道:
>> >>>> On 08/08/2019 16:42, Chunming Zhou wrote:
>> >>>>> No, I just see your comment "The next vkQueueSubmit()
>> >>>>> waiting on a the syncobj will read the sideband payload and 
>> wait for a
>> >>>>> fence chain element with a seqno superior or equal to the sideband
>> >>>>> payload value to be added into the fence chain and use that 
>> fence to
>> >>>>> trigger the submission on the kernel driver. ".
>> >>>> That was meant to say wait on the chain fence to reach the sideband
>> >>>> payload value.
>> >>>>
>> >>>> It a bit confusing but I can't see any other way to word it.
>> >>>>
>> >>>>
>> >>>>> In that, you mentioned wait for sideband.
>> >>>>> So I want to know we how to use that, maybe I miss something in
>> >>>>> previous
>> >>>>> discussing thread.
>> >>>> In QueueSubmit(), we start by reading the sideband payloads :
>> >>>> 
>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L655
>> >>>>
>> >>>>
>> >>>> Then build everything for the submission and hand it over to the
>> >>>> submission thread.
>> >>>>
>> >>>> Instead of the just waiting on the timeline semaphore values, we 
>> also
>> >>>> wait on the binary semaphore sideband payload values.
>> >>> Waiting on timeline values is when finding fence in kernel.
>> >>
>> >> Hmm aren't you waiting in a thread in userspace?
>> > Yes, For timeline case, we can use waitForSyncobj()..., At begin of
>> > QueueThread, I let it wait in cs_ioctl when drm_syncobj_find_fence.
>> >
>> > But I still didn't get how to wait on sideband for binary Syncobj.
>> >
>> > Ah, I see, you will compare it in your QueueThread, if sideband 
>> value is
>> >   >= expected, then do submission, otherwise, loop QueueThread, right?
>>
>>
>> Just treat binary semaphores as timelines and waitForSyncobj on the
>> sideband payload value.
>>
>> It should make the submission thread any busier than currently.
>>
>>
>> -Lionel
>>
>>
>> >
>> > That sounds the QueueThread will be always busy.
>> >
>> > -David
>> >
>> >
>> >>
>> >>> But I don't see how to wait/block in kernel when finding fence for
>> >>> binary sideband payload  values.
>> >>
>> >> Essentially our driver now handles timelines & binary semaphore using
>> >> dma-fence-chain in both cases.
>> >
>> >
>> >> Only with timelines we take the values submitted by the vulkan
>> >> application.
>> >
>> >> The binary semaphore auto increment on vkQueueSubmit() and that is
>> >> managed by the userspace driver.
>> >>
>> >>
>> >> -Lionel
>> >>
>> >>
>> >>>
>> >>> -David
>> >>>
>> >>>> Finally before exiting the QueueSubmit() call, we bump the sideband
>> >>>> payloads of all the binary semaphores have have been signaled :
>> >>>> 
>> https://gitlab.freedesktop.org/llandwerlin/mesa/blob/review/anv-timeline_semaphore_prep/src/intel/vulkan/anv_queue.c#L806
>> >>>>
>> >>>>
>> >>>>
>> >>>> Whoever calls QueueSubmit() after that will pickup the new sideband
>> >>>> payload values to wait on.
>> >>>>
>> >>>>
>> >>>> -Lionel
>> >>>>
>> >>>>
>> >>>>
>> >>>>> -DAvid
>> >>>>>
>> >>>>>
>> >>>>> 在 2019/8/8 21:38, Lionel Landwerlin 写道:
>> >>>>>> Interesting question :)
>> >>>>>>
>> >>>>>> I didn't see any usecase for that.
>> >>>>>> This sideband payload value is used for a wait so waiting on 
>> the wait
>> >>>>>> value for another wait is bit meta :)
>> >>>>>>
>> >>>>>> Do you have a use case for that?
>> >>>>>>
>> >>>>>> -Lionel
>> >>>>>>
>> >>>>>> On 08/08/2019 16:23, Chunming Zhou wrote:
>> >>>>>>> The propursal is fine with me.
>> >>>>>>>
>> >>>>>>> one question:
>> >>>>>>>
>> >>>>>>> how to wait sideband payload? Following patch will show that?
>> >>>>>>>
>> >>>>>>> -David
>> >>>>>>>
>> >>>>>>> 在 2019/8/8 21:14, Lionel Landwerlin 写道:
>> >>>>>>>> The Vulkan timeline semaphores allow signaling to happen on the
>> >>>>>>>> point
>> >>>>>>>> of the timeline without all of the its dependencies to be 
>> created.
>> >>>>>>>>
>> >>>>>>>> The current 2 implementations (AMD/Intel) of the Vulkan spec on
>> >>>>>>>> top of
>> >>>>>>>> the Linux kernel are using a thread to wait on the dependencies
>> >>>>>>>> of a
>> >>>>>>>> given point to materialize and delay actual submission to the
>> >>>>>>>> kernel
>> >>>>>>>> driver until the wait completes.
>> >>>>>>>>
>> >>>>>>>> If a binary semaphore is submitted for signaling along the side
>> >>>>>>>> of a
>> >>>>>>>> timeline semaphore waiting for completion that means that 
>> the drm
>> >>>>>>>> syncobj associated with that binary semaphore will not have 
>> a DMA
>> >>>>>>>> fence associated with it by the time vkQueueSubmit() 
>> returns. This
>> >>>>>>>> and
>> >>>>>>>> the fact that a binary semaphore can be signaled and 
>> unsignaled as
>> >>>>>>>> before its DMA fences materialize mean that we cannot just 
>> rely on
>> >>>>>>>> the
>> >>>>>>>> fence within the syncobj but we also need a sideband payload
>> >>>>>>>> verifying
>> >>>>>>>> that the fence in the syncobj matches the last submission 
>> from the
>> >>>>>>>> Vulkan API point of view.
>> >>>>>>>>
>> >>>>>>>> This change adds a sideband payload that is incremented with
>> >>>>>>>> signaled
>> >>>>>>>> syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
>> >>>>>>>> waiting on a the syncobj will read the sideband payload and wait
>> >>>>>>>> for a
>> >>>>>>>> fence chain element with a seqno superior or equal to the 
>> sideband
>> >>>>>>>> payload value to be added into the fence chain and use that
>> >>>>>>>> fence to
>> >>>>>>>> trigger the submission on the kernel driver.
>> >>>>>>>>
>> >>>>>>>> v2: Use a separate ioctl to get/set the sideband value 
>> (Christian)
>> >>>>>>>>
>> >>>>>>>> v3: Use 2 ioctls for get/set (Christian)
>> >>>>>>>>
>> >>>>>>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> >>>>>>>> Cc: Christian Koenig <Christian.Koenig@amd.com>
>> >>>>>>>> Cc: Jason Ekstrand <jason@jlekstrand.net>
>> >>>>>>>> Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
>> >>>>>>>> ---
>> >>>>>>>> drivers/gpu/drm/drm_internal.h |  4 ++
>> >>>>>>>> drivers/gpu/drm/drm_ioctl.c    |  5 +++
>> >>>>>>>> drivers/gpu/drm/drm_syncobj.c  | 79
>> >>>>>>>> +++++++++++++++++++++++++++++++++-
>> >>>>>>>> include/drm/drm_syncobj.h      |  9 ++++
>> >>>>>>>> include/uapi/drm/drm.h         |  2 +
>> >>>>>>>>       5 files changed, 98 insertions(+), 1 deletion(-)
>> >>>>>>>>
>> >>>>>>>> diff --git a/drivers/gpu/drm/drm_internal.h
>> >>>>>>>> b/drivers/gpu/drm/drm_internal.h
>> >>>>>>>> index 51a2055c8f18..0c9736199df0 100644
>> >>>>>>>> --- a/drivers/gpu/drm/drm_internal.h
>> >>>>>>>> +++ b/drivers/gpu/drm/drm_internal.h
>> >>>>>>>> @@ -208,6 +208,10 @@ int 
>> drm_syncobj_timeline_signal_ioctl(struct
>> >>>>>>>> drm_device *dev, void *data,
>> >>>>>>>>                            struct drm_file *file_private);
>> >>>>>>>>       int drm_syncobj_query_ioctl(struct drm_device *dev, void
>> >>>>>>>> *data,
>> >>>>>>>> struct drm_file *file_private);
>> >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>> >>>>>>>> *data,
>> >>>>>>>> + struct drm_file *file_private);
>> >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>> >>>>>>>> *data,
>> >>>>>>>> + struct drm_file *file_private);
>> >>>>>>>>          /* drm_framebuffer.c */
>> >>>>>>>>       void drm_framebuffer_print_info(struct drm_printer *p,
>> >>>>>>>> unsigned
>> >>>>>>>> int indent,
>> >>>>>>>> diff --git a/drivers/gpu/drm/drm_ioctl.c
>> >>>>>>>> b/drivers/gpu/drm/drm_ioctl.c
>> >>>>>>>> index f675a3bb2c88..48500bf62801 100644
>> >>>>>>>> --- a/drivers/gpu/drm/drm_ioctl.c
>> >>>>>>>> +++ b/drivers/gpu/drm/drm_ioctl.c
>> >>>>>>>> @@ -703,6 +703,11 @@ static const struct drm_ioctl_desc
>> >>>>>>>> drm_ioctls[]
>> >>>>>>>> = {
>> >>>>>>>> DRM_RENDER_ALLOW),
>> >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY,
>> >>>>>>>> drm_syncobj_query_ioctl,
>> >>>>>>>> DRM_RENDER_ALLOW),
>> >>>>>>>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND,
>> >>>>>>>> drm_syncobj_get_sideband_ioctl,
>> >>>>>>>> + DRM_RENDER_ALLOW),
>> >>>>>>>> + DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND,
>> >>>>>>>> drm_syncobj_set_sideband_ioctl,
>> >>>>>>>> + DRM_RENDER_ALLOW),
>> >>>>>>>> +
>> >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE,
>> >>>>>>>> drm_crtc_get_sequence_ioctl, 0),
>> >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE,
>> >>>>>>>> drm_crtc_queue_sequence_ioctl, 0),
>> >>>>>>>> DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE,
>> >>>>>>>> drm_mode_create_lease_ioctl, DRM_MASTER),
>> >>>>>>>> diff --git a/drivers/gpu/drm/drm_syncobj.c
>> >>>>>>>> b/drivers/gpu/drm/drm_syncobj.c
>> >>>>>>>> index b927e482e554..c90ef20b9242 100644
>> >>>>>>>> --- a/drivers/gpu/drm/drm_syncobj.c
>> >>>>>>>> +++ b/drivers/gpu/drm/drm_syncobj.c
>> >>>>>>>> @@ -1150,8 +1150,10 @@ drm_syncobj_reset_ioctl(struct drm_device
>> >>>>>>>> *dev, void *data,
>> >>>>>>>>           if (ret < 0)
>> >>>>>>>>               return ret;
>> >>>>>>>>       -    for (i = 0; i < args->count_handles; i++)
>> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>> >>>>>>>> drm_syncobj_replace_fence(syncobjs[i], NULL);
>> >>>>>>>> + syncobjs[i]->sideband_payload = 0;
>> >>>>>>>> +    }
>> >>>>>>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>> >>>>>>>>       @@ -1321,6 +1323,81 @@ int drm_syncobj_query_ioctl(struct
>> >>>>>>>> drm_device *dev, void *data,
>> >>>>>>>>               if (ret)
>> >>>>>>>>                   break;
>> >>>>>>>>           }
>> >>>>>>>> +
>> >>>>>>>> + drm_syncobj_array_free(syncobjs, args->count_handles);
>> >>>>>>>> +
>> >>>>>>>> +    return ret;
>> >>>>>>>> +}
>> >>>>>>>> +
>> >>>>>>>> +int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void
>> >>>>>>>> *data,
>> >>>>>>>> + struct drm_file *file_private)
>> >>>>>>>> +{
>> >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>> >>>>>>>> +    struct drm_syncobj **syncobjs;
>> >>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>> >>>>>>>> +    uint32_t i;
>> >>>>>>>> +    int ret;
>> >>>>>>>> +
>> >>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>> >>>>>>>> +        return -EOPNOTSUPP;
>> >>>>>>>> +
>> >>>>>>>> +    if (args->pad != 0)
>> >>>>>>>> +        return -EINVAL;
>> >>>>>>>> +
>> >>>>>>>> +    if (args->count_handles == 0)
>> >>>>>>>> +        return -EINVAL;
>> >>>>>>>> +
>> >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>> >>>>>>>> + u64_to_user_ptr(args->handles),
>> >>>>>>>> + args->count_handles,
>> >>>>>>>> + &syncobjs);
>> >>>>>>>> +    if (ret < 0)
>> >>>>>>>> +        return ret;
>> >>>>>>>> +
>> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>> >>>>>>>> + copy_to_user(&points[i], &syncobjs[i]->sideband_payload,
>> >>>>>>>> sizeof(uint64_t));
>> >>>>>>>> +        ret = ret ? -EFAULT : 0;
>> >>>>>>>> +        if (ret)
>> >>>>>>>> +            break;
>> >>>>>>>> +    }
>> >>>>>>>> +
>> >>>>>>>> + drm_syncobj_array_free(syncobjs, args->count_handles);
>> >>>>>>>> +
>> >>>>>>>> +    return ret;
>> >>>>>>>> +}
>> >>>>>>>> +
>> >>>>>>>> +int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void
>> >>>>>>>> *data,
>> >>>>>>>> + struct drm_file *file_private)
>> >>>>>>>> +{
>> >>>>>>>> +    struct drm_syncobj_timeline_array *args = data;
>> >>>>>>>> +    struct drm_syncobj **syncobjs;
>> >>>>>>>> +    uint64_t __user *points = u64_to_user_ptr(args->points);
>> >>>>>>>> +    uint32_t i;
>> >>>>>>>> +    int ret;
>> >>>>>>>> +
>> >>>>>>>> +    if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>> >>>>>>>> +        return -EOPNOTSUPP;
>> >>>>>>>> +
>> >>>>>>>> +    if (args->pad != 0)
>> >>>>>>>> +        return -EINVAL;
>> >>>>>>>> +
>> >>>>>>>> +    if (args->count_handles == 0)
>> >>>>>>>> +        return -EINVAL;
>> >>>>>>>> +
>> >>>>>>>> +    ret = drm_syncobj_array_find(file_private,
>> >>>>>>>> + u64_to_user_ptr(args->handles),
>> >>>>>>>> + args->count_handles,
>> >>>>>>>> + &syncobjs);
>> >>>>>>>> +    if (ret < 0)
>> >>>>>>>> +        return ret;
>> >>>>>>>> +
>> >>>>>>>> +    for (i = 0; i < args->count_handles; i++) {
>> >>>>>>>> + copy_from_user(&syncobjs[i]->sideband_payload, &points[i],
>> >>>>>>>> sizeof(uint64_t));
>> >>>>>>>> +        ret = ret ? -EFAULT : 0;
>> >>>>>>>> +        if (ret)
>> >>>>>>>> +            break;
>> >>>>>>>> +    }
>> >>>>>>>> +
>> >>>>>>>> drm_syncobj_array_free(syncobjs, args->count_handles);
>> >>>>>>>>              return ret;
>> >>>>>>>> diff --git a/include/drm/drm_syncobj.h 
>> b/include/drm/drm_syncobj.h
>> >>>>>>>> index 6cf7243a1dc5..b587b8e07547 100644
>> >>>>>>>> --- a/include/drm/drm_syncobj.h
>> >>>>>>>> +++ b/include/drm/drm_syncobj.h
>> >>>>>>>> @@ -61,6 +61,15 @@ struct drm_syncobj {
>> >>>>>>>>            * @file: A file backing for this syncobj.
>> >>>>>>>>            */
>> >>>>>>>>           struct file *file;
>> >>>>>>>> +    /**
>> >>>>>>>> +     * @sideband_payload: A 64bit side band payload.
>> >>>>>>>> +     *
>> >>>>>>>> +     * We use the sideband payload value to wait on binary 
>> syncobj
>> >>>>>>>> fences
>> >>>>>>>> +     * to materialize. It is a reservation mechanism for the
>> >>>>>>>> signaler to
>> >>>>>>>> +     * express that at some point in the future a dma fence 
>> with
>> >>>>>>>> the same
>> >>>>>>>> +     * seqno will be put into the syncobj.
>> >>>>>>>> +     */
>> >>>>>>>> +    u64 sideband_payload;
>> >>>>>>>>       };
>> >>>>>>>>          void drm_syncobj_free(struct kref *kref);
>> >>>>>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> >>>>>>>> index 8a5b2f8f8eb9..cffdc6c9772c 100644
>> >>>>>>>> --- a/include/uapi/drm/drm.h
>> >>>>>>>> +++ b/include/uapi/drm/drm.h
>> >>>>>>>> @@ -946,6 +946,8 @@ extern "C" {
>> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_QUERY DRM_IOWR(0xCB, struct
>> >>>>>>>> drm_syncobj_timeline_array)
>> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct
>> >>>>>>>> drm_syncobj_transfer)
>> >>>>>>>>       #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD,
>> >>>>>>>> struct drm_syncobj_timeline_array)
>> >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND DRM_IOR(0xCE, struct
>> >>>>>>>> drm_syncobj_timeline_array)
>> >>>>>>>> +#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND DRM_IOWR(0xCF, struct
>> >>>>>>>> drm_syncobj_timeline_array)
>> >>>>>>>>          /**
>> >>>>>>>>        * Device specific ioctls should only be in their 
>> respective
>> >>>>>>>> headers
>> >>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 51a2055c8f18..0c9736199df0 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -208,6 +208,10 @@  int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
+int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private);
+int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f675a3bb2c88..48500bf62801 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -703,6 +703,11 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
 		      DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_GET_SIDEBAND, drm_syncobj_get_sideband_ioctl,
+		      DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_SET_SIDEBAND, drm_syncobj_set_sideband_ioctl,
+		      DRM_RENDER_ALLOW),
+
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index b927e482e554..c90ef20b9242 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1150,8 +1150,10 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < args->count_handles; i++)
+	for (i = 0; i < args->count_handles; i++) {
 		drm_syncobj_replace_fence(syncobjs[i], NULL);
+		syncobjs[i]->sideband_payload = 0;
+	}
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
@@ -1321,6 +1323,81 @@  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 		if (ret)
 			break;
 	}
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+int drm_syncobj_get_sideband_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint64_t __user *points = u64_to_user_ptr(args->points);
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		copy_to_user(&points[i], &syncobjs[i]->sideband_payload, sizeof(uint64_t));
+		ret = ret ? -EFAULT : 0;
+		if (ret)
+			break;
+	}
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+int drm_syncobj_set_sideband_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file_private)
+{
+	struct drm_syncobj_timeline_array *args = data;
+	struct drm_syncobj **syncobjs;
+	uint64_t __user *points = u64_to_user_ptr(args->points);
+	uint32_t i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		copy_from_user(&syncobjs[i]->sideband_payload, &points[i], sizeof(uint64_t));
+		ret = ret ? -EFAULT : 0;
+		if (ret)
+			break;
+	}
+
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
 	return ret;
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 6cf7243a1dc5..b587b8e07547 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -61,6 +61,15 @@  struct drm_syncobj {
 	 * @file: A file backing for this syncobj.
 	 */
 	struct file *file;
+	/**
+	 * @sideband_payload: A 64bit side band payload.
+	 *
+	 * We use the sideband payload value to wait on binary syncobj fences
+	 * to materialize. It is a reservation mechanism for the signaler to
+	 * express that at some point in the future a dma fence with the same
+	 * seqno will be put into the syncobj.
+	 */
+	u64 sideband_payload;
 };
 
 void drm_syncobj_free(struct kref *kref);
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 8a5b2f8f8eb9..cffdc6c9772c 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -946,6 +946,8 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_GET_SIDEBAND	DRM_IOR(0xCE, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_SET_SIDEBAND	DRM_IOWR(0xCF, struct drm_syncobj_timeline_array)
 
 /**
  * Device specific ioctls should only be in their respective headers