diff mbox series

[v4,1/1] drm/syncobj: add sideband payload

Message ID 20190809113030.2547-2-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/syncobj: add syncobj sideband payload for threaded submission | expand

Commit Message

Lionel Landwerlin Aug. 9, 2019, 11:30 a.m. UTC
The Vulkan timeline semaphores allow signaling to happen on the point
of the timeline without all of the its dependencies to be created.

The current 2 implementations (AMD/Intel) of the Vulkan spec on top of
the Linux kernel are using a thread to wait on the dependencies of a
given point to materialize and delay actual submission to the kernel
driver until the wait completes.

If a binary semaphore is submitted for signaling along the side of a
timeline semaphore waiting for completion that means that the drm
syncobj associated with that binary semaphore will not have a DMA
fence associated with it by the time vkQueueSubmit() returns. This and
the fact that a binary semaphore can be signaled and unsignaled as
before its DMA fences materialize mean that we cannot just rely on the
fence within the syncobj but we also need a sideband payload verifying
that the fence in the syncobj matches the last submission from the
Vulkan API point of view.

This change adds a sideband payload that is incremented with signaled
syncobj when vkQueueSubmit() is called. The next vkQueueSubmit()
waiting on a the syncobj will read the sideband payload and wait for a
fence chain element with a seqno superior or equal to the sideband
payload value to be added into the fence chain and use that fence to
trigger the submission on the kernel driver.

v2: Use a separate ioctl to get/set the sideband value (Christian)

v3: Use 2 ioctls for get/set (Christian)

v4: Use a single new ioctl

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Christian Koenig <Christian.Koenig@amd.com>
Cc: Jason Ekstrand <jason@jlekstrand.net>
Cc: David(ChunMing) Zhou <David1.Zhou@amd.com>
---
 drivers/gpu/drm/drm_internal.h |  2 ++
 drivers/gpu/drm/drm_ioctl.c    |  3 ++
 drivers/gpu/drm/drm_syncobj.c  | 55 +++++++++++++++++++++++++++++++++-
 include/drm/drm_syncobj.h      |  9 ++++++
 include/uapi/drm/drm.h         | 17 +++++++++++
 5 files changed, 85 insertions(+), 1 deletion(-)

Comments

Chris Wilson Aug. 9, 2019, 11:44 a.m. UTC | #1
Quoting Lionel Landwerlin (2019-08-09 12:30:30)
> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
> index 8a5b2f8f8eb9..1ce83853f997 100644
> --- a/include/uapi/drm/drm.h
> +++ b/include/uapi/drm/drm.h
> @@ -785,6 +785,22 @@ struct drm_syncobj_timeline_array {
>         __u32 pad;
>  };
>  
> +struct drm_syncobj_binary_array {
> +       /* A pointer to an array of u32 syncobj handles. */
> +       __u64 handles;
> +       /* A pointer to an array of u32 access flags for each handle. */
> +       __u64 access_flags;
> +       /* The binary value of a syncobj is read before it is incremented. */
> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ (1u << 0)
> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC  (1u << 1)

You're not in Kansas anymore ;)
-Chris
Chris Wilson Aug. 9, 2019, 11:58 a.m. UTC | #2
Quoting Lionel Landwerlin (2019-08-09 12:30:30)
> +int drm_syncobj_binary_ioctl(struct drm_device *dev, void *data,
> +                            struct drm_file *file_private)
> +{
> +       struct drm_syncobj_binary_array *args = data;
> +       struct drm_syncobj **syncobjs;
> +       u32 __user *access_flags = u64_to_user_ptr(args->access_flags);
> +       u64 __user *values = u64_to_user_ptr(args->values);
> +       u32 i;
> +       int ret;
> +
> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
> +               return -EOPNOTSUPP;
> +
> +       if (args->pad != 0)
> +               return -EINVAL;
> +
> +       if (args->count_handles == 0)
> +               return -EINVAL;

You may find it easier to just return success for 0 handles. Slightly less
obnoxious error handling?

> +       ret = drm_syncobj_array_find(file_private,
> +                                    u64_to_user_ptr(args->handles),
> +                                    args->count_handles,
> +                                    &syncobjs);
> +       if (ret < 0)
> +               return ret;
> +
> +       for (i = 0; i < args->count_handles; i++) {
> +               u32 flags;
> +
> +               copy_from_user(&flags, &access_flags[i], sizeof(flags));
> +               ret = ret ? -EFAULT : 0;

Magic?

if (get_user(flags, &access_flags[i[))
	return -EFAULT;

> +               if (ret)
> +                       break;
> +
> +               if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ) {
> +                       copy_to_user(&values[i], &syncobjs[i]->binary_payload, sizeof(values[i]));
> +                       ret = ret ? -EFAULT : 0;

More magic.

if (put_user(&syncobjs[i]->binary_payload, &values[i]))
	return -EFAULT;

> +                       if (ret)
> +                               break;
> +               }
> +
> +               if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC)
> +                       syncobjs[i]->binary_payload++;

So if an error occurs how does the user know which syncobj were
advanced before the error? (Or explain why it doesn't actually matter)
The clue I guess is with read/inc, but confirmation of design would be
nice.

Not atomic (the u64 write should really be to avoid total corruption)
and nothing prevents userspace from racing. How safe is that in the
overall design?

What would happen if the binary_payload was initialised to -1?

> +       }
> +
>         drm_syncobj_array_free(syncobjs, args->count_handles);
>  
>         return ret;
Lionel Landwerlin Aug. 9, 2019, 12:26 p.m. UTC | #3
On 09/08/2019 14:44, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-09 12:30:30)
>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>> index 8a5b2f8f8eb9..1ce83853f997 100644
>> --- a/include/uapi/drm/drm.h
>> +++ b/include/uapi/drm/drm.h
>> @@ -785,6 +785,22 @@ struct drm_syncobj_timeline_array {
>>          __u32 pad;
>>   };
>>   
>> +struct drm_syncobj_binary_array {
>> +       /* A pointer to an array of u32 syncobj handles. */
>> +       __u64 handles;
>> +       /* A pointer to an array of u32 access flags for each handle. */
>> +       __u64 access_flags;
>> +       /* The binary value of a syncobj is read before it is incremented. */
>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ (1u << 0)
>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC  (1u << 1)
> You're not in Kansas anymore ;)
> -Chris
>
Which means? :)


-Lionel
Christian König Aug. 9, 2019, 12:27 p.m. UTC | #4
Am 09.08.19 um 14:26 schrieb Lionel Landwerlin:
> On 09/08/2019 14:44, Chris Wilson wrote:
>> Quoting Lionel Landwerlin (2019-08-09 12:30:30)
>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>> index 8a5b2f8f8eb9..1ce83853f997 100644
>>> --- a/include/uapi/drm/drm.h
>>> +++ b/include/uapi/drm/drm.h
>>> @@ -785,6 +785,22 @@ struct drm_syncobj_timeline_array {
>>>          __u32 pad;
>>>   };
>>>   +struct drm_syncobj_binary_array {
>>> +       /* A pointer to an array of u32 syncobj handles. */
>>> +       __u64 handles;
>>> +       /* A pointer to an array of u32 access flags for each 
>>> handle. */
>>> +       __u64 access_flags;
>>> +       /* The binary value of a syncobj is read before it is 
>>> incremented. */
>>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ (1u << 0)
>>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC  (1u << 1)
>> You're not in Kansas anymore ;)
>> -Chris
>>
> Which means? :)

You are in common DRM code, but the new defines start with I915_....

Cheers,
Christian.

>
>
> -Lionel
>
Chris Wilson Aug. 9, 2019, 12:38 p.m. UTC | #5
Quoting Chris Wilson (2019-08-09 12:58:51)
> Quoting Lionel Landwerlin (2019-08-09 12:30:30)
> > +               if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ) {
> > +                       copy_to_user(&values[i], &syncobjs[i]->binary_payload, sizeof(values[i]));
> > +                       ret = ret ? -EFAULT : 0;
> 
> More magic.
> 
> if (put_user(&syncobjs[i]->binary_payload, &values[i]))
>         return -EFAULT;

(break not yet)

Should just be put_user(syncobjs[i]->binary_payload, &values[i])
The value of, not its address.
-Chris
Lionel Landwerlin Aug. 9, 2019, 12:38 p.m. UTC | #6
On 09/08/2019 14:58, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-09 12:30:30)
>> +int drm_syncobj_binary_ioctl(struct drm_device *dev, void *data,
>> +                            struct drm_file *file_private)
>> +{
>> +       struct drm_syncobj_binary_array *args = data;
>> +       struct drm_syncobj **syncobjs;
>> +       u32 __user *access_flags = u64_to_user_ptr(args->access_flags);
>> +       u64 __user *values = u64_to_user_ptr(args->values);
>> +       u32 i;
>> +       int ret;
>> +
>> +       if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
>> +               return -EOPNOTSUPP;
>> +
>> +       if (args->pad != 0)
>> +               return -EINVAL;
>> +
>> +       if (args->count_handles == 0)
>> +               return -EINVAL;
> You may find it easier to just return success for 0 handles. Slightly less
> obnoxious error handling?


All the other ioctls in this file return EINVAL in that case. I'm just 
going for consistency.

It's also a good indication for the application it can save itself an 
ioctl really :)


>
>> +       ret = drm_syncobj_array_find(file_private,
>> +                                    u64_to_user_ptr(args->handles),
>> +                                    args->count_handles,
>> +                                    &syncobjs);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       for (i = 0; i < args->count_handles; i++) {
>> +               u32 flags;
>> +
>> +               copy_from_user(&flags, &access_flags[i], sizeof(flags));
>> +               ret = ret ? -EFAULT : 0;
> Magic?
>
> if (get_user(flags, &access_flags[i[))
> 	return -EFAULT;


I give this no testing, I'm just trying to get some feedback about the 
direction.

Thanks though :)


>
>> +               if (ret)
>> +                       break;
>> +
>> +               if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ) {
>> +                       copy_to_user(&values[i], &syncobjs[i]->binary_payload, sizeof(values[i]));
>> +                       ret = ret ? -EFAULT : 0;
> More magic.
>
> if (put_user(&syncobjs[i]->binary_payload, &values[i]))
> 	return -EFAULT;
>
>> +                       if (ret)
>> +                               break;
>> +               }
>> +
>> +               if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC)
>> +                       syncobjs[i]->binary_payload++;
> So if an error occurs how does the user know which syncobj were
> advanced before the error? (Or explain why it doesn't actually matter)
> The clue I guess is with read/inc, but confirmation of design would be
> nice.


I guess we could toggle the access flag bits to notify that the actions 
were completed.


>
> Not atomic (the u64 write should really be to avoid total corruption)
> and nothing prevents userspace from racing. How safe is that in the
> overall design?


Atomic would prevent issue related to 2 processes/threads seeing 
different values because of caching?


If not then it's not really interesting for the use case. The increment 
should happen during the vkQueueSubmit() call and the value is only 
valid upon returning.

The application is responsible for not having 
vkQueueSubmit()/vkWaitForFences() race.


Not opposed to switch to atomic though.


>
> What would happen if the binary_payload was initialised to -1?


The 0 value is problematic because it's also used for "whatever fence in 
the syncobj".

I think we need to stick to the same rules as the timeline values : 0 is 
always signaled


Thanks,


-Lionel


>
>> +       }
>> +
>>          drm_syncobj_array_free(syncobjs, args->count_handles);
>>   
>>          return ret;
Chris Wilson Aug. 9, 2019, 12:49 p.m. UTC | #7
Quoting Lionel Landwerlin (2019-08-09 13:38:57)
> On 09/08/2019 14:58, Chris Wilson wrote:
> > Not atomic (the u64 write should really be to avoid total corruption)
> > and nothing prevents userspace from racing. How safe is that in the
> > overall design?
> 
> 
> Atomic would prevent issue related to 2 processes/threads seeing 
> different values because of caching?

No, the kernel atomics themselves do not guarantee memory barriers in
all cases. The issue I see here is that we can not safely do a u64
increment on all platforms without write tearing. E.g. two clients
simultaneously incrementing from U32_MAX becomes 0x1 (both would report
U32_MAX)
-Chris
Lionel Landwerlin Aug. 9, 2019, 12:53 p.m. UTC | #8
On 09/08/2019 15:27, Koenig, Christian wrote:
> Am 09.08.19 um 14:26 schrieb Lionel Landwerlin:
>> On 09/08/2019 14:44, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-08-09 12:30:30)
>>>> diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
>>>> index 8a5b2f8f8eb9..1ce83853f997 100644
>>>> --- a/include/uapi/drm/drm.h
>>>> +++ b/include/uapi/drm/drm.h
>>>> @@ -785,6 +785,22 @@ struct drm_syncobj_timeline_array {
>>>>           __u32 pad;
>>>>    };
>>>>    +struct drm_syncobj_binary_array {
>>>> +       /* A pointer to an array of u32 syncobj handles. */
>>>> +       __u64 handles;
>>>> +       /* A pointer to an array of u32 access flags for each
>>>> handle. */
>>>> +       __u64 access_flags;
>>>> +       /* The binary value of a syncobj is read before it is
>>>> incremented. */
>>>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ (1u << 0)
>>>> +#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC  (1u << 1)
>>> You're not in Kansas anymore ;)
>>> -Chris
>>>
>> Which means? :)
> You are in common DRM code, but the new defines start with I915_....
>
> Cheers,
> Christian.


Oh dear...


-Lionel


>
>>
>> -Lionel
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 51a2055c8f18..e297dfd85019 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -208,6 +208,8 @@  int drm_syncobj_timeline_signal_ioctl(struct drm_device *dev, void *data,
 				      struct drm_file *file_private);
 int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_private);
+int drm_syncobj_binary_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_private);
 
 /* drm_framebuffer.c */
 void drm_framebuffer_print_info(struct drm_printer *p, unsigned int indent,
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index f675a3bb2c88..644d0bc800a4 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -703,6 +703,9 @@  static const struct drm_ioctl_desc drm_ioctls[] = {
 		      DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_QUERY, drm_syncobj_query_ioctl,
 		      DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_BINARY, drm_syncobj_binary_ioctl,
+		      DRM_RENDER_ALLOW),
+
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER),
diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c
index b927e482e554..daced258d5ce 100644
--- a/drivers/gpu/drm/drm_syncobj.c
+++ b/drivers/gpu/drm/drm_syncobj.c
@@ -1150,8 +1150,10 @@  drm_syncobj_reset_ioctl(struct drm_device *dev, void *data,
 	if (ret < 0)
 		return ret;
 
-	for (i = 0; i < args->count_handles; i++)
+	for (i = 0; i < args->count_handles; i++) {
 		drm_syncobj_replace_fence(syncobjs[i], NULL);
+		syncobjs[i]->binary_payload = 0;
+	}
 
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
@@ -1321,6 +1323,57 @@  int drm_syncobj_query_ioctl(struct drm_device *dev, void *data,
 		if (ret)
 			break;
 	}
+
+	drm_syncobj_array_free(syncobjs, args->count_handles);
+
+	return ret;
+}
+
+int drm_syncobj_binary_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_private)
+{
+	struct drm_syncobj_binary_array *args = data;
+	struct drm_syncobj **syncobjs;
+	u32 __user *access_flags = u64_to_user_ptr(args->access_flags);
+	u64 __user *values = u64_to_user_ptr(args->values);
+	u32 i;
+	int ret;
+
+	if (!drm_core_check_feature(dev, DRIVER_SYNCOBJ_TIMELINE))
+		return -EOPNOTSUPP;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (args->count_handles == 0)
+		return -EINVAL;
+
+	ret = drm_syncobj_array_find(file_private,
+				     u64_to_user_ptr(args->handles),
+				     args->count_handles,
+				     &syncobjs);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < args->count_handles; i++) {
+		u32 flags;
+
+		copy_from_user(&flags, &access_flags[i], sizeof(flags));
+		ret = ret ? -EFAULT : 0;
+		if (ret)
+			break;
+
+		if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ) {
+			copy_to_user(&values[i], &syncobjs[i]->binary_payload, sizeof(values[i]));
+			ret = ret ? -EFAULT : 0;
+			if (ret)
+				break;
+		}
+
+		if (flags & I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC)
+			syncobjs[i]->binary_payload++;
+	}
+
 	drm_syncobj_array_free(syncobjs, args->count_handles);
 
 	return ret;
diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h
index 6cf7243a1dc5..99d552647d6c 100644
--- a/include/drm/drm_syncobj.h
+++ b/include/drm/drm_syncobj.h
@@ -61,6 +61,15 @@  struct drm_syncobj {
 	 * @file: A file backing for this syncobj.
 	 */
 	struct file *file;
+	/**
+	 * @binary_payload: A 64bit payload for binary syncobjs.
+	 *
+	 * We use the payload value to wait on binary syncobj fences to
+	 * materialize. It is a reservation mechanism for the signaler to
+	 * express that at some point in the future a dma fence with the same
+	 * seqno will be put into the syncobj.
+	 */
+	u64 binary_payload;
 };
 
 void drm_syncobj_free(struct kref *kref);
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 8a5b2f8f8eb9..1ce83853f997 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -785,6 +785,22 @@  struct drm_syncobj_timeline_array {
 	__u32 pad;
 };
 
+struct drm_syncobj_binary_array {
+	/* A pointer to an array of u32 syncobj handles. */
+	__u64 handles;
+	/* A pointer to an array of u32 access flags for each handle. */
+	__u64 access_flags;
+	/* The binary value of a syncobj is read before it is incremented. */
+#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_READ (1u << 0)
+#define I915_DRM_SYNCOBJ_BINARY_ITEM_VALUE_INC  (1u << 1)
+	/* A pointer to an array of u64 values written to by the kernel if the
+	 * handle is flagged for reading.
+	 */
+	__u64 values;
+	/* The length of the 3 arrays above. */
+	__u32 count_handles;
+	__u32 pad;
+};
 
 /* Query current scanout sequence number */
 struct drm_crtc_get_sequence {
@@ -946,6 +962,7 @@  extern "C" {
 #define DRM_IOCTL_SYNCOBJ_QUERY		DRM_IOWR(0xCB, struct drm_syncobj_timeline_array)
 #define DRM_IOCTL_SYNCOBJ_TRANSFER	DRM_IOWR(0xCC, struct drm_syncobj_transfer)
 #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL	DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_SYNCOBJ_BINARY	DRM_IOWR(0xCE, struct drm_syncobj_binary_array)
 
 /**
  * Device specific ioctls should only be in their respective headers