diff mbox series

[v6,07/11] drm/i915: add syncobj timeline support

Message ID 20190701113437.4043-8-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Vulkan performance query support | expand

Commit Message

Lionel Landwerlin July 1, 2019, 11:34 a.m. UTC
Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 288 ++++++++++++++----
 drivers/gpu/drm/i915/i915_drv.c               |   4 +-
 include/uapi/drm/i915_drm.h                   |  38 +++
 3 files changed, 273 insertions(+), 57 deletions(-)

Comments

Chris Wilson July 1, 2019, 1:13 p.m. UTC | #1
Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>  struct i915_execbuffer {
>         struct drm_i915_private *i915; /** i915 backpointer */
>         struct drm_file *file; /** per-file lookup tables and limits */
> @@ -275,6 +282,7 @@ struct i915_execbuffer {
>  
>         struct {
>                 u64 flags; /** Available extensions parameters */
> +               struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>         } extensions;
>  };
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> +       struct i915_execbuffer *eb = data;
> +
> +       /* Timeline fences are incompatible with the fence array flag. */
> +       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +               return -EINVAL;
> +
> +       if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +               return -EINVAL;

flags is 64b, so wiser if we use BIT_ULL() from the start. You don't
want to copy my bugs ;)
-Chris
Lionel Landwerlin July 1, 2019, 1:15 p.m. UTC | #2
On 01/07/2019 16:13, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>>   struct i915_execbuffer {
>>          struct drm_i915_private *i915; /** i915 backpointer */
>>          struct drm_file *file; /** per-file lookup tables and limits */
>> @@ -275,6 +282,7 @@ struct i915_execbuffer {
>>   
>>          struct {
>>                  u64 flags; /** Available extensions parameters */
>> +               struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>>          } extensions;
>>   };
>> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
>> +{
>> +       struct i915_execbuffer *eb = data;
>> +
>> +       /* Timeline fences are incompatible with the fence array flag. */
>> +       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
>> +               return -EINVAL;
>> +
>> +       if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
>> +               return -EINVAL;
> flags is 64b, so wiser if we use BIT_ULL() from the start. You don't
> want to copy my bugs ;)
> -Chris
>
Dammit! Why aren't all bit macros 64bits? :)


-Lionel
Chris Wilson July 1, 2019, 1:18 p.m. UTC | #3
Quoting Lionel Landwerlin (2019-07-01 12:34:33)
> +               /*
> +                * For timeline syncobjs we need to preallocate chains for
> +                * later signaling.
> +                */
> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +                       fences[n].chain_fence =
> +                               kmalloc(sizeof(*fences[n].chain_fence),
> +                                       GFP_KERNEL);
> +                       if (!fences[n].chain_fence) {
> +                               dma_fence_put(fence);
> +                               drm_syncobj_put(syncobj);
> +                               err = -ENOMEM;
> +                               DRM_DEBUG("Unable to alloc chain_fence\n");

This is like throwing a grenade, waiting for the explosion, and then
saying "bang" under your breath. :)
-Chris
Lionel Landwerlin July 1, 2019, 1:22 p.m. UTC | #4
On 01/07/2019 16:18, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>> +               /*
>> +                * For timeline syncobjs we need to preallocate chains for
>> +                * later signaling.
>> +                */
>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
>> +                       fences[n].chain_fence =
>> +                               kmalloc(sizeof(*fences[n].chain_fence),
>> +                                       GFP_KERNEL);
>> +                       if (!fences[n].chain_fence) {
>> +                               dma_fence_put(fence);
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -ENOMEM;
>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
> This is like throwing a grenade, waiting for the explosion, and then
> saying "bang" under your breath. :)
> -Chris
>
I don't get your point :)


-Lionel
Chris Wilson July 3, 2019, 8:56 a.m. UTC | #5
Quoting Lionel Landwerlin (2019-07-01 12:34:33)
> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> +               if (!syncobj) {
> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
> +                       err = -EINVAL;
> +                       goto err;
> +               }
> +
> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +                       fence = drm_syncobj_fence_get(syncobj);
> +                       if (!fence) {
> +                               DRM_DEBUG("Syncobj handle has no fence\n");
> +                               drm_syncobj_put(syncobj);
> +                               err = -EINVAL;
> +                               goto err;
> +                       }
> +
> +                       err = dma_fence_chain_find_seqno(&fence, point);

I'm very dubious about chain_find_seqno().

It returns -EINVAL if the point is older than the first in the chain --
it is in an unknown state, but may be signaled since we remove signaled
links from the chain. If we are waiting for an already signaled syncpt,
we should not be erring out!

Do we allow later requests to insert earlier syncpt into the chain? If
so, then the request we wait on here may be woefully inaccurate and
quite easily lead to cycles in the fence tree. We have no way of
resolving such deadlocks -- we would have to treat this fence as a
foreign fence and install a backup timer. Alternatively, we only allow
this to return the exact fence for a syncpt, and proxies for the rest.

> +                       if (err || !fence) {
> +                               DRM_DEBUG("Syncobj handle missing requested point\n");
> +                               drm_syncobj_put(syncobj);
> +                               err = err != 0 ? err : -EINVAL;
> +                               goto err;
> +                       }
> +               }
> +
> +               /*
> +                * For timeline syncobjs we need to preallocate chains for
> +                * later signaling.
> +                */
> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +                       fences[n].chain_fence =
> +                               kmalloc(sizeof(*fences[n].chain_fence),
> +                                       GFP_KERNEL);
> +                       if (!fences[n].chain_fence) {
> +                               dma_fence_put(fence);
> +                               drm_syncobj_put(syncobj);
> +                               err = -ENOMEM;
> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
> +                               goto err;
> +                       }

What happens if we later try to insert two fences for the same syncpt?
Should we not reserve the slot in the chain to reject duplicates?
-Chris
Lionel Landwerlin July 3, 2019, 9:17 a.m. UTC | #6
On 03/07/2019 11:56, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>> +               if (!syncobj) {
>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>> +                       err = -EINVAL;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>> +                       fence = drm_syncobj_fence_get(syncobj);
>> +                       if (!fence) {
>> +                               DRM_DEBUG("Syncobj handle has no fence\n");
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -EINVAL;
>> +                               goto err;
>> +                       }
>> +
>> +                       err = dma_fence_chain_find_seqno(&fence, point);
> I'm very dubious about chain_find_seqno().
>
> It returns -EINVAL if the point is older than the first in the chain --
> it is in an unknown state, but may be signaled since we remove signaled
> links from the chain. If we are waiting for an already signaled syncpt,
> we should not be erring out!


You're right, I got this wrong.

We can get fence = NULL if the point is already signaled.

The easiest would be to skip it from the list, or add the stub fence.


I guess the CTS got lucky that it always got the point needed before it 
was garbage collected...


>
> Do we allow later requests to insert earlier syncpt into the chain? If
> so, then the request we wait on here may be woefully inaccurate and
> quite easily lead to cycles in the fence tree. We have no way of
> resolving such deadlocks -- we would have to treat this fence as a
> foreign fence and install a backup timer. Alternatively, we only allow
> this to return the exact fence for a syncpt, and proxies for the rest.


Adding points < latest added point is forbidden.

I wish we enforced it a bit more than what's currently done in 
drm_syncobj_add_point().

In my view we should :

     - lock the syncobj in get_timeline_fence_array() do the sanity 
check there.

     - keep the lock until we add the point to the timeline

     - unlock once added


That way we would ensure that the application cannot generate invalid 
timelines and error out if it does.

We could do the same for host signaling in 
drm_syncobj_timeline_signal_ioctl/drm_syncobj_transfer_to_timeline 
(there the locking a lot shorter).

That requires holding the lock for longer than maybe other driver would 
prefer.


Ccing Christian who can tell whether that's out of question for AMD.


Cheers,


-Lionel


>> +                       if (err || !fence) {
>> +                               DRM_DEBUG("Syncobj handle missing requested point\n");
>> +                               drm_syncobj_put(syncobj);
>> +                               err = err != 0 ? err : -EINVAL;
>> +                               goto err;
>> +                       }
>> +               }
>> +
>> +               /*
>> +                * For timeline syncobjs we need to preallocate chains for
>> +                * later signaling.
>> +                */
>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
>> +                       fences[n].chain_fence =
>> +                               kmalloc(sizeof(*fences[n].chain_fence),
>> +                                       GFP_KERNEL);
>> +                       if (!fences[n].chain_fence) {
>> +                               dma_fence_put(fence);
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -ENOMEM;
>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
>> +                               goto err;
>> +                       }
> What happens if we later try to insert two fences for the same syncpt?
> Should we not reserve the slot in the chain to reject duplicates?
> -Chris
>
Christian König July 15, 2019, 11:30 a.m. UTC | #7
Hi Lionel,

sorry for the delayed response, I'm just back from vacation.

Am 03.07.19 um 11:17 schrieb Lionel Landwerlin:
> On 03/07/2019 11:56, Chris Wilson wrote:
>> Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>>> +               syncobj = drm_syncobj_find(eb->file, 
>>> user_fence.handle);
>>> +               if (!syncobj) {
>>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>>> +                       err = -EINVAL;
>>> +                       goto err;
>>> +               }
>>> +
>>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>>> +                       fence = drm_syncobj_fence_get(syncobj);
>>> +                       if (!fence) {
>>> +                               DRM_DEBUG("Syncobj handle has no 
>>> fence\n");
>>> +                               drm_syncobj_put(syncobj);
>>> +                               err = -EINVAL;
>>> +                               goto err;
>>> +                       }
>>> +
>>> +                       err = dma_fence_chain_find_seqno(&fence, 
>>> point);
>> I'm very dubious about chain_find_seqno().
>>
>> It returns -EINVAL if the point is older than the first in the chain --
>> it is in an unknown state, but may be signaled since we remove signaled
>> links from the chain. If we are waiting for an already signaled syncpt,
>> we should not be erring out!
>
>
> You're right, I got this wrong.
>
> We can get fence = NULL if the point is already signaled.
>
> The easiest would be to skip it from the list, or add the stub fence.
>
>
> I guess the CTS got lucky that it always got the point needed before 
> it was garbage collected...

The topmost point is never garbage collected. So IIRC the check is 
actually correct and you should never get NULL here.

>>
>> Do we allow later requests to insert earlier syncpt into the chain? If
>> so, then the request we wait on here may be woefully inaccurate and
>> quite easily lead to cycles in the fence tree. We have no way of
>> resolving such deadlocks -- we would have to treat this fence as a
>> foreign fence and install a backup timer. Alternatively, we only allow
>> this to return the exact fence for a syncpt, and proxies for the rest.
>
>
> Adding points < latest added point is forbidden.
>
> I wish we enforced it a bit more than what's currently done in 
> drm_syncobj_add_point().
>
> In my view we should :
>
>     - lock the syncobj in get_timeline_fence_array() do the sanity 
> check there.
>
>     - keep the lock until we add the point to the timeline
>
>     - unlock once added
>
>
> That way we would ensure that the application cannot generate invalid 
> timelines and error out if it does.
>
> We could do the same for host signaling in 
> drm_syncobj_timeline_signal_ioctl/drm_syncobj_transfer_to_timeline 
> (there the locking a lot shorter).
>
> That requires holding the lock for longer than maybe other driver 
> would prefer.
>
>
> Ccing Christian who can tell whether that's out of question for AMD.

Yeah, adding the lock was the only other option I could see as well, but 
we intentionally decided against that.

Since we have multiple out sync objects we would need to use a ww_mutex 
as lock here.

That in turn would result in a another rather complicated dance for 
deadlock avoidance. Something which each driver would have to implement 
correctly.

That doesn't sounds like a good idea to me just to improve error checking.

As long as it is only in the same process userspace could check that as 
well before doing the submission.

Regards,
Christian.



>
>
> Cheers,
>
>
> -Lionel
>
>
>>> +                       if (err || !fence) {
>>> +                               DRM_DEBUG("Syncobj handle missing 
>>> requested point\n");
>>> +                               drm_syncobj_put(syncobj);
>>> +                               err = err != 0 ? err : -EINVAL;
>>> +                               goto err;
>>> +                       }
>>> +               }
>>> +
>>> +               /*
>>> +                * For timeline syncobjs we need to preallocate 
>>> chains for
>>> +                * later signaling.
>>> +                */
>>> +               if (point != 0 && user_fence.flags & 
>>> I915_EXEC_FENCE_SIGNAL) {
>>> +                       fences[n].chain_fence =
>>> + kmalloc(sizeof(*fences[n].chain_fence),
>>> +                                       GFP_KERNEL);
>>> +                       if (!fences[n].chain_fence) {
>>> +                               dma_fence_put(fence);
>>> +                               drm_syncobj_put(syncobj);
>>> +                               err = -ENOMEM;
>>> +                               DRM_DEBUG("Unable to alloc 
>>> chain_fence\n");
>>> +                               goto err;
>>> +                       }
>> What happens if we later try to insert two fences for the same syncpt?
>> Should we not reserve the slot in the chain to reject duplicates?
>> -Chris
>>
>
Lionel Landwerlin July 16, 2019, 8:17 a.m. UTC | #8
On 15/07/2019 14:30, Koenig, Christian wrote:
> Hi Lionel,
>
> sorry for the delayed response, I'm just back from vacation.
>
> Am 03.07.19 um 11:17 schrieb Lionel Landwerlin:
>> On 03/07/2019 11:56, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-07-01 12:34:33)
>>>> +               syncobj = drm_syncobj_find(eb->file,
>>>> user_fence.handle);
>>>> +               if (!syncobj) {
>>>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>>>> +                       err = -EINVAL;
>>>> +                       goto err;
>>>> +               }
>>>> +
>>>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>>>> +                       fence = drm_syncobj_fence_get(syncobj);
>>>> +                       if (!fence) {
>>>> +                               DRM_DEBUG("Syncobj handle has no
>>>> fence\n");
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               err = -EINVAL;
>>>> +                               goto err;
>>>> +                       }
>>>> +
>>>> +                       err = dma_fence_chain_find_seqno(&fence,
>>>> point);
>>> I'm very dubious about chain_find_seqno().
>>>
>>> It returns -EINVAL if the point is older than the first in the chain --
>>> it is in an unknown state, but may be signaled since we remove signaled
>>> links from the chain. If we are waiting for an already signaled syncpt,
>>> we should not be erring out!
>>
>> You're right, I got this wrong.
>>
>> We can get fence = NULL if the point is already signaled.
>>
>> The easiest would be to skip it from the list, or add the stub fence.
>>
>>
>> I guess the CTS got lucky that it always got the point needed before
>> it was garbage collected...
> The topmost point is never garbage collected. So IIRC the check is
> actually correct and you should never get NULL here.
>
>>> Do we allow later requests to insert earlier syncpt into the chain? If
>>> so, then the request we wait on here may be woefully inaccurate and
>>> quite easily lead to cycles in the fence tree. We have no way of
>>> resolving such deadlocks -- we would have to treat this fence as a
>>> foreign fence and install a backup timer. Alternatively, we only allow
>>> this to return the exact fence for a syncpt, and proxies for the rest.
>>
>> Adding points < latest added point is forbidden.
>>
>> I wish we enforced it a bit more than what's currently done in
>> drm_syncobj_add_point().
>>
>> In my view we should :
>>
>>      - lock the syncobj in get_timeline_fence_array() do the sanity
>> check there.
>>
>>      - keep the lock until we add the point to the timeline
>>
>>      - unlock once added
>>
>>
>> That way we would ensure that the application cannot generate invalid
>> timelines and error out if it does.
>>
>> We could do the same for host signaling in
>> drm_syncobj_timeline_signal_ioctl/drm_syncobj_transfer_to_timeline
>> (there the locking a lot shorter).
>>
>> That requires holding the lock for longer than maybe other driver
>> would prefer.
>>
>>
>> Ccing Christian who can tell whether that's out of question for AMD.
> Yeah, adding the lock was the only other option I could see as well, but
> we intentionally decided against that.
>
> Since we have multiple out sync objects we would need to use a ww_mutex
> as lock here.
>
> That in turn would result in a another rather complicated dance for
> deadlock avoidance. Something which each driver would have to implement
> correctly.
>
> That doesn't sounds like a good idea to me just to improve error checking.
>
> As long as it is only in the same process userspace could check that as
> well before doing the submission.


Thanks Christian,


Would you be opposed to exposing an _locked() version of 
drm_syncobj_add_point() and have a static inline do the locking?

I don't think it would be a difference for your driver and we could add 
checking with a proxy fence Chris suggested on our side.


We could also allow do checks in drm_syncobj_timeline_signal_ioctl().


-Lionel


>
> Regards,
> Christian.
>
>
>
>>
>> Cheers,
>>
>>
>> -Lionel
>>
>>
>>>> +                       if (err || !fence) {
>>>> +                               DRM_DEBUG("Syncobj handle missing
>>>> requested point\n");
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               err = err != 0 ? err : -EINVAL;
>>>> +                               goto err;
>>>> +                       }
>>>> +               }
>>>> +
>>>> +               /*
>>>> +                * For timeline syncobjs we need to preallocate
>>>> chains for
>>>> +                * later signaling.
>>>> +                */
>>>> +               if (point != 0 && user_fence.flags &
>>>> I915_EXEC_FENCE_SIGNAL) {
>>>> +                       fences[n].chain_fence =
>>>> + kmalloc(sizeof(*fences[n].chain_fence),
>>>> +                                       GFP_KERNEL);
>>>> +                       if (!fences[n].chain_fence) {
>>>> +                               dma_fence_put(fence);
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               err = -ENOMEM;
>>>> +                               DRM_DEBUG("Unable to alloc
>>>> chain_fence\n");
>>>> +                               goto err;
>>>> +                       }
>>> What happens if we later try to insert two fences for the same syncpt?
>>> Should we not reserve the slot in the chain to reject duplicates?
>>> -Chris
>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9887fa9e3ac8..d3004fc1f995 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -213,6 +213,13 @@  enum {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
 
+struct i915_eb_fences {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
 struct i915_execbuffer {
 	struct drm_i915_private *i915; /** i915 backpointer */
 	struct drm_file *file; /** per-file lookup tables and limits */
@@ -275,6 +282,7 @@  struct i915_execbuffer {
 
 	struct {
 		u64 flags; /** Available extensions parameters */
+		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
 	} extensions;
 };
 
@@ -2224,67 +2232,198 @@  eb_select_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
 {
-	while (n--)
-		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+	while (n--) {
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+		&eb->extensions.timeline_fences;
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	struct i915_eb_fences *fences;
+	u64 __user *user_values;
+	const u64 num_fences = timeline_fences->fence_count;
+	unsigned long n;
+	int err;
+
+	*out_n_fences = num_fences;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_fences > min_t(unsigned long,
+			       ULONG_MAX / sizeof(*user_fences),
+			       SIZE_MAX / sizeof(*fences)))
+		return ERR_PTR(-EINVAL);
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, num_fences * sizeof(*user_fences)))
+		return ERR_PTR(-EFAULT);
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, num_fences * sizeof(*user_values)))
+		return ERR_PTR(-EFAULT);
+
+	fences = kvmalloc_array(num_fences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	for (n = 0; n < num_fences; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (__get_user(point, user_values++)) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+
+			err = dma_fence_chain_find_seqno(&fence, point);
+			if (err || !fence) {
+				DRM_DEBUG("Syncobj handle missing requested point\n");
+				drm_syncobj_put(syncobj);
+				err = err != 0 ? err : -EINVAL;
+				goto err;
+			}
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			fences[n].chain_fence =
+				kmalloc(sizeof(*fences[n].chain_fence),
+					GFP_KERNEL);
+			if (!fences[n].chain_fence) {
+				dma_fence_put(fence);
+				drm_syncobj_put(syncobj);
+				err = -ENOMEM;
+				DRM_DEBUG("Unable to alloc chain_fence\n");
+				goto err;
+			}
+		} else {
+			fences[n].chain_fence = NULL;
+		}
+
+		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[n].dma_fence = fence;
+		fences[n].value = point;
+	}
+
+	return fences;
+
+err:
+	__free_fence_array(fences, n);
+	return ERR_PTR(err);
+}
+
+static struct i915_eb_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+		       int *out_n_fences)
 {
-	const unsigned long nfences = args->num_cliprects;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
 	struct drm_i915_gem_exec_fence __user *user;
-	struct drm_syncobj **fences;
+	struct i915_eb_fences *fences;
+	const u32 num_fences = args->num_cliprects;
 	unsigned long n;
 	int err;
 
-	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-		return NULL;
+	*out_n_fences = num_fences;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
-	if (nfences > min_t(unsigned long,
-			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+	if (*out_n_fences > min_t(unsigned long,
+				  ULONG_MAX / sizeof(*user),
+				  SIZE_MAX / sizeof(*fences)))
 		return ERR_PTR(-EINVAL);
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(user, nfences * sizeof(*user)))
+	if (!access_ok(user, *out_n_fences * sizeof(*user)))
 		return ERR_PTR(-EFAULT);
 
-	fences = kvmalloc_array(nfences, sizeof(*fences),
+	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
 				__GFP_NOWARN | GFP_KERNEL);
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for (n = 0; n < nfences; n++) {
-		struct drm_i915_gem_exec_fence fence;
+	for (n = 0; n < *out_n_fences; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
 		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
 
-		if (__copy_from_user(&fence, user++, sizeof(fence))) {
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
 			err = -EFAULT;
 			goto err;
 		}
 
-		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
 			err = -EINVAL;
 			goto err;
 		}
 
-		syncobj = drm_syncobj_find(file, fence.handle);
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
 			err = -ENOENT;
 			goto err;
 		}
 
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+		}
+
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[n].dma_fence = fence;
+		fences[n].value = 0;
+		fences[n].chain_fence = NULL;
 	}
 
 	return fences;
@@ -2294,37 +2433,44 @@  get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 	return ERR_PTR(err);
 }
 
+static struct i915_eb_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return get_legacy_fence_array(eb, out_n_fences);
+
+	if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return get_timeline_fence_array(eb, out_n_fences);
+
+	*out_n_fences = 0;
+	return NULL;
+}
+
 static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_syncobj **fences)
+put_fence_array(struct i915_eb_fences *fences, int nfences)
 {
 	if (fences)
-		__free_fence_array(fences, args->num_cliprects);
+		__free_fence_array(fences, nfences);
 }
 
 static int
 await_fence_array(struct i915_execbuffer *eb,
-		  struct drm_syncobj **fences)
+		  struct i915_eb_fences *fences,
+		  int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	unsigned int n;
 	int err;
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_syncobj *syncobj;
-		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
-		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
-
-		err = i915_request_await_dma_fence(eb->request, fence);
-		dma_fence_put(fence);
+		err = i915_request_await_dma_fence(eb->request,
+						   fences[n].dma_fence);
 		if (err < 0)
 			return err;
 	}
@@ -2334,9 +2480,9 @@  await_fence_array(struct i915_execbuffer *eb,
 
 static void
 signal_fence_array(struct i915_execbuffer *eb,
-		   struct drm_syncobj **fences)
+		   struct i915_eb_fences *fences,
+		   int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
@@ -2344,15 +2490,46 @@  signal_fence_array(struct i915_execbuffer *eb,
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+					      fence, fences[n].value);
+			/*
+			 * The chain's ownership is transfered to the
+			 * timeline.
+			 */
+			fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	/* Timeline fences are incompatible with the fence array flag. */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.timeline_fences, ext,
+			   sizeof(eb->extensions.timeline_fences)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
@@ -2377,14 +2554,15 @@  static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct drm_syncobj **fences)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	struct i915_eb_fences *fences = NULL;
 	int out_fence_fd = -1;
+	int nfences = 0;
 	int err;
 
 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2423,10 +2601,16 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		return err;
 
+	fences = get_fence_array(&eb, &nfences);
+	if (IS_ERR(fences))
+		return PTR_ERR(fences);
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!in_fence)
-			return -EINVAL;
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_fences;
+		}
 	}
 
 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2584,7 +2768,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 
 	if (fences) {
-		err = await_fence_array(&eb, fences);
+		err = await_fence_array(&eb, fences, nfences);
 		if (err)
 			goto err_request;
 	}
@@ -2613,7 +2797,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_request_add(eb.request);
 
 	if (fences)
-		signal_fence_array(&eb, fences);
+		signal_fence_array(&eb, fences, nfences);
 
 	if (out_fence) {
 		if (err == 0) {
@@ -2648,6 +2832,8 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
+err_fences:
+	put_fence_array(fences, nfences);
 	return err;
 }
 
@@ -2741,7 +2927,7 @@  i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 			exec2_list[i].flags = 0;
 	}
 
-	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
 	if (exec2.flags & __EXEC_HAS_RELOC) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
@@ -2772,7 +2958,6 @@  i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
-	struct drm_syncobj **fences = NULL;
 	const size_t count = args->buffer_count;
 	int err;
 
@@ -2800,15 +2985,7 @@  i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	if (args->flags & I915_EXEC_FENCE_ARRAY) {
-		fences = get_fence_array(args, file);
-		if (IS_ERR(fences)) {
-			kvfree(exec2_list);
-			return PTR_ERR(fences);
-		}
-	}
-
-	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
 
 	/*
 	 * Now that we have begun execution of the batchbuffer, we ignore
@@ -2848,7 +3025,6 @@  end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-	put_fence_array(args, fences);
 	kvfree(exec2_list);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fa02e8f033d7..088f9d2af3fa 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -457,6 +457,7 @@  static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -3220,7 +3221,8 @@  static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index efa195d6994e..6bd76a0d29e5 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -617,6 +617,12 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	54
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1014,9 +1020,41 @@  struct drm_i915_gem_exec_fence {
 };
 
 enum drm_i915_gem_execbuffer_ext {
+	/**
+	 * See drm_i915_gem_execbuf_ext_timeline_fences.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 0,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs