diff mbox series

[v12,2/3] drm/i915: add syncobj timeline support

Message ID 20200708131751.334457-3-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: timeline semaphore support | expand

Commit Message

Lionel Landwerlin July 8, 2020, 1:17 p.m. UTC
Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)

v5: Use BIT_ULL (Chris)

v6: Fix issue with already signaled timeline points,
    dma_fence_chain_find_seqno() setting fence to NULL (Chris)

v7: Report ENOENT with invalid syncobj handle (Lionel)

v8: Check for out of order timeline point insertion (Chris)

v9: After explanations on
    https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
    drop the ordering check from v8 (Lionel)

v10: Set first extension enum item to 1 (Jason)

v11: Rebase

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 167 +++++++++++++++++-
 drivers/gpu/drm/i915/i915_drv.c               |   3 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   1 +
 include/uapi/drm/i915_drm.h                   |  39 ++++
 4 files changed, 203 insertions(+), 7 deletions(-)

Comments

Daniel Vetter July 29, 2020, 12:24 p.m. UTC | #1
Hi Kees&Linus,

Not sure you're the right person, but figured I start with you with an
idea I had while looking at this patch. Cut out everything except the
relevant part:

On Wed, Jul 8, 2020 at 3:18 PM Lionel Landwerlin <lionel.g.landwerlin@intel.com> wrote:
> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> +       if (num_user_fences > min_t(unsigned long,
> +                                   ULONG_MAX / sizeof(*user_fences),
> +                                   SIZE_MAX / sizeof(*fences)))
> +               return -EINVAL;
> +
> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> +               return -EFAULT;
> +
> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> +               return -EFAULT;

Do we have a access_ok_array or so? Instead of duplicating overflow checks
everywhere and getting it all wrong ... Similar I guess for
copy_from/to_user_array.

Just random idea that crossed my mind and I figured I'll throw it out
there.

Cheers, Daniel

> +
> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> +                               __GFP_NOWARN | GFP_KERNEL);
> +       if (!fences)
> +               return -ENOMEM;
> +
> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> +               struct drm_i915_gem_exec_fence fence;
> +               struct drm_syncobj *syncobj;
> +               u64 point;
> +
> +               if (__copy_from_user(&fence, user_fences++, sizeof(fence))) {
> +                       err = -EFAULT;
> +                       goto err;
> +               }
> +
> +               if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +                       err = -EINVAL;
> +                       goto err;
> +               }
> +
> +               if (__get_user(point, user_values++)) {
> +                       err = -EFAULT;
> +                       goto err;
> +               }
> +
> +               syncobj = drm_syncobj_find(eb->file, fence.handle);
> +               if (!syncobj) {
> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
> +                       err = -ENOENT;
> +                       goto err;
> +               }
> +
> +               /*
> +                * For timeline syncobjs we need to preallocate chains for
> +                * later signaling.
> +                */
> +               if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +                       /*
> +                        * Waiting and signaling the same point (when point !=
> +                        * 0) would break the timeline.
> +                        */
> +                       if (fence.flags & I915_EXEC_FENCE_WAIT) {
> +                               DRM_DEBUG("Tring to wait & signal the same timeline point.\n");
> +                               err = -EINVAL;
> +                               drm_syncobj_put(syncobj);
> +                               goto err;
> +                       }
> +
> +                       fences[num_fences].chain_fence =
> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
> +                                       GFP_KERNEL);
> +                       if (!fences[num_fences].chain_fence) {
> +                               drm_syncobj_put(syncobj);
> +                               err = -ENOMEM;
> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
> +                               goto err;
> +                       }
> +               } else {
> +                       fences[num_fences].chain_fence = NULL;
> +               }
> +
> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
> +               fences[num_fences].value = point;
> +               num_fences++;
> +       }
> +
> +       eb->fences = fences;
> +       eb->n_fences = num_fences;
> +
> +       return 0;
> +
> +err:
> +       __free_fence_array(fences, num_fences);
> +       return err;
> +}
> +
> +static int
> +get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args,
> +                      struct i915_execbuffer *eb)
>  {
>         const unsigned long nfences = args->num_cliprects;
>         struct drm_i915_gem_exec_fence __user *user;
> @@ -2235,7 +2344,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>         BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>         if (nfences > min_t(unsigned long,
>                             ULONG_MAX / sizeof(*user),
> -                           SIZE_MAX / sizeof(*fences)))
> +                           SIZE_MAX / sizeof(*eb->fences)))
>                 return -EINVAL;
>
>         user = u64_to_user_ptr(args->cliprects_ptr);
> @@ -2272,6 +2381,8 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>                              ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>
>                 fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
> +               fences[n].value = 0;
> +               fences[n].chain_fence = NULL;
>         }
>
>         eb->fences = fences;
> @@ -2303,6 +2414,15 @@ await_fence_array(struct i915_execbuffer *eb)
>                 if (!fence)
>                         return -EINVAL;
>
> +               if (eb->fences[n].value) {
> +                       err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value);
> +                       if (err)
> +                               return err;
> +
> +                       if (!fence)
> +                               continue;
> +               }
> +
>                 err = i915_request_await_dma_fence(eb->request, fence);
>                 dma_fence_put(fence);
>                 if (err < 0)
> @@ -2326,7 +2446,17 @@ signal_fence_array(struct i915_execbuffer *eb)
>                 if (!(flags & I915_EXEC_FENCE_SIGNAL))
>                         continue;
>
> -               drm_syncobj_replace_fence(syncobj, fence);
> +               if (eb->fences[n].chain_fence) {
> +                       drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence,
> +                                             fence, eb->fences[n].value);
> +                       /*
> +                        * The chain's ownership is transferred to the
> +                        * timeline.
> +                        */
> +                       eb->fences[n].chain_fence = NULL;
> +               } else {
> +                       drm_syncobj_replace_fence(syncobj, fence);
> +               }
>         }
>  }
>
> @@ -2371,7 +2501,32 @@ static void eb_request_add(struct i915_execbuffer *eb)
>         mutex_unlock(&tl->mutex);
>  }
>
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> +       struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> +       struct i915_execbuffer *eb = data;
> +
> +       /* Timeline fences are incompatible with the fence array flag. */
> +       if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +               return -EINVAL;
> +
> +       /*
> +        * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure
> +        * to be included multiple times.
> +        */
> +       if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +               return -EINVAL;
> +
> +       if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
> +               return -EFAULT;
> +
> +       eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> +       return get_timeline_fence_array(&timeline_fences, eb);
> +}
> +
>  static const i915_user_extension_fn execbuf_extensions[] = {
> +       [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>  };
>
>  static int
> @@ -2475,7 +2630,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>         if (err)
>                 goto err_out_fence;
>
> -       err = get_fence_array(args, &eb);
> +       err = get_legacy_fence_array(args, &eb);
>         if (err)
>                 goto err_arr_fence;
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 67102dc26fce..f450d8782ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1851,7 +1851,8 @@ static struct drm_driver driver = {
>          */
>         .driver_features =
>             DRIVER_GEM |
> -           DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> +           DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> +           DRIVER_SYNCOBJ_TIMELINE,
>         .release = i915_driver_release,
>         .open = i915_driver_open,
>         .lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 40390b2352b1..e50bdf676955 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>         case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>         case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>         case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> +       case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
>                 /* For the time being all of these are always true;
>                  * if some supported hardware does not have one of these
>                  * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..85ff36faf15a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,13 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_PERF_REVISION       54
>
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
> +
>  /* Must be kept compact -- no holes and well documented */
>
>  typedef struct drm_i915_getparam {
> @@ -1047,9 +1054,41 @@ struct drm_i915_gem_exec_fence {
>  };
>
>  enum drm_i915_gem_execbuffer_ext {
> +       /**
> +        * See drm_i915_gem_execbuf_ext_timeline_fences.
> +        */
> +       DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
>         DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>  };
>
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> +       struct i915_user_extension base;
> +
> +       /**
> +        * Number of element in the handles_ptr & value_ptr arrays.
> +        */
> +       __u64 fence_count;
> +
> +       /**
> +        * Pointer to an array of struct drm_i915_gem_exec_fence of length
> +        * fence_count.
> +        */
> +       __u64 handles_ptr;
> +
> +       /**
> +        * Pointer to an array of u64 values of length fence_count. Values
> +        * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> +        * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> +        */
> +       __u64 values_ptr;
> +};
> +
>  struct drm_i915_gem_execbuffer2 {
>         /**
>          * List of gem_exec_object2 structs
> --
> 2.27.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter July 29, 2020, 12:34 p.m. UTC | #2
On Wed, Jul 08, 2020 at 04:17:50PM +0300, Lionel Landwerlin wrote:
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
> 
> v2: Reuse i915_user_extension_fn
> 
> v3: Check that the chained extension is only present once (Chris)
> 
> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)
> 
> v5: Use BIT_ULL (Chris)
> 
> v6: Fix issue with already signaled timeline points,
>     dma_fence_chain_find_seqno() setting fence to NULL (Chris)
> 
> v7: Report ENOENT with invalid syncobj handle (Lionel)
> 
> v8: Check for out of order timeline point insertion (Chris)
> 
> v9: After explanations on
>     https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
>     drop the ordering check from v8 (Lionel)
> 
> v10: Set first extension enum item to 1 (Jason)
> 
> v11: Rebase
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 167 +++++++++++++++++-
>  drivers/gpu/drm/i915/i915_drv.c               |   3 +-
>  drivers/gpu/drm/i915/i915_getparam.c          |   1 +
>  include/uapi/drm/i915_drm.h                   |  39 ++++
>  4 files changed, 203 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 03a656feb7b8..d8814e637e71 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -285,6 +285,8 @@ struct i915_execbuffer {
>  
>  	struct i915_eb_fence {
>  		struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> +		u64 value;
> +		struct dma_fence_chain *chain_fence;
>  	} *fences;
>  	u32 n_fences;
>  
> @@ -2213,14 +2215,121 @@ eb_pin_engine(struct i915_execbuffer *eb,
>  static void
>  __free_fence_array(struct i915_eb_fence *fences, unsigned int n)
>  {
> -	while (n--)
> +	while (n--) {
>  		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> +		kfree(fences[n].chain_fence);
> +	}
>  	kvfree(fences);
>  }
>  
>  static int
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -		struct i915_execbuffer *eb)
> +get_timeline_fence_array(const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences,
> +			 struct i915_execbuffer *eb)
> +{
> +	struct drm_i915_gem_exec_fence __user *user_fences;
> +	struct i915_eb_fence *fences;
> +	u64 __user *user_values;
> +	u64 num_fences, num_user_fences = timeline_fences->fence_count;
> +	unsigned long n;
> +	int err;
> +
> +	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
> +	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> +	if (num_user_fences > min_t(unsigned long,
> +				    ULONG_MAX / sizeof(*user_fences),
> +				    SIZE_MAX / sizeof(*fences)))
> +		return -EINVAL;
> +
> +	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> +	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> +		return -EFAULT;
> +
> +	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> +	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> +		return -EFAULT;
> +
> +	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> +				__GFP_NOWARN | GFP_KERNEL);
> +	if (!fences)
> +		return -ENOMEM;
> +
> +	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> +		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> +	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> +		struct drm_i915_gem_exec_fence fence;
> +		struct drm_syncobj *syncobj;
> +		u64 point;
> +
> +		if (__copy_from_user(&fence, user_fences++, sizeof(fence))) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (__get_user(point, user_values++)) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		syncobj = drm_syncobj_find(eb->file, fence.handle);
> +		if (!syncobj) {
> +			DRM_DEBUG("Invalid syncobj handle provided\n");
> +			err = -ENOENT;
> +			goto err;
> +		}
> +
> +		/*
> +		 * For timeline syncobjs we need to preallocate chains for
> +		 * later signaling.
> +		 */
> +		if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +			/*
> +			 * Waiting and signaling the same point (when point !=
> +			 * 0) would break the timeline.
> +			 */
> +			if (fence.flags & I915_EXEC_FENCE_WAIT) {
> +				DRM_DEBUG("Tring to wait & signal the same timeline point.\n");
> +				err = -EINVAL;
> +				drm_syncobj_put(syncobj);
> +				goto err;
> +			}
> +
> +			fences[num_fences].chain_fence =
> +				kmalloc(sizeof(*fences[num_fences].chain_fence),
> +					GFP_KERNEL);

Uh this kmalloc here I don't like, it's kinda fragile, plus we might want
to use a kmemcache with slab freeing for these eventually. But amdgpu and
msm do the same thing, so maybe a follow-up patch which adds a
dma_fence_chain_alloc() which is used everywhere?

The thing is that drm_syncobj.c allocates with kzalloc, so guaranteed all
cleared to 0.

Not a blocker for this, but would be really nice if you volunteer for this
:-)

> +			if (!fences[num_fences].chain_fence) {
> +				drm_syncobj_put(syncobj);
> +				err = -ENOMEM;
> +				DRM_DEBUG("Unable to alloc chain_fence\n");
> +				goto err;
> +			}
> +		} else {
> +			fences[num_fences].chain_fence = NULL;
> +		}
> +
> +		fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
> +		fences[num_fences].value = point;
> +		num_fences++;
> +	}
> +
> +	eb->fences = fences;
> +	eb->n_fences = num_fences;
> +
> +	return 0;
> +
> +err:
> +	__free_fence_array(fences, num_fences);
> +	return err;
> +}
> +
> +static int
> +get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args,
> +		       struct i915_execbuffer *eb)
>  {
>  	const unsigned long nfences = args->num_cliprects;
>  	struct drm_i915_gem_exec_fence __user *user;
> @@ -2235,7 +2344,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>  	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>  	if (nfences > min_t(unsigned long,
>  			    ULONG_MAX / sizeof(*user),
> -			    SIZE_MAX / sizeof(*fences)))
> +			    SIZE_MAX / sizeof(*eb->fences)))
>  		return -EINVAL;
>  
>  	user = u64_to_user_ptr(args->cliprects_ptr);
> @@ -2272,6 +2381,8 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>  			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>  
>  		fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
> +		fences[n].value = 0;
> +		fences[n].chain_fence = NULL;
>  	}
>  
>  	eb->fences = fences;
> @@ -2303,6 +2414,15 @@ await_fence_array(struct i915_execbuffer *eb)
>  		if (!fence)
>  			return -EINVAL;
>  
> +		if (eb->fences[n].value) {
> +			err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value);
> +			if (err)
> +				return err;
> +
> +			if (!fence)
> +				continue;
> +		}
> +
>  		err = i915_request_await_dma_fence(eb->request, fence);
>  		dma_fence_put(fence);
>  		if (err < 0)
> @@ -2326,7 +2446,17 @@ signal_fence_array(struct i915_execbuffer *eb)
>  		if (!(flags & I915_EXEC_FENCE_SIGNAL))
>  			continue;
>  
> -		drm_syncobj_replace_fence(syncobj, fence);
> +		if (eb->fences[n].chain_fence) {
> +			drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence,
> +					      fence, eb->fences[n].value);
> +			/*
> +			 * The chain's ownership is transferred to the
> +			 * timeline.
> +			 */
> +			eb->fences[n].chain_fence = NULL;
> +		} else {
> +			drm_syncobj_replace_fence(syncobj, fence);
> +		}
>  	}
>  }
>  
> @@ -2371,7 +2501,32 @@ static void eb_request_add(struct i915_execbuffer *eb)
>  	mutex_unlock(&tl->mutex);
>  }
>  
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> +	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> +	struct i915_execbuffer *eb = data;
> +
> +	/* Timeline fences are incompatible with the fence array flag. */
> +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +		return -EINVAL;
> +
> +	/*
> +	 * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure
> +	 * to be included multiple times.
> +	 */
> +	if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +		return -EINVAL;
> +
> +	if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
> +		return -EFAULT;
> +
> +	eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> +	return get_timeline_fence_array(&timeline_fences, eb);
> +}
> +
>  static const i915_user_extension_fn execbuf_extensions[] = {
> +	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>  };
>  
>  static int
> @@ -2475,7 +2630,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>  	if (err)
>  		goto err_out_fence;
>  
> -	err = get_fence_array(args, &eb);
> +	err = get_legacy_fence_array(args, &eb);
>  	if (err)
>  		goto err_arr_fence;
>  
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 67102dc26fce..f450d8782ac1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1851,7 +1851,8 @@ static struct drm_driver driver = {
>  	 */
>  	.driver_features =
>  	    DRIVER_GEM |
> -	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> +	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> +	    DRIVER_SYNCOBJ_TIMELINE,
>  	.release = i915_driver_release,
>  	.open = i915_driver_open,
>  	.lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 40390b2352b1..e50bdf676955 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>  	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>  	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>  	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> +	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
>  		/* For the time being all of these are always true;
>  		 * if some supported hardware does not have one of these
>  		 * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..85ff36faf15a 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,13 @@ typedef struct drm_i915_irq_wait {
>   */
>  #define I915_PARAM_PERF_REVISION	54
>  
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
> +
>  /* Must be kept compact -- no holes and well documented */
>  
>  typedef struct drm_i915_getparam {
> @@ -1047,9 +1054,41 @@ struct drm_i915_gem_exec_fence {
>  };
>  
>  enum drm_i915_gem_execbuffer_ext {
> +	/**
> +	 * See drm_i915_gem_execbuf_ext_timeline_fences.
> +	 */
> +	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
>  	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>  };
>  
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> +	struct i915_user_extension base;
> +
> +	/**
> +	 * Number of element in the handles_ptr & value_ptr arrays.
> +	 */
> +	__u64 fence_count;
> +
> +	/**
> +	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
> +	 * fence_count.
> +	 */
> +	__u64 handles_ptr;
> +
> +	/**
> +	 * Pointer to an array of u64 values of length fence_count. Values
> +	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> +	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> +	 */
> +	__u64 values_ptr;
> +};
> +
>  struct drm_i915_gem_execbuffer2 {
>  	/**
>  	 * List of gem_exec_object2 structs
> -- 
> 2.27.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Linus Torvalds July 29, 2020, 5:51 p.m. UTC | #3
On Wed, Jul 29, 2020 at 5:24 AM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> Do we have a access_ok_array or so? Instead of duplicating overflow checks
> everywhere and getting it all wrong ...

I really really think you should get away from access_ok() entirely.

Please just get rid of it, and use "copy_from_user()" instead.

Seriously.

access_ok() is completely wrong, because

 (a) it doesn't actually protect from any fault returns, it only doe
sthe high-level check of "is the pointer even ok".

So you can't say "ok, I did access_ok(), so I don't have to check the
return value", and you're actually making the source code more
complicated, and only introducing the possibility of bugs.

Overflow is just _one_ such bug. Missing the access_ok() entirely
because it was in some caller but not another is another common bug.

 (b) it no longer even makes the code faster.

It never really did for the the copy_to/from_user() case _anyway_, it
was always for the "I will now do several get/put_user() accesses and
I only want to do the range check once".

And that has simply not been true for the last few CPU generations -
because the cost isn't in the range check any more. Now allk the real
costs are about CLAC/STAC. The range check takes two cycles and
schedules well (so it's generally not even visible). The CLAC/STAC
takes 30+ cycles, and stalls the pipeline.

>Similar I guess for copy_from/to_user_array.

No. I refuse to add complexity onto the garbage that is access_ok().

Just remove it. It's not helping. People who think it's helping
haven't actually looked at profiles, or are working with hardware that
is no longer relevant.

                Linus
Kees Cook July 29, 2020, 7:19 p.m. UTC | #4
On Wed, Jul 29, 2020 at 10:51:23AM -0700, Linus Torvalds wrote:
> On Wed, Jul 29, 2020 at 5:24 AM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > Do we have a access_ok_array or so? Instead of duplicating overflow checks
> > everywhere and getting it all wrong ...
> 
> I really really think you should get away from access_ok() entirely.
> 
> Please just get rid of it, and use "copy_from_user()" instead.

Yes please. :) It also makes code SO much easier to audit (both
automatically and manually).
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 03a656feb7b8..d8814e637e71 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -285,6 +285,8 @@  struct i915_execbuffer {
 
 	struct i915_eb_fence {
 		struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+		u64 value;
+		struct dma_fence_chain *chain_fence;
 	} *fences;
 	u32 n_fences;
 
@@ -2213,14 +2215,121 @@  eb_pin_engine(struct i915_execbuffer *eb,
 static void
 __free_fence_array(struct i915_eb_fence *fences, unsigned int n)
 {
-	while (n--)
+	while (n--) {
 		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
 static int
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct i915_execbuffer *eb)
+get_timeline_fence_array(const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences,
+			 struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	struct i915_eb_fence *fences;
+	u64 __user *user_values;
+	u64 num_fences, num_user_fences = timeline_fences->fence_count;
+	unsigned long n;
+	int err;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_user_fences > min_t(unsigned long,
+				    ULONG_MAX / sizeof(*user_fences),
+				    SIZE_MAX / sizeof(*fences)))
+		return -EINVAL;
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+		return -EFAULT;
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+		return -EFAULT;
+
+	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+		struct drm_i915_gem_exec_fence fence;
+		struct drm_syncobj *syncobj;
+		u64 point;
+
+		if (__copy_from_user(&fence, user_fences++, sizeof(fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (__get_user(point, user_values++)) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(eb->file, fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -ENOENT;
+			goto err;
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			/*
+			 * Waiting and signaling the same point (when point !=
+			 * 0) would break the timeline.
+			 */
+			if (fence.flags & I915_EXEC_FENCE_WAIT) {
+				DRM_DEBUG("Tring to wait & signal the same timeline point.\n");
+				err = -EINVAL;
+				drm_syncobj_put(syncobj);
+				goto err;
+			}
+
+			fences[num_fences].chain_fence =
+				kmalloc(sizeof(*fences[num_fences].chain_fence),
+					GFP_KERNEL);
+			if (!fences[num_fences].chain_fence) {
+				drm_syncobj_put(syncobj);
+				err = -ENOMEM;
+				DRM_DEBUG("Unable to alloc chain_fence\n");
+				goto err;
+			}
+		} else {
+			fences[num_fences].chain_fence = NULL;
+		}
+
+		fences[num_fences].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[num_fences].value = point;
+		num_fences++;
+	}
+
+	eb->fences = fences;
+	eb->n_fences = num_fences;
+
+	return 0;
+
+err:
+	__free_fence_array(fences, num_fences);
+	return err;
+}
+
+static int
+get_legacy_fence_array(struct drm_i915_gem_execbuffer2 *args,
+		       struct i915_execbuffer *eb)
 {
 	const unsigned long nfences = args->num_cliprects;
 	struct drm_i915_gem_exec_fence __user *user;
@@ -2235,7 +2344,7 @@  get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
 	if (nfences > min_t(unsigned long,
 			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+			    SIZE_MAX / sizeof(*eb->fences)))
 		return -EINVAL;
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
@@ -2272,6 +2381,8 @@  get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
 		fences[n].syncobj = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].value = 0;
+		fences[n].chain_fence = NULL;
 	}
 
 	eb->fences = fences;
@@ -2303,6 +2414,15 @@  await_fence_array(struct i915_execbuffer *eb)
 		if (!fence)
 			return -EINVAL;
 
+		if (eb->fences[n].value) {
+			err = dma_fence_chain_find_seqno(&fence, eb->fences[n].value);
+			if (err)
+				return err;
+
+			if (!fence)
+				continue;
+		}
+
 		err = i915_request_await_dma_fence(eb->request, fence);
 		dma_fence_put(fence);
 		if (err < 0)
@@ -2326,7 +2446,17 @@  signal_fence_array(struct i915_execbuffer *eb)
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (eb->fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj, eb->fences[n].chain_fence,
+					      fence, eb->fences[n].value);
+			/*
+			 * The chain's ownership is transferred to the
+			 * timeline.
+			 */
+			eb->fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
@@ -2371,7 +2501,32 @@  static void eb_request_add(struct i915_execbuffer *eb)
 	mutex_unlock(&tl->mutex);
 }
 
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+	struct i915_execbuffer *eb = data;
+
+	/* Timeline fences are incompatible with the fence array flag. */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	/*
+	 * Prevent the drm_i915_gem_execbuffer_ext_timeline_fences structure
+	 * to be included multiple times.
+	 */
+	if (eb->extension_flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return -EINVAL;
+
+	if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
+		return -EFAULT;
+
+	eb->extension_flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+	return get_timeline_fence_array(&timeline_fences, eb);
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
+	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
@@ -2475,7 +2630,7 @@  i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		goto err_out_fence;
 
-	err = get_fence_array(args, &eb);
+	err = get_legacy_fence_array(args, &eb);
 	if (err)
 		goto err_arr_fence;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 67102dc26fce..f450d8782ac1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1851,7 +1851,8 @@  static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 40390b2352b1..e50bdf676955 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -132,6 +132,7 @@  int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ea38aa6502c..85ff36faf15a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -619,6 +619,13 @@  typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	54
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1047,9 +1054,41 @@  struct drm_i915_gem_exec_fence {
 };
 
 enum drm_i915_gem_execbuffer_ext {
+	/**
+	 * See drm_i915_gem_execbuf_ext_timeline_fences.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs