Message ID | 20191213215614.24558-3-niranjana.vishwanathapura@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915/svm: Add SVM support | expand |
On Fri, Dec 13, 2019 at 4:07 PM Niranjana Vishwanathapura < niranjana.vishwanathapura@intel.com> wrote: > Shared Virtual Memory (SVM) runtime allocator support allows > binding a shared virtual address to a buffer object (BO) in the > device page table through an ioctl call. > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Jon Bloomfield <jon.bloomfield@intel.com> > Cc: Daniel Vetter <daniel.vetter@intel.com> > Cc: Sudeep Dutt <sudeep.dutt@intel.com> > Signed-off-by: Niranjana Vishwanathapura < > niranjana.vishwanathapura@intel.com> > --- > drivers/gpu/drm/i915/Kconfig | 11 ++++ > drivers/gpu/drm/i915/Makefile | 3 + > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 ++++++++++++++---- > drivers/gpu/drm/i915/gem/i915_gem_svm.c | 60 +++++++++++++++++++ > drivers/gpu/drm/i915/gem/i915_gem_svm.h | 22 +++++++ > drivers/gpu/drm/i915/i915_drv.c | 21 +++++++ > drivers/gpu/drm/i915/i915_drv.h | 22 +++++++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + > drivers/gpu/drm/i915/i915_gem_gtt.h | 13 ++++ > include/uapi/drm/i915_drm.h | 27 +++++++++ > 10 files changed, 227 insertions(+), 11 deletions(-) > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.c > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.h > > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig > index ba9595960bbe..c2e48710eec8 100644 > --- a/drivers/gpu/drm/i915/Kconfig > +++ b/drivers/gpu/drm/i915/Kconfig > @@ -137,6 +137,16 @@ config DRM_I915_GVT_KVMGT > Choose this option if you want to enable KVMGT support for > Intel GVT-g. > > +config DRM_I915_SVM > + bool "Enable Shared Virtual Memory support in i915" > + depends on STAGING > + depends on DRM_I915 > + default n > + help > + Choose this option if you want Shared Virtual Memory (SVM) > + support in i915. With SVM support, one can share the virtual > + address space between a process and the GPU. > + > menu "drm/i915 Debugging" > depends on DRM_I915 > depends on EXPERT > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index e0fd10c0cfb8..75fe45633779 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -153,6 +153,9 @@ i915-y += \ > intel_region_lmem.o \ > intel_wopcm.o > > +# SVM code > +i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o > + > # general-purpose microcontroller (GuC) support > obj-y += gt/uc/ > i915-y += gt/uc/intel_uc.o \ > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 5003e616a1ad..af360238a392 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -2836,10 +2836,14 @@ int > i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > + struct drm_i915_gem_exec_object2 *exec2_list, *exec2_list_user; > struct drm_i915_gem_execbuffer2 *args = data; > - struct drm_i915_gem_exec_object2 *exec2_list; > - struct drm_syncobj **fences = NULL; > const size_t count = args->buffer_count; > + struct drm_syncobj **fences = NULL; > + unsigned int i = 0, svm_count = 0; > + struct i915_address_space *vm; > + struct i915_gem_context *ctx; > + struct i915_svm_obj *svm_obj; > int err; > > if (!check_buffer_count(count)) { > @@ -2851,15 +2855,46 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, > void *data, > if (err) > return err; > > + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); > + if (!ctx || !rcu_access_pointer(ctx->vm)) > + return -ENOENT; > + > + rcu_read_lock(); > + vm = i915_vm_get(ctx->vm); > + rcu_read_unlock(); > + > +alloc_again: > + svm_count = vm->svm_count; > /* Allocate an extra slot for use by the command parser */ > - exec2_list = kvmalloc_array(count + 1, eb_element_size(), > + exec2_list = kvmalloc_array(count + svm_count + 1, > eb_element_size(), > __GFP_NOWARN | GFP_KERNEL); > if (exec2_list == NULL) { > DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", > - count); > + count + svm_count); > return -ENOMEM; > } > - if (copy_from_user(exec2_list, > + mutex_lock(&vm->mutex); > + if (svm_count != vm->svm_count) { > + mutex_unlock(&vm->mutex); > + kvfree(exec2_list); > + goto alloc_again; > + } > + > + list_for_each_entry(svm_obj, &vm->svm_list, link) { > + memset(&exec2_list[i], 0, sizeof(*exec2_list)); > + exec2_list[i].handle = svm_obj->handle; > + exec2_list[i].offset = svm_obj->offset; > + exec2_list[i].flags = EXEC_OBJECT_PINNED | > + EXEC_OBJECT_SUPPORTS_48B_ADDRESS; > + i++; > + } > + exec2_list_user = &exec2_list[i]; > + args->buffer_count += svm_count; > + mutex_unlock(&vm->mutex); > + i915_vm_put(vm); > + i915_gem_context_put(ctx); > + > + if (copy_from_user(exec2_list_user, > u64_to_user_ptr(args->buffers_ptr), > sizeof(*exec2_list) * count)) { > DRM_DEBUG("copy %zd exec entries failed\n", count); > @@ -2876,6 +2911,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, > void *data, > } > > err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); > + args->buffer_count -= svm_count; > > /* > * Now that we have begun execution of the batchbuffer, we ignore > @@ -2886,7 +2922,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, > void *data, > if (args->flags & __EXEC_HAS_RELOC) { > struct drm_i915_gem_exec_object2 __user *user_exec_list = > u64_to_user_ptr(args->buffers_ptr); > - unsigned int i; > > /* Copy the new buffer offsets back to the user's exec > list. */ > /* > @@ -2900,13 +2935,14 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, > void *data, > goto end; > > for (i = 0; i < args->buffer_count; i++) { > - if (!(exec2_list[i].offset & UPDATE)) > + u64 *offset = &exec2_list_user[i].offset; > + > + if (!(*offset & UPDATE)) > continue; > > - exec2_list[i].offset = > - gen8_canonical_addr(exec2_list[i].offset & > PIN_OFFSET_MASK); > - unsafe_put_user(exec2_list[i].offset, > - &user_exec_list[i].offset, > + *offset = gen8_canonical_addr(*offset & > + PIN_OFFSET_MASK); > + unsafe_put_user(*offset, &user_exec_list[i].offset, > end_user); > } > end_user: > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_svm.c > b/drivers/gpu/drm/i915/gem/i915_gem_svm.c > new file mode 100644 > index 000000000000..882fe56138e2 > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_svm.c > @@ -0,0 +1,60 @@ > +// SPDX-License-Identifier: MIT > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#include "i915_drv.h" > +#include "i915_gem_gtt.h" > +#include "i915_gem_lmem.h" > + > +int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, > + struct drm_i915_gem_vm_bind *args, > + struct drm_file *file) > +{ > + struct i915_svm_obj *svm_obj, *tmp; > + struct drm_i915_gem_object *obj; > + int ret = 0; > + > + obj = i915_gem_object_lookup(file, args->handle); > + if (!obj) > + return -ENOENT; > + > + /* For dgfx, ensure the obj is in device local memory only */ > + if (IS_DGFX(vm->i915) && !i915_gem_object_is_lmem(obj)) > + return -EINVAL; > + > + /* FIXME: Need to handle case with unending batch buffers */ > + if (!(args->flags & I915_GEM_VM_BIND_UNBIND)) { > + svm_obj = kmalloc(sizeof(*svm_obj), GFP_KERNEL); > + if (!svm_obj) { > + ret = -ENOMEM; > + goto put_obj; > + } > + svm_obj->handle = args->handle; > + svm_obj->offset = args->start; > + } > + > + mutex_lock(&vm->mutex); > + if (!(args->flags & I915_GEM_VM_BIND_UNBIND)) { > + list_add(&svm_obj->link, &vm->svm_list); > + vm->svm_count++; > + } else { > + /* > + * FIXME: Need to handle case where object is > migrated/closed > + * without unbinding first. > + */ > + list_for_each_entry_safe(svm_obj, tmp, &vm->svm_list, > link) { > + if (svm_obj->handle != args->handle) > + continue; > + > + list_del_init(&svm_obj->link); > + vm->svm_count--; > + kfree(svm_obj); > + break; > + } > + } > + mutex_unlock(&vm->mutex); > +put_obj: > + i915_gem_object_put(obj); > + return ret; > +} > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_svm.h > b/drivers/gpu/drm/i915/gem/i915_gem_svm.h > new file mode 100644 > index 000000000000..d60b35c7d21a > --- /dev/null > +++ b/drivers/gpu/drm/i915/gem/i915_gem_svm.h > @@ -0,0 +1,22 @@ > +/* SPDX-License-Identifier: MIT */ > +/* > + * Copyright © 2019 Intel Corporation > + */ > + > +#ifndef __I915_GEM_SVM_H > +#define __I915_GEM_SVM_H > + > +#include "i915_drv.h" > + > +#if defined(CONFIG_DRM_I915_SVM) > +int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, > + struct drm_i915_gem_vm_bind *args, > + struct drm_file *file); > +#else > +static inline int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, > + struct drm_i915_gem_vm_bind > *args, > + struct drm_file *file) > +{ return -ENOTSUPP; } > +#endif > + > +#endif /* __I915_GEM_SVM_H */ > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c > index 2a11f60c4fd2..d452ea8e40b3 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -2680,6 +2680,26 @@ i915_gem_reject_pin_ioctl(struct drm_device *dev, > void *data, > return -ENODEV; > } > > +static int i915_gem_vm_bind_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > + struct drm_i915_gem_vm_bind *args = data; > + struct i915_address_space *vm; > + int ret = -EINVAL; > + > + vm = i915_gem_address_space_lookup(file->driver_priv, args->vm_id); > + if (unlikely(!vm)) > + return -ENOENT; > + > + switch (args->type) { > + case I915_GEM_VM_BIND_SVM_OBJ: > + ret = i915_gem_vm_bind_svm_obj(vm, args, file); > + } > + > + i915_vm_put(vm); > + return ret; > +} > + > static const struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_INIT, drm_noop, > DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), > DRM_IOCTL_DEF_DRV(I915_FLUSH, drm_noop, DRM_AUTH), > @@ -2739,6 +2759,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { > DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, > DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, > DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(I915_GEM_VM_BIND, i915_gem_vm_bind_ioctl, > DRM_RENDER_ALLOW), > }; > > static struct drm_driver driver = { > diff --git a/drivers/gpu/drm/i915/i915_drv.h > b/drivers/gpu/drm/i915/i915_drv.h > index ce130e1f1e47..2d0a7cd2dc44 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1909,6 +1909,28 @@ i915_gem_context_lookup(struct > drm_i915_file_private *file_priv, u32 id) > return ctx; > } > > +static inline struct i915_address_space * > +__i915_gem_address_space_lookup_rcu(struct drm_i915_file_private > *file_priv, > + u32 id) > +{ > + return idr_find(&file_priv->vm_idr, id); > +} > + > +static inline struct i915_address_space * > +i915_gem_address_space_lookup(struct drm_i915_file_private *file_priv, > + u32 id) > +{ > + struct i915_address_space *vm; > + > + rcu_read_lock(); > + vm = __i915_gem_address_space_lookup_rcu(file_priv, id); > + if (vm) > + vm = i915_vm_get(vm); > + rcu_read_unlock(); > + > + return vm; > +} > + > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > u64 min_size, u64 alignment, > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c > b/drivers/gpu/drm/i915/i915_gem_gtt.c > index be36719e7987..7d4f5fa84b02 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -586,6 +586,7 @@ static void i915_address_space_init(struct > i915_address_space *vm, int subclass) > stash_init(&vm->free_pages); > > INIT_LIST_HEAD(&vm->bound_list); > + INIT_LIST_HEAD(&vm->svm_list); > } > > static int __setup_page_dma(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h > b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 31a4a96ddd0d..7c1b54c9677d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -285,6 +285,13 @@ struct pagestash { > struct pagevec pvec; > }; > > +struct i915_svm_obj { > + /** This obj's place in the SVM object list */ > + struct list_head link; > + u32 handle; > + u64 offset; > +}; > + > struct i915_address_space { > struct kref ref; > struct rcu_work rcu; > @@ -329,6 +336,12 @@ struct i915_address_space { > */ > struct list_head bound_list; > > + /** > + * List of SVM bind objects. > + */ > + struct list_head svm_list; > + unsigned int svm_count; > + > struct pagestash free_pages; > > /* Global GTT */ > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 20314eea632a..e10d7bf2cf9f 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -360,6 +360,7 @@ typedef struct _drm_i915_sarea { > #define DRM_I915_GEM_VM_CREATE 0x3a > #define DRM_I915_GEM_VM_DESTROY 0x3b > #define DRM_I915_GEM_OBJECT_SETPARAM DRM_I915_GEM_CONTEXT_SETPARAM > +#define DRM_I915_GEM_VM_BIND 0x3c > /* Must be kept compact -- no holes */ > > #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + > DRM_I915_INIT, drm_i915_init_t) > @@ -424,6 +425,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + > DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) > #define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + > DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) > #define DRM_IOCTL_I915_GEM_OBJECT_SETPARAM DRM_IOWR(DRM_COMMAND_BASE > + DRM_I915_GEM_OBJECT_SETPARAM, struct drm_i915_gem_object_param) > +#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE > + DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind) > > /* Allow drivers to submit batchbuffers directly to hardware, relying > * on the security mechanisms provided by hardware. > @@ -2300,6 +2302,31 @@ struct drm_i915_query_perf_config { > __u8 data[]; > }; > > +/** > + * struct drm_i915_gem_vm_bind > + * > + * Bind an object in a vm's page table. > First off, this is something I've wanted for a while for Vulkan, it's just never made its way high enough up the priority list. However, it's going to have to come one way or another soon. I'm glad to see kernel API for this being proposed. I do, however, have a few high-level comments/questions about the API: 1. In order to be useful for sparse memory support, the API has to go the other way around so that it binds a VA range to a range within the BO. It also needs to be able to handle overlapping where two different VA ranges may map to the same underlying bytes in the BO. This likely means that unbind needs to also take a VA range and only unbind that range. 2. If this is going to be useful for managing GL's address space where we have lots of BOs, we probably want it to take a list of ranges so we aren't making one ioctl for each thing we want to bind. 3. Why are there no ways to synchronize this with anything? For binding, this probably isn't really needed as long as the VA range you're binding is empty. However, if you want to move bindings around or unbind something, the only option is to block in userspace and then call bind/unbind. This can be done but it means even more threads in the UMD which is unpleasant. One could argue that that's more or less what the kernel is going to have to do so we may as well do it in userspace. However, I'm not 100% convinced that's true. --Jason > + */ > +struct drm_i915_gem_vm_bind { > + /** VA start to bind **/ > + __u64 start; > + > + /** Type of memory to [un]bind **/ > + __u32 type; > +#define I915_GEM_VM_BIND_SVM_OBJ 0 > + > + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ > + __u32 handle; > + > + /** vm to [un]bind **/ > + __u32 vm_id; > + > + /** Flags **/ > + __u32 flags; > +#define I915_GEM_VM_BIND_UNBIND (1 << 0) > +#define I915_GEM_VM_BIND_READONLY (1 << 1) > +}; > + > #if defined(__cplusplus) > } > #endif > -- > 2.21.0.rc0.32.g243a4c7e27 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx >
On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: > > +/** > + * struct drm_i915_gem_vm_bind > + * > + * Bind an object in a vm's page table. > > First off, this is something I've wanted for a while for Vulkan, it's just > never made its way high enough up the priority list. However, it's going > to have to come one way or another soon. I'm glad to see kernel API for > this being proposed. > I do, however, have a few high-level comments/questions about the API: > 1. In order to be useful for sparse memory support, the API has to go the > other way around so that it binds a VA range to a range within the BO. It > also needs to be able to handle overlapping where two different VA ranges > may map to the same underlying bytes in the BO. This likely means that > unbind needs to also take a VA range and only unbind that range. > 2. If this is going to be useful for managing GL's address space where we > have lots of BOs, we probably want it to take a list of ranges so we > aren't making one ioctl for each thing we want to bind. Hi Jason, Yah, some of these requirements came up. They are not being done here due to time and effort involved in defining those requirements, implementing and validating. However, this ioctl can be extended in a backward compatible way to handle those requirements if required. > 3. Why are there no ways to synchronize this with anything? For binding, > this probably isn't really needed as long as the VA range you're binding > is empty. However, if you want to move bindings around or unbind > something, the only option is to block in userspace and then call > bind/unbind. This can be done but it means even more threads in the UMD > which is unpleasant. One could argue that that's more or less what the > kernel is going to have to do so we may as well do it in userspace. > However, I'm not 100% convinced that's true. > --Jason > Yah, that is the thought. But as SVM feature evolves, I think we can consider handling some such cases if hadling those in driver does make whole lot sense. Thanks, Niranjana > > + */ > +struct drm_i915_gem_vm_bind { > + /** VA start to bind **/ > + __u64 start; > + > + /** Type of memory to [un]bind **/ > + __u32 type; > +#define I915_GEM_VM_BIND_SVM_OBJ 0 > + > + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type > **/ > + __u32 handle; > + > + /** vm to [un]bind **/ > + __u32 vm_id; > + > + /** Flags **/ > + __u32 flags; > +#define I915_GEM_VM_BIND_UNBIND (1 << 0) > +#define I915_GEM_VM_BIND_READONLY (1 << 1) > +}; > + > #if defined(__cplusplus) > } > #endif > -- > 2.21.0.rc0.32.g243a4c7e27 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Fri, Dec 13, 2019 at 5:24 PM Niranjan Vishwanathapura < niranjana.vishwanathapura@intel.com> wrote: > On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: > > > > +/** > > + * struct drm_i915_gem_vm_bind > > + * > > + * Bind an object in a vm's page table. > > > > First off, this is something I've wanted for a while for Vulkan, it's > just > > never made its way high enough up the priority list. However, it's > going > > to have to come one way or another soon. I'm glad to see kernel API > for > > this being proposed. > > I do, however, have a few high-level comments/questions about the API: > > 1. In order to be useful for sparse memory support, the API has to go > the > > other way around so that it binds a VA range to a range within the > BO. It > > also needs to be able to handle overlapping where two different VA > ranges > > may map to the same underlying bytes in the BO. This likely means that > > unbind needs to also take a VA range and only unbind that range. > > 2. If this is going to be useful for managing GL's address space > where we > > have lots of BOs, we probably want it to take a list of ranges so we > > aren't making one ioctl for each thing we want to bind. > > Hi Jason, > > Yah, some of these requirements came up. > Yes, I have raised them every single time an API like this has come across my e-mail inbox for years and they continue to get ignored. Why are we landing an API that we know isn't the API we want especially when it's pretty obvious roughly what the API we want is? It may be less time in the short term, but long-term it means two ioctls and two implementations in i915, IGT tests for both code paths, and code in all UMDs to call one or the other depending on what kernel you're running on, and we have to maintain all that code going forward forever. Sure, that's a price we pay today for a variety of things but that's because they all seemed like the right thing at the time. Landing the wrong API when we know it's the wrong API seems foolish. They are not being done here due to time and effort involved in defining > those requirements, implementing and validating. > For #1, yes, it would require more effort but for #2, it really doesn't take any extra effort to make it take an array... > However, this ioctl can be extended in a backward compatible way to handle > those requirements if required. > > > 3. Why are there no ways to synchronize this with anything? For > binding, > > this probably isn't really needed as long as the VA range you're > binding > > is empty. However, if you want to move bindings around or unbind > > something, the only option is to block in userspace and then call > > bind/unbind. This can be done but it means even more threads in the > UMD > > which is unpleasant. One could argue that that's more or less what the > > kernel is going to have to do so we may as well do it in userspace. > > However, I'm not 100% convinced that's true. > > --Jason > > > > Yah, that is the thought. > But as SVM feature evolves, I think we can consider handling some such > cases > if hadling those in driver does make whole lot sense. > Sparse binding exists as a feature. It's been in D3D for some time and it's in Vulkan. We pretty much know what the requirements are. If you go look at how it's supposed to work in Vulkan, you have a binding queue and it waits on semaphores before [un]binding and signals semaphores after [un]binding. The biggest problem from an API (as opposed to implementation) POV with doing that in i915 is that we have too many synchronization primitives to choose from. :-( --Jason > Thanks, > Niranjana > > > > > + */ > > +struct drm_i915_gem_vm_bind { > > + /** VA start to bind **/ > > + __u64 start; > > + > > + /** Type of memory to [un]bind **/ > > + __u32 type; > > +#define I915_GEM_VM_BIND_SVM_OBJ 0 > > + > > + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ > type > > **/ > > + __u32 handle; > > + > > + /** vm to [un]bind **/ > > + __u32 vm_id; > > + > > + /** Flags **/ > > + __u32 flags; > > +#define I915_GEM_VM_BIND_UNBIND (1 << 0) > > +#define I915_GEM_VM_BIND_READONLY (1 << 1) > > +}; > > + > > #if defined(__cplusplus) > > } > > #endif > > -- > > 2.21.0.rc0.32.g243a4c7e27 > > > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/intel-gfx >
Quoting Jason Ekstrand (2019-12-14 00:36:19) > On Fri, Dec 13, 2019 at 5:24 PM Niranjan Vishwanathapura < > niranjana.vishwanathapura@intel.com> wrote: > > On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: > > > > +/** > > + * struct drm_i915_gem_vm_bind > > + * > > + * Bind an object in a vm's page table. > > > > First off, this is something I've wanted for a while for Vulkan, it's > just > > never made its way high enough up the priority list. However, it's > going > > to have to come one way or another soon. I'm glad to see kernel API > for > > this being proposed. > > I do, however, have a few high-level comments/questions about the API: > > 1. In order to be useful for sparse memory support, the API has to go > the > > other way around so that it binds a VA range to a range within the BO. > It > > also needs to be able to handle overlapping where two different VA > ranges > > may map to the same underlying bytes in the BO. This likely means that > > unbind needs to also take a VA range and only unbind that range. > > 2. If this is going to be useful for managing GL's address space where > we > > have lots of BOs, we probably want it to take a list of ranges so we > > aren't making one ioctl for each thing we want to bind. > > Hi Jason, > > Yah, some of these requirements came up. > > > Yes, I have raised them every single time an API like this has come across my > e-mail inbox for years and they continue to get ignored. Why are we landing an > API that we know isn't the API we want especially when it's pretty obvious > roughly what the API we want is? It may be less time in the short term, but > long-term it means two ioctls and two implementations in i915, IGT tests for > both code paths, and code in all UMDs to call one or the other depending on > what kernel you're running on, and we have to maintain all that code going > forward forever. Sure, that's a price we pay today for a variety of things but > that's because they all seemed like the right thing at the time. Landing the > wrong API when we know it's the wrong API seems foolish. Exactly. This is not even close to the uAPI we need. Reposting an RFC without taking in the concerns last time (or the time before that, or the time before that...) suggests that you aren't really requesting for comments at all. -Chris
Quoting Niranjana Vishwanathapura (2019-12-13 21:56:04) > Shared Virtual Memory (SVM) runtime allocator support allows > binding a shared virtual address to a buffer object (BO) in the > device page table through an ioctl call. > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Jon Bloomfield <jon.bloomfield@intel.com> > Cc: Daniel Vetter <daniel.vetter@intel.com> > Cc: Sudeep Dutt <sudeep.dutt@intel.com> > Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> > --- > drivers/gpu/drm/i915/Kconfig | 11 ++++ > drivers/gpu/drm/i915/Makefile | 3 + > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 ++++++++++++++---- > drivers/gpu/drm/i915/gem/i915_gem_svm.c | 60 +++++++++++++++++++ > drivers/gpu/drm/i915/gem/i915_gem_svm.h | 22 +++++++ > drivers/gpu/drm/i915/i915_drv.c | 21 +++++++ > drivers/gpu/drm/i915/i915_drv.h | 22 +++++++ > drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + > drivers/gpu/drm/i915/i915_gem_gtt.h | 13 ++++ > include/uapi/drm/i915_drm.h | 27 +++++++++ > 10 files changed, 227 insertions(+), 11 deletions(-) > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.c > create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.h > > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig > index ba9595960bbe..c2e48710eec8 100644 > --- a/drivers/gpu/drm/i915/Kconfig > +++ b/drivers/gpu/drm/i915/Kconfig > @@ -137,6 +137,16 @@ config DRM_I915_GVT_KVMGT > Choose this option if you want to enable KVMGT support for > Intel GVT-g. > > +config DRM_I915_SVM > + bool "Enable Shared Virtual Memory support in i915" > + depends on STAGING > + depends on DRM_I915 > + default n > + help > + Choose this option if you want Shared Virtual Memory (SVM) > + support in i915. With SVM support, one can share the virtual > + address space between a process and the GPU. > + > menu "drm/i915 Debugging" > depends on DRM_I915 > depends on EXPERT > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index e0fd10c0cfb8..75fe45633779 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -153,6 +153,9 @@ i915-y += \ > intel_region_lmem.o \ > intel_wopcm.o > > +# SVM code > +i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o > + > # general-purpose microcontroller (GuC) support > obj-y += gt/uc/ > i915-y += gt/uc/intel_uc.o \ > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 5003e616a1ad..af360238a392 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -2836,10 +2836,14 @@ int > i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > + struct drm_i915_gem_exec_object2 *exec2_list, *exec2_list_user; > struct drm_i915_gem_execbuffer2 *args = data; > - struct drm_i915_gem_exec_object2 *exec2_list; > - struct drm_syncobj **fences = NULL; > const size_t count = args->buffer_count; > + struct drm_syncobj **fences = NULL; > + unsigned int i = 0, svm_count = 0; > + struct i915_address_space *vm; > + struct i915_gem_context *ctx; > + struct i915_svm_obj *svm_obj; > int err; > > if (!check_buffer_count(count)) { > @@ -2851,15 +2855,46 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > if (err) > return err; > > + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); > + if (!ctx || !rcu_access_pointer(ctx->vm)) > + return -ENOENT; This is just hopelessly wrong. For persistence, the _ce_->vm will have a list of must-be-present vma, with a flag for whether they need prefaulting (!svm everything must be prefaulted obviously). Then during reservation we ensure that all those persistent vma are in place (so we probably use an eviction list to keep track of those we need to instantiate on this execbuf). We don't even want to individually track activity on those vma, preferring to assume they are used by every request and so on change they need serialising [for explicit uAPI unbind, where possible we strive to do it async for endless, or at least sync against iova semaphore] against the last request in the vm (so we need a vm->active). However, we do need an EXT_EXTENSION to mark writes for implicit fencing (e.g. exported dmabuf) to replace the information lost from execobject[] > +struct drm_i915_gem_vm_bind { > + /** VA start to bind **/ > + __u64 start; iova; offset; /* into handle */ length; /* from offset */ > + > + /** Type of memory to [un]bind **/ > + __u32 type; > +#define I915_GEM_VM_BIND_SVM_OBJ 0 > + > + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ > + __u32 handle; > + > + /** vm to [un]bind **/ > + __u32 vm_id; > + > + /** Flags **/ > + __u32 flags; > +#define I915_GEM_VM_BIND_UNBIND (1 << 0) > +#define I915_GEM_VM_BIND_READONLY (1 << 1) And don't forget extensions so that we can define the synchronisation controls. -Chris
On Sat, Dec 14, 2019 at 10:31:37AM +0000, Chris Wilson wrote: >Quoting Jason Ekstrand (2019-12-14 00:36:19) >> On Fri, Dec 13, 2019 at 5:24 PM Niranjan Vishwanathapura < >> niranjana.vishwanathapura@intel.com> wrote: >> >> On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: >> > >> > +/** >> > + * struct drm_i915_gem_vm_bind >> > + * >> > + * Bind an object in a vm's page table. >> > >> > First off, this is something I've wanted for a while for Vulkan, it's >> just >> > never made its way high enough up the priority list. However, it's >> going >> > to have to come one way or another soon. I'm glad to see kernel API >> for >> > this being proposed. >> > I do, however, have a few high-level comments/questions about the API: >> > 1. In order to be useful for sparse memory support, the API has to go >> the >> > other way around so that it binds a VA range to a range within the BO. >> It >> > also needs to be able to handle overlapping where two different VA >> ranges >> > may map to the same underlying bytes in the BO. This likely means that >> > unbind needs to also take a VA range and only unbind that range. >> > 2. If this is going to be useful for managing GL's address space where >> we >> > have lots of BOs, we probably want it to take a list of ranges so we >> > aren't making one ioctl for each thing we want to bind. >> >> Hi Jason, >> >> Yah, some of these requirements came up. >> >> >> Yes, I have raised them every single time an API like this has come across my >> e-mail inbox for years and they continue to get ignored. Why are we landing an >> API that we know isn't the API we want especially when it's pretty obvious >> roughly what the API we want is? It may be less time in the short term, but >> long-term it means two ioctls and two implementations in i915, IGT tests for >> both code paths, and code in all UMDs to call one or the other depending on >> what kernel you're running on, and we have to maintain all that code going >> forward forever. Sure, that's a price we pay today for a variety of things but >> that's because they all seemed like the right thing at the time. Landing the >> wrong API when we know it's the wrong API seems foolish. > >Exactly. This is not even close to the uAPI we need. Reposting an RFC >without taking in the concerns last time (or the time before that, or >the time before that...) suggests that you aren't really requesting for >comments at all. Thanks Jason for detailed exlanation. Chris, all comments and guidance are much appreciated :) I haven't looked in detail, but my concern is that implementing partial object binding (offset, lenght) from vma down to [un]binding in ppgtt might be a lot of work to include in this SVM patch series. I believe we need the partial object binding in non-SVM scenario as well? Ok, let me change the interface as below. struct drm_i915_gem_vm_bind_va { /** VA start to bind **/ __u64 start; /** Offset in Object to bind for I915_GEM_VM_BIND_SVM_OBJ type **/ __u64 offset; /** VA length to [un]bind **/ __u64 length; /** Type of memory to [un]bind **/ __u32 type; #define I915_GEM_VM_BIND_SVM_OBJ 0 #define I915_GEM_VM_BIND_SVM_BUFFER 1 /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ __u32 handle; /** Flags **/ __u32 flags; #define I915_GEM_VM_BIND_UNBIND (1 << 0) #define I915_GEM_VM_BIND_READONLY (1 << 1) } struct drm_i915_gem_vm_bind { /** vm to [un]bind **/ __u32 vm_id; /** number of VAs to bind **/ __u32 num_vas; /** Array of VAs to bind **/ struct drm_i915_gem_vm_bind_va *bind_vas; /** User extensions **/ __u64 extensions; }; When synchronization control is added as extension, it applies to all VAs in the array. Does this looks good? Niranjana >-Chris
On Sat, Dec 14, 2019 at 10:56:54AM +0000, Chris Wilson wrote: >Quoting Niranjana Vishwanathapura (2019-12-13 21:56:04) >> Shared Virtual Memory (SVM) runtime allocator support allows >> binding a shared virtual address to a buffer object (BO) in the >> device page table through an ioctl call. >> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> Cc: Jon Bloomfield <jon.bloomfield@intel.com> >> Cc: Daniel Vetter <daniel.vetter@intel.com> >> Cc: Sudeep Dutt <sudeep.dutt@intel.com> >> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> >> --- >> drivers/gpu/drm/i915/Kconfig | 11 ++++ >> drivers/gpu/drm/i915/Makefile | 3 + >> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 ++++++++++++++---- >> drivers/gpu/drm/i915/gem/i915_gem_svm.c | 60 +++++++++++++++++++ >> drivers/gpu/drm/i915/gem/i915_gem_svm.h | 22 +++++++ >> drivers/gpu/drm/i915/i915_drv.c | 21 +++++++ >> drivers/gpu/drm/i915/i915_drv.h | 22 +++++++ >> drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + >> drivers/gpu/drm/i915/i915_gem_gtt.h | 13 ++++ >> include/uapi/drm/i915_drm.h | 27 +++++++++ >> 10 files changed, 227 insertions(+), 11 deletions(-) >> create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.c >> create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.h >> >> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig >> index ba9595960bbe..c2e48710eec8 100644 >> --- a/drivers/gpu/drm/i915/Kconfig >> +++ b/drivers/gpu/drm/i915/Kconfig >> @@ -137,6 +137,16 @@ config DRM_I915_GVT_KVMGT >> Choose this option if you want to enable KVMGT support for >> Intel GVT-g. >> >> +config DRM_I915_SVM >> + bool "Enable Shared Virtual Memory support in i915" >> + depends on STAGING >> + depends on DRM_I915 >> + default n >> + help >> + Choose this option if you want Shared Virtual Memory (SVM) >> + support in i915. With SVM support, one can share the virtual >> + address space between a process and the GPU. >> + >> menu "drm/i915 Debugging" >> depends on DRM_I915 >> depends on EXPERT >> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile >> index e0fd10c0cfb8..75fe45633779 100644 >> --- a/drivers/gpu/drm/i915/Makefile >> +++ b/drivers/gpu/drm/i915/Makefile >> @@ -153,6 +153,9 @@ i915-y += \ >> intel_region_lmem.o \ >> intel_wopcm.o >> >> +# SVM code >> +i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o >> + >> # general-purpose microcontroller (GuC) support >> obj-y += gt/uc/ >> i915-y += gt/uc/intel_uc.o \ >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> index 5003e616a1ad..af360238a392 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> @@ -2836,10 +2836,14 @@ int >> i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >> struct drm_file *file) >> { >> + struct drm_i915_gem_exec_object2 *exec2_list, *exec2_list_user; >> struct drm_i915_gem_execbuffer2 *args = data; >> - struct drm_i915_gem_exec_object2 *exec2_list; >> - struct drm_syncobj **fences = NULL; >> const size_t count = args->buffer_count; >> + struct drm_syncobj **fences = NULL; >> + unsigned int i = 0, svm_count = 0; >> + struct i915_address_space *vm; >> + struct i915_gem_context *ctx; >> + struct i915_svm_obj *svm_obj; >> int err; >> >> if (!check_buffer_count(count)) { >> @@ -2851,15 +2855,46 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >> if (err) >> return err; >> >> + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); >> + if (!ctx || !rcu_access_pointer(ctx->vm)) >> + return -ENOENT; > >This is just hopelessly wrong. > >For persistence, the _ce_->vm will have a list of must-be-present >vma, with a flag for whether they need prefaulting (!svm everything must >be prefaulted obviously). Then during reservation we ensure that all those >persistent vma are in place (so we probably use an eviction list to keep >track of those we need to instantiate on this execbuf). We don't even >want to individually track activity on those vma, preferring to assume >they are used by every request and so on change they need serialising >[for explicit uAPI unbind, where possible we strive to do it async for >endless, or at least sync against iova semaphore] against the last request >in the vm (so we need a vm->active). However, we do need an EXT_EXTENSION >to mark writes for implicit fencing (e.g. exported dmabuf) to replace >the information lost from execobject[] > I did not understand some points above. I am no expert here, and appreciate the feedback. My understanding is that [excluding endless batch buffer scenario which is not supported in this patch series,] VM_BIND is no different than the soft-pinning of objects we have today in the execbuf path. Hence the idea here is to add those VM_BIND objects to the execobject[] and let the execbuffer path to take care of the rest. Persistence of bindings across multiple requests is something not considered. Do we need this flag in execobject[] as well in execbuff path (with & without soft-pinning)? Other than that, we do have a list of VM_BIND objects in a per 'vm' list as you are suggesting above. Let me sync with you to better understand this. >> +struct drm_i915_gem_vm_bind { >> + /** VA start to bind **/ >> + __u64 start; > >iova; >offset; /* into handle */ >length; /* from offset */ > Here iova is same as 'start' above? >> + >> + /** Type of memory to [un]bind **/ >> + __u32 type; >> +#define I915_GEM_VM_BIND_SVM_OBJ 0 >> + >> + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ >> + __u32 handle; >> + >> + /** vm to [un]bind **/ >> + __u32 vm_id; >> + >> + /** Flags **/ >> + __u32 flags; >> +#define I915_GEM_VM_BIND_UNBIND (1 << 0) >> +#define I915_GEM_VM_BIND_READONLY (1 << 1) > >And don't forget extensions so that we can define the synchronisation >controls. OK. Niranjana >-Chris
On Sun, Dec 15, 2019 at 10:24 PM Niranjan Vishwanathapura < niranjana.vishwanathapura@intel.com> wrote: > On Sat, Dec 14, 2019 at 10:31:37AM +0000, Chris Wilson wrote: > >Quoting Jason Ekstrand (2019-12-14 00:36:19) > >> On Fri, Dec 13, 2019 at 5:24 PM Niranjan Vishwanathapura < > >> niranjana.vishwanathapura@intel.com> wrote: > >> > >> On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: > >> > > >> > +/** > >> > + * struct drm_i915_gem_vm_bind > >> > + * > >> > + * Bind an object in a vm's page table. > >> > > >> > First off, this is something I've wanted for a while for > Vulkan, it's > >> just > >> > never made its way high enough up the priority list. However, > it's > >> going > >> > to have to come one way or another soon. I'm glad to see > kernel API > >> for > >> > this being proposed. > >> > I do, however, have a few high-level comments/questions about > the API: > >> > 1. In order to be useful for sparse memory support, the API > has to go > >> the > >> > other way around so that it binds a VA range to a range within > the BO. > >> It > >> > also needs to be able to handle overlapping where two different > VA > >> ranges > >> > may map to the same underlying bytes in the BO. This likely > means that > >> > unbind needs to also take a VA range and only unbind that range. > >> > 2. If this is going to be useful for managing GL's address > space where > >> we > >> > have lots of BOs, we probably want it to take a list of ranges > so we > >> > aren't making one ioctl for each thing we want to bind. > >> > >> Hi Jason, > >> > >> Yah, some of these requirements came up. > >> > >> > >> Yes, I have raised them every single time an API like this has come > across my > >> e-mail inbox for years and they continue to get ignored. Why are we > landing an > >> API that we know isn't the API we want especially when it's pretty > obvious > >> roughly what the API we want is? It may be less time in the short > term, but > >> long-term it means two ioctls and two implementations in i915, IGT > tests for > >> both code paths, and code in all UMDs to call one or the other > depending on > >> what kernel you're running on, and we have to maintain all that code > going > >> forward forever. Sure, that's a price we pay today for a variety of > things but > >> that's because they all seemed like the right thing at the time. > Landing the > >> wrong API when we know it's the wrong API seems foolish. > > > >Exactly. This is not even close to the uAPI we need. Reposting an RFC > >without taking in the concerns last time (or the time before that, or > >the time before that...) suggests that you aren't really requesting for > >comments at all. > > Thanks Jason for detailed exlanation. > Chris, all comments and guidance are much appreciated :) > > I haven't looked in detail, but my concern is that implementing > partial object binding (offset, lenght) from vma down to [un]binding > in ppgtt might be a lot of work to include in this SVM patch series. > I believe we need the partial object binding in non-SVM scenario > as well? > Yes, the Vulkan APIs require both partial binding and aliasing. It may be worth pointing out that we're already doing some of this stuff today, although in a rather backwards way. Our non-softpin model for Vulkan uses a memfd which we then map in userspace and turn into a BO via userptr. Due to the way we handle threading in the driver, we end up with multiple BOs pointing to the same overlapping range in the memfd and hence the same pages. That doesn't mean that everything in the path is already set up for what you need but the VA -> page mappings should be. Also, avoiding these kinds of shinanigans is exactly why we want a "real" kernel API for this. :-) > Ok, let me change the interface as below. > > struct drm_i915_gem_vm_bind_va > { > /** VA start to bind **/ > __u64 start; > > /** Offset in Object to bind for I915_GEM_VM_BIND_SVM_OBJ type **/ > __u64 offset; > > /** VA length to [un]bind **/ > __u64 length; > > /** Type of memory to [un]bind **/ > __u32 type; > #define I915_GEM_VM_BIND_SVM_OBJ 0 > #define I915_GEM_VM_BIND_SVM_BUFFER 1 > > /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ > __u32 handle; > > /** Flags **/ > __u32 flags; > #define I915_GEM_VM_BIND_UNBIND (1 << 0) > #define I915_GEM_VM_BIND_READONLY (1 << 1) > } > > struct drm_i915_gem_vm_bind { > /** vm to [un]bind **/ > __u32 vm_id; > > /** number of VAs to bind **/ > __u32 num_vas; > > /** Array of VAs to bind **/ > struct drm_i915_gem_vm_bind_va *bind_vas; > > /** User extensions **/ > __u64 extensions; > }; > > When synchronization control is added as extension, it applies to all VAs > in the array. > Does this looks good? > Yes, I think that header looks good. It's probably fine if synchronization comes later. I have two more questions (more for my own education than anything): 1. What is the difference between a SVM object and a buffer? 2. I see a vm_id but there is no context. What (if any) are the synchronization guarantees between the VM_BIND ioctl and EXECBUF? If I VM_BIND followed by EXECBUF is it guaranteed to happen in that order? What if I EXECBUF and then VM_BIND to unbind something? If I VM_BIND while an execbuf is happening but I have some way of delaying the GPU work from the CPU and I unblock it once the VM_BIND comes back, is that ok? If those questions are answered by other patches, feel free to just point me at them instead of answering in detail here. --Jason
On Fri, Dec 13, 2019 at 01:56:04PM -0800, Niranjana Vishwanathapura wrote: > + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); > + if (!ctx || !rcu_access_pointer(ctx->vm)) > + return -ENOENT; > + > + rcu_read_lock(); > + vm = i915_vm_get(ctx->vm); > + rcu_read_unlock(); This looks like wrong use of RCU to me. Getting a rcu lock and not calling rcu_dereference under it is basically guarenteed to be wrong.. Jason
On Sun, Dec 15, 2019 at 08:15:24PM -0800, Niranjan Vishwanathapura wrote: >On Sat, Dec 14, 2019 at 10:56:54AM +0000, Chris Wilson wrote: >>Quoting Niranjana Vishwanathapura (2019-12-13 21:56:04) >>>Shared Virtual Memory (SVM) runtime allocator support allows >>>binding a shared virtual address to a buffer object (BO) in the >>>device page table through an ioctl call. >>> >>>Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >>>Cc: Jon Bloomfield <jon.bloomfield@intel.com> >>>Cc: Daniel Vetter <daniel.vetter@intel.com> >>>Cc: Sudeep Dutt <sudeep.dutt@intel.com> >>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> >>>--- >>> drivers/gpu/drm/i915/Kconfig | 11 ++++ >>> drivers/gpu/drm/i915/Makefile | 3 + >>> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 ++++++++++++++---- >>> drivers/gpu/drm/i915/gem/i915_gem_svm.c | 60 +++++++++++++++++++ >>> drivers/gpu/drm/i915/gem/i915_gem_svm.h | 22 +++++++ >>> drivers/gpu/drm/i915/i915_drv.c | 21 +++++++ >>> drivers/gpu/drm/i915/i915_drv.h | 22 +++++++ >>> drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + >>> drivers/gpu/drm/i915/i915_gem_gtt.h | 13 ++++ >>> include/uapi/drm/i915_drm.h | 27 +++++++++ >>> 10 files changed, 227 insertions(+), 11 deletions(-) >>> create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.c >>> create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.h >>> >>>diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig >>>index ba9595960bbe..c2e48710eec8 100644 >>>--- a/drivers/gpu/drm/i915/Kconfig >>>+++ b/drivers/gpu/drm/i915/Kconfig >>>@@ -137,6 +137,16 @@ config DRM_I915_GVT_KVMGT >>> Choose this option if you want to enable KVMGT support for >>> Intel GVT-g. >>> >>>+config DRM_I915_SVM >>>+ bool "Enable Shared Virtual Memory support in i915" >>>+ depends on STAGING >>>+ depends on DRM_I915 >>>+ default n >>>+ help >>>+ Choose this option if you want Shared Virtual Memory (SVM) >>>+ support in i915. With SVM support, one can share the virtual >>>+ address space between a process and the GPU. >>>+ >>> menu "drm/i915 Debugging" >>> depends on DRM_I915 >>> depends on EXPERT >>>diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile >>>index e0fd10c0cfb8..75fe45633779 100644 >>>--- a/drivers/gpu/drm/i915/Makefile >>>+++ b/drivers/gpu/drm/i915/Makefile >>>@@ -153,6 +153,9 @@ i915-y += \ >>> intel_region_lmem.o \ >>> intel_wopcm.o >>> >>>+# SVM code >>>+i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o >>>+ >>> # general-purpose microcontroller (GuC) support >>> obj-y += gt/uc/ >>> i915-y += gt/uc/intel_uc.o \ >>>diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >>>index 5003e616a1ad..af360238a392 100644 >>>--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >>>+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >>>@@ -2836,10 +2836,14 @@ int >>> i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >>> struct drm_file *file) >>> { >>>+ struct drm_i915_gem_exec_object2 *exec2_list, *exec2_list_user; >>> struct drm_i915_gem_execbuffer2 *args = data; >>>- struct drm_i915_gem_exec_object2 *exec2_list; >>>- struct drm_syncobj **fences = NULL; >>> const size_t count = args->buffer_count; >>>+ struct drm_syncobj **fences = NULL; >>>+ unsigned int i = 0, svm_count = 0; >>>+ struct i915_address_space *vm; >>>+ struct i915_gem_context *ctx; >>>+ struct i915_svm_obj *svm_obj; >>> int err; >>> >>> if (!check_buffer_count(count)) { >>>@@ -2851,15 +2855,46 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >>> if (err) >>> return err; >>> >>>+ ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); >>>+ if (!ctx || !rcu_access_pointer(ctx->vm)) >>>+ return -ENOENT; >> >>This is just hopelessly wrong. >> >>For persistence, the _ce_->vm will have a list of must-be-present >>vma, with a flag for whether they need prefaulting (!svm everything must >>be prefaulted obviously). Then during reservation we ensure that all those >>persistent vma are in place (so we probably use an eviction list to keep >>track of those we need to instantiate on this execbuf). We don't even >>want to individually track activity on those vma, preferring to assume >>they are used by every request and so on change they need serialising >>[for explicit uAPI unbind, where possible we strive to do it async for >>endless, or at least sync against iova semaphore] against the last request >>in the vm (so we need a vm->active). However, we do need an EXT_EXTENSION >>to mark writes for implicit fencing (e.g. exported dmabuf) to replace >>the information lost from execobject[] >> > >I did not understand some points above. >I am no expert here, and appreciate the feedback. >My understanding is that [excluding endless batch buffer scenario which >is not supported in this patch series,] VM_BIND is no different than the >soft-pinning of objects we have today in the execbuf path. Hence the idea >here is to add those VM_BIND objects to the execobject[] and let the >execbuffer path to take care of the rest. Persistence of bindings across >multiple requests is something not considered. Do we need this flag in >execobject[] as well in execbuff path (with & without soft-pinning)? >Other than that, we do have a list of VM_BIND objects in a per 'vm' list >as you are suggesting above. >Let me sync with you to better understand this. > Ok, we discussed it offline. I will look into some of the requirement/usecases above to capture change required to uapi (if any) including around synchronization between VM_BIND and execbuff paths. Thanks, Niranjana >>>+struct drm_i915_gem_vm_bind { >>>+ /** VA start to bind **/ >>>+ __u64 start; >> >>iova; >>offset; /* into handle */ >>length; /* from offset */ >> > >Here iova is same as 'start' above? > >>>+ >>>+ /** Type of memory to [un]bind **/ >>>+ __u32 type; >>>+#define I915_GEM_VM_BIND_SVM_OBJ 0 >>>+ >>>+ /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ >>>+ __u32 handle; >>>+ >>>+ /** vm to [un]bind **/ >>>+ __u32 vm_id; >>>+ >>>+ /** Flags **/ >>>+ __u32 flags; >>>+#define I915_GEM_VM_BIND_UNBIND (1 << 0) >>>+#define I915_GEM_VM_BIND_READONLY (1 << 1) >> >>And don't forget extensions so that we can define the synchronisation >>controls. > >OK. > >Niranjana >>-Chris
On Tue, Dec 17, 2019 at 12:01:26PM -0600, Jason Ekstrand wrote: > On Sun, Dec 15, 2019 at 10:24 PM Niranjan Vishwanathapura > <niranjana.vishwanathapura@intel.com> wrote: > > On Sat, Dec 14, 2019 at 10:31:37AM +0000, Chris Wilson wrote: > >Quoting Jason Ekstrand (2019-12-14 00:36:19) > >> On Fri, Dec 13, 2019 at 5:24 PM Niranjan Vishwanathapura < > >> niranjana.vishwanathapura@intel.com> wrote: > >> > >> On Fri, Dec 13, 2019 at 04:58:42PM -0600, Jason Ekstrand wrote: > >> > > >> > +/** > >> > + * struct drm_i915_gem_vm_bind > >> > + * > >> > + * Bind an object in a vm's page table. > >> > > >> > First off, this is something I've wanted for a while for > Vulkan, it's > >> just > >> > never made its way high enough up the priority list. > However, it's > >> going > >> > to have to come one way or another soon. I'm glad to see > kernel API > >> for > >> > this being proposed. > >> > I do, however, have a few high-level comments/questions about > the API: > >> > 1. In order to be useful for sparse memory support, the API > has to go > >> the > >> > other way around so that it binds a VA range to a range > within the BO. > >> It > >> > also needs to be able to handle overlapping where two > different VA > >> ranges > >> > may map to the same underlying bytes in the BO. This likely > means that > >> > unbind needs to also take a VA range and only unbind that > range. > >> > 2. If this is going to be useful for managing GL's address > space where > >> we > >> > have lots of BOs, we probably want it to take a list of > ranges so we > >> > aren't making one ioctl for each thing we want to bind. > >> > >> Hi Jason, > >> > >> Yah, some of these requirements came up. > >> > >> > >> Yes, I have raised them every single time an API like this has come > across my > >> e-mail inbox for years and they continue to get ignored. Why are we > landing an > >> API that we know isn't the API we want especially when it's pretty > obvious > >> roughly what the API we want is? It may be less time in the short > term, but > >> long-term it means two ioctls and two implementations in i915, IGT > tests for > >> both code paths, and code in all UMDs to call one or the other > depending on > >> what kernel you're running on, and we have to maintain all that code > going > >> forward forever. Sure, that's a price we pay today for a variety of > things but > >> that's because they all seemed like the right thing at the time. > Landing the > >> wrong API when we know it's the wrong API seems foolish. > > > >Exactly. This is not even close to the uAPI we need. Reposting an RFC > >without taking in the concerns last time (or the time before that, or > >the time before that...) suggests that you aren't really requesting for > >comments at all. > > Thanks Jason for detailed exlanation. > Chris, all comments and guidance are much appreciated :) > > I haven't looked in detail, but my concern is that implementing > partial object binding (offset, lenght) from vma down to [un]binding > in ppgtt might be a lot of work to include in this SVM patch series. > I believe we need the partial object binding in non-SVM scenario > as well? > > Yes, the Vulkan APIs require both partial binding and aliasing. > It may be worth pointing out that we're already doing some of this stuff > today, although in a rather backwards way. Our non-softpin model for > Vulkan uses a memfd which we then map in userspace and turn into a BO via > userptr. Due to the way we handle threading in the driver, we end up with > multiple BOs pointing to the same overlapping range in the memfd and hence > the same pages. That doesn't mean that everything in the path is already > set up for what you need but the VA -> page mappings should be. Also, > avoiding these kinds of shinanigans is exactly why we want a "real" kernel > API for this. :-) > Ok thanks Jason for the explantion. Will look into supporting this here. > > Ok, let me change the interface as below. > > struct drm_i915_gem_vm_bind_va > { > /** VA start to bind **/ > __u64 start; > > /** Offset in Object to bind for I915_GEM_VM_BIND_SVM_OBJ type > **/ > __u64 offset; > > /** VA length to [un]bind **/ > __u64 length; > > /** Type of memory to [un]bind **/ > __u32 type; > #define I915_GEM_VM_BIND_SVM_OBJ 0 > #define I915_GEM_VM_BIND_SVM_BUFFER 1 > > /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type > **/ > __u32 handle; > > /** Flags **/ > __u32 flags; > #define I915_GEM_VM_BIND_UNBIND (1 << 0) > #define I915_GEM_VM_BIND_READONLY (1 << 1) > } > > struct drm_i915_gem_vm_bind { > /** vm to [un]bind **/ > __u32 vm_id; > > /** number of VAs to bind **/ > __u32 num_vas; > > /** Array of VAs to bind **/ > struct drm_i915_gem_vm_bind_va *bind_vas; > > /** User extensions **/ > __u64 extensions; > }; > > When synchronization control is added as extension, it applies to all > VAs in the array. > Does this looks good? > > Yes, I think that header looks good. It's probably fine if > synchronization comes later. > I have two more questions (more for my own education than anything): > 1. What is the difference between a SVM object and a buffer? SVM object is the GEM BO. By buffer I mean system allocated memory (say malloc()). I have some documentation in patch 01. https://lists.freedesktop.org/archives/intel-gfx/2019-December/223481.html > 2. I see a vm_id but there is no context. What (if any) are the > synchronization guarantees between the VM_BIND ioctl and EXECBUF? If I > VM_BIND followed by EXECBUF is it guaranteed to happen in that order? > What if I EXECBUF and then VM_BIND to unbind something? If I VM_BIND > while an execbuf is happening but I have some way of delaying the GPU work > from the CPU and I unblock it once the VM_BIND comes back, is that ok? > If those questions are answered by other patches, feel free to just point > me at them instead of answering in detail here. Binding/unbinding is w.r.t to a device page table (hence the vm_id). With current implementation, Yes, EXECBUF after the VM_BIND will see those binds and ensure that those VA ranges are bound in device page table. VM_BIND to bind/unbind after issuing EXECBUF which is alredy running (eg., endless batch buffer), is not currently supported by this patch. But yes, I expect your above scenario should be allowed and supported eventually. I agree we need to set right expectation here for some of the use cases. Let me look into this synchronization b/w two paths a bit more and sync with you. Thanks, Niranjana > --Jason
On Tue, Dec 17, 2019 at 08:18:21PM +0000, Jason Gunthorpe wrote: >On Fri, Dec 13, 2019 at 01:56:04PM -0800, Niranjana Vishwanathapura wrote: > >> + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); >> + if (!ctx || !rcu_access_pointer(ctx->vm)) >> + return -ENOENT; >> + >> + rcu_read_lock(); >> + vm = i915_vm_get(ctx->vm); >> + rcu_read_unlock(); > >This looks like wrong use of RCU to me. > >Getting a rcu lock and not calling rcu_dereference under it is >basically guarenteed to be wrong.. > Oops, yah, I should be just calling i915_gem_context_get_vm_rcu(). Thanks, Niranjana >Jason
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index ba9595960bbe..c2e48710eec8 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -137,6 +137,16 @@ config DRM_I915_GVT_KVMGT Choose this option if you want to enable KVMGT support for Intel GVT-g. +config DRM_I915_SVM + bool "Enable Shared Virtual Memory support in i915" + depends on STAGING + depends on DRM_I915 + default n + help + Choose this option if you want Shared Virtual Memory (SVM) + support in i915. With SVM support, one can share the virtual + address space between a process and the GPU. + menu "drm/i915 Debugging" depends on DRM_I915 depends on EXPERT diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e0fd10c0cfb8..75fe45633779 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -153,6 +153,9 @@ i915-y += \ intel_region_lmem.o \ intel_wopcm.o +# SVM code +i915-$(CONFIG_DRM_I915_SVM) += gem/i915_gem_svm.o + # general-purpose microcontroller (GuC) support obj-y += gt/uc/ i915-y += gt/uc/intel_uc.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 5003e616a1ad..af360238a392 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2836,10 +2836,14 @@ int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_gem_exec_object2 *exec2_list, *exec2_list_user; struct drm_i915_gem_execbuffer2 *args = data; - struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; const size_t count = args->buffer_count; + struct drm_syncobj **fences = NULL; + unsigned int i = 0, svm_count = 0; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct i915_svm_obj *svm_obj; int err; if (!check_buffer_count(count)) { @@ -2851,15 +2855,46 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (err) return err; + ctx = i915_gem_context_lookup(file->driver_priv, args->rsvd1); + if (!ctx || !rcu_access_pointer(ctx->vm)) + return -ENOENT; + + rcu_read_lock(); + vm = i915_vm_get(ctx->vm); + rcu_read_unlock(); + +alloc_again: + svm_count = vm->svm_count; /* Allocate an extra slot for use by the command parser */ - exec2_list = kvmalloc_array(count + 1, eb_element_size(), + exec2_list = kvmalloc_array(count + svm_count + 1, eb_element_size(), __GFP_NOWARN | GFP_KERNEL); if (exec2_list == NULL) { DRM_DEBUG("Failed to allocate exec list for %zd buffers\n", - count); + count + svm_count); return -ENOMEM; } - if (copy_from_user(exec2_list, + mutex_lock(&vm->mutex); + if (svm_count != vm->svm_count) { + mutex_unlock(&vm->mutex); + kvfree(exec2_list); + goto alloc_again; + } + + list_for_each_entry(svm_obj, &vm->svm_list, link) { + memset(&exec2_list[i], 0, sizeof(*exec2_list)); + exec2_list[i].handle = svm_obj->handle; + exec2_list[i].offset = svm_obj->offset; + exec2_list[i].flags = EXEC_OBJECT_PINNED | + EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + i++; + } + exec2_list_user = &exec2_list[i]; + args->buffer_count += svm_count; + mutex_unlock(&vm->mutex); + i915_vm_put(vm); + i915_gem_context_put(ctx); + + if (copy_from_user(exec2_list_user, u64_to_user_ptr(args->buffers_ptr), sizeof(*exec2_list) * count)) { DRM_DEBUG("copy %zd exec entries failed\n", count); @@ -2876,6 +2911,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, } err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + args->buffer_count -= svm_count; /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2886,7 +2922,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, if (args->flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object2 __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); - unsigned int i; /* Copy the new buffer offsets back to the user's exec list. */ /* @@ -2900,13 +2935,14 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, goto end; for (i = 0; i < args->buffer_count; i++) { - if (!(exec2_list[i].offset & UPDATE)) + u64 *offset = &exec2_list_user[i].offset; + + if (!(*offset & UPDATE)) continue; - exec2_list[i].offset = - gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK); - unsafe_put_user(exec2_list[i].offset, - &user_exec_list[i].offset, + *offset = gen8_canonical_addr(*offset & + PIN_OFFSET_MASK); + unsafe_put_user(*offset, &user_exec_list[i].offset, end_user); } end_user: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_svm.c b/drivers/gpu/drm/i915/gem/i915_gem_svm.c new file mode 100644 index 000000000000..882fe56138e2 --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_svm.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2019 Intel Corporation + */ + +#include "i915_drv.h" +#include "i915_gem_gtt.h" +#include "i915_gem_lmem.h" + +int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, + struct drm_i915_gem_vm_bind *args, + struct drm_file *file) +{ + struct i915_svm_obj *svm_obj, *tmp; + struct drm_i915_gem_object *obj; + int ret = 0; + + obj = i915_gem_object_lookup(file, args->handle); + if (!obj) + return -ENOENT; + + /* For dgfx, ensure the obj is in device local memory only */ + if (IS_DGFX(vm->i915) && !i915_gem_object_is_lmem(obj)) + return -EINVAL; + + /* FIXME: Need to handle case with unending batch buffers */ + if (!(args->flags & I915_GEM_VM_BIND_UNBIND)) { + svm_obj = kmalloc(sizeof(*svm_obj), GFP_KERNEL); + if (!svm_obj) { + ret = -ENOMEM; + goto put_obj; + } + svm_obj->handle = args->handle; + svm_obj->offset = args->start; + } + + mutex_lock(&vm->mutex); + if (!(args->flags & I915_GEM_VM_BIND_UNBIND)) { + list_add(&svm_obj->link, &vm->svm_list); + vm->svm_count++; + } else { + /* + * FIXME: Need to handle case where object is migrated/closed + * without unbinding first. + */ + list_for_each_entry_safe(svm_obj, tmp, &vm->svm_list, link) { + if (svm_obj->handle != args->handle) + continue; + + list_del_init(&svm_obj->link); + vm->svm_count--; + kfree(svm_obj); + break; + } + } + mutex_unlock(&vm->mutex); +put_obj: + i915_gem_object_put(obj); + return ret; +} diff --git a/drivers/gpu/drm/i915/gem/i915_gem_svm.h b/drivers/gpu/drm/i915/gem/i915_gem_svm.h new file mode 100644 index 000000000000..d60b35c7d21a --- /dev/null +++ b/drivers/gpu/drm/i915/gem/i915_gem_svm.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2019 Intel Corporation + */ + +#ifndef __I915_GEM_SVM_H +#define __I915_GEM_SVM_H + +#include "i915_drv.h" + +#if defined(CONFIG_DRM_I915_SVM) +int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, + struct drm_i915_gem_vm_bind *args, + struct drm_file *file); +#else +static inline int i915_gem_vm_bind_svm_obj(struct i915_address_space *vm, + struct drm_i915_gem_vm_bind *args, + struct drm_file *file) +{ return -ENOTSUPP; } +#endif + +#endif /* __I915_GEM_SVM_H */ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2a11f60c4fd2..d452ea8e40b3 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2680,6 +2680,26 @@ i915_gem_reject_pin_ioctl(struct drm_device *dev, void *data, return -ENODEV; } +static int i915_gem_vm_bind_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_gem_vm_bind *args = data; + struct i915_address_space *vm; + int ret = -EINVAL; + + vm = i915_gem_address_space_lookup(file->driver_priv, args->vm_id); + if (unlikely(!vm)) + return -ENOENT; + + switch (args->type) { + case I915_GEM_VM_BIND_SVM_OBJ: + ret = i915_gem_vm_bind_svm_obj(vm, args, file); + } + + i915_vm_put(vm); + return ret; +} + static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_FLUSH, drm_noop, DRM_AUTH), @@ -2739,6 +2759,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_VM_BIND, i915_gem_vm_bind_ioctl, DRM_RENDER_ALLOW), }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce130e1f1e47..2d0a7cd2dc44 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1909,6 +1909,28 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } +static inline struct i915_address_space * +__i915_gem_address_space_lookup_rcu(struct drm_i915_file_private *file_priv, + u32 id) +{ + return idr_find(&file_priv->vm_idr, id); +} + +static inline struct i915_address_space * +i915_gem_address_space_lookup(struct drm_i915_file_private *file_priv, + u32 id) +{ + struct i915_address_space *vm; + + rcu_read_lock(); + vm = __i915_gem_address_space_lookup_rcu(file_priv, id); + if (vm) + vm = i915_vm_get(vm); + rcu_read_unlock(); + + return vm; +} + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, u64 min_size, u64 alignment, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index be36719e7987..7d4f5fa84b02 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -586,6 +586,7 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) stash_init(&vm->free_pages); INIT_LIST_HEAD(&vm->bound_list); + INIT_LIST_HEAD(&vm->svm_list); } static int __setup_page_dma(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 31a4a96ddd0d..7c1b54c9677d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -285,6 +285,13 @@ struct pagestash { struct pagevec pvec; }; +struct i915_svm_obj { + /** This obj's place in the SVM object list */ + struct list_head link; + u32 handle; + u64 offset; +}; + struct i915_address_space { struct kref ref; struct rcu_work rcu; @@ -329,6 +336,12 @@ struct i915_address_space { */ struct list_head bound_list; + /** + * List of SVM bind objects. + */ + struct list_head svm_list; + unsigned int svm_count; + struct pagestash free_pages; /* Global GTT */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 20314eea632a..e10d7bf2cf9f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -360,6 +360,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_VM_CREATE 0x3a #define DRM_I915_GEM_VM_DESTROY 0x3b #define DRM_I915_GEM_OBJECT_SETPARAM DRM_I915_GEM_CONTEXT_SETPARAM +#define DRM_I915_GEM_VM_BIND 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -424,6 +425,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) #define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) #define DRM_IOCTL_I915_GEM_OBJECT_SETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_OBJECT_SETPARAM, struct drm_i915_gem_object_param) +#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -2300,6 +2302,31 @@ struct drm_i915_query_perf_config { __u8 data[]; }; +/** + * struct drm_i915_gem_vm_bind + * + * Bind an object in a vm's page table. + */ +struct drm_i915_gem_vm_bind { + /** VA start to bind **/ + __u64 start; + + /** Type of memory to [un]bind **/ + __u32 type; +#define I915_GEM_VM_BIND_SVM_OBJ 0 + + /** Object handle to [un]bind for I915_GEM_VM_BIND_SVM_OBJ type **/ + __u32 handle; + + /** vm to [un]bind **/ + __u32 vm_id; + + /** Flags **/ + __u32 flags; +#define I915_GEM_VM_BIND_UNBIND (1 << 0) +#define I915_GEM_VM_BIND_READONLY (1 << 1) +}; + #if defined(__cplusplus) } #endif
Shared Virtual Memory (SVM) runtime allocator support allows binding a shared virtual address to a buffer object (BO) in the device page table through an ioctl call. Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Sudeep Dutt <sudeep.dutt@intel.com> Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com> --- drivers/gpu/drm/i915/Kconfig | 11 ++++ drivers/gpu/drm/i915/Makefile | 3 + .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 58 ++++++++++++++---- drivers/gpu/drm/i915/gem/i915_gem_svm.c | 60 +++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_svm.h | 22 +++++++ drivers/gpu/drm/i915/i915_drv.c | 21 +++++++ drivers/gpu/drm/i915/i915_drv.h | 22 +++++++ drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/i915_gem_gtt.h | 13 ++++ include/uapi/drm/i915_drm.h | 27 +++++++++ 10 files changed, 227 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.c create mode 100644 drivers/gpu/drm/i915/gem/i915_gem_svm.h