@@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
i915_gem_gtt.o \
i915_gem_stolen.o \
i915_gem_tiling.o \
+ i915_gem_userptr.o \
i915_sysfs.o \
i915_trace_points.o \
intel_display.o \
@@ -1899,6 +1899,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_ROOT_ONLY|DRM_UNLOCKED),
};
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
@@ -40,6 +40,7 @@
#include <linux/backlight.h>
#include <linux/intel-iommu.h>
#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
/* General customization:
*/
@@ -927,6 +928,7 @@ struct drm_i915_gem_object_ops {
*/
int (*get_pages)(struct drm_i915_gem_object *);
void (*put_pages)(struct drm_i915_gem_object *);
+ void (*release)(struct drm_i915_gem_object *);
};
struct drm_i915_gem_object {
@@ -1072,6 +1074,23 @@ struct drm_i915_gem_object {
atomic_t pending_flip;
};
+struct i915_gem_userptr_object {
+ struct drm_i915_gem_object gem;
+ uintptr_t user_ptr;
+ size_t user_size;
+ int read_only;
+
+ struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+ struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+ struct drm_i915_gem_object base;
+ struct i915_gem_userptr_object userptr;
+};
+
inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
{
return obj->base.import_attach != NULL;
@@ -1333,6 +1352,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
int i915_gem_set_tiling(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -2565,9 +2565,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
/* Avoid an unnecessary call to unbind on rebind. */
obj->map_and_fenceable = true;
+ obj->gtt_offset -= obj->gtt_space->start;
drm_mm_put_block(obj->gtt_space);
obj->gtt_space = NULL;
- obj->gtt_offset = 0;
return 0;
}
@@ -3083,7 +3083,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
- obj->gtt_offset = obj->gtt_space->start;
+ obj->gtt_offset += obj->gtt_space->start;
fenceable =
obj->gtt_space->size == fence_size &&
@@ -3876,6 +3876,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
if (obj->base.import_attach)
drm_prime_gem_destroy(&obj->base, NULL);
+ if (obj->ops->release)
+ obj->ops->release(obj);
+
drm_gem_object_release(&obj->base);
i915_gem_info_remove_obj(dev_priv, obj->base.size);
@@ -4263,7 +4266,7 @@ i915_gem_load(struct drm_device *dev)
dev_priv->slab =
kmem_cache_create("i915_gem_object",
- sizeof(struct drm_i915_gem_object), 0,
+ sizeof(union drm_i915_gem_objects), 0,
SLAB_HWCACHE_ALIGN,
NULL);
new file mode 100644
@@ -0,0 +1,277 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+ return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+ struct mm_struct *mm)
+{
+ struct i915_gem_userptr_object *vmap;
+
+ vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+ BUG_ON(vmap->mm != mm);
+ vmap->mm = NULL;
+
+ /* XXX Schedule an eventual unbind? E.g. hook into require request?
+ * However, locking will be complicated.
+ */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+ .release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+ if (vmap->mm) {
+ mmu_notifier_unregister(&vmap->mn, vmap->mm);
+ BUG_ON(vmap->mm);
+ }
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+ vmap->mn.ops = &i915_gem_userptr_notifier;
+ return mmu_notifier_register(&vmap->mn, vmap->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+ return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+ struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+ int num_pages = obj->base.size >> PAGE_SHIFT;
+ struct sg_table *st;
+ struct scatterlist *sg;
+ struct page **pvec;
+ int n, pinned, ret;
+
+ if (vmap->mm == NULL)
+ return -EFAULT;
+
+ if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+ (char __user *)vmap->user_ptr, vmap->user_size))
+ return -EFAULT;
+
+ /* If userspace should engineer that these pages are replaced in
+ * the vma between us binding this page into the GTT and completion
+ * of rendering... Their loss. If they change the mapping of their
+ * pages they need to create a new bo to point to the new vma.
+ *
+ * However, that still leaves open the possibility of the vma
+ * being copied upon fork. Which falls under the same userspace
+ * synchronisation issue as a regular bo, except that this time
+ * the process may not be expecting that a particular piece of
+ * memory is tied to the GPU.
+ */
+
+ pvec = kmalloc(num_pages*sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (pvec == NULL) {
+ pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+ if (pvec == NULL)
+ return -ENOMEM;
+ }
+
+ pinned = 0;
+ if (vmap->mm == current->mm)
+ pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+ !vmap->read_only, pvec);
+ if (pinned < num_pages) {
+ struct mm_struct *mm = vmap->mm;
+ ret = 0;
+ mutex_unlock(&obj->base.dev->struct_mutex);
+ down_read(&mm->mmap_sem);
+ if (vmap->mm != NULL)
+ ret = get_user_pages(current, mm,
+ vmap->user_ptr + (pinned << PAGE_SHIFT),
+ num_pages - pinned,
+ !vmap->read_only, 0,
+ pvec + pinned,
+ NULL);
+ up_read(&mm->mmap_sem);
+ mutex_lock(&obj->base.dev->struct_mutex);
+ if (ret > 0)
+ pinned += ret;
+
+ if (obj->pages || pinned < num_pages) {
+ ret = obj->pages ? 0 : -EFAULT;
+ goto cleanup_pinned;
+ }
+ }
+
+ st = kmalloc(sizeof(*st), GFP_KERNEL);
+ if (st == NULL) {
+ ret = -ENOMEM;
+ goto cleanup_pinned;
+ }
+
+ if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto cleanup_st;
+ }
+
+ for_each_sg(st->sgl, sg, num_pages, n)
+ sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+ drm_free_large(pvec);
+
+ obj->pages = st;
+ return 0;
+
+cleanup_st:
+ kfree(st);
+cleanup_pinned:
+ release_pages(pvec, pinned, 0);
+ drm_free_large(pvec);
+ return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+ struct scatterlist *sg;
+ int i;
+
+ if (obj->madv != I915_MADV_WILLNEED)
+ obj->dirty = 0;
+
+ for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+ struct page *page = sg_page(sg);
+
+ if (obj->dirty)
+ set_page_dirty(page);
+
+ mark_page_accessed(page);
+ page_cache_release(page);
+ }
+ obj->dirty = 0;
+
+ sg_free_table(obj->pages);
+ kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+ struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+ i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+ .get_pages = i915_gem_userptr_get_pages,
+ .put_pages = i915_gem_userptr_put_pages,
+ .release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_userptr *args = data;
+ struct i915_gem_userptr_object *obj;
+ loff_t first_data_page, last_data_page;
+ int num_pages;
+ int ret;
+ u32 handle;
+
+ first_data_page = args->user_ptr / PAGE_SIZE;
+ last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+ num_pages = last_data_page - first_data_page + 1;
+ if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+ return -E2BIG;
+
+ ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->user_ptr,
+ args->user_size);
+ if (ret)
+ return ret;
+
+ /* Allocate the new object */
+ obj = i915_gem_object_alloc(dev);
+ if (obj == NULL)
+ return -ENOMEM;
+
+ if (drm_gem_private_object_init(dev, &obj->gem.base,
+ num_pages * PAGE_SIZE)) {
+ i915_gem_object_free(&obj->gem);
+ return -ENOMEM;
+ }
+
+ i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+ obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+ obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+ obj->user_ptr = args->user_ptr;
+ obj->user_size = args->user_size;
+ obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+ /* And keep a pointer to the current->mm for resolving the user pages
+ * at binding. This means that we need to hook into the mmu_notifier
+ * in order to detect if the mmu is destroyed.
+ */
+ obj->mm = current->mm;
+ ret = i915_gem_userptr_init__mmu_notifier(obj);
+ if (ret)
+ return ret;
+
+ ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_unreference(&obj->gem.base);
+ if (ret)
+ return ret;
+
+ args->handle = handle;
+ return 0;
+}
@@ -206,6 +206,7 @@ typedef struct _drm_i915_sarea {
#define DRM_I915_GEM_SET_CACHEING 0x2f
#define DRM_I915_GEM_GET_CACHEING 0x30
#define DRM_I915_REG_READ 0x31
+#define DRM_I915_GEM_USERPTR 0x32
#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -255,6 +256,7 @@ typedef struct _drm_i915_sarea {
#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
/* Allow drivers to submit batchbuffers directly to hardware, relying
* on the security mechanisms provided by hardware.
@@ -476,6 +478,19 @@ struct drm_i915_gem_mmap_gtt {
__u64 offset;
};
+struct drm_i915_gem_userptr {
+ __u64 user_ptr;
+ __u32 user_size;
+ __u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+ /**
+ * Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+ __u32 handle;
+};
+
struct drm_i915_gem_set_domain {
/** Handle for the object */
__u32 handle;
By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL). v2: Compile with CONFIG_MMU_NOTIFIER Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 21 +++ drivers/gpu/drm/i915/i915_gem.c | 9 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 277 +++++++++++++++++++++++++++++++ include/drm/i915_drm.h | 15 ++ 6 files changed, 321 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c