diff mbox

[24/24] drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl

Message ID 1346340679-7699-25-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State Superseded
Headers show

Commit Message

Chris Wilson Aug. 30, 2012, 3:31 p.m. UTC
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of partial software fallbacks (xterm!) to faster
pipelining of texture data (such as pixel buffer objects in GL).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile           |    1 +
 drivers/gpu/drm/i915/i915_dma.c         |    1 +
 drivers/gpu/drm/i915/i915_drv.h         |   14 +++
 drivers/gpu/drm/i915/i915_gem.c         |    6 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c |  209 +++++++++++++++++++++++++++++++
 include/drm/i915_drm.h                  |   15 +++
 6 files changed, 243 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0f2c549..754d665 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@  i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  intel_display.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f2e3439..837fc63 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1883,6 +1883,7 @@  struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_MASTER|DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ec8c0fc..1467cc1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1050,6 +1050,18 @@  struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr;
+};
+
 inline static bool i915_gem_object_is_prime(struct drm_i915_gem_object *obj)
 {
 	return obj->base.import_attach != NULL;
@@ -1308,6 +1320,8 @@  int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 70f6c53..69972f6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2556,9 +2556,9 @@  i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -3074,7 +3074,7 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	obj->gtt_offset = obj->gtt_space->start;
+	obj->gtt_offset += obj->gtt_space->start;
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
@@ -4253,7 +4253,7 @@  i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..8604dad
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,209 @@ 
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+				       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = current->mm;
+
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm,
+				     vmap->user_ptr + (pinned << PAGE_SHIFT),
+				     num_pages - pinned,
+				     !vmap->read_only, 0,
+				     pvec + pinned,
+				     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+		return -E2BIG;
+
+	ret = fault_in_multipages_readable((char __user *)(uintptr_t)args->user_ptr,
+					   args->user_size);
+	if (ret)
+		return ret;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	if (ret) {
+		drm_gem_object_release(&obj->gem.base);
+		dev_priv->mm.object_count--;
+		dev_priv->mm.object_memory -= obj->gem.base.size;
+		i915_gem_object_free(&obj->gem);
+		return ret;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 05e24d3..0a0b881 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -206,6 +206,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHEING	0x2f
 #define DRM_I915_GEM_GET_CACHEING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -255,6 +256,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -474,6 +476,19 @@  struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	__u32 handle;