diff mbox

=?utf-8?q?=5BPATCH_21/21=5D_drm/i915=3A_Introduce_vma?= =?utf-8?q?p_=28mapping_of_user_pages_into_video_memory=29_ioctl?=

Message ID 1302945465-32115-22-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 16, 2011, 9:17 a.m. UTC
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of partial software fallbacks (xterm!) to faster
pipelining of texture data (such as pixel buffer objects in GL).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_gem.c            |    3 +-
 drivers/gpu/drm/i915/Makefile        |    1 +
 drivers/gpu/drm/i915/i915_dma.c      |    4 +
 drivers/gpu/drm/i915/i915_drv.h      |   33 +++++++-
 drivers/gpu/drm/i915/i915_gem.c      |   77 +++++++++++------
 drivers/gpu/drm/i915/i915_gem_vmap.c |  149 ++++++++++++++++++++++++++++++++++
 include/drm/i915_drm.h               |   16 ++++
 7 files changed, 254 insertions(+), 29 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_vmap.c
diff mbox

Patch

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 74e4ff5..03ca40a 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -426,7 +426,8 @@  drm_gem_release(struct drm_device *dev, struct drm_file *file_private)
 void
 drm_gem_object_release(struct drm_gem_object *obj)
 {
-	fput(obj->filp);
+	if (obj->filp)
+		fput(obj->filp);
 }
 EXPORT_SYMBOL(drm_gem_object_release);
 
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 0ae6a7c..0bbc404 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -12,6 +12,7 @@  i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
 	  i915_gem_execbuffer.o \
 	  i915_gem_gtt.o \
 	  i915_gem_tiling.o \
+	  i915_gem_vmap.o \
 	  i915_trace_points.o \
 	  intel_display.o \
 	  intel_crt.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index d8269f3..3979ed8 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -782,6 +782,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_RELAXED_DELTA:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_VMAP:
+		value = dev_priv->has_gem;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
@@ -2279,6 +2282,7 @@  struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_VMAP, i915_gem_vmap_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a8733ac..90eac1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -721,6 +721,11 @@  enum i915_cache_level {
 	I915_CACHE_LLC_MLC, /* gen6+ */
 };
 
+struct drm_i915_gem_object_ops {
+	int (*get_pages)(struct drm_i915_gem_object *, gfp_t, u32 *offset);
+	void (*put_pages)(struct drm_i915_gem_object *);
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -867,6 +872,18 @@  struct drm_i915_gem_object {
 	atomic_t pending_flip;
 };
 
+struct i915_gem_vmap_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_vmap_object vmap;
+};
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1122,6 +1139,8 @@  int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring,
 				     uint32_t flush_domains);
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops);
 void i915_gem_free_object(struct drm_gem_object *obj);
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
@@ -1143,7 +1162,19 @@  int i915_gem_dumb_create(struct drm_file *file_priv,
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
 int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-			  uint32_t handle);			  
+			  uint32_t handle);
+
+/* i915_gem_vmap.c */
+int i915_gem_vmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+int
+i915_gem_get_user_pages(struct drm_device *dev,
+			unsigned long addr,
+			bool write,
+			int *num_pages,
+			struct page ***pages_out);
+
 /**
  * Returns true if seq1 is later than seq2.
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f554273..6cb2331 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -274,7 +274,7 @@  static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
  * Returns: an error code *and* the number of user pages acquired. Even
  * on an error, you must iterate over the returned pages and release them.
  */
-static int
+int
 i915_gem_get_user_pages(struct drm_device *dev,
 			unsigned long addr,
 			bool write,
@@ -1585,12 +1585,13 @@  i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 
 static int
 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
-			      gfp_t gfpmask)
+			      gfp_t gfpmask,
+			      u32 *offset)
 {
-	int page_count, i;
 	struct address_space *mapping;
 	struct inode *inode;
 	struct page *page;
+	int i, page_count;
 
 	/* Get the list of pages out of our struct file.  They'll be pinned
 	 * at this point until we release them.
@@ -1618,6 +1619,7 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
 	if (obj->tiling_mode != I915_TILING_NONE)
 		i915_gem_object_do_bit_17_swizzle(obj);
 
+	*offset = 0;
 	return 0;
 
 err_pages:
@@ -1785,6 +1787,9 @@  i915_gem_object_truncate(struct drm_i915_gem_object *obj)
 {
 	struct inode *inode;
 
+	if (obj->base.filp == NULL)
+		return;
+
 	/* Our goal here is to return as much of the memory as
 	 * is possible back to the system as we are called from OOM.
 	 * To do this we must instruct the shmfs to drop all of its
@@ -2269,6 +2274,7 @@  static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
 int
 i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 {
+	const struct drm_i915_gem_object_ops *ops = obj->base.driver_private;
 	int ret = 0;
 
 	if (obj->gtt_space == NULL)
@@ -2313,7 +2319,7 @@  i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	trace_i915_gem_object_unbind(obj);
 
 	i915_gem_gtt_unbind_object(obj);
-	i915_gem_object_put_pages_gtt(obj);
+	ops->put_pages(obj);
 
 	list_del_init(&obj->gtt_list);
 	list_del_init(&obj->mm_list);
@@ -2859,11 +2865,14 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 			    unsigned alignment,
 			    bool map_and_fenceable)
 {
+	const struct drm_i915_gem_object_ops *ops = obj->base.driver_private;
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_mm_node *free_space;
 	gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
-	u32 size, fence_size, fence_alignment, unfenced_alignment;
+	u32 fence_size, fence_alignment;
+	u32 unfenced_alignment;
+	u32 size, offset;
 	bool mappable, fenceable;
 	int ret;
 
@@ -2929,7 +2938,7 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 		goto search_free;
 	}
 
-	ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
+	ret = ops->get_pages(obj, gfpmask, &offset);
 	if (ret) {
 		drm_mm_put_block(obj->gtt_space);
 		obj->gtt_space = NULL;
@@ -2955,7 +2964,7 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
 	ret = i915_gem_gtt_bind_object(obj);
 	if (ret) {
-		i915_gem_object_put_pages_gtt(obj);
+		ops->put_pages(obj);
 		drm_mm_put_block(obj->gtt_space);
 		obj->gtt_space = NULL;
 
@@ -2975,11 +2984,11 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
 	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
 
-	obj->gtt_offset = obj->gtt_space->start;
+	obj->gtt_offset = obj->gtt_space->start + offset;
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
-		(obj->gtt_space->start & (fence_alignment -1)) == 0;
+		(obj->gtt_offset & (fence_alignment -1)) == 0;
 
 	mappable =
 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
@@ -3779,27 +3788,16 @@  unlock:
 	return ret;
 }
 
-struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
-						  size_t size)
+void
+i915_gem_object_init(struct drm_i915_gem_object *obj,
+		     const struct drm_i915_gem_object_ops *ops)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
-
-	obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
-	if (obj == NULL)
-		return NULL;
-
-	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
-		kfree(obj);
-		return NULL;
-	}
-
-	i915_gem_info_add_obj(dev_priv, size);
+	obj->base.driver_private = (void *)ops;
 
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-	if (IS_GEN6(dev)) {
+	if (IS_GEN6(obj->base.dev)) {
 		/* On Gen6, we can have the GPU use the LLC (the CPU
 		 * cache) for about a 10% performance improvement
 		 * compared to uncached.  Graphics requests other than
@@ -3816,7 +3814,6 @@  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	} else
 		obj->cache_level = I915_CACHE_NONE;
 
-	obj->base.driver_private = NULL;
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj->mm_list);
 	INIT_LIST_HEAD(&obj->gtt_list);
@@ -3824,9 +3821,35 @@  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	INIT_LIST_HEAD(&obj->exec_list);
 	INIT_LIST_HEAD(&obj->gpu_write_list);
 	obj->madv = I915_MADV_WILLNEED;
+
 	/* Avoid an unnecessary call to unbind on the first bind. */
 	obj->map_and_fenceable = true;
 
+	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
+	.get_pages = i915_gem_object_get_pages_gtt,
+	.put_pages = i915_gem_object_put_pages_gtt,
+};
+
+struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
+						  size_t size)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+
+	obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+	if (obj == NULL)
+		return NULL;
+
+	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
+		kfree(obj);
+		return NULL;
+	}
+
+	i915_gem_object_init(obj, &i915_gem_object_ops);
+
 	return obj;
 }
 
@@ -4056,7 +4079,7 @@  i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_vmap.c b/drivers/gpu/drm/i915/i915_gem_vmap.c
new file mode 100644
index 0000000..89a4ac4
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_vmap.c
@@ -0,0 +1,149 @@ 
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/swap.h>
+
+static struct i915_gem_vmap_object *to_vmap_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_vmap_object, gem);
+}
+
+static int
+i915_gem_vmap_get_pages(struct drm_i915_gem_object *obj, gfp_t gfp, u32 *offset)
+{
+	struct i915_gem_vmap_object *vmap = to_vmap_object(obj);
+	int num_pages = vmap->gem.base.size >> PAGE_SHIFT;
+	struct page **pages;
+	int i;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	if (i915_gem_get_user_pages(obj->base.dev,
+				    vmap->user_ptr,
+				    !vmap->read_only,
+				    &num_pages,
+				    &pages))
+		goto err;
+
+	vmap->gem.pages = pages;
+	*offset = offset_in_page(vmap->user_ptr);
+	return 0;
+
+err:
+	for (i = 0; i < num_pages; i++)
+		page_cache_release(pages[i]);
+	drm_free_large(pages);
+
+	return vmap->gem.pages ? -EAGAIN : -EFAULT;
+}
+
+static void
+i915_gem_vmap_put_pages(struct drm_i915_gem_object *obj)
+{
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	int i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (obj->dirty)
+			set_page_dirty(obj->pages[i]);
+
+		mark_page_accessed(obj->pages[i]);
+		page_cache_release(obj->pages[i]);
+	}
+
+	obj->dirty = 0;
+	drm_free_large(obj->pages);
+	obj->pages = NULL;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_vmap_ops = {
+	.get_pages = i915_gem_vmap_get_pages,
+	.put_pages = i915_gem_vmap_put_pages,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_vmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_vmap *args = data;
+	struct i915_gem_vmap_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total)
+		return -E2BIG;
+
+	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->user_ptr,
+				      args->user_size);
+	if (ret)
+		return ret;
+
+	/* Allocate the new object */
+	obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	obj->gem.base.dev = dev;
+	obj->gem.base.size = num_pages * PAGE_SIZE;
+
+	kref_init(&obj->gem.base.refcount);
+	atomic_set(&obj->gem.base.handle_count, 0);
+
+	i915_gem_object_init(&obj->gem, &i915_gem_vmap_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_VMAP_READ_ONLY;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	if (ret) {
+		drm_gem_object_release(&obj->gem.base);
+		kfree(obj);
+		return ret;
+	}
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c4d6dbf..f02f6d7 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -198,6 +198,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_VMAP	0x2a
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -239,6 +240,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE	DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_GEM_VMAP	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_VMAP, struct drm_i915_gem_vmap)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -291,6 +293,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_COHERENT_RINGS	 13
 #define I915_PARAM_HAS_EXEC_CONSTANTS	 14
 #define I915_PARAM_HAS_RELAXED_DELTA	 15
+#define I915_PARAM_HAS_VMAP		 16
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -388,6 +391,19 @@  struct drm_i915_gem_create {
 	__u32 pad;
 };
 
+struct drm_i915_gem_vmap {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_VMAP_READ_ONLY 0x1
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
+
 struct drm_i915_gem_pread {
 	/** Handle for the object being read. */
 	__u32 handle;