From patchwork Sat Apr 16 09:17:45 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 712071 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p3G9KnQQ009483 for ; Sat, 16 Apr 2011 09:21:09 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 2044C9EAFA for ; Sat, 16 Apr 2011 02:20:49 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (server109-228-6-236.live-servers.net [109.228.6.236]) by gabe.freedesktop.org (Postfix) with ESMTP id D8F639E915 for ; Sat, 16 Apr 2011 02:18:08 -0700 (PDT) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.66.37; Received: from arrandale.alporthouse.com (unverified [78.156.66.37]) by fireflyinternet.com (Firefly Internet SMTP) with ESMTP id 32233689-1500050 for multiple; Sat, 16 Apr 2011 10:17:57 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sat, 16 Apr 2011 10:17:45 +0100 Message-Id: <1302945465-32115-22-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1302945465-32115-1-git-send-email-chris@chris-wilson.co.uk> References: <1302945465-32115-1-git-send-email-chris@chris-wilson.co.uk> MIME-Version: 1.0 X-Originating-IP: 78.156.66.37 Cc: Dave Airlie Subject: [Intel-gfx] =?utf-8?q?=5BPATCH_21/21=5D_drm/i915=3A_Introduce_vma?= =?utf-8?q?p_=28mapping_of_user_pages_into_video_memory=29_ioctl?= X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+patchwork-intel-gfx=patchwork.kernel.org@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Sat, 16 Apr 2011 09:21:09 +0000 (UTC) X-MIME-Autoconverted: from base64 to 8bit by demeter1.kernel.org id p3G9KnQQ009483 By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of partial software fallbacks (xterm!) to faster pipelining of texture data (such as pixel buffer objects in GL). Signed-off-by: Chris Wilson Cc: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 3 +- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_dma.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 33 +++++++- drivers/gpu/drm/i915/i915_gem.c | 77 +++++++++++------ drivers/gpu/drm/i915/i915_gem_vmap.c | 149 ++++++++++++++++++++++++++++++++++ include/drm/i915_drm.h | 16 ++++ 7 files changed, 254 insertions(+), 29 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_vmap.c diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 74e4ff5..03ca40a 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -426,7 +426,8 @@ drm_gem_release(struct drm_device *dev, struct drm_file *file_private) void drm_gem_object_release(struct drm_gem_object *obj) { - fput(obj->filp); + if (obj->filp) + fput(obj->filp); } EXPORT_SYMBOL(drm_gem_object_release); diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 0ae6a7c..0bbc404 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -12,6 +12,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \ i915_gem_execbuffer.o \ i915_gem_gtt.o \ i915_gem_tiling.o \ + i915_gem_vmap.o \ i915_trace_points.o \ intel_display.o \ intel_crt.o \ diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index d8269f3..3979ed8 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -782,6 +782,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RELAXED_DELTA: value = 1; break; + case I915_PARAM_HAS_VMAP: + value = dev_priv->has_gem; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); @@ -2279,6 +2282,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF_DRV(I915_GEM_VMAP, i915_gem_vmap_ioctl, DRM_UNLOCKED), }; int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a8733ac..90eac1d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -721,6 +721,11 @@ enum i915_cache_level { I915_CACHE_LLC_MLC, /* gen6+ */ }; +struct drm_i915_gem_object_ops { + int (*get_pages)(struct drm_i915_gem_object *, gfp_t, u32 *offset); + void (*put_pages)(struct drm_i915_gem_object *); +}; + struct drm_i915_gem_object { struct drm_gem_object base; @@ -867,6 +872,18 @@ struct drm_i915_gem_object { atomic_t pending_flip; }; +struct i915_gem_vmap_object { + struct drm_i915_gem_object gem; + uintptr_t user_ptr; + size_t user_size; + int read_only; +}; + +union drm_i915_gem_objects { + struct drm_i915_gem_object base; + struct i915_gem_vmap_object vmap; +}; + #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) /** @@ -1122,6 +1139,8 @@ int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t flush_domains); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); +void i915_gem_object_init(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_object_ops *ops); void i915_gem_free_object(struct drm_gem_object *obj); int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, uint32_t alignment, @@ -1143,7 +1162,19 @@ int i915_gem_dumb_create(struct drm_file *file_priv, int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset); int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev, - uint32_t handle); + uint32_t handle); + +/* i915_gem_vmap.c */ +int i915_gem_vmap_ioctl(struct drm_device *dev, void *data, + struct drm_file *file); + +int +i915_gem_get_user_pages(struct drm_device *dev, + unsigned long addr, + bool write, + int *num_pages, + struct page ***pages_out); + /** * Returns true if seq1 is later than seq2. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f554273..6cb2331 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -274,7 +274,7 @@ static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) * Returns: an error code *and* the number of user pages acquired. Even * on an error, you must iterate over the returned pages and release them. */ -static int +int i915_gem_get_user_pages(struct drm_device *dev, unsigned long addr, bool write, @@ -1585,12 +1585,13 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, - gfp_t gfpmask) + gfp_t gfpmask, + u32 *offset) { - int page_count, i; struct address_space *mapping; struct inode *inode; struct page *page; + int i, page_count; /* Get the list of pages out of our struct file. They'll be pinned * at this point until we release them. @@ -1618,6 +1619,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, if (obj->tiling_mode != I915_TILING_NONE) i915_gem_object_do_bit_17_swizzle(obj); + *offset = 0; return 0; err_pages: @@ -1785,6 +1787,9 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) { struct inode *inode; + if (obj->base.filp == NULL) + return; + /* Our goal here is to return as much of the memory as * is possible back to the system as we are called from OOM. * To do this we must instruct the shmfs to drop all of its @@ -2269,6 +2274,7 @@ static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { + const struct drm_i915_gem_object_ops *ops = obj->base.driver_private; int ret = 0; if (obj->gtt_space == NULL) @@ -2313,7 +2319,7 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) trace_i915_gem_object_unbind(obj); i915_gem_gtt_unbind_object(obj); - i915_gem_object_put_pages_gtt(obj); + ops->put_pages(obj); list_del_init(&obj->gtt_list); list_del_init(&obj->mm_list); @@ -2859,11 +2865,14 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, bool map_and_fenceable) { + const struct drm_i915_gem_object_ops *ops = obj->base.driver_private; struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_mm_node *free_space; gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN; - u32 size, fence_size, fence_alignment, unfenced_alignment; + u32 fence_size, fence_alignment; + u32 unfenced_alignment; + u32 size, offset; bool mappable, fenceable; int ret; @@ -2929,7 +2938,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, goto search_free; } - ret = i915_gem_object_get_pages_gtt(obj, gfpmask); + ret = ops->get_pages(obj, gfpmask, &offset); if (ret) { drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; @@ -2955,7 +2964,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, ret = i915_gem_gtt_bind_object(obj); if (ret) { - i915_gem_object_put_pages_gtt(obj); + ops->put_pages(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; @@ -2975,11 +2984,11 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); - obj->gtt_offset = obj->gtt_space->start; + obj->gtt_offset = obj->gtt_space->start + offset; fenceable = obj->gtt_space->size == fence_size && - (obj->gtt_space->start & (fence_alignment -1)) == 0; + (obj->gtt_offset & (fence_alignment -1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; @@ -3779,27 +3788,16 @@ unlock: return ret; } -struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, - size_t size) +void +i915_gem_object_init(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_object_ops *ops) { - struct drm_i915_private *dev_priv = dev->dev_private; - struct drm_i915_gem_object *obj; - - obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); - if (obj == NULL) - return NULL; - - if (drm_gem_object_init(dev, &obj->base, size) != 0) { - kfree(obj); - return NULL; - } - - i915_gem_info_add_obj(dev_priv, size); + obj->base.driver_private = (void *)ops; obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; - if (IS_GEN6(dev)) { + if (IS_GEN6(obj->base.dev)) { /* On Gen6, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than @@ -3816,7 +3814,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, } else obj->cache_level = I915_CACHE_NONE; - obj->base.driver_private = NULL; obj->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj->mm_list); INIT_LIST_HEAD(&obj->gtt_list); @@ -3824,9 +3821,35 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->gpu_write_list); obj->madv = I915_MADV_WILLNEED; + /* Avoid an unnecessary call to unbind on the first bind. */ obj->map_and_fenceable = true; + i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); +} + +static const struct drm_i915_gem_object_ops i915_gem_object_ops = { + .get_pages = i915_gem_object_get_pages_gtt, + .put_pages = i915_gem_object_put_pages_gtt, +}; + +struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, + size_t size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj; + + obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); + if (obj == NULL) + return NULL; + + if (drm_gem_object_init(dev, &obj->base, size) != 0) { + kfree(obj); + return NULL; + } + + i915_gem_object_init(obj, &i915_gem_object_ops); + return obj; } @@ -4056,7 +4079,7 @@ i915_gem_load(struct drm_device *dev) dev_priv->slab = kmem_cache_create("i915_gem_object", - sizeof(struct drm_i915_gem_object), 0, + sizeof(union drm_i915_gem_objects), 0, SLAB_HWCACHE_ALIGN, NULL); diff --git a/drivers/gpu/drm/i915/i915_gem_vmap.c b/drivers/gpu/drm/i915/i915_gem_vmap.c new file mode 100644 index 0000000..89a4ac4 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_vmap.c @@ -0,0 +1,149 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" +#include "i915_trace.h" +#include "intel_drv.h" +#include + +static struct i915_gem_vmap_object *to_vmap_object(struct drm_i915_gem_object *obj) +{ + return container_of(obj, struct i915_gem_vmap_object, gem); +} + +static int +i915_gem_vmap_get_pages(struct drm_i915_gem_object *obj, gfp_t gfp, u32 *offset) +{ + struct i915_gem_vmap_object *vmap = to_vmap_object(obj); + int num_pages = vmap->gem.base.size >> PAGE_SHIFT; + struct page **pages; + int i; + + if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE, + (char __user *)vmap->user_ptr, vmap->user_size)) + return -EFAULT; + + if (i915_gem_get_user_pages(obj->base.dev, + vmap->user_ptr, + !vmap->read_only, + &num_pages, + &pages)) + goto err; + + vmap->gem.pages = pages; + *offset = offset_in_page(vmap->user_ptr); + return 0; + +err: + for (i = 0; i < num_pages; i++) + page_cache_release(pages[i]); + drm_free_large(pages); + + return vmap->gem.pages ? -EAGAIN : -EFAULT; +} + +static void +i915_gem_vmap_put_pages(struct drm_i915_gem_object *obj) +{ + int num_pages = obj->base.size >> PAGE_SHIFT; + int i; + + for (i = 0; i < num_pages; i++) { + if (obj->dirty) + set_page_dirty(obj->pages[i]); + + mark_page_accessed(obj->pages[i]); + page_cache_release(obj->pages[i]); + } + + obj->dirty = 0; + drm_free_large(obj->pages); + obj->pages = NULL; +} + +static const struct drm_i915_gem_object_ops i915_gem_vmap_ops = { + .get_pages = i915_gem_vmap_get_pages, + .put_pages = i915_gem_vmap_put_pages, +}; + +/** + * Creates a new mm object that wraps some user memory. + */ +int +i915_gem_vmap_ioctl(struct drm_device *dev, void *data, + struct drm_file *file) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_i915_gem_vmap *args = data; + struct i915_gem_vmap_object *obj; + loff_t first_data_page, last_data_page; + int num_pages; + int ret; + u32 handle; + + first_data_page = args->user_ptr / PAGE_SIZE; + last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE; + num_pages = last_data_page - first_data_page + 1; + if (num_pages * PAGE_SIZE > dev_priv->mm.gtt_total) + return -E2BIG; + + ret = fault_in_pages_readable((char __user *)(uintptr_t)args->user_ptr, + args->user_size); + if (ret) + return ret; + + /* Allocate the new object */ + obj = kmem_cache_alloc(dev_priv->slab, GFP_KERNEL | __GFP_ZERO); + if (obj == NULL) + return -ENOMEM; + + obj->gem.base.dev = dev; + obj->gem.base.size = num_pages * PAGE_SIZE; + + kref_init(&obj->gem.base.refcount); + atomic_set(&obj->gem.base.handle_count, 0); + + i915_gem_object_init(&obj->gem, &i915_gem_vmap_ops); + obj->gem.cache_level = I915_CACHE_LLC_MLC; + + obj->user_ptr = args->user_ptr; + obj->user_size = args->user_size; + obj->read_only = args->flags & I915_VMAP_READ_ONLY; + + ret = drm_gem_handle_create(file, &obj->gem.base, &handle); + if (ret) { + drm_gem_object_release(&obj->gem.base); + kfree(obj); + return ret; + } + + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference(&obj->gem.base); + + args->handle = handle; + return 0; +} diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index c4d6dbf..f02f6d7 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_VMAP 0x2a #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -239,6 +240,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise) #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image) #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs) +#define DRM_IOCTL_I915_GEM_VMAP DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_VMAP, struct drm_i915_gem_vmap) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -291,6 +293,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_COHERENT_RINGS 13 #define I915_PARAM_HAS_EXEC_CONSTANTS 14 #define I915_PARAM_HAS_RELAXED_DELTA 15 +#define I915_PARAM_HAS_VMAP 16 typedef struct drm_i915_getparam { int param; @@ -388,6 +391,19 @@ struct drm_i915_gem_create { __u32 pad; }; +struct drm_i915_gem_vmap { + __u64 user_ptr; + __u32 user_size; + __u32 flags; +#define I915_VMAP_READ_ONLY 0x1 + /** + * Returned handle for the object. + * + * Object handles are nonzero. + */ + __u32 handle; +}; + struct drm_i915_gem_pread { /** Handle for the object being read. */ __u32 handle;