[RFC] drm/i915 : Reduce the shmem page allocation time by using blitter engines for clearing pages.

Message ID	1399374658-19525-1-git-send-email-sourab.gupta@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: sourab.gupta@intel.com To: intel-gfx@lists.freedesktop.org Date: Tue, 6 May 2014 16:40:58 +0530 Message-Id: <1399374658-19525-1-git-send-email-sourab.gupta@intel.com> Cc: Akash Goel <akash.goel@intel.com>, Sourab Gupta <sourab.gupta@intel.com> Subject: [Intel-gfx] [RFC] drm/i915 : Reduce the shmem page allocation time by using blitter engines for clearing pages. Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6dc579a..c3844da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1596,6 +1596,10 @@ struct drm_i915_gem_object { unsigned int has_aliasing_ppgtt_mapping:1; unsigned int has_global_gtt_mapping:1; unsigned int has_dma_mapping:1; + /* + * Do the pages of object need to be cleared after shmem allocation + */ + unsigned int require_clear:1; struct sg_table *pages; int pages_pin_count; @@ -2120,6 +2124,8 @@ int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset); void i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj); +int i915_add_clear_obj_cmd(struct drm_i915_gem_object *obj); +int i915_gem_memset_obj_hw(struct drm_i915_gem_object *obj); /** * Returns true if seq1 is later than seq2. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 867da2d..972695a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1376,6 +1376,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return 0; } + /** * i915_gem_fault - fault a page into the GTT * vma: VMA in question @@ -1436,6 +1437,12 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ret) goto unlock; + if (obj->require_clear) { + i915_gem_object_flush_cpu_write_domain(obj, false); + i915_gem_memset_obj_hw(obj); + obj->require_clear = false; + } + ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) goto unpin; @@ -1927,12 +1934,13 @@ i915_gem_object_shmem_preallocate(struct drm_i915_gem_object *obj) /* Get the list of pages out of our struct file * Fail silently without starting the shrinker */ + obj->require_clear = 1; mapping = file_inode(obj->base.filp)->i_mapping; gfp = mapping_gfp_mask(mapping); gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD; gfp &= ~(__GFP_IO | __GFP_WAIT); for (i = 0; i < page_count; i++) { - page = shmem_read_mapping_page_gfp(mapping, i, gfp); + page = shmem_read_mapping_page_gfp_noclear(mapping, i, gfp); if (IS_ERR(page)) { DRM_DEBUG_DRIVER("Failure for obj(%p), size(%x) at page(%d)\n", obj, obj->base.size, i); @@ -2173,6 +2181,76 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) WARN_ON(i915_verify_lists(dev)); } +int i915_add_clear_obj_cmd(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; + u32 offset = i915_gem_obj_ggtt_offset(obj); + int ret; + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, (0x2 << 29) | (0x40 << 22) | + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB | + 0x3); + intel_ring_emit(ring, BLT_DEPTH_32 | (0xF0 << 16) | 4096); + intel_ring_emit(ring, + (DIV_ROUND_UP(obj->base.size, 4096) << 16) | 4096); + intel_ring_emit(ring, offset); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + + return 0; +} + +int i915_gem_memset_obj_hw(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; + u32 seqno; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + DRM_ERROR("Mapping of User FB to GTT failed\n"); + return ret; + } + + /* Adding commands to the blitter ring to + * clear out the contents of the buffer object + */ + ret = i915_add_clear_obj_cmd(obj); + if (ret) { + DRM_ERROR("couldn't add commands in blitter ring\n"); + i915_gem_object_ggtt_unpin(obj); + return ret; + } + + seqno = intel_ring_get_seqno(ring); + + obj->base.read_domains = I915_GEM_DOMAIN_RENDER; + obj->base.write_domain = I915_GEM_DOMAIN_RENDER; + + i915_gem_object_move_to_active(obj, ring); + + obj->dirty = 1; + obj->last_write_seqno = seqno; + + /* Unconditionally force add_request to emit a full flush. */ + ring->gpu_caches_dirty = true; + + /* Add a breadcrumb for the completion of the batch buffer */ + (void)i915_add_request(ring, NULL); + + i915_gem_object_ggtt_unpin(obj); + + return 0; +} + static void i915_gem_object_retire(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index cb9e143..47e6946 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -895,6 +895,45 @@ i915_gem_execbuffer_preallocate_objs(struct list_head *objects) } } +static int +i915_gem_execbuffer_clear_objs(struct drm_device *dev, struct list_head *objects) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; + struct drm_i915_gem_object *obj; + u32 seqno; + int ret; + + list_for_each_entry(obj, objects, obj_exec_link) { + if (obj->require_clear) { + /* Flush the CPU write domain for the object if it's dirty. */ + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj, false); + + ret = i915_add_clear_obj_cmd(obj); + if (ret) + return ret; + + seqno = intel_ring_get_seqno(ring); + + obj->base.read_domains = I915_GEM_DOMAIN_RENDER; + obj->base.write_domain = I915_GEM_DOMAIN_RENDER; + i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring); + + obj->dirty = 1; + obj->last_write_seqno = seqno; + obj->require_clear = false; + } + } + /* Unconditionally force add_request to emit a full flush. */ + ring->gpu_caches_dirty = true; + + /* Add a breadcrumb for the completion of the batch buffer */ + (void)i915_add_request(ring, NULL); + + return 0; +} + static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { @@ -1286,6 +1325,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (ret) goto err; + ret = i915_gem_execbuffer_clear_objs(dev, &eb->objects); + if (ret) + goto err; + /* The objects are in their final locations, apply the relocations. */ if (need_relocs) ret = i915_gem_execbuffer_relocate(eb); diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 4d1771c..ac976c8 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -55,6 +55,8 @@ extern bool shmem_mapping(struct address_space *mapping); extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); +extern struct page *shmem_read_mapping_page_gfp_noclear(struct address_space *mapping, + pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); extern int shmem_unuse(swp_entry_t entry, struct page *page); diff --git a/mm/shmem.c b/mm/shmem.c index 9f70e02..66d3a61 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -98,6 +98,7 @@ enum sgp_type { SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */ SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */ SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */ + SGP_CACHE_NOCLEAR, /* like SGP_CACHE, but don't clear alloced pages */ }; #ifdef CONFIG_TMPFS @@ -1169,7 +1170,8 @@ clear: * it now, lest undo on failure cancel our earlier guarantee. */ if (sgp != SGP_WRITE) { - clear_highpage(page); + if (sgp != SGP_CACHE_NOCLEAR) + clear_highpage(page); flush_dcache_page(page); SetPageUptodate(page); } @@ -2966,3 +2968,43 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, #endif } EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp); + + +/** + * shmem_read_mapping_page_gfp_noclear - read into page cache, using specified + * page allocation flags. Do not clear the pages, in case of newly allocated page. + * It is the responsibility of caller to clear the pages returned by this function. + * @mapping: the page's address_space + * @index: the page index + * @gfp: the page allocator flags to use if allocating + * + * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", + * with any new page allocations done using the specified allocation flags. + * But read_cache_page_gfp() uses the ->readpage() method: which does not + * suit tmpfs, since it may have pages in swapcache, and needs to find those + * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. + * + */ +struct page *shmem_read_mapping_page_gfp_noclear(struct address_space *mapping, + pgoff_t index, gfp_t gfp) +{ +#ifdef CONFIG_SHMEM + struct inode *inode = mapping->host; + struct page *page; + int error; + + BUG_ON(mapping->a_ops != &shmem_aops); + error = shmem_getpage_gfp(inode, index, &page, SGP_CACHE_NOCLEAR, gfp, NULL); + if (error) + page = ERR_PTR(error); + else + unlock_page(page); + return page; +#else + /* + * The tiny !SHMEM case uses ramfs without swap + */ + return read_cache_page_gfp(mapping, index, gfp); +#endif +} +EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp_noclear);

[RFC] drm/i915 : Reduce the shmem page allocation time by using blitter engines for clearing pages.

Commit Message

Comments

Patch