diff mbox

[2/2] intel: Add the cacheing logic for bo's created using create2 ioctl

Message ID 1402932739-16727-3-git-send-email-sourab.gupta@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

sourab.gupta@intel.com June 16, 2014, 3:32 p.m. UTC
From: Sourab Gupta <sourab.gupta@intel.com>

This patch add the cacheing logic for the buffer objects created
using create2 ioctl. This cacheing logic is in principle similar
the cachecing employed in the earlier clear ioctl.
The cacheing is not employed if object is created in stolen region
or if the offset is specified for the object.

Signed-off-by: Sourab Gupta <sourab.gupta@intel.com>
---
 intel/intel_bufmgr_gem.c | 201 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 153 insertions(+), 48 deletions(-)
diff mbox

Patch

diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 67430b9..4cefec2 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -781,62 +781,165 @@  drm_intel_gem_bo_alloc_create2(drm_intel_bufmgr *bufmgr,
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
 	drm_intel_bo_gem *bo_gem;
-	struct drm_i915_gem_create2 create;
-	int ret;
-
-	bo_gem = calloc(1, sizeof(*bo_gem));
-	if (!bo_gem)
-		return NULL;
-
-	bo_gem->bo.size = size;
+	unsigned int page_size = getpagesize();
+	struct drm_intel_gem_bo_bucket *bucket;
+	bool alloc_from_cache;
+	unsigned long bo_size;
+	bool for_render = false;
+	int ret, stride;
 
-	VG_CLEAR(create);
-	create.size = size;
-	if(params->offset & I915_CREATE_OFFSET_VALID) {
-		create.offset = params->offset;
-		create.context = params->context;
-	}
+	if (params->flags & BO_ALLOC_FOR_RENDER)
+		for_render = true;
 
 	if (params->tiling_mode == I915_TILING_NONE)
-		create.stride = 0;
+		stride = 0;
 	else
-		create.stride = params->stride;
+		stride = params->stride;
 
-	create.placement = params->placement;
-	create.domain = params->cache_domain;
-	create.caching = params->caching;
-	create.tiling_mode = params->tiling_mode;
-	create.madvise = params->madvise;
+	if (params->placement == I915_CREATE_PLACEMENT_STOLEN ||
+			params->offset & I915_CREATE_OFFSET_VALID ||
+			params->madvise == I915_MADV_DONTNEED) {
+		bucket = NULL;
+		bo_size = size;
+	} else {
+		/* Round the allocated size up to a power of two
+		 * number of pages.
+		 */
+		bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
+		/* If we don't have caching at this size, don't actually
+		 * round the allocation up.
+		 */
+		if (bucket == NULL) {
+			bo_size = size;
+			if (bo_size < page_size)
+				bo_size = page_size;
+		} else {
+			bo_size = bucket->size;
+		}
+ 	}
+	pthread_mutex_lock(&bufmgr_gem->lock);
+	/* Get a buffer out of the cache if available */
+retry:
+	alloc_from_cache = false;
+	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
+		if (for_render) {
+			/* Allocate new render-target BOs from the tail (MRU)
+			 * of the list, as it will likely be hot in the GPU
+			 * cache and in the aperture for us.
+			 */
+			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+					      bucket->head.prev, head);
+			DRMLISTDEL(&bo_gem->head);
+			alloc_from_cache = true;
+		} else {
+			/* For non-render-target BOs (where we're probably
+			 * going to map it first thing in order to fill it
+			 * with data), check if the last BO in the cache is
+			 * unbusy, and only reuse in that case. Otherwise,
+			 * allocating a new buffer is probably faster than
+			 * waiting for the GPU to finish.
+			 */
+			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+					      bucket->head.next, head);
+			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
+				alloc_from_cache = true;
+				DRMLISTDEL(&bo_gem->head);
+			}
+		}
+		if (alloc_from_cache) {
+			struct drm_i915_gem_caching bo_cache;
+			struct drm_i915_gem_set_domain set_domain;
+			/* We need to call the functions for setcache, domain,
+			 * tiling, madvise here, as these parameters may be
+			 * different for the bo present in cache.
+			 */
+			if (!drm_intel_gem_bo_madvise_internal
+					(bufmgr_gem, bo_gem,
+					 I915_MADV_WILLNEED)) {
+				drm_intel_gem_bo_free(&bo_gem->bo);
+				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
+						bucket);
+				goto retry;
+			}
 
-	ret = drmIoctl(bufmgr_gem->fd,
-			DRM_IOCTL_I915_GEM_CREATE2,
-			&create);
-	bo_gem->gem_handle = create.handle;
-	bo_gem->bo.handle = bo_gem->gem_handle;
-	if (ret != 0) {
-		free(bo_gem);
-		return NULL;
+			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+						params->tiling_mode,
+						stride)) {
+				drm_intel_gem_bo_free(&bo_gem->bo);
+				goto retry;
+			}
+			VG_CLEAR(bo_cache);
+			bo_cache.handle = bo_gem->gem_handle;
+			bo_cache.caching = params->caching;
+			if (drmIoctl(bufmgr_gem->fd,
+						DRM_IOCTL_I915_GEM_SET_CACHING,
+						&bo_cache)) {
+				drm_intel_gem_bo_free(&bo_gem->bo);
+				goto retry;
+			}
+			VG_CLEAR(set_domain);
+			set_domain.handle = bo_gem->gem_handle;
+			set_domain.read_domains = params->cache_domain;
+			set_domain.write_domain = params->cache_domain;
+			if (drmIoctl(bufmgr_gem->fd,
+						DRM_IOCTL_I915_GEM_SET_DOMAIN,
+						&set_domain)) {
+				drm_intel_gem_bo_free(&bo_gem->bo);
+				goto retry;
+			}
+		}
 	}
-	bo_gem->bo.bufmgr = bufmgr;
+	pthread_mutex_unlock(&bufmgr_gem->lock);
 
-	/* We have to call the set_tiling ioctl, as create2 ioctl
-	 * doesn't return the swizzle mode
-	 * TODO: Is this required? Can the functionality be put in
-	 * create2 ioctl?
-	 */
-	bo_gem->tiling_mode = create.tiling_mode;
-	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
-	bo_gem->stride = create.stride;
+	if (!alloc_from_cache) {
+		struct drm_i915_gem_create2 create;
+		bo_gem = calloc(1, sizeof(*bo_gem));
+		if (!bo_gem)
+			return NULL;
+		bo_gem->bo.size = bo_size;
+		VG_CLEAR(create);
+		create.size = bo_size;
+		if(params->offset & I915_CREATE_OFFSET_VALID) {
+			create.offset = params->offset;
+			create.context = params->context;
+		}
 
-	if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
-				create.tiling_mode,
-				create.stride)) {
-		drm_intel_gem_bo_free(&bo_gem->bo);
-		return NULL;
-	}
+		create.stride = stride;
+		create.placement = params->placement;
+		create.domain = params->cache_domain;
+		create.caching = params->caching;
+		create.tiling_mode = params->tiling_mode;
+		create.madvise = params->madvise;
 
-	DRMINITLISTHEAD(&bo_gem->name_list);
-	DRMINITLISTHEAD(&bo_gem->vma_list);
+		ret = drmIoctl(bufmgr_gem->fd,
+				DRM_IOCTL_I915_GEM_CREATE2,
+				&create);
+		bo_gem->gem_handle = create.handle;
+		bo_gem->bo.handle = bo_gem->gem_handle;
+		if (ret != 0) {
+			free(bo_gem);
+			return NULL;
+		}
+		bo_gem->bo.bufmgr = bufmgr;
+
+		/* We have to call the set_tiling ioctl, as create2 ioctl
+		 * doesn't return the swizzle mode
+		 * TODO: Can create2 achieve this functionality?
+		 */
+		bo_gem->tiling_mode = create.tiling_mode;
+		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+		bo_gem->stride = stride;
+
+		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
+					create.tiling_mode,
+					stride)) {
+			drm_intel_gem_bo_free(&bo_gem->bo);
+			return NULL;
+		}
+
+		DRMINITLISTHEAD(&bo_gem->name_list);
+		DRMINITLISTHEAD(&bo_gem->vma_list);
+	}
 
 	bo_gem->name = params->name;
 	atomic_set(&bo_gem->refcount, 1);
@@ -844,8 +947,10 @@  drm_intel_gem_bo_alloc_create2(drm_intel_bufmgr *bufmgr,
 	bo_gem->reloc_tree_fences = 0;
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
-	/* set reusable to false, as caching is not employed currently*/
-	bo_gem->reusable = false;
+	if (params->placement == I915_CREATE_PLACEMENT_STOLEN)
+		bo_gem->reusable = false;
+	else
+		bo_gem->reusable = true;
 	bo_gem->aub_annotations = NULL;
 	bo_gem->aub_annotation_count = 0;