diff mbox

[v4,3/3] drm/i915: optimise i915_gem_object_vmap_range() for small objects

Message ID 1456154308-9342-4-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon Feb. 22, 2016, 3:18 p.m. UTC
Now that we use this function for ringbuffers and other "small" objects,
it's worth avoiding an extra kmalloc()/kfree() cycle if the page array
is small enough to put on the stack. Here we've chosen an arbitrary
cutoff of 32 (4k) pages, which is big enough for a ringbuffer (4 pages)
or a context image (currently up to 22 pages).

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Alex Dai <yu.dai@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

Comments

Chris Wilson Feb. 23, 2016, 10:16 a.m. UTC | #1
On Mon, Feb 22, 2016 at 03:18:28PM +0000, Dave Gordon wrote:
> Now that we use this function for ringbuffers and other "small" objects,
> it's worth avoiding an extra kmalloc()/kfree() cycle if the page array
> is small enough to put on the stack. Here we've chosen an arbitrary
> cutoff of 32 (4k) pages, which is big enough for a ringbuffer (4 pages)
> or a context image (currently up to 22 pages).
> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Alex Dai <yu.dai@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 18 +++++++++++++-----
>  1 file changed, 13 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 14942cf..effb69b 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2422,6 +2422,7 @@ void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
>  				 unsigned int npages)
>  {
>  	struct sg_page_iter sg_iter;
> +	struct page *mypages[32];

Use stack or stack_pages, that's the pattern we've used elsewhere.
Though pages_on_stack would be more in line with the rest of the kernel.

>  	struct page **pages;
>  	void *addr;
>  	int i;
> @@ -2431,10 +2432,16 @@ void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
>  		return NULL;
>  	}
>  
> -	pages = drm_malloc_ab(npages, sizeof(*pages));
> -	if (pages == NULL) {
> -		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> -		return NULL;
> +	if (npages <= ARRAY_SIZE(mypages))
> +		pages = mypages;

If one branch requires braces, add them to all.

> +	else {
> +		pages = kmalloc(npages*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);

drm_malloc_gfp

Otherwise, seems an ok idea. The deepest vmap is the
ioctl/execbuffer/cmdparser to which using a further 256 bytes of stack is
acceptable.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 14942cf..effb69b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2422,6 +2422,7 @@  void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
 				 unsigned int npages)
 {
 	struct sg_page_iter sg_iter;
+	struct page *mypages[32];
 	struct page **pages;
 	void *addr;
 	int i;
@@ -2431,10 +2432,16 @@  void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
 		return NULL;
 	}
 
-	pages = drm_malloc_ab(npages, sizeof(*pages));
-	if (pages == NULL) {
-		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
-		return NULL;
+	if (npages <= ARRAY_SIZE(mypages))
+		pages = mypages;
+	else {
+		pages = kmalloc(npages*sizeof(*pages), GFP_TEMPORARY | __GFP_NOWARN);
+		if (pages == NULL)
+			pages = drm_malloc_ab(npages, sizeof(*pages));
+		if (pages == NULL) {
+			DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+			return NULL;
+		}
 	}
 
 	i = 0;
@@ -2447,7 +2454,8 @@  void *i915_gem_object_vmap_range(struct drm_i915_gem_object *obj,
 	addr = vmap(pages, npages, 0, PAGE_KERNEL);
 	if (addr == NULL)
 		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
-	drm_free_large(pages);
+	if (pages != mypages)
+		drm_free_large(pages);
 
 	return addr;
 }