[1/2] drm/i915: refactor i915_gem_object_pin_map()

Message ID	1461087608-34346-1-git-send-email-david.s.gordon@intel.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Dave Gordon <david.s.gordon@intel.com> To: intel-gfx@lists.freedesktop.org Date: Tue, 19 Apr 2016 18:40:07 +0100 Message-Id: <1461087608-34346-1-git-send-email-david.s.gordon@intel.com> Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, Swindon SN3 1RJ Subject: [Intel-gfx] [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Message ID

1461087608-34346-1-git-send-email-david.s.gordon@intel.com (mailing list archive)

State

New, archived

Headers

From: Dave Gordon <david.s.gordon@intel.com>
To: intel-gfx@lists.freedesktop.org
Date: Tue, 19 Apr 2016 18:40:07 +0100
Message-Id: <1461087608-34346-1-git-send-email-david.s.gordon@intel.com>
Organization: Intel Corporation (UK) Ltd. - Co. Reg. #1134945 - Pipers Way, 
	Swindon SN3 1RJ
Subject: [Intel-gfx] [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map()
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
Errors-To: intel-gfx-bounces@lists.freedesktop.org
Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

Commit Message

Dave Gordon April 19, 2016, 5:40 p.m. UTC

From: Alex Dai <yu.dai@intel.com>

The recently-added i915_gem_object_pin_map() can be further optimised
for "small" objects. To facilitate this, and simplify the error paths
before adding the new code, this patch pulls out the "mapping" part of
the operation (involving local allocations which must be undone before
return) into its own subfunction.

The next patch will then insert the new optimisation into the middle of
the now-separated subfunction.

This reorganisation will probably not affect the generated code, as the
compiler will most likely inline it anyway, but it makes the logical
structure a bit clearer and easier to modify.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 21 deletions(-)

Comments

Chris Wilson April 19, 2016, 7:50 p.m. UTC | #1

On Tue, Apr 19, 2016 at 06:40:07PM +0100, Dave Gordon wrote:
> From: Alex Dai <yu.dai@intel.com>
> 
> The recently-added i915_gem_object_pin_map() can be further optimised
> for "small" objects. To facilitate this, and simplify the error paths
> before adding the new code, this patch pulls out the "mapping" part of
> the operation (involving local allocations which must be undone before
> return) into its own subfunction.
> 
> The next patch will then insert the new optimisation into the middle of
> the now-separated subfunction.
> 
> This reorganisation will probably not affect the generated code, as the
> compiler will most likely inline it anyway, but it makes the logical
> structure a bit clearer and easier to modify.
> 
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
>  1 file changed, 40 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6ce2c31..fc42be0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2396,6 +2396,45 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> +/* The 'mapping' part of i915_gem_object_pin_map() below */
> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
> +{
> +	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
> +	struct scatterlist *sg = obj->pages->sgl;
> +	struct sg_page_iter sg_iter;
> +	struct page **pages;
> +	unsigned long i = 0;
> +	void *addr = NULL;
> +
> +	/* A single page can always be kmapped */
> +	if (n_pages == 1)
> +		return kmap(sg_page(sg));
> +
> +	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
> +	if (pages == NULL) {
> +		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> +		return NULL;
> +	}
> +
> +	for_each_sg_page(sg, &sg_iter, n_pages, 0) {
> +		pages[i] = sg_page_iter_page(&sg_iter);

Just pages[i++] = sg_page_iter_page(&sg_iter);

> +		if (++i == n_pages) {
> +			addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
> +			break;
> +		}
> +	}
> +
> +	/* We should have got here via the 'break' above */
> +	WARN_ON(i != n_pages);
> +	if (addr == NULL)
> +		DRM_DEBUG_DRIVER("Failed to vmap pages\n");

As this is a very, very confused loop.
-Chris

Dave Gordon April 20, 2016, 9:39 a.m. UTC | #2

On 19/04/16 20:50, Chris Wilson wrote:
> On Tue, Apr 19, 2016 at 06:40:07PM +0100, Dave Gordon wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> The recently-added i915_gem_object_pin_map() can be further optimised
>> for "small" objects. To facilitate this, and simplify the error paths
>> before adding the new code, this patch pulls out the "mapping" part of
>> the operation (involving local allocations which must be undone before
>> return) into its own subfunction.
>>
>> The next patch will then insert the new optimisation into the middle of
>> the now-separated subfunction.
>>
>> This reorganisation will probably not affect the generated code, as the
>> compiler will most likely inline it anyway, but it makes the logical
>> structure a bit clearer and easier to modify.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
>>   1 file changed, 40 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 6ce2c31..fc42be0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2396,6 +2396,45 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
>>   	return 0;
>>   }
>>
>> +/* The 'mapping' part of i915_gem_object_pin_map() below */
>> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>> +{
>> +	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>> +	struct scatterlist *sg = obj->pages->sgl;
>> +	struct sg_page_iter sg_iter;
>> +	struct page **pages;
>> +	unsigned long i = 0;
>> +	void *addr = NULL;
>> +
>> +	/* A single page can always be kmapped */
>> +	if (n_pages == 1)
>> +		return kmap(sg_page(sg));
>> +
>> +	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
>> +	if (pages == NULL) {
>> +		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
>> +		return NULL;
>> +	}
>> +
>> +	for_each_sg_page(sg, &sg_iter, n_pages, 0) {
>> +		pages[i] = sg_page_iter_page(&sg_iter);
>
> Just pages[i++] = sg_page_iter_page(&sg_iter);
>
>> +		if (++i == n_pages) {
>> +			addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
>> +			break;
>> +		}
>> +	}
>> +
>> +	/* We should have got here via the 'break' above */
>> +	WARN_ON(i != n_pages);
>> +	if (addr == NULL)
>> +		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
>
> As this is a very, very confused loop.
> -Chris

I tried that approach before, but it was actually more difficult to have 
tidy error-checking that way (remembering that we must always free the 
pages array, so don't really want an early return).

Here, putting the vmap() inside the final iteration of the loop means 
that we automatically leave "addr" as NULL if we don't reach the 
expected count. The subsequent WARN_ON() tells us that this has 
happened, but we don't then have to base any further branching on this 
condition (i != n_pages) as "addr" is already right. (Obviously, we 
don't want to do the vmap() if we have exited the loop with the wrong 
page count).

I'll post the other version, but I think the post-loop checking is 
messier, to such an extent that this way round is simpler overall.

.Dave.

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ce2c31..fc42be0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2396,6 +2396,45 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+	struct scatterlist *sg = obj->pages->sgl;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	unsigned long i = 0;
+	void *addr = NULL;
+
+	/* A single page can always be kmapped */
+	if (n_pages == 1)
+		return kmap(sg_page(sg));
+
+	pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+	if (pages == NULL) {
+		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+		return NULL;
+	}
+
+	for_each_sg_page(sg, &sg_iter, n_pages, 0) {
+		pages[i] = sg_page_iter_page(&sg_iter);
+		if (++i == n_pages) {
+			addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+			break;
+		}
+	}
+
+	/* We should have got here via the 'break' above */
+	WARN_ON(i != n_pages);
+	if (addr == NULL)
+		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
+
+	drm_free_large(pages);
+
+	return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 {
 	int ret;
@@ -2409,27 +2448,7 @@  void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_pin_pages(obj);
 
 	if (obj->mapping == NULL) {
-		struct page **pages;
-
-		pages = NULL;
-		if (obj->base.size == PAGE_SIZE)
-			obj->mapping = kmap(sg_page(obj->pages->sgl));
-		else
-			pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
-					       sizeof(*pages),
-					       GFP_TEMPORARY);
-		if (pages != NULL) {
-			struct sg_page_iter sg_iter;
-			int n;
-
-			n = 0;
-			for_each_sg_page(obj->pages->sgl, &sg_iter,
-					 obj->pages->nents, 0)
-				pages[n++] = sg_page_iter_page(&sg_iter);
-
-			obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
-			drm_free_large(pages);
-		}
+		obj->mapping = i915_gem_object_map(obj);
 		if (obj->mapping == NULL) {
 			i915_gem_object_unpin_pages(obj);
 			return ERR_PTR(-ENOMEM);

[1/2] drm/i915: refactor i915_gem_object_pin_map()

Commit Message

Comments

Patch