diff mbox

[1/3] Introduce & use new SG page iterators

Message ID 1462786413-3899-1-git-send-email-david.s.gordon@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Gordon May 9, 2016, 9:33 a.m. UTC
The existing for_each_sg_page() iterator is somewhat inconvenient; in
particular, the 'nents' parameters is not expressed in any useful way,
and so there is no way to get a precise (maximum) number of iterations
(and therefore pages) without knowing that the SGL has been constructed
in a specific way.

So here we introduce for_each_sgt_page(), which simplifies the
parameters down to just two: the iterator and an sg_table to iterate
over. This is ideal where we simply need to process *all* the pages,
regardless of how many there might be (e.g. put_pages()).

For more sophisticated uses, for_each_sgt_range() adds a starting (page)
offset and a maximum number of iterations to be performed. This fits the
usage required when processing only a subrange of the pages, or to avoid
any risk of overflow when filling a bounded array with per-page data.

Nearly all uses of for_each_sg_page() are then converted to the new
simpler macros.

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Imre Deak <imre.deak@intel.com>
Cc: linux-kernel@vger.kernel.org
---
 drivers/gpu/drm/i915/i915_cmd_parser.c  |  8 ++++----
 drivers/gpu/drm/i915/i915_gem.c         | 10 ++++------
 drivers/gpu/drm/i915/i915_gem_fence.c   |  5 +++--
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 20 +++++++++-----------
 drivers/gpu/drm/i915/i915_gem_userptr.c |  2 +-
 include/linux/scatterlist.h             | 21 +++++++++++++++++++++
 lib/scatterlist.c                       | 26 ++++++++++++++++++++++++++
 7 files changed, 68 insertions(+), 24 deletions(-)

Comments

Chris Wilson May 9, 2016, 9:51 a.m. UTC | #1
On Mon, May 09, 2016 at 10:33:31AM +0100, Dave Gordon wrote:
> The existing for_each_sg_page() iterator is somewhat inconvenient; in
> particular, the 'nents' parameters is not expressed in any useful way,
> and so there is no way to get a precise (maximum) number of iterations
> (and therefore pages) without knowing that the SGL has been constructed
> in a specific way.
> 
> So here we introduce for_each_sgt_page(), which simplifies the
> parameters down to just two: the iterator and an sg_table to iterate
> over. This is ideal where we simply need to process *all* the pages,
> regardless of how many there might be (e.g. put_pages()).

It seems to be a lot of churn for something that doesn't address my core
complaint with the iterator: it is too slow and *is* one of the
rate-limiting steps in some heavy workloads (games where we have to
rewrite the page tables frequently as their textures do not fit within
the GTT).
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index c3a7603..4f08ab0 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -946,11 +946,11 @@  static u32 *vmap_batch(struct drm_i915_gem_object *obj,
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, first_page, npages)
 		pages[i++] = sg_page_iter_page(&sg_iter);
-		if (i == npages)
-			break;
-	}
+
+	if (WARN_ON(i != npages))
+		goto finish;
 
 	addr = vmap(pages, i, 0, PAGE_KERNEL);
 	if (addr == NULL) {
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a88e6c9..03b2ed3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -611,8 +611,7 @@  int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 
 	offset = args->offset;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (remain <= 0)
@@ -935,8 +934,7 @@  int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 	offset = args->offset;
 	obj->dirty = 1;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
-			 offset >> PAGE_SHIFT) {
+	for_each_sgt_page_range(&sg_iter, obj->pages, offset >> PAGE_SHIFT, ~0u) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		int partial_cacheline_write;
 
@@ -2184,7 +2182,7 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	if (obj->madv == I915_MADV_DONTNEED)
 		obj->dirty = 0;
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
@@ -2340,7 +2338,7 @@  static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 
 err_pages:
 	sg_mark_end(sg);
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
+	for_each_sgt_page(&sg_iter, st)
 		put_page(sg_page_iter_page(&sg_iter));
 	sg_free_table(st);
 	kfree(st);
diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
index a2b938e..4fb06f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_fence.c
+++ b/drivers/gpu/drm/i915/i915_gem_fence.c
@@ -752,7 +752,7 @@  void i915_gem_restore_fences(struct drm_device *dev)
 		return;
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 		char new_bit_17 = page_to_phys(page) >> 17;
 		if ((new_bit_17 & 0x1) !=
@@ -790,7 +790,8 @@  void i915_gem_restore_fences(struct drm_device *dev)
 	}
 
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
 			__set_bit(i, obj->bit_17);
 		else
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d284b17..7296d02 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1844,7 +1844,7 @@  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
-	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, pages) {
 		if (pt_vaddr == NULL)
 			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
 
@@ -2371,7 +2371,7 @@  static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_dma_address(sg_iter.sg) +
 			(sg_iter.sg_pgoffset << PAGE_SHIFT);
 		gen8_set_pte(&gtt_entries[i],
@@ -2449,7 +2449,7 @@  static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 
 	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
 
-	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+	for_each_sgt_page(&sg_iter, st) {
 		addr = sg_page_iter_dma_address(&sg_iter);
 		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
 		i++;
@@ -3384,6 +3384,7 @@  struct i915_vma *
 intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
 			  struct drm_i915_gem_object *obj)
 {
+	unsigned int n_pages = obj->base.size / PAGE_SIZE;
 	unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height;
 	unsigned int size_pages_uv;
 	struct sg_page_iter sg_iter;
@@ -3395,7 +3396,7 @@  struct i915_vma *
 	int ret = -ENOMEM;
 
 	/* Allocate a temporary list of source pages for random access. */
-	page_addr_list = drm_malloc_gfp(obj->base.size / PAGE_SIZE,
+	page_addr_list = drm_malloc_gfp(n_pages,
 					sizeof(dma_addr_t),
 					GFP_TEMPORARY);
 	if (!page_addr_list)
@@ -3418,11 +3419,10 @@  struct i915_vma *
 
 	/* Populate source page list from the object. */
 	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
-		i++;
-	}
+	for_each_sgt_page_range(&sg_iter, obj->pages, 0, n_pages)
+		page_addr_list[i++] = sg_page_iter_dma_address(&sg_iter);
 
+	WARN_ON(i != n_pages);
 	st->nents = 0;
 	sg = st->sgl;
 
@@ -3488,9 +3488,7 @@  struct i915_vma *
 
 	sg = st->sgl;
 	st->nents = 0;
-	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
-		view->params.partial.offset)
-	{
+	for_each_sgt_page_range(&obj_sg_iter, obj->pages, view->params.partial.offset, ~0u) {
 		if (st->nents >= view->params.partial.size)
 			break;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 32d9726..3d5e4a9 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -716,7 +716,7 @@  struct get_pages_work {
 
 	i915_gem_gtt_finish_object(obj);
 
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
+	for_each_sgt_page(&sg_iter, obj->pages) {
 		struct page *page = sg_page_iter_page(&sg_iter);
 
 		if (obj->dirty)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 556ec1e..2755f16 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -305,6 +305,7 @@  struct sg_page_iter {
 						 * next step */
 };
 
+bool __sgt_page_iter_next(struct sg_page_iter *piter);
 bool __sg_page_iter_next(struct sg_page_iter *piter);
 void __sg_page_iter_start(struct sg_page_iter *piter,
 			  struct scatterlist *sglist, unsigned int nents,
@@ -339,6 +340,26 @@  static inline dma_addr_t sg_page_iter_dma_address(struct sg_page_iter *piter)
 	for (__sg_page_iter_start((piter), (sglist), (nents), (pgoffset)); \
 	     __sg_page_iter_next(piter);)
 
+/**
+ * for_each_sgt_page - iterate over the pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ */
+#define for_each_sgt_page(piter, sgt)					\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (~0u), (0)); \
+	     __sgt_page_iter_next(piter);)
+
+/**
+ * for_each_sgt_page_range - iterate over some pages of the given sg_table
+ * @piter:	page iterator to hold current page, sg, sg_pgoffset
+ * @sgt:	sg_table to iterate over
+ * @first:	starting page offset
+ * @max:	maxiumum number of pages to iterate over
+ */
+#define for_each_sgt_page_range(piter, sgt, first, max)			\
+	for (__sg_page_iter_start((piter), (sgt)->sgl, (max), (first)); \
+	     __sgt_page_iter_next(piter);)
+
 /*
  * Mapping sg iterator
  *
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 004fc70..ceeea63 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -469,6 +469,32 @@  bool __sg_page_iter_next(struct sg_page_iter *piter)
 }
 EXPORT_SYMBOL(__sg_page_iter_next);
 
+/*
+ * Note: we use the same iterator structure as __sg_page_iter_next()
+ * above, but interpret the 'nents' member differently. Here it is
+ * an upper bound on the number of iterations to be performed, not
+ * the number of internal list elements to be traversed.
+ */
+bool __sgt_page_iter_next(struct sg_page_iter *piter)
+{
+	if (!piter->__nents || !piter->sg)
+		return false;
+
+	piter->sg_pgoffset += piter->__pg_advance;
+	piter->__pg_advance = 1;
+
+	while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
+		piter->sg_pgoffset -= sg_page_count(piter->sg);
+		piter->sg = sg_next(piter->sg);
+		if (!piter->sg)
+			return false;
+	}
+
+	--piter->__nents;
+	return true;
+}
+EXPORT_SYMBOL(__sgt_page_iter_next);
+
 /**
  * sg_miter_start - start mapping iteration over a sg list
  * @miter: sg mapping iter to be started