diff mbox series

dm-crypt: allocate compound pages if possible

Message ID alpine.LRH.2.21.2302161245210.18393@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show
Series dm-crypt: allocate compound pages if possible | expand

Commit Message

Mikulas Patocka Feb. 16, 2023, 5:47 p.m. UTC
It was reported that allocating pages for the write buffer in dm-crypt
causes measurable overhead [1].

This patch changes dm-crypt to allocate compound pages if they are
available. If not, we fall back to the mempool.

[1] https://listman.redhat.com/archives/dm-devel/2023-February/053284.html

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

---
 drivers/md/dm-crypt.c |   50 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 8 deletions(-)

Comments

Matthew Wilcox Feb. 16, 2023, 7:04 p.m. UTC | #1
On Thu, Feb 16, 2023 at 12:47:08PM -0500, Mikulas Patocka wrote:
> +		while (order > 0) {
> +			page = alloc_pages(gfp_mask
> +				| __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order);

... | __GFP_COMP

>  		page = mempool_alloc(&cc->page_pool, gfp_mask);
>  		if (!page) {
>  			crypt_free_buffer_pages(cc, clone);
>  			bio_put(clone);
>  			gfp_mask |= __GFP_DIRECT_RECLAIM;
> +			order = 0;
>  			goto retry;
>  		}
>  
> -		len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
> -
> -		bio_add_page(clone, page, len, 0);
> +have_pages:
> +		page->compound_order = order;

No.  You'll corrupt the next page if page is order-0, which it is if it
came from the mempool.  Also we've deleted page->compound_order in -next
so you can't make this mistake.  Using __GFP_COMP will set this field
for you, so you can just drop this line.

> -		remaining_size -= len;
> +		for (o = 0; o < 1U << order; o++) {
> +			unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
> +			bio_add_page(clone, page, len, 0);
> +			remaining_size -= len;
> +			page++;

You can add multiple pages at once, whether they're compound or not.  So
replace this entire loop with:

		bio_add_page(clone, page, remaining_size, 0);

> @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
>  {
>  	struct bio_vec *bv;
>  	struct bvec_iter_all iter_all;
> +	unsigned skip_entries = 0;
>  
>  	bio_for_each_segment_all(bv, clone, iter_all) {
> -		BUG_ON(!bv->bv_page);
> -		mempool_free(bv->bv_page, &cc->page_pool);
> +		unsigned order;
> +		struct page *page = bv->bv_page;
> +		BUG_ON(!page);
> +		if (skip_entries) {
> +			skip_entries--;
> +			continue;
> +		}
> +		order = page->compound_order;
> +		if (order) {
> +			__free_pages(page, order);
> +			skip_entries = (1U << order) - 1;
> +		} else {
> +			mempool_free(page, &cc->page_pool);
> +		}

You can simplify this by using the folio code.

	struct folio_iter fi;

	bio_for_each_folio_all(fi, bio) {
		if (folio_test_large(folio))
			folio_put(folio);
		else
			mempool_free(&folio->page, &cc->page_pool);
	}

(further work would actually convert this driver to use folios instead
of pages)
Mikulas Patocka Feb. 16, 2023, 9:19 p.m. UTC | #2
On Thu, 16 Feb 2023, Matthew Wilcox wrote:

> > -		len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
> > -
> > -		bio_add_page(clone, page, len, 0);
> > +have_pages:
> > +		page->compound_order = order;
> 
> No.  You'll corrupt the next page if page is order-0, which it is if it
> came from the mempool.  Also we've deleted page->compound_order in -next
> so you can't make this mistake.  Using __GFP_COMP will set this field
> for you, so you can just drop this line.

OK

> > -		remaining_size -= len;
> > +		for (o = 0; o < 1U << order; o++) {
> > +			unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
> > +			bio_add_page(clone, page, len, 0);
> > +			remaining_size -= len;
> > +			page++;
> 
> You can add multiple pages at once, whether they're compound or not.  So
> replace this entire loop with:
> 
> 		bio_add_page(clone, page, remaining_size, 0);

This should be min((unsigned)PAGE_SIZE << order, remaining_size), because 
we may allocate less than remaining_size.

> > @@ -1711,10 +1732,23 @@ static void crypt_free_buffer_pages(stru
> >  {
> >  	struct bio_vec *bv;
> >  	struct bvec_iter_all iter_all;
> > +	unsigned skip_entries = 0;
> >  
> >  	bio_for_each_segment_all(bv, clone, iter_all) {
> > -		BUG_ON(!bv->bv_page);
> > -		mempool_free(bv->bv_page, &cc->page_pool);
> > +		unsigned order;
> > +		struct page *page = bv->bv_page;
> > +		BUG_ON(!page);
> > +		if (skip_entries) {
> > +			skip_entries--;
> > +			continue;
> > +		}
> > +		order = page->compound_order;
> > +		if (order) {
> > +			__free_pages(page, order);
> > +			skip_entries = (1U << order) - 1;
> > +		} else {
> > +			mempool_free(page, &cc->page_pool);
> > +		}
> 
> You can simplify this by using the folio code.
> 
> 	struct folio_iter fi;
> 
> 	bio_for_each_folio_all(fi, bio) {
> 		if (folio_test_large(folio))
> 			folio_put(folio);
> 		else
> 			mempool_free(&folio->page, &cc->page_pool);
> 	}

OK. I'm sending version 2 of the patch.

> (further work would actually convert this driver to use folios instead
> of pages)

Mikulas
diff mbox series

Patch

Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c	2023-01-20 13:22:38.000000000 +0100
+++ linux-2.6/drivers/md/dm-crypt.c	2023-02-16 18:33:42.000000000 +0100
@@ -1657,6 +1657,9 @@  static void crypt_free_buffer_pages(stru
  * In order to not degrade performance with excessive locking, we try
  * non-blocking allocations without a mutex first but on failure we fallback
  * to blocking allocations with a mutex.
+ *
+ * In order to reduce allocation overhead, we try to allocate compound pages in
+ * the first pass. If they are not available, we fall back to the mempool.
  */
 static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size)
 {
@@ -1664,8 +1667,9 @@  static struct bio *crypt_alloc_buffer(st
 	struct bio *clone;
 	unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	gfp_t gfp_mask = GFP_NOWAIT | __GFP_HIGHMEM;
-	unsigned i, len, remaining_size;
+	unsigned remaining_size;
 	struct page *page;
+	unsigned order = MAX_ORDER - 1;
 
 retry:
 	if (unlikely(gfp_mask & __GFP_DIRECT_RECLAIM))
@@ -1678,20 +1682,37 @@  retry:
 
 	remaining_size = size;
 
-	for (i = 0; i < nr_iovecs; i++) {
+	while (remaining_size) {
+		unsigned o;
+		unsigned remaining_order = __fls((remaining_size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+		order = min(order, remaining_order);
+
+		while (order > 0) {
+			page = alloc_pages(gfp_mask
+				| __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN, order);
+			if (likely(page != NULL))
+				goto have_pages;
+			order--;
+		}
+
 		page = mempool_alloc(&cc->page_pool, gfp_mask);
 		if (!page) {
 			crypt_free_buffer_pages(cc, clone);
 			bio_put(clone);
 			gfp_mask |= __GFP_DIRECT_RECLAIM;
+			order = 0;
 			goto retry;
 		}
 
-		len = (remaining_size > PAGE_SIZE) ? PAGE_SIZE : remaining_size;
-
-		bio_add_page(clone, page, len, 0);
+have_pages:
+		page->compound_order = order;
 
-		remaining_size -= len;
+		for (o = 0; o < 1U << order; o++) {
+			unsigned len = min((unsigned)PAGE_SIZE, remaining_size);
+			bio_add_page(clone, page, len, 0);
+			remaining_size -= len;
+			page++;
+		}
 	}
 
 	/* Allocate space for integrity tags */
@@ -1711,10 +1732,23 @@  static void crypt_free_buffer_pages(stru
 {
 	struct bio_vec *bv;
 	struct bvec_iter_all iter_all;
+	unsigned skip_entries = 0;
 
 	bio_for_each_segment_all(bv, clone, iter_all) {
-		BUG_ON(!bv->bv_page);
-		mempool_free(bv->bv_page, &cc->page_pool);
+		unsigned order;
+		struct page *page = bv->bv_page;
+		BUG_ON(!page);
+		if (skip_entries) {
+			skip_entries--;
+			continue;
+		}
+		order = page->compound_order;
+		if (order) {
+			__free_pages(page, order);
+			skip_entries = (1U << order) - 1;
+		} else {
+			mempool_free(page, &cc->page_pool);
+		}
 	}
 }