diff mbox

dm bufio: Reduce dm_bufio_lock contention

Message ID alpine.LRH.2.02.1806251037250.17405@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mikulas Patocka June 25, 2018, 2:42 p.m. UTC
On Mon, 25 Jun 2018, Michal Hocko wrote:

> > And the throttling in dm-bufio prevents kswapd from making forward 
> > progress, causing this situation...
> 
> Which is what we have PF_THROTTLE_LESS for. Geez, do we have to go in
> circles like that? Are you even listening?
> 
> [...]
> 
> > And so what do you want to do to prevent block drivers from sleeping?
> 
> use the existing means we have.
> -- 
> Michal Hocko
> SUSE Labs

So - do you want this patch?

There is no behavior difference between changing the allocator (so that it 
implies PF_THROTTLE_LESS for block drivers) and chaning all the block 
drivers to explicitly set PF_THROTTLE_LESS.

But if you insist that the allocator can't be changed, we have to repeat 
the same code over and over again in the block drivers.

Mikulas



---
 block/bio.c                   |    4 ++++
 drivers/md/dm-bufio.c         |   14 +++++++++++---
 drivers/md/dm-crypt.c         |    8 ++++++++
 drivers/md/dm-integrity.c     |    4 ++++
 drivers/md/dm-kcopyd.c        |    3 +++
 drivers/md/dm-verity-target.c |    4 ++++
 drivers/md/dm-writecache.c    |    4 ++++
 mm/mempool.c                  |    4 ++++
 8 files changed, 42 insertions(+), 3 deletions(-)

Comments

Michal Hocko June 25, 2018, 2:57 p.m. UTC | #1
On Mon 25-06-18 10:42:30, Mikulas Patocka wrote:
> 
> 
> On Mon, 25 Jun 2018, Michal Hocko wrote:
> 
> > > And the throttling in dm-bufio prevents kswapd from making forward 
> > > progress, causing this situation...
> > 
> > Which is what we have PF_THROTTLE_LESS for. Geez, do we have to go in
> > circles like that? Are you even listening?
> > 
> > [...]
> > 
> > > And so what do you want to do to prevent block drivers from sleeping?
> > 
> > use the existing means we have.
> > -- 
> > Michal Hocko
> > SUSE Labs
> 
> So - do you want this patch?
> 
> There is no behavior difference between changing the allocator (so that it 
> implies PF_THROTTLE_LESS for block drivers) and chaning all the block 
> drivers to explicitly set PF_THROTTLE_LESS.

As long as you can reliably detect those users. And using gfp_mask is
about the worst way to achieve that because users tend to be creative
when it comes to using gfp mask. PF_THROTTLE_LESS in general is a
way to tell the allocator that _you_ are the one to help the reclaim by
cleaning data.

> But if you insist that the allocator can't be changed, we have to repeat 
> the same code over and over again in the block drivers.

I am not familiar with the patched code but mempool change at least
makes sense (bvec_alloc seems to fallback to mempool which then makes
sense as well). If others in md/ do the same thing

I would just use current_restore_flags rather than open code it.

Thanks!
diff mbox

Patch

Index: linux-2.6/mm/mempool.c
===================================================================
--- linux-2.6.orig/mm/mempool.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/mm/mempool.c	2018-06-25 16:32:19.200000000 +0200
@@ -369,6 +369,7 @@  void *mempool_alloc(mempool_t *pool, gfp
 	unsigned long flags;
 	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
+	unsigned old_flags;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
 	might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);
@@ -381,7 +382,10 @@  void *mempool_alloc(mempool_t *pool, gfp
 
 repeat_alloc:
 
+	old_flags = current->flags & PF_LESS_THROTTLE;
+	current->flags |= PF_LESS_THROTTLE;
 	element = pool->alloc(gfp_temp, pool->pool_data);
+	current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 	if (likely(element != NULL))
 		return element;
 
Index: linux-2.6/block/bio.c
===================================================================
--- linux-2.6.orig/block/bio.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/block/bio.c	2018-06-25 16:32:19.200000000 +0200
@@ -217,6 +217,7 @@  fallback:
 	} else {
 		struct biovec_slab *bvs = bvec_slabs + *idx;
 		gfp_t __gfp_mask = gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
+		unsigned old_flags;
 
 		/*
 		 * Make this allocation restricted and don't dump info on
@@ -229,7 +230,10 @@  fallback:
 		 * Try a slab allocation. If this fails and __GFP_DIRECT_RECLAIM
 		 * is set, retry with the 1-entry mempool
 		 */
+		old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
 		bvl = kmem_cache_alloc(bvs->slab, __gfp_mask);
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 		if (unlikely(!bvl && (gfp_mask & __GFP_DIRECT_RECLAIM))) {
 			*idx = BVEC_POOL_MAX;
 			goto fallback;
Index: linux-2.6/drivers/md/dm-bufio.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-bufio.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-bufio.c	2018-06-25 16:32:19.200000000 +0200
@@ -356,6 +356,7 @@  static void __cache_size_refresh(void)
 static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
 			       unsigned char *data_mode)
 {
+	void *ptr;
 	if (unlikely(c->slab_cache != NULL)) {
 		*data_mode = DATA_MODE_SLAB;
 		return kmem_cache_alloc(c->slab_cache, gfp_mask);
@@ -363,9 +364,14 @@  static void *alloc_buffer_data(struct dm
 
 	if (c->block_size <= KMALLOC_MAX_SIZE &&
 	    gfp_mask & __GFP_NORETRY) {
+		unsigned old_flags;
 		*data_mode = DATA_MODE_GET_FREE_PAGES;
-		return (void *)__get_free_pages(gfp_mask,
+		old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
+		ptr = (void *)__get_free_pages(gfp_mask,
 						c->sectors_per_block_bits - (PAGE_SHIFT - SECTOR_SHIFT));
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
+		return ptr;
 	}
 
 	*data_mode = DATA_MODE_VMALLOC;
@@ -381,8 +387,10 @@  static void *alloc_buffer_data(struct dm
 	 */
 	if (gfp_mask & __GFP_NORETRY) {
 		unsigned noio_flag = memalloc_noio_save();
-		void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
-
+		unsigned old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
+		ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 		memalloc_noio_restore(noio_flag);
 		return ptr;
 	}
Index: linux-2.6/drivers/md/dm-integrity.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-integrity.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-integrity.c	2018-06-25 16:32:52.600000000 +0200
@@ -1317,6 +1317,7 @@  static void integrity_metadata(struct wo
 	int r;
 
 	if (ic->internal_hash) {
+		unsigned old_flags;
 		struct bvec_iter iter;
 		struct bio_vec bv;
 		unsigned digest_size = crypto_shash_digestsize(ic->internal_hash);
@@ -1330,8 +1331,11 @@  static void integrity_metadata(struct wo
 		if (unlikely(ic->mode == 'R'))
 			goto skip_io;
 
+		old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
 		checksums = kmalloc((PAGE_SIZE >> SECTOR_SHIFT >> ic->sb->log2_sectors_per_block) * ic->tag_size + extra_space,
 				    GFP_NOIO | __GFP_NORETRY | __GFP_NOWARN);
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 		if (!checksums)
 			checksums = checksums_onstack;
 
Index: linux-2.6/drivers/md/dm-kcopyd.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-kcopyd.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-kcopyd.c	2018-06-25 16:32:19.200000000 +0200
@@ -245,7 +245,10 @@  static int kcopyd_get_pages(struct dm_kc
 	*pages = NULL;
 
 	do {
+		unsigned old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
 		pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY | __GFP_KSWAPD_RECLAIM);
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 		if (unlikely(!pl)) {
 			/* Use reserved pages */
 			pl = kc->pages;
Index: linux-2.6/drivers/md/dm-verity-target.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-verity-target.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-verity-target.c	2018-06-25 16:32:19.200000000 +0200
@@ -596,9 +596,13 @@  no_prefetch_cluster:
 static void verity_submit_prefetch(struct dm_verity *v, struct dm_verity_io *io)
 {
 	struct dm_verity_prefetch_work *pw;
+	unsigned old_flags;
 
+	old_flags = current->flags & PF_LESS_THROTTLE;
+	current->flags |= PF_LESS_THROTTLE;
 	pw = kmalloc(sizeof(struct dm_verity_prefetch_work),
 		GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
+	current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 
 	if (!pw)
 		return;
Index: linux-2.6/drivers/md/dm-writecache.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-writecache.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-writecache.c	2018-06-25 16:32:19.200000000 +0200
@@ -1467,6 +1467,7 @@  static void __writecache_writeback_pmem(
 	unsigned max_pages;
 
 	while (wbl->size) {
+		unsigned old_flags;
 		wbl->size--;
 		e = container_of(wbl->list.prev, struct wc_entry, lru);
 		list_del(&e->lru);
@@ -1480,6 +1481,8 @@  static void __writecache_writeback_pmem(
 		bio_set_dev(&wb->bio, wc->dev->bdev);
 		wb->bio.bi_iter.bi_sector = read_original_sector(wc, e);
 		wb->page_offset = PAGE_SIZE;
+		old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
 		if (max_pages <= WB_LIST_INLINE ||
 		    unlikely(!(wb->wc_list = kmalloc(max_pages * sizeof(struct wc_entry *),
 						     GFP_NOIO | __GFP_NORETRY |
@@ -1487,6 +1490,7 @@  static void __writecache_writeback_pmem(
 			wb->wc_list = wb->wc_list_inline;
 			max_pages = WB_LIST_INLINE;
 		}
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 
 		BUG_ON(!wc_add_block(wb, e, GFP_NOIO));
 
Index: linux-2.6/drivers/md/dm-crypt.c
===================================================================
--- linux-2.6.orig/drivers/md/dm-crypt.c	2018-06-25 16:32:19.210000000 +0200
+++ linux-2.6/drivers/md/dm-crypt.c	2018-06-25 16:33:55.810000000 +0200
@@ -2181,12 +2181,16 @@  static void *crypt_page_alloc(gfp_t gfp_
 {
 	struct crypt_config *cc = pool_data;
 	struct page *page;
+	unsigned old_flags;
 
 	if (unlikely(percpu_counter_compare(&cc->n_allocated_pages, dm_crypt_pages_per_client) >= 0) &&
 	    likely(gfp_mask & __GFP_NORETRY))
 		return NULL;
 
+	old_flags = current->flags & PF_LESS_THROTTLE;
+	current->flags |= PF_LESS_THROTTLE;
 	page = alloc_page(gfp_mask);
+	current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 	if (likely(page != NULL))
 		percpu_counter_add(&cc->n_allocated_pages, 1);
 
@@ -2893,7 +2897,10 @@  static int crypt_map(struct dm_target *t
 
 	if (cc->on_disk_tag_size) {
 		unsigned tag_len = cc->on_disk_tag_size * (bio_sectors(bio) >> cc->sector_shift);
+		unsigned old_flags;
 
+		old_flags = current->flags & PF_LESS_THROTTLE;
+		current->flags |= PF_LESS_THROTTLE;
 		if (unlikely(tag_len > KMALLOC_MAX_SIZE) ||
 		    unlikely(!(io->integrity_metadata = kmalloc(tag_len,
 				GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)))) {
@@ -2902,6 +2909,7 @@  static int crypt_map(struct dm_target *t
 			io->integrity_metadata = mempool_alloc(&cc->tag_pool, GFP_NOIO);
 			io->integrity_metadata_from_pool = true;
 		}
+		current->flags = (current->flags & ~PF_LESS_THROTTLE) | old_flags;
 	}
 
 	if (crypt_integrity_aead(cc))