diff mbox series

[4/4] percpu: use reclaim threshold instead of running for every page

Message ID 20210419225047.3415425-5-dennis@kernel.org (mailing list archive)
State New, archived
Headers show
Series percpu: partial chunk depopulation | expand

Commit Message

Dennis Zhou April 19, 2021, 10:50 p.m. UTC
The last patch implements reclaim by adding 2 additional lists where a
chunk's lifecycle is:
  active_slot -> to_depopulate_slot -> sidelined_slot

This worked great because we're able to nicely converge paths into
isolation. However, it's a bit aggressive to run for every free page.
Let's accumulate a few free pages before we do this. To do this, the new
lifecycle is:
  active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot

The transition from sidelined_slot -> to_depopulate_slot occurs on a
threshold instead of before where it directly went to the
to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to
aid with this.

Suggested-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 mm/percpu-internal.h |  1 +
 mm/percpu-stats.c    |  8 ++++++--
 mm/percpu.c          | 44 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 9 deletions(-)

Comments

Roman Gushchin April 20, 2021, 9:23 p.m. UTC | #1
On Mon, Apr 19, 2021 at 10:50:47PM +0000, Dennis Zhou wrote:
> The last patch implements reclaim by adding 2 additional lists where a
> chunk's lifecycle is:
>   active_slot -> to_depopulate_slot -> sidelined_slot
> 
> This worked great because we're able to nicely converge paths into
> isolation. However, it's a bit aggressive to run for every free page.
> Let's accumulate a few free pages before we do this. To do this, the new
> lifecycle is:
>   active_slot -> sidelined_slot -> to_depopulate_slot -> sidelined_slot
> 
> The transition from sidelined_slot -> to_depopulate_slot occurs on a
> threshold instead of before where it directly went to the
> to_depopulate_slot. pcpu_nr_isolated_empty_pop_pages[] is introduced to
> aid with this.
> 
> Suggested-by: Roman Gushchin <guro@fb.com>
> Signed-off-by: Dennis Zhou <dennis@kernel.org>

Acked-by: Roman Gushchin <guro@fb.com>

Thanks, Dennis!
diff mbox series

Patch

diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 10604dce806f..b3e43b016276 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -92,6 +92,7 @@  extern int pcpu_nr_slots;
 extern int pcpu_sidelined_slot;
 extern int pcpu_to_depopulate_slot;
 extern int pcpu_nr_empty_pop_pages[];
+extern int pcpu_nr_isolated_empty_pop_pages[];
 
 extern struct pcpu_chunk *pcpu_first_chunk;
 extern struct pcpu_chunk *pcpu_reserved_chunk;
diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c
index 2125981acfb9..facc804eb86c 100644
--- a/mm/percpu-stats.c
+++ b/mm/percpu-stats.c
@@ -145,7 +145,7 @@  static int percpu_stats_show(struct seq_file *m, void *v)
 	int slot, max_nr_alloc;
 	int *buffer;
 	enum pcpu_chunk_type type;
-	int nr_empty_pop_pages;
+	int nr_empty_pop_pages, nr_isolated_empty_pop_pages;
 
 alloc_buffer:
 	spin_lock_irq(&pcpu_lock);
@@ -167,8 +167,11 @@  static int percpu_stats_show(struct seq_file *m, void *v)
 	}
 
 	nr_empty_pop_pages = 0;
-	for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++)
+	nr_isolated_empty_pop_pages = 0;
+	for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) {
 		nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type];
+		nr_isolated_empty_pop_pages += pcpu_nr_isolated_empty_pop_pages[type];
+	}
 
 #define PL(X)								\
 	seq_printf(m, "  %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X)
@@ -202,6 +205,7 @@  static int percpu_stats_show(struct seq_file *m, void *v)
 	PU(min_alloc_size);
 	PU(max_alloc_size);
 	P("empty_pop_pages", nr_empty_pop_pages);
+	P("iso_empty_pop_pages", nr_isolated_empty_pop_pages);
 	seq_putc(m, '\n');
 
 #undef PU
diff --git a/mm/percpu.c b/mm/percpu.c
index 79eebc80860d..ba13e683d022 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,6 +110,9 @@ 
 #define PCPU_EMPTY_POP_PAGES_LOW	2
 #define PCPU_EMPTY_POP_PAGES_HIGH	4
 
+/* only schedule reclaim if there are at least N empty pop pages sidelined */
+#define PCPU_EMPTY_POP_RECLAIM_THRESHOLD	4
+
 #ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
 #ifndef __addr_to_pcpu_ptr
@@ -183,6 +186,7 @@  static LIST_HEAD(pcpu_map_extend_chunks);
  * The reserved chunk doesn't contribute to the count.
  */
 int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
+int pcpu_nr_isolated_empty_pop_pages[PCPU_NR_CHUNK_TYPES];
 
 /*
  * The number of populated pages in use by the allocator, protected by
@@ -582,8 +586,10 @@  static void pcpu_isolate_chunk(struct pcpu_chunk *chunk)
 	if (!chunk->isolated) {
 		chunk->isolated = true;
 		pcpu_nr_empty_pop_pages[type] -= chunk->nr_empty_pop_pages;
+		pcpu_nr_isolated_empty_pop_pages[type] +=
+			chunk->nr_empty_pop_pages;
+		list_move(&chunk->list, &pcpu_slot[pcpu_sidelined_slot]);
 	}
-	list_move(&chunk->list, &pcpu_slot[pcpu_to_depopulate_slot]);
 }
 
 static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
@@ -595,6 +601,8 @@  static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
 	if (chunk->isolated) {
 		chunk->isolated = false;
 		pcpu_nr_empty_pop_pages[type] += chunk->nr_empty_pop_pages;
+		pcpu_nr_isolated_empty_pop_pages[type] -=
+			chunk->nr_empty_pop_pages;
 		pcpu_chunk_relocate(chunk, -1);
 	}
 }
@@ -610,9 +618,15 @@  static void pcpu_reintegrate_chunk(struct pcpu_chunk *chunk)
  */
 static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr)
 {
+	enum pcpu_chunk_type type = pcpu_chunk_type(chunk);
+
 	chunk->nr_empty_pop_pages += nr;
-	if (chunk != pcpu_reserved_chunk && !chunk->isolated)
-		pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr;
+	if (chunk != pcpu_reserved_chunk) {
+		if (chunk->isolated)
+			pcpu_nr_isolated_empty_pop_pages[type] += nr;
+		else
+			pcpu_nr_empty_pop_pages[type] += nr;
+	}
 }
 
 /*
@@ -2138,10 +2152,13 @@  static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 	struct list_head *pcpu_slot = pcpu_chunk_list(type);
 	struct pcpu_chunk *chunk;
 	struct pcpu_block_md *block;
+	LIST_HEAD(to_depopulate);
 	int i, end;
 
 	spin_lock_irq(&pcpu_lock);
 
+	list_splice_init(&pcpu_slot[pcpu_to_depopulate_slot], &to_depopulate);
+
 restart:
 	/*
 	 * Once a chunk is isolated to the to_depopulate list, the chunk is no
@@ -2149,9 +2166,9 @@  static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 	 * other accessor is the free path which only returns area back to the
 	 * allocator not touching the populated bitmap.
 	 */
-	while (!list_empty(&pcpu_slot[pcpu_to_depopulate_slot])) {
-		chunk = list_first_entry(&pcpu_slot[pcpu_to_depopulate_slot],
-					 struct pcpu_chunk, list);
+	while (!list_empty(&to_depopulate)) {
+		chunk = list_first_entry(&to_depopulate, struct pcpu_chunk,
+					 list);
 		WARN_ON(chunk->immutable);
 
 		/*
@@ -2208,6 +2225,13 @@  static void pcpu_reclaim_populated(enum pcpu_chunk_type type)
 				  &pcpu_slot[pcpu_sidelined_slot]);
 	}
 
+	if (pcpu_nr_isolated_empty_pop_pages[type] >=
+	    PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+		list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+				      &pcpu_slot[pcpu_to_depopulate_slot]);
+		pcpu_schedule_balance_work();
+	}
+
 	spin_unlock_irq(&pcpu_lock);
 }
 
@@ -2291,7 +2315,13 @@  void free_percpu(void __percpu *ptr)
 			}
 	} else if (pcpu_should_reclaim_chunk(chunk)) {
 		pcpu_isolate_chunk(chunk);
-		need_balance = true;
+		if (chunk->free_bytes == pcpu_unit_size ||
+		    pcpu_nr_isolated_empty_pop_pages[pcpu_chunk_type(chunk)] >=
+		    PCPU_EMPTY_POP_RECLAIM_THRESHOLD) {
+			list_splice_tail_init(&pcpu_slot[pcpu_sidelined_slot],
+					      &pcpu_slot[pcpu_to_depopulate_slot]);
+			need_balance = true;
+		}
 	}
 
 	trace_percpu_free_percpu(chunk->base_addr, off, ptr);