diff mbox series

[RFC] mm: page_alloc: Add kernel parameter to select maximum PCP batch scale number

Message ID 20241126095138.1832464-1-claudiu.beznea.uj@bp.renesas.com (mailing list archive)
State New
Headers show
Series [RFC] mm: page_alloc: Add kernel parameter to select maximum PCP batch scale number | expand

Commit Message

Claudiu Nov. 26, 2024, 9:51 a.m. UTC
From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>

Commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid
too long latency") introduced default PCP (Per-CPU Pageset) batch size as
a configuration flag. The configuration flag is CONFIG_PCP_BATCH_SCALE_MAX.

The ARM64 defconfig has CONFIG_PCP_BATCH_SCALE_MAX=5. This defconfig
is used by a high range of SoCs.

The Renesas RZ/G3S SoC is a single CPU SoC, with L1$ (I-cache 32Kbytes,
D-cache 32 Kbytes), L3$ (256 Kbytes), but no L2$. It is currently used in
a configuration with 1 GiB RAM size. In this configuration, starting with
commit 52166607ecc9 ("mm: restrict the pcp batch scale factor to avoid too
long latency") the "bonnie++ -d /mnt -u root" benchmark takes ~14 minutes
while previously it took ~10 minutes. The /mnt directory is mounted on SD
card. Same behavior is reproduced on similar Renesas single core devices
(e.g., Renesas RZ/G2UL).

Add a new kernel parameter to allow systems like Renesas RZ/G3S to
continue have the same performance numbers with the default mainline
ARM64 config. With pcp_batch_scale_max=5 (the default value) the bonnie++
benchmark takes ~14 minutes while with pcp_batch_scale_max=0 it takes
~10 minutes.

Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
---
 .../admin-guide/kernel-parameters.txt         |  6 +++++
 mm/page_alloc.c                               | 26 ++++++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e7bfe1bde49e..ce745ea78470 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4716,6 +4716,12 @@ 
 			for debug and development, but should not be
 			needed on a platform with proper driver support.
 
+	pcp_batch_scale_max=n
+			Format: <integer>
+			Range: 0,6 : number
+			Default : CONFIG_PCP_BATCH_SCALE_MAX
+			Used for setting the scale number for PCP batch scale algorithm.
+
 	pdcchassis=	[PARISC,HW] Disable/Enable PDC Chassis Status codes at
 			boot time.
 			Format: { 0 | 1 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc55d39eb372..ef1d37cefb43 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -163,6 +163,20 @@  static DEFINE_MUTEX(pcp_batch_high_lock);
 #define pcp_spin_unlock(ptr)						\
 	pcpu_spin_unlock(lock, ptr)
 
+static unsigned int pcp_batch_scale_max = CONFIG_PCP_BATCH_SCALE_MAX;
+#define MAX_PCP_BATCH	6
+
+static int __init setup_pcp_batch_scale_max(char *str)
+{
+	get_option(&str, (unsigned int *)&pcp_batch_scale_max);
+
+	if (pcp_batch_scale_max > MAX_PCP_BATCH)
+		pcp_batch_scale_max = MAX_PCP_BATCH;
+
+	return 1;
+}
+__setup("pcp_batch_scale_max=", setup_pcp_batch_scale_max);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -2362,7 +2376,7 @@  int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp)
 	 * control latency.  This caps pcp->high decrement too.
 	 */
 	if (pcp->high > high_min) {
-		pcp->high = max3(pcp->count - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+		pcp->high = max3(pcp->count - (batch << pcp_batch_scale_max),
 				 pcp->high - (pcp->high >> 3), high_min);
 		if (pcp->high > high_min)
 			todo++;
@@ -2412,7 +2426,7 @@  static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 		count = pcp->count;
 		if (count) {
 			int to_drain = min(count,
-				pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX);
+				pcp->batch << pcp_batch_scale_max);
 
 			free_pcppages_bulk(zone, to_drain, pcp, 0);
 			count -= to_drain;
@@ -2540,7 +2554,7 @@  static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free
 
 	/* Free as much as possible if batch freeing high-order pages. */
 	if (unlikely(free_high))
-		return min(pcp->count, batch << CONFIG_PCP_BATCH_SCALE_MAX);
+		return min(pcp->count, batch << pcp_batch_scale_max);
 
 	/* Check for PCP disabled or boot pageset */
 	if (unlikely(high < batch))
@@ -2572,7 +2586,7 @@  static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone,
 		return 0;
 
 	if (unlikely(free_high)) {
-		pcp->high = max(high - (batch << CONFIG_PCP_BATCH_SCALE_MAX),
+		pcp->high = max(high - (batch << pcp_batch_scale_max),
 				high_min);
 		return 0;
 	}
@@ -2642,7 +2656,7 @@  static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
 	} else if (pcp->flags & PCPF_PREV_FREE_HIGH_ORDER) {
 		pcp->flags &= ~PCPF_PREV_FREE_HIGH_ORDER;
 	}
-	if (pcp->free_count < (batch << CONFIG_PCP_BATCH_SCALE_MAX))
+	if (pcp->free_count < (batch << pcp_batch_scale_max))
 		pcp->free_count += (1 << order);
 	high = nr_pcp_high(pcp, zone, batch, free_high);
 	if (pcp->count >= high) {
@@ -2984,7 +2998,7 @@  static int nr_pcp_alloc(struct per_cpu_pages *pcp, struct zone *zone, int order)
 		 * subsequent allocation of order-0 pages without any freeing.
 		 */
 		if (batch <= max_nr_alloc &&
-		    pcp->alloc_factor < CONFIG_PCP_BATCH_SCALE_MAX)
+		    pcp->alloc_factor < pcp_batch_scale_max)
 			pcp->alloc_factor++;
 		batch = min(batch, max_nr_alloc);
 	}