@@ -1002,6 +1002,12 @@ that the number of free pages kswapd maintains for latency reasons is
too small for the allocation bursts occurring in the system. This knob
can then be used to tune kswapd aggressiveness accordingly.
+The watermark_scale_factor is an array. You can set each zone's watermark
+separately and can be seen by reading this file::
+
+ % cat /proc/sys/vm/watermark_scale_factor
+ 10 10 10 10
+
zone_reclaim_mode
=================
@@ -2525,7 +2525,7 @@ extern void setup_per_cpu_pageset(void);
/* page_alloc.c */
extern int min_free_kbytes;
extern int watermark_boost_factor;
-extern int watermark_scale_factor;
+extern int watermark_scale_factor[MAX_NR_ZONES];
extern bool arch_has_descending_max_zone_pfns(void);
/* nommu.c */
@@ -2251,8 +2251,6 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(watermark_scale_factor),
.mode = 0644,
.proc_handler = watermark_scale_factor_sysctl_handler,
- .extra1 = SYSCTL_ONE,
- .extra2 = SYSCTL_THREE_THOUSAND,
},
{
.procname = "percpu_pagelist_high_fraction",
@@ -421,7 +421,6 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {
int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
int watermark_boost_factor __read_mostly = 15000;
-int watermark_scale_factor = 10;
static unsigned long nr_kernel_pages __initdata;
static unsigned long nr_all_pages __initdata;
@@ -449,6 +448,20 @@ EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly;
+int watermark_scale_factor[MAX_NR_ZONES] = {
+#ifdef CONFIG_ZONE_DMA
+ [ZONE_DMA] = 10,
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ [ZONE_DMA32] = 10,
+#endif
+ [ZONE_NORMAL] = 10,
+#ifdef CONFIG_HIGHMEM
+ [ZONE_HIGHMEM] = 10,
+#endif
+ [ZONE_MOVABLE] = 10,
+};
+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
* During boot we initialize deferred pages on-demand, as needed, but once
@@ -8643,6 +8656,7 @@ static void __setup_per_zone_wmarks(void)
}
for_each_zone(zone) {
+ int zone_wm_factor;
u64 tmp;
spin_lock_irqsave(&zone->lock, flags);
@@ -8676,9 +8690,10 @@ static void __setup_per_zone_wmarks(void)
* scale factor in proportion to available memory, but
* ensure a minimum size on small systems.
*/
+ zone_wm_factor = watermark_scale_factor[zone_idx(zone)];
tmp = max_t(u64, tmp >> 2,
- mult_frac(zone_managed_pages(zone),
- watermark_scale_factor, 10000));
+ mult_frac(zone_managed_pages(zone), zone_wm_factor,
+ 10000));
zone->watermark_boost = 0;
zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
@@ -8798,11 +8813,19 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
{
- int rc;
+ int i;
- rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
- if (rc)
- return rc;
+ proc_dointvec_minmax(table, write, buffer, length, ppos);
+
+ /*
+ * The unit is in fractions of 10,000. The default value of 10
+ * means the distances between watermarks are 0.1% of the available
+ * memory in the node/system. The maximum value is 3000, or 30% of
+ * memory.
+ */
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ watermark_scale_factor[i] =
+ clamp(watermark_scale_factor[i], 1, 3000);
if (write)
setup_per_zone_wmarks();