@@ -2520,6 +2520,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
[X86]
Set unknown_nmi_panic=1 early on boot.
+ unmapped_page_control
+ [KNL] Available if CONFIG_UNMAPPED_PAGECACHE_CONTROL
+ is enabled. It controls the amount of unmapped memory
+ that is present in the system. This boot option plus
+ vm.min_unmapped_ratio (sysctl) provide granular control
+ over how much unmapped page cache can exist in the system
+ before kswapd starts reclaiming unmapped page cache pages.
+
usbcore.autosuspend=
[USB] The autosuspend time delay (in seconds) used
for newly-detected USB devices (default 2). This
@@ -381,11 +381,14 @@ and may not be fast.
min_unmapped_ratio:
-This is available only on NUMA kernels.
+This is available only on NUMA kernels or when unmapped page cache
+control is enabled.
This is a percentage of the total pages in each zone. Zone reclaim will
only occur if more than this percentage of pages are in a state that
-zone_reclaim_mode allows to be reclaimed.
+zone_reclaim_mode allows to be reclaimed. If unmapped page cache control
+is enabled, this is the minimum level to which the cache will be shrunk
+down to.
If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
against all file-backed unmapped pages including swapcache pages and tmpfs
@@ -396,6 +399,18 @@ The default is 1 percent.
==============================================================
+max_unmapped_ratio:
+
+This is available only when unmapped page cache control is enabled.
+
+This is a percentage of the total pages in each zone. Zone reclaim will
+only occur if more than this percentage of pages are in a state and
+unmapped page cache control is enabled.
+
+The default is 16 percent.
+
+==============================================================
+
mmap_min_addr
This file indicates the amount of address space which a user process will
@@ -309,7 +309,12 @@ struct zone {
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
unsigned long min_unmapped_pages;
+#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+ unsigned long max_unmapped_pages;
+#endif
#ifdef CONFIG_NUMA
int node;
unsigned long min_slab_pages;
@@ -776,6 +781,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
+int sysctl_max_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
@@ -264,19 +264,36 @@ extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
extern long vm_total_pages;
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
extern int sysctl_min_unmapped_ratio;
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+extern int sysctl_max_unmapped_ratio;
+#endif
+
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
-#ifdef CONFIG_NUMA
-extern int zone_reclaim_mode;
-extern int sysctl_min_slab_ratio;
#else
-#define zone_reclaim_mode 0
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
{
return 0;
}
#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+extern bool should_reclaim_unmapped_pages(struct zone *zone);
+#else
+static inline bool should_reclaim_unmapped_pages(struct zone *zone)
+{
+ return false;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+extern int zone_reclaim_mode;
+extern int sysctl_min_slab_ratio;
+#else
+#define zone_reclaim_mode 0
+#endif
+
extern int page_evictable(struct page *page, struct vm_area_struct *vma);
extern void scan_mapping_unevictable_pages(struct address_space *);
@@ -811,6 +811,18 @@ config SCHED_AUTOGROUP
config MM_OWNER
bool
+config UNMAPPED_PAGECACHE_CONTROL
+ bool "Provide control over unmapped page cache"
+ default n
+ help
+ This option adds support for controlling unmapped page cache
+ via a boot parameter (unmapped_page_control). The boot parameter
+ with sysctl (vm.min_unmapped_ratio) control the total number
+ of unmapped pages in the system. This feature is useful if
+ you want to limit the amount of unmapped page cache or want
+ to reduce page cache duplication in a virtualized environment.
+ If unsure say 'N'
+
config SYSFS_DEPRECATED
bool "Enable deprecated sysfs features to support old userspace tools"
depends on SYSFS
@@ -1214,6 +1214,7 @@ static struct ctl_table vm_table[] = {
.proc_handler = proc_dointvec_unsigned,
},
#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
{
.procname = "min_unmapped_ratio",
.data = &sysctl_min_unmapped_ratio,
@@ -1223,6 +1224,18 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
.extra2 = &one_hundred,
},
+#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+ {
+ .procname = "max_unmapped_ratio",
+ .data = &sysctl_max_unmapped_ratio,
+ .maxlen = sizeof(sysctl_max_unmapped_ratio),
+ .mode = 0644,
+ .proc_handler = sysctl_max_unmapped_ratio_sysctl_handler,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
+ },
+#endif
#ifdef CONFIG_NUMA
{
.procname = "zone_reclaim_mode",
@@ -1669,6 +1669,9 @@ zonelist_scan:
unsigned long mark;
int ret;
+ if (should_reclaim_unmapped_pages(zone))
+ wakeup_kswapd(zone, order, classzone_idx);
+
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (zone_watermark_ok(zone, order, mark,
classzone_idx, alloc_flags))
@@ -4249,8 +4252,14 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->spanned_pages = size;
zone->present_pages = realsize;
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
/ 100;
+#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+ zone->max_unmapped_pages = (realsize*sysctl_max_unmapped_ratio)
+ / 100;
+#endif
#ifdef CONFIG_NUMA
zone->node = nid;
zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
@@ -5157,6 +5166,7 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
return 0;
}
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
@@ -5173,6 +5183,25 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
return 0;
}
+#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+int sysctl_max_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ struct zone *zone;
+ int rc;
+
+ rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+ if (rc)
+ return rc;
+
+ for_each_zone(zone)
+ zone->max_unmapped_pages = (zone->present_pages *
+ sysctl_max_unmapped_ratio) / 100;
+ return 0;
+}
+#endif
+
#ifdef CONFIG_NUMA
int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
@@ -158,6 +158,29 @@ static DECLARE_RWSEM(shrinker_rwsem);
#define scanning_global_lru(sc) (1)
#endif
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+static void reclaim_unmapped_pages(int priority, struct zone *zone,
+ struct scan_control *sc);
+static int unmapped_page_control __read_mostly;
+
+static int __init unmapped_page_control_parm(char *str)
+{
+ unmapped_page_control = 1;
+ /*
+ * XXX: Should we tweak swappiness here?
+ */
+ return 1;
+}
+__setup("unmapped_page_control", unmapped_page_control_parm);
+
+#else /* !CONFIG_UNMAPPED_PAGECACHE_CONTROL */
+static inline void reclaim_unmapped_pages(int priority,
+ struct zone *zone, struct scan_control *sc)
+{
+ return 0;
+}
+#endif
+
static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
struct scan_control *sc)
{
@@ -2371,6 +2394,12 @@ loop_again:
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);
+ /*
+ * We do unmapped page reclaim once here and once
+ * below, so that we don't lose out
+ */
+ reclaim_unmapped_pages(priority, zone, &sc);
+
if (!zone_watermark_ok_safe(zone, order,
high_wmark_pages(zone), 0, 0)) {
end_zone = i;
@@ -2408,6 +2437,11 @@ loop_again:
continue;
sc.nr_scanned = 0;
+ /*
+ * Reclaim unmapped pages upfront, this should be
+ * really cheap
+ */
+ reclaim_unmapped_pages(priority, zone, &sc);
/*
* Call soft limit reclaim before calling shrink_zone.
@@ -2721,7 +2755,8 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
}
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
- if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0) &&
+ !should_reclaim_unmapped_pages(zone))
return;
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
@@ -2874,6 +2909,7 @@ static int __init kswapd_init(void)
module_init(kswapd_init)
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL) || defined(CONFIG_NUMA)
/*
* Zone reclaim mode
*
@@ -2900,6 +2936,10 @@ int zone_reclaim_mode __read_mostly;
*/
int sysctl_min_unmapped_ratio = 1;
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+int sysctl_max_unmapped_ratio = 16;
+#endif
+
/*
* If the number of slab pages in a zone grows beyond this percentage then
* slab reclaim needs to occur.
@@ -3094,6 +3134,52 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
return ret;
}
+#endif
+
+#if defined(CONFIG_UNMAPPED_PAGECACHE_CONTROL)
+/*
+ * Routine to reclaim unmapped pages, inspired from the code under
+ * CONFIG_NUMA that does unmapped page and slab page control by keeping
+ * min_unmapped_pages in the zone. We currently reclaim just unmapped
+ * pages, slab control will come in soon, at which point this routine
+ * should be called reclaim cached pages
+ */
+void reclaim_unmapped_pages(int priority, struct zone *zone,
+ struct scan_control *sc)
+{
+ if (unlikely(unmapped_page_control) &&
+ (zone_unmapped_file_pages(zone) > zone->min_unmapped_pages)) {
+ struct scan_control nsc;
+ unsigned long nr_pages;
+
+ nsc = *sc;
+
+ nsc.swappiness = 0;
+ nsc.may_writepage = 0;
+ nsc.may_unmap = 0;
+ nsc.nr_reclaimed = 0;
+
+ nr_pages = zone_unmapped_file_pages(zone) -
+ zone->min_unmapped_pages;
+ /*
+ * We don't want to be too aggressive with our
+ * reclaim, it is our best effort to control
+ * unmapped pages
+ */
+ nr_pages >>= 3;
+
+ zone_reclaim_pages(zone, &nsc, nr_pages);
+ }
+}
+
+bool should_reclaim_unmapped_pages(struct zone *zone)
+{
+ if (unlikely(unmapped_page_control) &&
+ (zone_unmapped_file_pages(zone) > zone->max_unmapped_pages))
+ return true;
+ return false;
+}
+#endif
/*
* page_evictable - test whether a page is evictable