@@ -300,12 +300,12 @@ struct zone {
*/
unsigned long lowmem_reserve[MAX_NR_ZONES];
+ unsigned long min_unmapped_pages;
#ifdef CONFIG_NUMA
int node;
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
- unsigned long min_unmapped_pages;
unsigned long min_slab_pages;
#endif
struct per_cpu_pageset __percpu *pageset;
@@ -251,10 +251,11 @@ extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
extern int remove_mapping(struct address_space *mapping, struct page *page);
extern long vm_total_pages;
+extern bool should_balance_unmapped_pages(struct zone *zone);
+extern int sysctl_min_unmapped_ratio;
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
-extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
#else
@@ -1642,6 +1642,9 @@ zonelist_scan:
unsigned long mark;
int ret;
+ if (should_balance_unmapped_pages(zone))
+ wakeup_kswapd(zone, order);
+
mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
if (zone_watermark_ok(zone, order, mark,
classzone_idx, alloc_flags))
@@ -4101,10 +4104,10 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
zone->spanned_pages = size;
zone->present_pages = realsize;
-#ifdef CONFIG_NUMA
- zone->node = nid;
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
/ 100;
+#ifdef CONFIG_NUMA
+ zone->node = nid;
zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
#endif
zone->name = zone_names[j];
@@ -5012,7 +5015,6 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
return 0;
}
-#ifdef CONFIG_NUMA
int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
@@ -5029,6 +5031,7 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
return 0;
}
+#ifdef CONFIG_NUMA
int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
@@ -139,6 +139,18 @@ static DECLARE_RWSEM(shrinker_rwsem);
#define scanning_global_lru(sc) (1)
#endif
+static int unmapped_page_control __read_mostly;
+
+static int __init unmapped_page_control_parm(char *str)
+{
+ unmapped_page_control = 1;
+ /*
+ * XXX: Should we tweak swappiness here?
+ */
+ return 1;
+}
+__setup("unmapped_page_control", unmapped_page_control_parm);
+
static struct zone_reclaim_stat *get_reclaim_stat(struct zone *zone,
struct scan_control *sc)
{
@@ -2091,6 +2103,102 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
}
/*
+ * Percentage of pages in a zone that must be unmapped for zone_reclaim to
+ * occur.
+ */
+int sysctl_min_unmapped_ratio = 1;
+/*
+ * Priority for ZONE_RECLAIM. This determines the fraction of pages
+ * of a node considered for each zone_reclaim. 4 scans 1/16th of
+ * a zone.
+ */
+#define ZONE_RECLAIM_PRIORITY 4
+
+
+#define RECLAIM_OFF 0
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
+
+static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
+{
+ unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
+ unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
+ zone_page_state(zone, NR_ACTIVE_FILE);
+
+ /*
+ * It's possible for there to be more file mapped pages than
+ * accounted for by the pages on the file LRU lists because
+ * tmpfs pages accounted for as ANON can also be FILE_MAPPED
+ */
+ return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
+}
+
+/*
+ * Helper function to reclaim unmapped pages, we might add something
+ * similar to this for slab cache as well. Currently this function
+ * is shared with __zone_reclaim()
+ */
+static inline void
+zone_reclaim_unmapped_pages(struct zone *zone, struct scan_control *sc,
+ unsigned long nr_pages)
+{
+ int priority;
+ /*
+ * Free memory by calling shrink zone with increasing
+ * priorities until we have enough memory freed.
+ */
+ priority = ZONE_RECLAIM_PRIORITY;
+ do {
+ shrink_zone(priority, zone, sc);
+ priority--;
+ } while (priority >= 0 && sc->nr_reclaimed < nr_pages);
+}
+
+/*
+ * Routine to balance unmapped pages, inspired from the code under
+ * CONFIG_NUMA that does unmapped page and slab page control by keeping
+ * min_unmapped_pages in the zone. We currently reclaim just unmapped
+ * pages, slab control will come in soon, at which point this routine
+ * should be called balance cached pages
+ */
+static unsigned long balance_unmapped_pages(int priority, struct zone *zone,
+ struct scan_control *sc)
+{
+ if (unmapped_page_control &&
+ (zone_unmapped_file_pages(zone) > zone->min_unmapped_pages)) {
+ struct scan_control nsc;
+ unsigned long nr_pages;
+
+ nsc = *sc;
+
+ nsc.swappiness = 0;
+ nsc.may_writepage = 0;
+ nsc.may_unmap = 0;
+ nsc.nr_reclaimed = 0;
+
+ nr_pages = zone_unmapped_file_pages(zone) -
+ zone->min_unmapped_pages;
+ /* Magically try to reclaim eighth the unmapped cache pages */
+ nr_pages >>= 3;
+
+ zone_reclaim_unmapped_pages(zone, &nsc, nr_pages);
+ return nsc.nr_reclaimed;
+ }
+ return 0;
+}
+
+#define UNMAPPED_PAGE_RATIO 16
+bool should_balance_unmapped_pages(struct zone *zone)
+{
+ if (unmapped_page_control &&
+ (zone_unmapped_file_pages(zone) >
+ UNMAPPED_PAGE_RATIO * zone->min_unmapped_pages))
+ return true;
+ return false;
+}
+
+/*
* For kswapd, balance_pgdat() will work across all this node's zones until
* they are all at high_wmark_pages(zone).
*
@@ -2169,6 +2277,12 @@ loop_again:
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);
+ /*
+ * We do unmapped page balancing once here and once
+ * below, so that we don't lose out
+ */
+ balance_unmapped_pages(priority, zone, &sc);
+
if (!zone_watermark_ok(zone, order,
high_wmark_pages(zone), 0, 0)) {
end_zone = i;
@@ -2206,6 +2320,12 @@ loop_again:
sc.nr_scanned = 0;
/*
+ * Balance unmapped pages upfront, this should be
+ * really cheap
+ */
+ balance_unmapped_pages(priority, zone, &sc);
+
+ /*
* Call soft limit reclaim before calling shrink_zone.
* For now we ignore the return value
*/
@@ -2418,7 +2538,8 @@ void wakeup_kswapd(struct zone *zone, int order)
return;
pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
+ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0) &&
+ !should_balance_unmapped_pages(zone))
return;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
@@ -2585,44 +2706,12 @@ module_init(kswapd_init)
*/
int zone_reclaim_mode __read_mostly;
-#define RECLAIM_OFF 0
-#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
-#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
-#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
-
-/*
- * Priority for ZONE_RECLAIM. This determines the fraction of pages
- * of a node considered for each zone_reclaim. 4 scans 1/16th of
- * a zone.
- */
-#define ZONE_RECLAIM_PRIORITY 4
-
-/*
- * Percentage of pages in a zone that must be unmapped for zone_reclaim to
- * occur.
- */
-int sysctl_min_unmapped_ratio = 1;
-
/*
* If the number of slab pages in a zone grows beyond this percentage then
* slab reclaim needs to occur.
*/
int sysctl_min_slab_ratio = 5;
-static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
-{
- unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
- unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) +
- zone_page_state(zone, NR_ACTIVE_FILE);
-
- /*
- * It's possible for there to be more file mapped pages than
- * accounted for by the pages on the file LRU lists because
- * tmpfs pages accounted for as ANON can also be FILE_MAPPED
- */
- return (file_lru > file_mapped) ? (file_lru - file_mapped) : 0;
-}
-
/* Work out how many page cache pages we can reclaim in this reclaim_mode */
static long zone_pagecache_reclaimable(struct zone *zone)
{
@@ -2660,7 +2749,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
- int priority;
struct scan_control sc = {
.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
@@ -2689,11 +2777,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* Free memory by calling shrink zone with increasing
* priorities until we have enough memory freed.
*/
- priority = ZONE_RECLAIM_PRIORITY;
- do {
- shrink_zone(priority, zone, &sc);
- priority--;
- } while (priority >= 0 && sc.nr_reclaimed < nr_pages);
+ zone_reclaim_unmapped_pages(zone, &sc, nr_pages);
}
nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);