@@ -639,6 +639,29 @@ out:
static void add_to_region_allocator(struct zone *z, struct free_list *free_list,
int region_id);
+static inline int region_is_evac_candidate(struct zone *z,
+ struct zone_mem_region *region,
+ int migratetype)
+{
+
+ /* Don't start evacuation too early during boot */
+ if (system_state != SYSTEM_RUNNING)
+ return 0;
+
+ /* Don't bother evacuating regions in ZONE_DMA */
+ if (zone_idx(z) == ZONE_DMA)
+ return 0;
+
+ /*
+ * Don't try evacuations in regions not containing MOVABLE or
+ * RECLAIMABLE allocations.
+ */
+ if (!(migratetype == MIGRATE_MOVABLE ||
+ migratetype == MIGRATE_RECLAIMABLE))
+ return 0;
+
+ return should_evacuate_region(z, region);
+}
static inline int can_return_region(struct mem_region_list *region, int order,
struct free_list *free_list)
@@ -683,7 +706,9 @@ static void add_to_freelist(struct page *page, struct free_list *free_list,
{
struct list_head *prev_region_list, *lru;
struct mem_region_list *region;
- int region_id, prev_region_id;
+ int region_id, prev_region_id, migratetype;
+ struct zone *zone;
+ struct pglist_data *pgdat;
lru = &page->lru;
region_id = page_zone_region_id(page);
@@ -741,8 +766,17 @@ try_return_region:
* Try to return the freepages of a memory region to the region
* allocator, if possible.
*/
- if (can_return_region(region, order, free_list))
+ if (can_return_region(region, order, free_list)) {
add_to_region_allocator(page_zone(page), free_list, region_id);
+ return;
+ }
+
+ zone = page_zone(page);
+ migratetype = get_pageblock_migratetype(page);
+ pgdat = NODE_DATA(page_to_nid(page));
+
+ if (region_is_evac_candidate(zone, region->zone_region, migratetype))
+ queue_mempower_work(pgdat, zone, region_id);
}
/*
Now that we have the entire infrastructure to perform targeted region evacuation from a dedicated kthread (kmempowerd), modify the page-allocator to invoke the region-evacuator at opportune points. At a basic level, the most obvious opportunity to try region-evacuation is when a page is freed back to the page-allocator. The rationale behind this is explained below. The page-allocator already has the intelligence to allocate pages such that they are consolidated within as few regions as possible. That is, due to the sorted-buddy design, it will _not_ spill allocations to a new region as long as there is still memory available in lower-numbered regions to satisfy the allocation request. So, the fragmentation happens _after_ they are allocated, i.e., once the entity starts freeing the memory in a random fashion. This freeing of pages presents an opportunity to the MM subsystem: if the pages freed belong to lower-numbered regions, then there is a chance that pages from higher-numbered regions could be moved to these freshly freed pages, thereby causing further consolidation of regions. With this in mind, add the region-evac trigger in the page-freeing path. Along with that, also add appropriate checks and intelligence necessary to avoid compaction attempts that don't provide any net benefit. For example, we can avoid compacting regions in ZONE_DMA, or regions that have mostly only MIGRATE_UNMOVABLE allocations etc. These checks are done best at the page-allocator side. Apart from them, also perform the same eligibility checks that the region-evacuator employs, to avoid useless wakeups of kmempowerd. Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com> --- mm/page_alloc.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-pm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html