@@ -333,6 +333,24 @@ enum zone_watermarks {
NR_WMARK
};
+/*
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
+ * for pageblock size for THP if configured.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define NR_PCP_THP 1
+#else
+#define NR_PCP_THP 0
+#endif
+#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
+
+/*
+ * Shift to encode migratetype and order in the same integer, with order
+ * in the least significant bits.
+ */
+#define NR_PCP_ORDER_WIDTH 8
+#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
+
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
@@ -349,7 +367,7 @@ struct per_cpu_pages {
#endif
/* Lists of pages, one per migrate type stored on the pcp-lists */
- struct list_head lists[MIGRATE_PCPTYPES];
+ struct list_head lists[NR_PCP_LISTS];
};
struct per_cpu_zonestat {
@@ -198,7 +198,7 @@ extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
extern int user_min_free_kbytes;
-extern void free_unref_page(struct page *page);
+extern void free_unref_page(struct page *page, unsigned int order);
extern void free_unref_page_list(struct list_head *list);
extern void zone_pcp_update(struct zone *zone, int cpu_online);
@@ -676,10 +676,53 @@ static void bad_page(struct page *page, const char *reason)
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
+static inline unsigned int order_to_pindex(int migratetype, int order)
+{
+ int base = order;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ VM_BUG_ON(order != pageblock_order);
+ base = PAGE_ALLOC_COSTLY_ORDER + 1;
+ }
+#else
+ VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
+#endif
+
+ return (MIGRATE_PCPTYPES * base) + migratetype;
+}
+
+static inline int pindex_to_order(unsigned int pindex)
+{
+ int order = pindex / MIGRATE_PCPTYPES;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (order > PAGE_ALLOC_COSTLY_ORDER) {
+ order = pageblock_order;
+ VM_BUG_ON(order != pageblock_order);
+ }
+#else
+ VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
+#endif
+
+ return order;
+}
+
+static inline bool pcp_allowed_order(unsigned int order)
+{
+ if (order <= PAGE_ALLOC_COSTLY_ORDER)
+ return true;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ if (order == pageblock_order)
+ return true;
+#endif
+ return false;
+}
+
static inline void free_the_page(struct page *page, unsigned int order)
{
- if (order == 0) /* Via pcp? */
- free_unref_page(page);
+ if (pcp_allowed_order(order)) /* Via pcp? */
+ free_unref_page(page, order);
else
__free_pages_ok(page, order, FPI_NONE);
}
@@ -702,7 +745,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
void free_compound_page(struct page *page)
{
mem_cgroup_uncharge(page);
- __free_pages_ok(page, compound_order(page), FPI_NONE);
+ free_the_page(page, compound_order(page));
}
void prep_compound_page(struct page *page, unsigned int order)
@@ -1352,9 +1395,9 @@ static __always_inline bool free_pages_prepare(struct page *page,
* to pcp lists. With debug_pagealloc also enabled, they are also rechecked when
* moved from pcp lists to free lists.
*/
-static bool free_pcp_prepare(struct page *page)
+static bool free_pcp_prepare(struct page *page, unsigned int order)
{
- return free_pages_prepare(page, 0, true, FPI_NONE);
+ return free_pages_prepare(page, order, true, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -1371,12 +1414,12 @@ static bool bulkfree_pcp_prepare(struct page *page)
* debug_pagealloc enabled, they are checked also immediately when being freed
* to the pcp lists.
*/
-static bool free_pcp_prepare(struct page *page)
+static bool free_pcp_prepare(struct page *page, unsigned int order)
{
if (debug_pagealloc_enabled_static())
- return free_pages_prepare(page, 0, true, FPI_NONE);
+ return free_pages_prepare(page, order, true, FPI_NONE);
else
- return free_pages_prepare(page, 0, false, FPI_NONE);
+ return free_pages_prepare(page, order, false, FPI_NONE);
}
static bool bulkfree_pcp_prepare(struct page *page)
@@ -1408,8 +1451,10 @@ static inline void prefetch_buddy(struct page *page)
static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp)
{
- int migratetype = 0;
+ int pindex = 0;
int batch_free = 0;
+ int nr_freed = 0;
+ unsigned int order;
int prefetch_nr = READ_ONCE(pcp->batch);
bool isolated_pageblocks;
struct page *page, *tmp;
@@ -1420,7 +1465,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
* below while (list_empty(list)) loop.
*/
count = min(pcp->count, count);
- while (count) {
+ while (count > 0) {
struct list_head *list;
/*
@@ -1432,24 +1477,31 @@ static void free_pcppages_bulk(struct zone *zone, int count,
*/
do {
batch_free++;
- if (++migratetype == MIGRATE_PCPTYPES)
- migratetype = 0;
- list = &pcp->lists[migratetype];
+ if (++pindex == NR_PCP_LISTS)
+ pindex = 0;
+ list = &pcp->lists[pindex];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
- if (batch_free == MIGRATE_PCPTYPES)
+ if (batch_free == NR_PCP_LISTS)
batch_free = count;
+ order = pindex_to_order(pindex);
+ BUILD_BUG_ON(MAX_ORDER >= (1<<NR_PCP_ORDER_WIDTH));
do {
page = list_last_entry(list, struct page, lru);
/* must delete to avoid corrupting pcp list */
list_del(&page->lru);
- pcp->count--;
+ nr_freed += 1 << order;
+ count -= 1 << order;
if (bulkfree_pcp_prepare(page))
continue;
+ /* Encode order with the migratetype */
+ page->index <<= NR_PCP_ORDER_WIDTH;
+ page->index |= order;
+
list_add_tail(&page->lru, &head);
/*
@@ -1465,8 +1517,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
prefetch_buddy(page);
prefetch_nr--;
}
- } while (--count && --batch_free && !list_empty(list));
+ } while (count > 0 && --batch_free && !list_empty(list));
}
+ pcp->count -= nr_freed;
/*
* local_lock_irq held so equivalent to spin_lock_irqsave for
@@ -1481,14 +1534,19 @@ static void free_pcppages_bulk(struct zone *zone, int count,
*/
list_for_each_entry_safe(page, tmp, &head, lru) {
int mt = get_pcppage_migratetype(page);
+
+ /* mt has been encoded with the order (see above) */
+ order = mt & NR_PCP_ORDER_MASK;
+ mt >>= NR_PCP_ORDER_WIDTH;
+
/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
/* Pageblock could have been isolated meanwhile */
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
- trace_mm_page_pcpu_drain(page, 0, mt);
+ __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE);
+ trace_mm_page_pcpu_drain(page, order, mt);
}
spin_unlock(&zone->lock);
}
@@ -3265,11 +3323,12 @@ void mark_free_pages(struct zone *zone)
}
#endif /* CONFIG_PM */
-static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
+static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
+ unsigned int order)
{
int migratetype;
- if (!free_pcp_prepare(page))
+ if (!free_pcp_prepare(page, order))
return false;
migratetype = get_pfnblock_migratetype(page, pfn);
@@ -3319,16 +3378,18 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
}
static void free_unref_page_commit(struct page *page, unsigned long pfn,
- int migratetype)
+ int migratetype, unsigned int order)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
int high;
+ int pindex;
__count_vm_event(PGFREE);
pcp = this_cpu_ptr(zone->per_cpu_pageset);
- list_add(&page->lru, &pcp->lists[migratetype]);
- pcp->count++;
+ pindex = order_to_pindex(migratetype, order);
+ list_add(&page->lru, &pcp->lists[pindex]);
+ pcp->count += 1 << order;
high = nr_pcp_high(pcp, zone);
if (pcp->count >= high) {
int batch = READ_ONCE(pcp->batch);
@@ -3338,15 +3399,15 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
}
/*
- * Free a 0-order page
+ * Free a pcp page
*/
-void free_unref_page(struct page *page)
+void free_unref_page(struct page *page, unsigned int order)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
- if (!free_unref_page_prepare(page, pfn))
+ if (!free_unref_page_prepare(page, pfn, order))
return;
/*
@@ -3359,14 +3420,14 @@ void free_unref_page(struct page *page)
migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) {
- free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+ free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE);
return;
}
migratetype = MIGRATE_MOVABLE;
}
local_lock_irqsave(&pagesets.lock, flags);
- free_unref_page_commit(page, pfn, migratetype);
+ free_unref_page_commit(page, pfn, migratetype, order);
local_unlock_irqrestore(&pagesets.lock, flags);
}
@@ -3383,7 +3444,7 @@ void free_unref_page_list(struct list_head *list)
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
pfn = page_to_pfn(page);
- if (!free_unref_page_prepare(page, pfn))
+ if (!free_unref_page_prepare(page, pfn, 0))
list_del(&page->lru);
/*
@@ -3415,7 +3476,7 @@ void free_unref_page_list(struct list_head *list)
set_page_private(page, 0);
migratetype = get_pcppage_migratetype(page);
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn, migratetype);
+ free_unref_page_commit(page, pfn, migratetype, 0);
/*
* Guard against excessive IRQ disabled times when we get
@@ -3551,7 +3612,8 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
/* Remove page from the per-cpu list, caller must protect the list */
static inline
-struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
+struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
+ int migratetype,
unsigned int alloc_flags,
struct per_cpu_pages *pcp,
struct list_head *list)
@@ -3560,16 +3622,30 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
do {
if (list_empty(list)) {
- pcp->count += rmqueue_bulk(zone, 0,
- READ_ONCE(pcp->batch), list,
+ int batch = READ_ONCE(pcp->batch);
+ int alloced;
+
+ /*
+ * Scale batch relative to order if batch implies
+ * free pages can be stored on the PCP. Batch can
+ * be 1 for small zones or for boot pagesets which
+ * should never store free pages as the pages may
+ * belong to arbitrary zones.
+ */
+ if (batch > 1)
+ batch = max(batch >> order, 2);
+ alloced = rmqueue_bulk(zone, order,
+ batch, list,
migratetype, alloc_flags);
+
+ pcp->count += alloced << order;
if (unlikely(list_empty(list)))
return NULL;
}
page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
- pcp->count--;
+ pcp->count -= 1 << order;
} while (check_new_pcp(page));
return page;
@@ -3577,8 +3653,9 @@ struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
- struct zone *zone, gfp_t gfp_flags,
- int migratetype, unsigned int alloc_flags)
+ struct zone *zone, unsigned int order,
+ gfp_t gfp_flags, int migratetype,
+ unsigned int alloc_flags)
{
struct per_cpu_pages *pcp;
struct list_head *list;
@@ -3594,8 +3671,8 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
*/
pcp = this_cpu_ptr(zone->per_cpu_pageset);
pcp->free_factor >>= 1;
- list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, alloc_flags, pcp, list);
+ list = &pcp->lists[order_to_pindex(migratetype, order)];
+ page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
local_unlock_irqrestore(&pagesets.lock, flags);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1);
@@ -3616,15 +3693,15 @@ struct page *rmqueue(struct zone *preferred_zone,
unsigned long flags;
struct page *page;
- if (likely(order == 0)) {
+ if (likely(pcp_allowed_order(order))) {
/*
* MIGRATE_MOVABLE pcplist could have the pages on CMA area and
* we need to skip it when CMA area isn't allowed.
*/
if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
migratetype != MIGRATE_MOVABLE) {
- page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
- migratetype, alloc_flags);
+ page = rmqueue_pcplist(preferred_zone, zone, order,
+ gfp_flags, migratetype, alloc_flags);
goto out;
}
}
@@ -5196,7 +5273,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
/* Attempt the batch allocation */
local_lock_irqsave(&pagesets.lock, flags);
pcp = this_cpu_ptr(zone->per_cpu_pageset);
- pcp_list = &pcp->lists[ac.migratetype];
+ pcp_list = &pcp->lists[order_to_pindex(ac.migratetype, 0)];
while (nr_populated < nr_pages) {
@@ -5206,7 +5283,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
continue;
}
- page = __rmqueue_pcplist(zone, ac.migratetype, alloc_flags,
+ page = __rmqueue_pcplist(zone, 0, ac.migratetype, alloc_flags,
pcp, pcp_list);
if (unlikely(!page)) {
/* Try and get at least one page */
@@ -6756,13 +6833,13 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
static void per_cpu_pages_init(struct per_cpu_pages *pcp, struct per_cpu_zonestat *pzstats)
{
- int migratetype;
+ int pindex;
memset(pcp, 0, sizeof(*pcp));
memset(pzstats, 0, sizeof(*pzstats));
- for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
- INIT_LIST_HEAD(&pcp->lists[migratetype]);
+ for (pindex = 0; pindex < NR_PCP_LISTS; pindex++)
+ INIT_LIST_HEAD(&pcp->lists[pindex]);
/*
* Set batch and high values safe for a boot pageset. A true percpu
@@ -95,7 +95,7 @@ static void __put_single_page(struct page *page)
{
__page_cache_release(page);
mem_cgroup_uncharge(page);
- free_unref_page(page);
+ free_unref_page(page, 0);
}
static void __put_compound_page(struct page *page)