@@ -182,6 +182,8 @@ bool is_transparent_hugepage(struct page *page);
bool can_split_huge_page(struct page *page, int *pextra_pins);
int split_huge_page_to_list(struct page *page, struct list_head *list);
+int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order);
static inline int split_huge_page(struct page *page)
{
return split_huge_page_to_list(page, NULL);
@@ -385,6 +387,12 @@ split_huge_page_to_list(struct page *page, struct list_head *list)
{
return 0;
}
+static inline int
+split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order)
+{
+ return 0;
+}
static inline int split_huge_page(struct page *page)
{
return 0;
@@ -2325,12 +2325,14 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page)
{
- enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK |
- TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD;
+ enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_RMAP_LOCKED;
bool unmap_success;
VM_BUG_ON_PAGE(!PageHead(page), page);
+ if (thp_order(page) >= HPAGE_PMD_ORDER)
+ ttu_flags |= TTU_SPLIT_HUGE_PMD;
+
if (PageAnon(page))
ttu_flags |= TTU_SPLIT_FREEZE;
@@ -2338,21 +2340,23 @@ static void unmap_page(struct page *page)
VM_BUG_ON_PAGE(!unmap_success, page);
}
-static void remap_page(struct page *page, unsigned int nr)
+static void remap_page(struct page *page, unsigned int nr, unsigned int new_nr)
{
- int i;
- if (PageTransHuge(page)) {
+ unsigned int i;
+
+ if (thp_nr_pages(page) == nr) {
remove_migration_ptes(page, page, true);
} else {
- for (i = 0; i < nr; i++)
+ for (i = 0; i < nr; i += new_nr)
remove_migration_ptes(page + i, page + i, true);
}
}
static void __split_huge_page_tail(struct page *head, int tail,
- struct lruvec *lruvec, struct list_head *list)
+ struct lruvec *lruvec, struct list_head *list, unsigned int new_order)
{
struct page *page_tail = head + tail;
+ unsigned long compound_head_flag = new_order ? (1L << PG_head) : 0;
VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
@@ -2376,6 +2380,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
#ifdef CONFIG_64BIT
(1L << PG_arch_2) |
#endif
+ compound_head_flag |
(1L << PG_dirty)));
/* ->mapping in first tail page is compound_mapcount */
@@ -2384,7 +2389,10 @@ static void __split_huge_page_tail(struct page *head, int tail,
page_tail->mapping = head->mapping;
page_tail->index = head->index + tail;
- /* Page flags must be visible before we make the page non-compound. */
+ /*
+ * Page flags must be visible before we make the page non-compound or
+ * a compound page in new_order.
+ */
smp_wmb();
/*
@@ -2394,10 +2402,15 @@ static void __split_huge_page_tail(struct page *head, int tail,
* which needs correct compound_head().
*/
clear_compound_head(page_tail);
+ if (new_order) {
+ prep_compound_page(page_tail, new_order);
+ thp_prep(page_tail);
+ }
/* Finally unfreeze refcount. Additional reference from page cache. */
- page_ref_unfreeze(page_tail, 1 + (!PageAnon(head) ||
- PageSwapCache(head)));
+ page_ref_unfreeze(page_tail, 1 + ((!PageAnon(head) ||
+ PageSwapCache(head)) ?
+ thp_nr_pages(page_tail) : 0));
if (page_is_young(head))
set_page_young(page_tail);
@@ -2415,7 +2428,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
}
static void __split_huge_page(struct page *page, struct list_head *list,
- pgoff_t end, unsigned long flags)
+ pgoff_t end, unsigned long flags, unsigned int new_order)
{
struct page *head = compound_head(page);
pg_data_t *pgdat = page_pgdat(head);
@@ -2424,12 +2437,13 @@ static void __split_huge_page(struct page *page, struct list_head *list,
unsigned long offset = 0;
unsigned int order = thp_order(head);
unsigned int nr = thp_nr_pages(head);
+ unsigned int new_nr = 1 << new_order;
int i;
lruvec = mem_cgroup_page_lruvec(head, pgdat);
/* complete memcg works before add pages to LRU */
- mem_cgroup_split_huge_fixup(head, 0);
+ mem_cgroup_split_huge_fixup(head, new_order);
if (PageAnon(head) && PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2439,46 +2453,54 @@ static void __split_huge_page(struct page *page, struct list_head *list,
xa_lock(&swap_cache->i_pages);
}
- for (i = nr - 1; i >= 1; i--) {
- __split_huge_page_tail(head, i, lruvec, list);
+ for (i = nr - new_nr; i >= new_nr; i -= new_nr) {
+ __split_huge_page_tail(head, i, lruvec, list, new_order);
/* Some pages can be beyond i_size: drop them from page cache */
if (head[i].index >= end) {
ClearPageDirty(head + i);
__delete_from_page_cache(head + i, NULL);
if (IS_ENABLED(CONFIG_SHMEM) && PageSwapBacked(head))
- shmem_uncharge(head->mapping->host, 1);
+ shmem_uncharge(head->mapping->host, new_nr);
put_page(head + i);
} else if (!PageAnon(page)) {
__xa_store(&head->mapping->i_pages, head[i].index,
head + i, 0);
} else if (swap_cache) {
+ /*
+ * split anonymous THPs (including swapped out ones) to
+ * non-zero order not supported
+ */
+ VM_BUG_ON(new_order);
__xa_store(&swap_cache->i_pages, offset + i,
head + i, 0);
}
}
- ClearPageCompound(head);
+ if (!new_order)
+ ClearPageCompound(head);
+ else
+ set_compound_order(head, new_order);
- split_page_owner(head, order, 0);
+ split_page_owner(head, order, new_order);
/* See comment in __split_huge_page_tail() */
if (PageAnon(head)) {
/* Additional pin to swap cache */
if (PageSwapCache(head)) {
- page_ref_add(head, 2);
+ page_ref_add(head, 1 + new_nr);
xa_unlock(&swap_cache->i_pages);
} else {
page_ref_inc(head);
}
} else {
/* Additional pin to page cache */
- page_ref_add(head, 2);
+ page_ref_add(head, 1 + new_nr);
xa_unlock(&head->mapping->i_pages);
}
spin_unlock_irqrestore(&pgdat->lru_lock, flags);
- remap_page(head, nr);
+ remap_page(head, nr, new_nr);
if (PageSwapCache(head)) {
swp_entry_t entry = { .val = page_private(head) };
@@ -2486,7 +2508,14 @@ static void __split_huge_page(struct page *page, struct list_head *list,
split_swap_cluster(entry);
}
- for (i = 0; i < nr; i++) {
+ /*
+ * set page to its compound_head when split to THPs, so that GUP pin and
+ * PG_locked are transferred to the right after-split page
+ */
+ if (new_order)
+ page = compound_head(page);
+
+ for (i = 0; i < nr; i += new_nr) {
struct page *subpage = head + i;
if (subpage == page)
continue;
@@ -2604,37 +2633,61 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
* This function splits huge page into normal pages. @page can point to any
* subpage of huge page to split. Split doesn't change the position of @page.
*
+ * See split_huge_page_to_list_to_order() for more details.
+ *
+ * Returns 0 if the hugepage is split successfully.
+ * Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
+ * us.
+ */
+int split_huge_page_to_list(struct page *page, struct list_head *list)
+{
+ return split_huge_page_to_list_to_order(page, list, 0);
+}
+
+/*
+ * This function splits huge page into pages in @new_order. @page can point to
+ * any subpage of huge page to split. Split doesn't change the position of
+ * @page.
+ *
* Only caller must hold pin on the @page, otherwise split fails with -EBUSY.
* The huge page must be locked.
*
* If @list is null, tail pages will be added to LRU list, otherwise, to @list.
*
- * Both head page and tail pages will inherit mapping, flags, and so on from
- * the hugepage.
+ * Pages in new_order will inherit mapping, flags, and so on from the hugepage.
*
- * GUP pin and PG_locked transferred to @page. Rest subpages can be freed if
- * they are not mapped.
+ * GUP pin and PG_locked transferred to @page or the compound page @page belongs
+ * to. Rest subpages can be freed if they are not mapped.
*
* Returns 0 if the hugepage is split successfully.
* Returns -EBUSY if the page is pinned or if anon_vma disappeared from under
* us.
*/
-int split_huge_page_to_list(struct page *page, struct list_head *list)
+int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
+ unsigned int new_order)
{
struct page *head = compound_head(page);
struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
struct deferred_split *ds_queue = get_deferred_split_queue(head);
- XA_STATE(xas, &head->mapping->i_pages, head->index);
+ /* reset xarray order to new order after split */
+ XA_STATE_ORDER(xas, &head->mapping->i_pages, head->index, new_order);
struct anon_vma *anon_vma = NULL;
struct address_space *mapping = NULL;
int count, mapcount, extra_pins, ret;
unsigned long flags;
pgoff_t end;
+ VM_BUG_ON(thp_order(head) <= new_order);
VM_BUG_ON_PAGE(is_huge_zero_page(head), head);
VM_BUG_ON_PAGE(!PageLocked(head), head);
VM_BUG_ON_PAGE(!PageCompound(head), head);
+ /* Cannot split THP to order-1 (no order-1 THPs) */
+ VM_BUG_ON(new_order == 1);
+
+ /* Split anonymous THP to non-zero order not support */
+ VM_BUG_ON(PageAnon(head) && new_order);
+
if (PageWriteback(head))
return -EBUSY;
@@ -2720,18 +2773,22 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) {
if (!list_empty(page_deferred_list(head))) {
ds_queue->split_queue_len--;
- list_del(page_deferred_list(head));
+ list_del_init(page_deferred_list(head));
}
spin_unlock(&ds_queue->split_queue_lock);
if (mapping) {
if (PageSwapBacked(head))
__dec_lruvec_page_state(head, NR_SHMEM_THPS);
- else
+ else if (!new_order)
+ /*
+ * Decrease THP stats only if split to normal
+ * pages
+ */
__mod_lruvec_page_state(head, NR_FILE_THPS,
-thp_nr_pages(head));
}
- __split_huge_page(page, list, end, flags);
+ __split_huge_page(page, list, end, flags, new_order);
ret = 0;
} else {
if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
@@ -2746,7 +2803,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
fail: if (mapping)
xas_unlock(&xas);
spin_unlock_irqrestore(&pgdata->lru_lock, flags);
- remap_page(head, thp_nr_pages(head));
+ remap_page(head, thp_nr_pages(head), 1);
ret = -EBUSY;
}
@@ -983,7 +983,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
struct lruvec *lruvec, struct list_head *list)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
- VM_BUG_ON_PAGE(PageCompound(page_tail), page);
VM_BUG_ON_PAGE(PageLRU(page_tail), page);
lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock);