@@ -99,6 +99,14 @@ struct collapse_control {
int node);
};
+/* Gather information from one khugepaged_scan_[pmd|file]() request */
+struct collapse_result {
+ enum scan_result result;
+
+ /* Was mmap_lock dropped during request? */
+ bool dropped_mmap_lock;
+};
+
/**
* struct mm_slot - hash lookup from mm to mm_slot
* @hash: hash collision list
@@ -743,13 +751,13 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_SUCCEED;
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
- return 1;
+ return SCAN_SUCCEED;
}
out:
release_pte_pages(pte, _pte, compound_pagelist);
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
referenced, writable, result);
- return 0;
+ return result;
}
static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
@@ -1087,7 +1095,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
struct collapse_control *cc, int referenced,
- int unmapped)
+ int unmapped, struct collapse_result *cr)
{
LIST_HEAD(compound_pagelist);
pmd_t *pmd, _pmd;
@@ -1095,7 +1103,6 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
pgtable_t pgtable;
struct page *new_page;
spinlock_t *pmd_ptl, *pte_ptl;
- int isolated = 0, result = 0;
struct vm_area_struct *vma;
struct mmu_notifier_range range;
gfp_t gfp;
@@ -1103,6 +1110,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
int node;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ cr->result = SCAN_FAIL;
/* Only allocate from the target node */
gfp = cc->gfp() | __GFP_THISNODE;
@@ -1114,6 +1122,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
* that. We will recheck the vma after taking it again in write mode.
*/
mmap_read_unlock(mm);
+ cr->dropped_mmap_lock = true;
node = khugepaged_find_target_node(cc);
/* sched to specified node before huage page memory copy */
@@ -1124,26 +1133,26 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
}
new_page = cc->alloc_hpage(cc, gfp, node);
if (!new_page) {
- result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+ cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out_nolock;
}
if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
- result = SCAN_CGROUP_CHARGE_FAIL;
+ cr->result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
mmap_read_lock(mm);
- result = hugepage_vma_revalidate(mm, address, &vma);
- if (result) {
+ cr->result = hugepage_vma_revalidate(mm, address, &vma);
+ if (cr->result) {
mmap_read_unlock(mm);
goto out_nolock;
}
pmd = mm_find_pmd(mm, address);
if (!pmd) {
- result = SCAN_PMD_NULL;
+ cr->result = SCAN_PMD_NULL;
mmap_read_unlock(mm);
goto out_nolock;
}
@@ -1166,8 +1175,8 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
* handled by the anon_vma lock + PG_lock.
*/
mmap_write_lock(mm);
- result = hugepage_vma_revalidate(mm, address, &vma);
- if (result)
+ cr->result = hugepage_vma_revalidate(mm, address, &vma);
+ if (cr->result)
goto out_up_write;
/* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd)
@@ -1194,11 +1203,11 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
mmu_notifier_invalidate_range_end(&range);
spin_lock(pte_ptl);
- isolated = __collapse_huge_page_isolate(vma, address, pte,
- &compound_pagelist);
+ cr->result = __collapse_huge_page_isolate(vma, address, pte,
+ &compound_pagelist);
spin_unlock(pte_ptl);
- if (unlikely(!isolated)) {
+ if (unlikely(cr->result != SCAN_SUCCEED)) {
pte_unmap(pte);
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
@@ -1210,7 +1219,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
pmd_populate(mm, pmd, pmd_pgtable(_pmd));
spin_unlock(pmd_ptl);
anon_vma_unlock_write(vma->anon_vma);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_up_write;
}
@@ -1246,25 +1255,25 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
cc->hpage = NULL;
- khugepaged_pages_collapsed++;
- result = SCAN_SUCCEED;
+ cr->result = SCAN_SUCCEED;
out_up_write:
mmap_write_unlock(mm);
out_nolock:
if (!IS_ERR_OR_NULL(cc->hpage))
mem_cgroup_uncharge(page_folio(cc->hpage));
- trace_mm_collapse_huge_page(mm, isolated, result);
+ trace_mm_collapse_huge_page(mm, cr->result == SCAN_SUCCEED, cr->result);
return;
}
-static int khugepaged_scan_pmd(struct mm_struct *mm,
- struct vm_area_struct *vma,
- unsigned long address,
- struct collapse_control *cc)
+static void khugepaged_scan_pmd(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long address,
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
pmd_t *pmd;
pte_t *pte, *_pte;
- int ret = 0, result = 0, referenced = 0;
+ int referenced = 0;
int none_or_zero = 0, shared = 0;
struct page *page = NULL;
unsigned long _address;
@@ -1273,9 +1282,10 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
bool writable = false;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ cr->result = SCAN_FAIL;
- result = find_pmd_or_thp_or_none(mm, address, &pmd);
- if (result != SCAN_SUCCEED)
+ cr->result = find_pmd_or_thp_or_none(mm, address, &pmd);
+ if (cr->result != SCAN_SUCCEED)
goto out;
memset(cc->node_load, 0, sizeof(cc->node_load));
@@ -1291,12 +1301,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* comment below for pte_uffd_wp().
*/
if (pte_swp_uffd_wp(pteval)) {
- result = SCAN_PTE_UFFD_WP;
+ cr->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
continue;
} else {
- result = SCAN_EXCEED_SWAP_PTE;
+ cr->result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
goto out_unmap;
}
@@ -1306,7 +1316,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
++none_or_zero <= khugepaged_max_ptes_none) {
continue;
} else {
- result = SCAN_EXCEED_NONE_PTE;
+ cr->result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
goto out_unmap;
}
@@ -1321,7 +1331,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* userfault messages that falls outside of
* the registered range. So, just be simple.
*/
- result = SCAN_PTE_UFFD_WP;
+ cr->result = SCAN_PTE_UFFD_WP;
goto out_unmap;
}
if (pte_write(pteval))
@@ -1329,13 +1339,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page)) {
- result = SCAN_PAGE_NULL;
+ cr->result = SCAN_PAGE_NULL;
goto out_unmap;
}
if (page_mapcount(page) > 1 &&
++shared > khugepaged_max_ptes_shared) {
- result = SCAN_EXCEED_SHARED_PTE;
+ cr->result = SCAN_EXCEED_SHARED_PTE;
count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
goto out_unmap;
}
@@ -1350,20 +1360,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
*/
node = page_to_nid(page);
if (khugepaged_scan_abort(node, cc)) {
- result = SCAN_SCAN_ABORT;
+ cr->result = SCAN_SCAN_ABORT;
goto out_unmap;
}
cc->node_load[node]++;
if (!PageLRU(page)) {
- result = SCAN_PAGE_LRU;
+ cr->result = SCAN_PAGE_LRU;
goto out_unmap;
}
if (PageLocked(page)) {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto out_unmap;
}
if (!PageAnon(page)) {
- result = SCAN_PAGE_ANON;
+ cr->result = SCAN_PAGE_ANON;
goto out_unmap;
}
@@ -1385,7 +1395,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
* will be done again later the risk seems low.
*/
if (!is_refcount_suitable(page)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
goto out_unmap;
}
if (pte_young(pteval) ||
@@ -1394,23 +1404,20 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
referenced++;
}
if (!writable) {
- result = SCAN_PAGE_RO;
+ cr->result = SCAN_PAGE_RO;
} else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) {
- result = SCAN_LACK_REFERENCED_PAGE;
+ cr->result = SCAN_LACK_REFERENCED_PAGE;
} else {
- result = SCAN_SUCCEED;
- ret = 1;
+ cr->result = SCAN_SUCCEED;
}
out_unmap:
pte_unmap_unlock(pte, ptl);
- if (ret) {
+ if (cr->result == SCAN_SUCCEED)
/* collapse_huge_page will return with the mmap_lock released */
- collapse_huge_page(mm, address, cc, referenced, unmapped);
- }
+ collapse_huge_page(mm, address, cc, referenced, unmapped, cr);
out:
trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
- none_or_zero, result, unmapped);
- return ret;
+ none_or_zero, cr->result, unmapped);
}
static void collect_mm_slot(struct mm_slot *mm_slot)
@@ -1671,6 +1678,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
* @file: file that collapse on
* @start: collapse start address
* @cc: collapse context and scratchpad
+ * @cr: aggregate result information of collapse
*
* Basic scheme is simple, details are more complex:
* - allocate and lock a new huge page;
@@ -1689,7 +1697,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
*/
static void collapse_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
+
{
struct address_space *mapping = file->f_mapping;
gfp_t gfp;
@@ -1697,25 +1707,27 @@ static void collapse_file(struct mm_struct *mm,
pgoff_t index, end = start + HPAGE_PMD_NR;
LIST_HEAD(pagelist);
XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
- int nr_none = 0, result = SCAN_SUCCEED;
+ int nr_none = 0;
bool is_shmem = shmem_file(file);
int nr, node;
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
+ cr->result = SCAN_SUCCEED;
+
/* Only allocate from the target node */
gfp = cc->gfp() | __GFP_THISNODE;
node = khugepaged_find_target_node(cc);
new_page = cc->alloc_hpage(cc, gfp, node);
if (!new_page) {
- result = SCAN_ALLOC_HUGE_PAGE_FAIL;
+ cr->result = SCAN_ALLOC_HUGE_PAGE_FAIL;
goto out;
}
if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
- result = SCAN_CGROUP_CHARGE_FAIL;
+ cr->result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
count_memcg_page_event(new_page, THP_COLLAPSE_ALLOC);
@@ -1731,7 +1743,7 @@ static void collapse_file(struct mm_struct *mm,
break;
xas_unlock_irq(&xas);
if (!xas_nomem(&xas, GFP_KERNEL)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out;
}
} while (1);
@@ -1762,13 +1774,13 @@ static void collapse_file(struct mm_struct *mm,
*/
if (index == start) {
if (!xas_next_entry(&xas, end - 1)) {
- result = SCAN_TRUNCATED;
+ cr->result = SCAN_TRUNCATED;
goto xa_locked;
}
xas_set(&xas, index);
}
if (!shmem_charge(mapping->host, 1)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_locked;
}
xas_store(&xas, new_page);
@@ -1781,14 +1793,14 @@ static void collapse_file(struct mm_struct *mm,
/* swap in or instantiate fallocated page */
if (shmem_getpage(mapping->host, index, &page,
SGP_NOALLOC)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
}
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
} else {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto xa_locked;
}
} else { /* !is_shmem */
@@ -1801,7 +1813,7 @@ static void collapse_file(struct mm_struct *mm,
lru_add_drain();
page = find_lock_page(mapping, index);
if (unlikely(page == NULL)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
}
} else if (PageDirty(page)) {
@@ -1820,17 +1832,17 @@ static void collapse_file(struct mm_struct *mm,
*/
xas_unlock_irq(&xas);
filemap_flush(mapping);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
} else if (PageWriteback(page)) {
xas_unlock_irq(&xas);
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto xa_unlocked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
} else {
- result = SCAN_PAGE_LOCK;
+ cr->result = SCAN_PAGE_LOCK;
goto xa_locked;
}
}
@@ -1843,7 +1855,7 @@ static void collapse_file(struct mm_struct *mm,
/* make sure the page is up to date */
if (unlikely(!PageUptodate(page))) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_unlock;
}
@@ -1852,12 +1864,12 @@ static void collapse_file(struct mm_struct *mm,
* we locked the first page, then a THP might be there already.
*/
if (PageTransCompound(page)) {
- result = SCAN_PAGE_COMPOUND;
+ cr->result = SCAN_PAGE_COMPOUND;
goto out_unlock;
}
if (page_mapping(page) != mapping) {
- result = SCAN_TRUNCATED;
+ cr->result = SCAN_TRUNCATED;
goto out_unlock;
}
@@ -1868,18 +1880,18 @@ static void collapse_file(struct mm_struct *mm,
* page is dirty because it hasn't been flushed
* since first write.
*/
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
goto out_unlock;
}
if (isolate_lru_page(page)) {
- result = SCAN_DEL_PAGE_LRU;
+ cr->result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
}
if (page_has_private(page) &&
!try_to_release_page(page, GFP_KERNEL)) {
- result = SCAN_PAGE_HAS_PRIVATE;
+ cr->result = SCAN_PAGE_HAS_PRIVATE;
putback_lru_page(page);
goto out_unlock;
}
@@ -1900,7 +1912,7 @@ static void collapse_file(struct mm_struct *mm,
* - one from isolate_lru_page;
*/
if (!page_ref_freeze(page, 3)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
xas_unlock_irq(&xas);
putback_lru_page(page);
goto out_unlock;
@@ -1935,7 +1947,7 @@ static void collapse_file(struct mm_struct *mm,
*/
smp_mb();
if (inode_is_open_for_write(mapping->host)) {
- result = SCAN_FAIL;
+ cr->result = SCAN_FAIL;
__mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr);
filemap_nr_thps_dec(mapping);
goto xa_locked;
@@ -1962,7 +1974,7 @@ static void collapse_file(struct mm_struct *mm,
*/
try_to_unmap_flush();
- if (result == SCAN_SUCCEED) {
+ if (cr->result == SCAN_SUCCEED) {
struct page *page, *tmp;
/*
@@ -2002,8 +2014,6 @@ static void collapse_file(struct mm_struct *mm,
*/
retract_page_tables(mapping, start);
cc->hpage = NULL;
-
- khugepaged_pages_collapsed++;
} else {
struct page *page;
@@ -2055,15 +2065,16 @@ static void collapse_file(struct mm_struct *mm,
static void khugepaged_scan_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
struct page *page = NULL;
struct address_space *mapping = file->f_mapping;
XA_STATE(xas, &mapping->i_pages, start);
int present, swap;
int node = NUMA_NO_NODE;
- int result = SCAN_SUCCEED;
+ cr->result = SCAN_SUCCEED;
present = 0;
swap = 0;
memset(cc->node_load, 0, sizeof(cc->node_load));
@@ -2074,7 +2085,7 @@ static void khugepaged_scan_file(struct mm_struct *mm,
if (xa_is_value(page)) {
if (++swap > khugepaged_max_ptes_swap) {
- result = SCAN_EXCEED_SWAP_PTE;
+ cr->result = SCAN_EXCEED_SWAP_PTE;
count_vm_event(THP_SCAN_EXCEED_SWAP_PTE);
break;
}
@@ -2086,25 +2097,25 @@ static void khugepaged_scan_file(struct mm_struct *mm,
* into a PMD sized page
*/
if (PageTransCompound(page)) {
- result = SCAN_PAGE_COMPOUND;
+ cr->result = SCAN_PAGE_COMPOUND;
break;
}
node = page_to_nid(page);
if (khugepaged_scan_abort(node, cc)) {
- result = SCAN_SCAN_ABORT;
+ cr->result = SCAN_SCAN_ABORT;
break;
}
cc->node_load[node]++;
if (!PageLRU(page)) {
- result = SCAN_PAGE_LRU;
+ cr->result = SCAN_PAGE_LRU;
break;
}
if (page_count(page) !=
1 + page_mapcount(page) + page_has_private(page)) {
- result = SCAN_PAGE_COUNT;
+ cr->result = SCAN_PAGE_COUNT;
break;
}
@@ -2123,12 +2134,12 @@ static void khugepaged_scan_file(struct mm_struct *mm,
}
rcu_read_unlock();
- if (result == SCAN_SUCCEED) {
+ if (cr->result == SCAN_SUCCEED) {
if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) {
- result = SCAN_EXCEED_NONE_PTE;
+ cr->result = SCAN_EXCEED_NONE_PTE;
count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
} else {
- collapse_file(mm, file, start, cc);
+ collapse_file(mm, file, start, cc, cr);
}
}
@@ -2137,7 +2148,8 @@ static void khugepaged_scan_file(struct mm_struct *mm,
#else
static void khugepaged_scan_file(struct mm_struct *mm,
struct file *file, pgoff_t start,
- struct collapse_control *cc)
+ struct collapse_control *cc,
+ struct collapse_result *cr)
{
BUILD_BUG();
}
@@ -2209,7 +2221,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
goto skip;
while (khugepaged_scan.address < hend) {
- int ret;
+ struct collapse_result cr = {0};
cond_resched();
if (unlikely(khugepaged_test_exit(mm)))
goto breakouterloop;
@@ -2223,17 +2235,20 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
khugepaged_scan.address);
mmap_read_unlock(mm);
- ret = 1;
- khugepaged_scan_file(mm, file, pgoff, cc);
+ cr.dropped_mmap_lock = true;
+ khugepaged_scan_file(mm, file, pgoff, cc, &cr);
fput(file);
} else {
- ret = khugepaged_scan_pmd(mm, vma,
- khugepaged_scan.address, cc);
+ khugepaged_scan_pmd(mm, vma,
+ khugepaged_scan.address,
+ cc, &cr);
}
+ if (cr.result == SCAN_SUCCEED)
+ ++khugepaged_pages_collapsed;
/* move to next address */
khugepaged_scan.address += HPAGE_PMD_SIZE;
progress += HPAGE_PMD_NR;
- if (ret)
+ if (cr.dropped_mmap_lock)
/* we released mmap_lock so break loop */
goto breakouterloop_mmap_lock;
if (progress >= pages)
Add struct collapse_result which aggregates data from a single khugepaged_scan_pmd() or khugapaged_scan_file() request. Change khugepaged to take action based on this returned data instead of deep within the collapsing functions themselves. Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- mm/khugepaged.c | 187 ++++++++++++++++++++++++++---------------------- 1 file changed, 101 insertions(+), 86 deletions(-)