Message ID | 20220706235936.2197195-7-zokeefe@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: userspace hugepage collapse | expand |
On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote: > > Add .is_khugepaged flag to struct collapse_control so > khugepaged-specific behavior can be elided by MADV_COLLAPSE context. > > Start by protecting khugepaged-specific heuristics by this flag. In > MADV_COLLAPSE, the user presumably has reason to believe the collapse > will be beneficial and khugepaged heuristics shouldn't prevent the user > from doing so: > > 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared] > > 2) requirement that some pages in region being collapsed be young or > referenced > > Signed-off-by: Zach O'Keefe <zokeefe@google.com> > --- > > v6 -> v7: There is no functional change here from v6, just a renaming of > flags to explicitly be predicated on khugepaged. Reviewed-by: Yang Shi <shy828301@gmail.com> Just a nit, some conditions check is_khugepaged first, some don't. Why not make them more consistent to check is_khugepaged first? > --- > mm/khugepaged.c | 62 ++++++++++++++++++++++++++++++++++--------------- > 1 file changed, 43 insertions(+), 19 deletions(-) > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 147f5828f052..d89056d8cbad 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); > * default collapse hugepages if there is at least one pte mapped like > * it would have happened if the vma was large enough during page > * fault. > + * > + * Note that these are only respected if collapse was initiated by khugepaged. > */ > static unsigned int khugepaged_max_ptes_none __read_mostly; > static unsigned int khugepaged_max_ptes_swap __read_mostly; > @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly; > #define MAX_PTE_MAPPED_THP 8 > > struct collapse_control { > + bool is_khugepaged; > + > /* Num pages scanned per node */ > int node_load[MAX_NUMNODES]; > > @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page) > static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > unsigned long address, > pte_t *pte, > + struct collapse_control *cc, > struct list_head *compound_pagelist) > { > struct page *page = NULL; > @@ -567,7 +572,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > if (pte_none(pteval) || (pte_present(pteval) && > is_zero_pfn(pte_pfn(pteval)))) { > if (!userfaultfd_armed(vma) && > - ++none_or_zero <= khugepaged_max_ptes_none) { > + (++none_or_zero <= khugepaged_max_ptes_none || > + !cc->is_khugepaged)) { > continue; > } else { > result = SCAN_EXCEED_NONE_PTE; > @@ -587,8 +593,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > VM_BUG_ON_PAGE(!PageAnon(page), page); > > - if (page_mapcount(page) > 1 && > - ++shared > khugepaged_max_ptes_shared) { > + if (cc->is_khugepaged && page_mapcount(page) > 1 && > + ++shared > khugepaged_max_ptes_shared) { > result = SCAN_EXCEED_SHARED_PTE; > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > goto out; > @@ -654,10 +660,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > if (PageCompound(page)) > list_add_tail(&page->lru, compound_pagelist); > next: > - /* There should be enough young pte to collapse the page */ > - if (pte_young(pteval) || > - page_is_young(page) || PageReferenced(page) || > - mmu_notifier_test_young(vma->vm_mm, address)) > + /* > + * If collapse was initiated by khugepaged, check that there is > + * enough young pte to justify collapsing the page > + */ > + if (cc->is_khugepaged && > + (pte_young(pteval) || page_is_young(page) || > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > + address))) > referenced++; > > if (pte_write(pteval)) > @@ -666,7 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > if (unlikely(!writable)) { > result = SCAN_PAGE_RO; > - } else if (unlikely(!referenced)) { > + } else if (unlikely(cc->is_khugepaged && !referenced)) { > result = SCAN_LACK_REFERENCED_PAGE; > } else { > result = SCAN_SUCCEED; > @@ -745,6 +755,7 @@ static void khugepaged_alloc_sleep(void) > > > struct collapse_control khugepaged_collapse_control = { > + .is_khugepaged = true, > .last_target_node = NUMA_NO_NODE, > }; > > @@ -1023,7 +1034,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, > mmu_notifier_invalidate_range_end(&range); > > spin_lock(pte_ptl); > - result = __collapse_huge_page_isolate(vma, address, pte, > + result = __collapse_huge_page_isolate(vma, address, pte, cc, > &compound_pagelist); > spin_unlock(pte_ptl); > > @@ -1114,7 +1125,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > _pte++, _address += PAGE_SIZE) { > pte_t pteval = *_pte; > if (is_swap_pte(pteval)) { > - if (++unmapped <= khugepaged_max_ptes_swap) { > + if (++unmapped <= khugepaged_max_ptes_swap || > + !cc->is_khugepaged) { > /* > * Always be strict with uffd-wp > * enabled swap entries. Please see > @@ -1133,7 +1145,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > } > if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { > if (!userfaultfd_armed(vma) && > - ++none_or_zero <= khugepaged_max_ptes_none) { > + (++none_or_zero <= khugepaged_max_ptes_none || > + !cc->is_khugepaged)) { > continue; > } else { > result = SCAN_EXCEED_NONE_PTE; > @@ -1163,8 +1176,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > goto out_unmap; > } > > - if (page_mapcount(page) > 1 && > - ++shared > khugepaged_max_ptes_shared) { > + if (cc->is_khugepaged && > + page_mapcount(page) > 1 && > + ++shared > khugepaged_max_ptes_shared) { > result = SCAN_EXCEED_SHARED_PTE; > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > goto out_unmap; > @@ -1218,14 +1232,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > result = SCAN_PAGE_COUNT; > goto out_unmap; > } > - if (pte_young(pteval) || > - page_is_young(page) || PageReferenced(page) || > - mmu_notifier_test_young(vma->vm_mm, address)) > + > + /* > + * If collapse was initiated by khugepaged, check that there is > + * enough young pte to justify collapsing the page > + */ > + if (cc->is_khugepaged && > + (pte_young(pteval) || page_is_young(page) || > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > + address))) > referenced++; > } > if (!writable) { > result = SCAN_PAGE_RO; > - } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) { > + } else if (cc->is_khugepaged && > + (!referenced || > + (unmapped && referenced < HPAGE_PMD_NR / 2))) { > result = SCAN_LACK_REFERENCED_PAGE; > } else { > result = SCAN_SUCCEED; > @@ -1894,7 +1916,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > continue; > > if (xa_is_value(page)) { > - if (++swap > khugepaged_max_ptes_swap) { > + if (cc->is_khugepaged && > + ++swap > khugepaged_max_ptes_swap) { > result = SCAN_EXCEED_SWAP_PTE; > count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); > break; > @@ -1945,7 +1968,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > rcu_read_unlock(); > > if (result == SCAN_SUCCEED) { > - if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { > + if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none && > + cc->is_khugepaged) { > result = SCAN_EXCEED_NONE_PTE; > count_vm_event(THP_SCAN_EXCEED_NONE_PTE); > } else { > -- > 2.37.0.rc0.161.g10f37bed90-goog >
On Jul 11 13:43, Yang Shi wrote: > On Wed, Jul 6, 2022 at 5:06 PM Zach O'Keefe <zokeefe@google.com> wrote: > > > > Add .is_khugepaged flag to struct collapse_control so > > khugepaged-specific behavior can be elided by MADV_COLLAPSE context. > > > > Start by protecting khugepaged-specific heuristics by this flag. In > > MADV_COLLAPSE, the user presumably has reason to believe the collapse > > will be beneficial and khugepaged heuristics shouldn't prevent the user > > from doing so: > > > > 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared] > > > > 2) requirement that some pages in region being collapsed be young or > > referenced > > > > Signed-off-by: Zach O'Keefe <zokeefe@google.com> > > --- > > > > v6 -> v7: There is no functional change here from v6, just a renaming of > > flags to explicitly be predicated on khugepaged. > > Reviewed-by: Yang Shi <shy828301@gmail.com> > > Just a nit, some conditions check is_khugepaged first, some don't. Why > not make them more consistent to check is_khugepaged first? > Again, thank you for taking the time to review. Agreed the inconsistency is ugly, and have updated to check is_khugepaged consistently first. Thanks for the suggestion. Zach > > --- > > mm/khugepaged.c | 62 ++++++++++++++++++++++++++++++++++--------------- > > 1 file changed, 43 insertions(+), 19 deletions(-) > > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > index 147f5828f052..d89056d8cbad 100644 > > --- a/mm/khugepaged.c > > +++ b/mm/khugepaged.c > > @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); > > * default collapse hugepages if there is at least one pte mapped like > > * it would have happened if the vma was large enough during page > > * fault. > > + * > > + * Note that these are only respected if collapse was initiated by khugepaged. > > */ > > static unsigned int khugepaged_max_ptes_none __read_mostly; > > static unsigned int khugepaged_max_ptes_swap __read_mostly; > > @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly; > > #define MAX_PTE_MAPPED_THP 8 > > > > struct collapse_control { > > + bool is_khugepaged; > > + > > /* Num pages scanned per node */ > > int node_load[MAX_NUMNODES]; > > > > @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page) > > static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > unsigned long address, > > pte_t *pte, > > + struct collapse_control *cc, > > struct list_head *compound_pagelist) > > { > > struct page *page = NULL; > > @@ -567,7 +572,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > if (pte_none(pteval) || (pte_present(pteval) && > > is_zero_pfn(pte_pfn(pteval)))) { > > if (!userfaultfd_armed(vma) && > > - ++none_or_zero <= khugepaged_max_ptes_none) { > > + (++none_or_zero <= khugepaged_max_ptes_none || > > + !cc->is_khugepaged)) { > > continue; > > } else { > > result = SCAN_EXCEED_NONE_PTE; > > @@ -587,8 +593,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > > > VM_BUG_ON_PAGE(!PageAnon(page), page); > > > > - if (page_mapcount(page) > 1 && > > - ++shared > khugepaged_max_ptes_shared) { > > + if (cc->is_khugepaged && page_mapcount(page) > 1 && > > + ++shared > khugepaged_max_ptes_shared) { > > result = SCAN_EXCEED_SHARED_PTE; > > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > > goto out; > > @@ -654,10 +660,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > if (PageCompound(page)) > > list_add_tail(&page->lru, compound_pagelist); > > next: > > - /* There should be enough young pte to collapse the page */ > > - if (pte_young(pteval) || > > - page_is_young(page) || PageReferenced(page) || > > - mmu_notifier_test_young(vma->vm_mm, address)) > > + /* > > + * If collapse was initiated by khugepaged, check that there is > > + * enough young pte to justify collapsing the page > > + */ > > + if (cc->is_khugepaged && > > + (pte_young(pteval) || page_is_young(page) || > > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > > + address))) > > referenced++; > > > > if (pte_write(pteval)) > > @@ -666,7 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, > > > > if (unlikely(!writable)) { > > result = SCAN_PAGE_RO; > > - } else if (unlikely(!referenced)) { > > + } else if (unlikely(cc->is_khugepaged && !referenced)) { > > result = SCAN_LACK_REFERENCED_PAGE; > > } else { > > result = SCAN_SUCCEED; > > @@ -745,6 +755,7 @@ static void khugepaged_alloc_sleep(void) > > > > > > struct collapse_control khugepaged_collapse_control = { > > + .is_khugepaged = true, > > .last_target_node = NUMA_NO_NODE, > > }; > > > > @@ -1023,7 +1034,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, > > mmu_notifier_invalidate_range_end(&range); > > > > spin_lock(pte_ptl); > > - result = __collapse_huge_page_isolate(vma, address, pte, > > + result = __collapse_huge_page_isolate(vma, address, pte, cc, > > &compound_pagelist); > > spin_unlock(pte_ptl); > > > > @@ -1114,7 +1125,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > > _pte++, _address += PAGE_SIZE) { > > pte_t pteval = *_pte; > > if (is_swap_pte(pteval)) { > > - if (++unmapped <= khugepaged_max_ptes_swap) { > > + if (++unmapped <= khugepaged_max_ptes_swap || > > + !cc->is_khugepaged) { > > /* > > * Always be strict with uffd-wp > > * enabled swap entries. Please see > > @@ -1133,7 +1145,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > > } > > if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { > > if (!userfaultfd_armed(vma) && > > - ++none_or_zero <= khugepaged_max_ptes_none) { > > + (++none_or_zero <= khugepaged_max_ptes_none || > > + !cc->is_khugepaged)) { > > continue; > > } else { > > result = SCAN_EXCEED_NONE_PTE; > > @@ -1163,8 +1176,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > > goto out_unmap; > > } > > > > - if (page_mapcount(page) > 1 && > > - ++shared > khugepaged_max_ptes_shared) { > > + if (cc->is_khugepaged && > > + page_mapcount(page) > 1 && > > + ++shared > khugepaged_max_ptes_shared) { > > result = SCAN_EXCEED_SHARED_PTE; > > count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); > > goto out_unmap; > > @@ -1218,14 +1232,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, > > result = SCAN_PAGE_COUNT; > > goto out_unmap; > > } > > - if (pte_young(pteval) || > > - page_is_young(page) || PageReferenced(page) || > > - mmu_notifier_test_young(vma->vm_mm, address)) > > + > > + /* > > + * If collapse was initiated by khugepaged, check that there is > > + * enough young pte to justify collapsing the page > > + */ > > + if (cc->is_khugepaged && > > + (pte_young(pteval) || page_is_young(page) || > > + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, > > + address))) > > referenced++; > > } > > if (!writable) { > > result = SCAN_PAGE_RO; > > - } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) { > > + } else if (cc->is_khugepaged && > > + (!referenced || > > + (unmapped && referenced < HPAGE_PMD_NR / 2))) { > > result = SCAN_LACK_REFERENCED_PAGE; > > } else { > > result = SCAN_SUCCEED; > > @@ -1894,7 +1916,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > > continue; > > > > if (xa_is_value(page)) { > > - if (++swap > khugepaged_max_ptes_swap) { > > + if (cc->is_khugepaged && > > + ++swap > khugepaged_max_ptes_swap) { > > result = SCAN_EXCEED_SWAP_PTE; > > count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); > > break; > > @@ -1945,7 +1968,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, > > rcu_read_unlock(); > > > > if (result == SCAN_SUCCEED) { > > - if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { > > + if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none && > > + cc->is_khugepaged) { > > result = SCAN_EXCEED_NONE_PTE; > > count_vm_event(THP_SCAN_EXCEED_NONE_PTE); > > } else { > > -- > > 2.37.0.rc0.161.g10f37bed90-goog > >
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 147f5828f052..d89056d8cbad 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -73,6 +73,8 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait); * default collapse hugepages if there is at least one pte mapped like * it would have happened if the vma was large enough during page * fault. + * + * Note that these are only respected if collapse was initiated by khugepaged. */ static unsigned int khugepaged_max_ptes_none __read_mostly; static unsigned int khugepaged_max_ptes_swap __read_mostly; @@ -86,6 +88,8 @@ static struct kmem_cache *mm_slot_cache __read_mostly; #define MAX_PTE_MAPPED_THP 8 struct collapse_control { + bool is_khugepaged; + /* Num pages scanned per node */ int node_load[MAX_NUMNODES]; @@ -554,6 +558,7 @@ static bool is_refcount_suitable(struct page *page) static int __collapse_huge_page_isolate(struct vm_area_struct *vma, unsigned long address, pte_t *pte, + struct collapse_control *cc, struct list_head *compound_pagelist) { struct page *page = NULL; @@ -567,7 +572,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (pte_none(pteval) || (pte_present(pteval) && is_zero_pfn(pte_pfn(pteval)))) { if (!userfaultfd_armed(vma) && - ++none_or_zero <= khugepaged_max_ptes_none) { + (++none_or_zero <= khugepaged_max_ptes_none || + !cc->is_khugepaged)) { continue; } else { result = SCAN_EXCEED_NONE_PTE; @@ -587,8 +593,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON_PAGE(!PageAnon(page), page); - if (page_mapcount(page) > 1 && - ++shared > khugepaged_max_ptes_shared) { + if (cc->is_khugepaged && page_mapcount(page) > 1 && + ++shared > khugepaged_max_ptes_shared) { result = SCAN_EXCEED_SHARED_PTE; count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); goto out; @@ -654,10 +660,14 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (PageCompound(page)) list_add_tail(&page->lru, compound_pagelist); next: - /* There should be enough young pte to collapse the page */ - if (pte_young(pteval) || - page_is_young(page) || PageReferenced(page) || - mmu_notifier_test_young(vma->vm_mm, address)) + /* + * If collapse was initiated by khugepaged, check that there is + * enough young pte to justify collapsing the page + */ + if (cc->is_khugepaged && + (pte_young(pteval) || page_is_young(page) || + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, + address))) referenced++; if (pte_write(pteval)) @@ -666,7 +676,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (unlikely(!writable)) { result = SCAN_PAGE_RO; - } else if (unlikely(!referenced)) { + } else if (unlikely(cc->is_khugepaged && !referenced)) { result = SCAN_LACK_REFERENCED_PAGE; } else { result = SCAN_SUCCEED; @@ -745,6 +755,7 @@ static void khugepaged_alloc_sleep(void) struct collapse_control khugepaged_collapse_control = { + .is_khugepaged = true, .last_target_node = NUMA_NO_NODE, }; @@ -1023,7 +1034,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, mmu_notifier_invalidate_range_end(&range); spin_lock(pte_ptl); - result = __collapse_huge_page_isolate(vma, address, pte, + result = __collapse_huge_page_isolate(vma, address, pte, cc, &compound_pagelist); spin_unlock(pte_ptl); @@ -1114,7 +1125,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, _pte++, _address += PAGE_SIZE) { pte_t pteval = *_pte; if (is_swap_pte(pteval)) { - if (++unmapped <= khugepaged_max_ptes_swap) { + if (++unmapped <= khugepaged_max_ptes_swap || + !cc->is_khugepaged) { /* * Always be strict with uffd-wp * enabled swap entries. Please see @@ -1133,7 +1145,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, } if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { if (!userfaultfd_armed(vma) && - ++none_or_zero <= khugepaged_max_ptes_none) { + (++none_or_zero <= khugepaged_max_ptes_none || + !cc->is_khugepaged)) { continue; } else { result = SCAN_EXCEED_NONE_PTE; @@ -1163,8 +1176,9 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unmap; } - if (page_mapcount(page) > 1 && - ++shared > khugepaged_max_ptes_shared) { + if (cc->is_khugepaged && + page_mapcount(page) > 1 && + ++shared > khugepaged_max_ptes_shared) { result = SCAN_EXCEED_SHARED_PTE; count_vm_event(THP_SCAN_EXCEED_SHARED_PTE); goto out_unmap; @@ -1218,14 +1232,22 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, struct vm_area_struct *vma, result = SCAN_PAGE_COUNT; goto out_unmap; } - if (pte_young(pteval) || - page_is_young(page) || PageReferenced(page) || - mmu_notifier_test_young(vma->vm_mm, address)) + + /* + * If collapse was initiated by khugepaged, check that there is + * enough young pte to justify collapsing the page + */ + if (cc->is_khugepaged && + (pte_young(pteval) || page_is_young(page) || + PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, + address))) referenced++; } if (!writable) { result = SCAN_PAGE_RO; - } else if (!referenced || (unmapped && referenced < HPAGE_PMD_NR/2)) { + } else if (cc->is_khugepaged && + (!referenced || + (unmapped && referenced < HPAGE_PMD_NR / 2))) { result = SCAN_LACK_REFERENCED_PAGE; } else { result = SCAN_SUCCEED; @@ -1894,7 +1916,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, continue; if (xa_is_value(page)) { - if (++swap > khugepaged_max_ptes_swap) { + if (cc->is_khugepaged && + ++swap > khugepaged_max_ptes_swap) { result = SCAN_EXCEED_SWAP_PTE; count_vm_event(THP_SCAN_EXCEED_SWAP_PTE); break; @@ -1945,7 +1968,8 @@ static int khugepaged_scan_file(struct mm_struct *mm, struct file *file, rcu_read_unlock(); if (result == SCAN_SUCCEED) { - if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none) { + if (present < HPAGE_PMD_NR - khugepaged_max_ptes_none && + cc->is_khugepaged) { result = SCAN_EXCEED_NONE_PTE; count_vm_event(THP_SCAN_EXCEED_NONE_PTE); } else {
Add .is_khugepaged flag to struct collapse_control so khugepaged-specific behavior can be elided by MADV_COLLAPSE context. Start by protecting khugepaged-specific heuristics by this flag. In MADV_COLLAPSE, the user presumably has reason to believe the collapse will be beneficial and khugepaged heuristics shouldn't prevent the user from doing so: 1) sysfs-controlled knobs khugepaged_max_ptes_[none|swap|shared] 2) requirement that some pages in region being collapsed be young or referenced Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- v6 -> v7: There is no functional change here from v6, just a renaming of flags to explicitly be predicated on khugepaged. --- mm/khugepaged.c | 62 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 19 deletions(-)