diff mbox series

[RFC,v2,5/9] khugepaged: generalize __collapse_huge_page_* for mTHP support

Message ID 20250211003028.213461-6-npache@redhat.com (mailing list archive)
State New
Headers show
Series khugepaged: mTHP support | expand

Commit Message

Nico Pache Feb. 11, 2025, 12:30 a.m. UTC
generalize the order of the __collapse_huge_page_* functions
to support future mTHP collapse.

mTHP collapse can suffer from incosistant behavior, and memory waste
"creep". disable swapin and shared support for mTHP collapse.

No functional changes in this patch.

Signed-off-by: Nico Pache <npache@redhat.com>
---
 mm/khugepaged.c | 48 ++++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

Comments

Ryan Roberts Feb. 19, 2025, 3:39 p.m. UTC | #1
On 11/02/2025 00:30, Nico Pache wrote:
> generalize the order of the __collapse_huge_page_* functions
> to support future mTHP collapse.
> 
> mTHP collapse can suffer from incosistant behavior, and memory waste
> "creep". disable swapin and shared support for mTHP collapse.
> 
> No functional changes in this patch.
> 
> Signed-off-by: Nico Pache <npache@redhat.com>
> ---
>  mm/khugepaged.c | 48 ++++++++++++++++++++++++++++--------------------
>  1 file changed, 28 insertions(+), 20 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 0cfcdc11cabd..3776055bd477 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -565,15 +565,17 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>  					unsigned long address,
>  					pte_t *pte,
>  					struct collapse_control *cc,
> -					struct list_head *compound_pagelist)
> +					struct list_head *compound_pagelist,
> +					u8 order)

nit: I think we are mostly standardised on order being int. Is there any reason
to make it u8 here?

>  {
>  	struct page *page = NULL;
>  	struct folio *folio = NULL;
>  	pte_t *_pte;
>  	int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
>  	bool writable = false;
> +	int scaled_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
>  
> -	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
> +	for (_pte = pte; _pte < pte + (1 << order);
>  	     _pte++, address += PAGE_SIZE) {
>  		pte_t pteval = ptep_get(_pte);
>  		if (pte_none(pteval) || (pte_present(pteval) &&
> @@ -581,7 +583,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>  			++none_or_zero;
>  			if (!userfaultfd_armed(vma) &&
>  			    (!cc->is_khugepaged ||
> -			     none_or_zero <= khugepaged_max_ptes_none)) {
> +			     none_or_zero <= scaled_none)) {
>  				continue;
>  			} else {
>  				result = SCAN_EXCEED_NONE_PTE;
> @@ -609,8 +611,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
>  		/* See khugepaged_scan_pmd(). */
>  		if (folio_likely_mapped_shared(folio)) {
>  			++shared;
> -			if (cc->is_khugepaged &&
> -			    shared > khugepaged_max_ptes_shared) {
> +			if (order != HPAGE_PMD_ORDER || (cc->is_khugepaged &&
> +			    shared > khugepaged_max_ptes_shared)) {
>  				result = SCAN_EXCEED_SHARED_PTE;
>  				count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);

Same comment about events; I think you will want to be careful to only count
events for PMD-sized THP using count_vm_event() and introduce equivalent MTHP
events to cover all sizes.

>  				goto out;
> @@ -711,14 +713,15 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
>  						struct vm_area_struct *vma,
>  						unsigned long address,
>  						spinlock_t *ptl,
> -						struct list_head *compound_pagelist)
> +						struct list_head *compound_pagelist,
> +						u8 order)
>  {
>  	struct folio *src, *tmp;
>  	pte_t *_pte;
>  	pte_t pteval;
>  
> -	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
> -	     _pte++, address += PAGE_SIZE) {
> +	for (_pte = pte; _pte < pte + (1 << order);
> +		_pte++, address += PAGE_SIZE) {

nit: you changed the indentation here.

>  		pteval = ptep_get(_pte);
>  		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
>  			add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
> @@ -764,7 +767,8 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
>  					     pmd_t *pmd,
>  					     pmd_t orig_pmd,
>  					     struct vm_area_struct *vma,
> -					     struct list_head *compound_pagelist)
> +					     struct list_head *compound_pagelist,
> +					     u8 order)
>  {
>  	spinlock_t *pmd_ptl;
>  
> @@ -781,7 +785,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
>  	 * Release both raw and compound pages isolated
>  	 * in __collapse_huge_page_isolate.
>  	 */
> -	release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
> +	release_pte_pages(pte, pte + (1 << order), compound_pagelist);
>  }
>  
>  /*
> @@ -802,7 +806,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
>  static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
>  		pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
>  		unsigned long address, spinlock_t *ptl,
> -		struct list_head *compound_pagelist)
> +		struct list_head *compound_pagelist, u8 order)
>  {
>  	unsigned int i;
>  	int result = SCAN_SUCCEED;
> @@ -810,7 +814,7 @@ static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
>  	/*
>  	 * Copying pages' contents is subject to memory poison at any iteration.
>  	 */
> -	for (i = 0; i < HPAGE_PMD_NR; i++) {
> +	for (i = 0; i < (1 << order); i++) {
>  		pte_t pteval = ptep_get(pte + i);
>  		struct page *page = folio_page(folio, i);
>  		unsigned long src_addr = address + i * PAGE_SIZE;
> @@ -829,10 +833,10 @@ static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
>  
>  	if (likely(result == SCAN_SUCCEED))
>  		__collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
> -						    compound_pagelist);
> +						    compound_pagelist, order);
>  	else
>  		__collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
> -						 compound_pagelist);
> +						 compound_pagelist, order);
>  
>  	return result;
>  }
> @@ -1000,11 +1004,11 @@ static int check_pmd_still_valid(struct mm_struct *mm,
>  static int __collapse_huge_page_swapin(struct mm_struct *mm,
>  				       struct vm_area_struct *vma,
>  				       unsigned long haddr, pmd_t *pmd,
> -				       int referenced)
> +				       int referenced, u8 order)
>  {
>  	int swapped_in = 0;
>  	vm_fault_t ret = 0;
> -	unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
> +	unsigned long address, end = haddr + (PAGE_SIZE << order);
>  	int result;
>  	pte_t *pte = NULL;
>  	spinlock_t *ptl;
> @@ -1035,6 +1039,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
>  		if (!is_swap_pte(vmf.orig_pte))
>  			continue;
>  
> +		if (order != HPAGE_PMD_ORDER) {
> +			result = SCAN_EXCEED_SWAP_PTE;
> +			goto out;
> +		}

A comment to explain the rationale for this divergent behaviour based on order
would be helpful.

> +
>  		vmf.pte = pte;
>  		vmf.ptl = ptl;
>  		ret = do_swap_page(&vmf);
> @@ -1114,7 +1123,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>  	int result = SCAN_FAIL;
>  	struct vm_area_struct *vma;
>  	struct mmu_notifier_range range;
> -

nit: no need for this whitespace change?

>  	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
>  
>  	/*
> @@ -1149,7 +1157,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>  		 * that case.  Continuing to collapse causes inconsistency.
>  		 */
>  		result = __collapse_huge_page_swapin(mm, vma, address, pmd,
> -						     referenced);
> +				referenced, HPAGE_PMD_ORDER);
>  		if (result != SCAN_SUCCEED)
>  			goto out_nolock;
>  	}
> @@ -1196,7 +1204,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>  	pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
>  	if (pte) {
>  		result = __collapse_huge_page_isolate(vma, address, pte, cc,
> -						      &compound_pagelist);
> +					&compound_pagelist, HPAGE_PMD_ORDER);
>  		spin_unlock(pte_ptl);
>  	} else {
>  		result = SCAN_PMD_NULL;
> @@ -1226,7 +1234,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>  
>  	result = __collapse_huge_page_copy(pte, folio, pmd, _pmd,
>  					   vma, address, pte_ptl,
> -					   &compound_pagelist);
> +					   &compound_pagelist, HPAGE_PMD_ORDER);
>  	pte_unmap(pte);
>  	if (unlikely(result != SCAN_SUCCEED))
>  		goto out_up_write;
Nico Pache Feb. 19, 2025, 4:02 p.m. UTC | #2
On Wed, Feb 19, 2025 at 8:39 AM Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 11/02/2025 00:30, Nico Pache wrote:
> > generalize the order of the __collapse_huge_page_* functions
> > to support future mTHP collapse.
> >
> > mTHP collapse can suffer from incosistant behavior, and memory waste
> > "creep". disable swapin and shared support for mTHP collapse.
> >
> > No functional changes in this patch.
> >
> > Signed-off-by: Nico Pache <npache@redhat.com>
> > ---
> >  mm/khugepaged.c | 48 ++++++++++++++++++++++++++++--------------------
> >  1 file changed, 28 insertions(+), 20 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 0cfcdc11cabd..3776055bd477 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -565,15 +565,17 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                                       unsigned long address,
> >                                       pte_t *pte,
> >                                       struct collapse_control *cc,
> > -                                     struct list_head *compound_pagelist)
> > +                                     struct list_head *compound_pagelist,
> > +                                     u8 order)
>
> nit: I think we are mostly standardised on order being int. Is there any reason
> to make it u8 here?

The reasoning was I didn't want to consume a lot of memory for the
mthp_bitmap_stack.
originally the order and offset were u8's, but i had to convert the
offset to u16 to fit the max offset on 64k kernels.
so 64 * (8+16) = 192 bytes as opposed to 1024 bytes if they were ints.

Not sure if using these u8/16 is frowned upon. Lmk if I need to
convert these back to int or if they can stay!


>
> >  {
> >       struct page *page = NULL;
> >       struct folio *folio = NULL;
> >       pte_t *_pte;
> >       int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
> >       bool writable = false;
> > +     int scaled_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
> >
> > -     for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
> > +     for (_pte = pte; _pte < pte + (1 << order);
> >            _pte++, address += PAGE_SIZE) {
> >               pte_t pteval = ptep_get(_pte);
> >               if (pte_none(pteval) || (pte_present(pteval) &&
> > @@ -581,7 +583,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >                       ++none_or_zero;
> >                       if (!userfaultfd_armed(vma) &&
> >                           (!cc->is_khugepaged ||
> > -                          none_or_zero <= khugepaged_max_ptes_none)) {
> > +                          none_or_zero <= scaled_none)) {
> >                               continue;
> >                       } else {
> >                               result = SCAN_EXCEED_NONE_PTE;
> > @@ -609,8 +611,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
> >               /* See khugepaged_scan_pmd(). */
> >               if (folio_likely_mapped_shared(folio)) {
> >                       ++shared;
> > -                     if (cc->is_khugepaged &&
> > -                         shared > khugepaged_max_ptes_shared) {
> > +                     if (order != HPAGE_PMD_ORDER || (cc->is_khugepaged &&
> > +                         shared > khugepaged_max_ptes_shared)) {
> >                               result = SCAN_EXCEED_SHARED_PTE;
> >                               count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
>
> Same comment about events; I think you will want to be careful to only count
> events for PMD-sized THP using count_vm_event() and introduce equivalent MTHP
> events to cover all sizes.

Makes sense, Ill work on adding the new counters for
THP_SCAN_EXCEED_(SWAP_PTE|NONE_PTE|SHARED_PTE). Thanks!

>
> >                               goto out;
> > @@ -711,14 +713,15 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte,
> >                                               struct vm_area_struct *vma,
> >                                               unsigned long address,
> >                                               spinlock_t *ptl,
> > -                                             struct list_head *compound_pagelist)
> > +                                             struct list_head *compound_pagelist,
> > +                                             u8 order)
> >  {
> >       struct folio *src, *tmp;
> >       pte_t *_pte;
> >       pte_t pteval;
> >
> > -     for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
> > -          _pte++, address += PAGE_SIZE) {
> > +     for (_pte = pte; _pte < pte + (1 << order);
> > +             _pte++, address += PAGE_SIZE) {
>
> nit: you changed the indentation here.
>
> >               pteval = ptep_get(_pte);
> >               if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
> >                       add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
> > @@ -764,7 +767,8 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
> >                                            pmd_t *pmd,
> >                                            pmd_t orig_pmd,
> >                                            struct vm_area_struct *vma,
> > -                                          struct list_head *compound_pagelist)
> > +                                          struct list_head *compound_pagelist,
> > +                                          u8 order)
> >  {
> >       spinlock_t *pmd_ptl;
> >
> > @@ -781,7 +785,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
> >        * Release both raw and compound pages isolated
> >        * in __collapse_huge_page_isolate.
> >        */
> > -     release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
> > +     release_pte_pages(pte, pte + (1 << order), compound_pagelist);
> >  }
> >
> >  /*
> > @@ -802,7 +806,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte,
> >  static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
> >               pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
> >               unsigned long address, spinlock_t *ptl,
> > -             struct list_head *compound_pagelist)
> > +             struct list_head *compound_pagelist, u8 order)
> >  {
> >       unsigned int i;
> >       int result = SCAN_SUCCEED;
> > @@ -810,7 +814,7 @@ static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
> >       /*
> >        * Copying pages' contents is subject to memory poison at any iteration.
> >        */
> > -     for (i = 0; i < HPAGE_PMD_NR; i++) {
> > +     for (i = 0; i < (1 << order); i++) {
> >               pte_t pteval = ptep_get(pte + i);
> >               struct page *page = folio_page(folio, i);
> >               unsigned long src_addr = address + i * PAGE_SIZE;
> > @@ -829,10 +833,10 @@ static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
> >
> >       if (likely(result == SCAN_SUCCEED))
> >               __collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
> > -                                                 compound_pagelist);
> > +                                                 compound_pagelist, order);
> >       else
> >               __collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
> > -                                              compound_pagelist);
> > +                                              compound_pagelist, order);
> >
> >       return result;
> >  }
> > @@ -1000,11 +1004,11 @@ static int check_pmd_still_valid(struct mm_struct *mm,
> >  static int __collapse_huge_page_swapin(struct mm_struct *mm,
> >                                      struct vm_area_struct *vma,
> >                                      unsigned long haddr, pmd_t *pmd,
> > -                                    int referenced)
> > +                                    int referenced, u8 order)
> >  {
> >       int swapped_in = 0;
> >       vm_fault_t ret = 0;
> > -     unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
> > +     unsigned long address, end = haddr + (PAGE_SIZE << order);
> >       int result;
> >       pte_t *pte = NULL;
> >       spinlock_t *ptl;
> > @@ -1035,6 +1039,11 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
> >               if (!is_swap_pte(vmf.orig_pte))
> >                       continue;
> >
> > +             if (order != HPAGE_PMD_ORDER) {
> > +                     result = SCAN_EXCEED_SWAP_PTE;
> > +                     goto out;
> > +             }
>
> A comment to explain the rationale for this divergent behaviour based on order
> would be helpful.
>
> > +
> >               vmf.pte = pte;
> >               vmf.ptl = ptl;
> >               ret = do_swap_page(&vmf);
> > @@ -1114,7 +1123,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >       int result = SCAN_FAIL;
> >       struct vm_area_struct *vma;
> >       struct mmu_notifier_range range;
> > -
>
> nit: no need for this whitespace change?

Thanks! Ill clean up the nits and add a comment to the swapin function
to describe skipping mTHP swapin.
>
> >       VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> >
> >       /*
> > @@ -1149,7 +1157,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >                * that case.  Continuing to collapse causes inconsistency.
> >                */
> >               result = __collapse_huge_page_swapin(mm, vma, address, pmd,
> > -                                                  referenced);
> > +                             referenced, HPAGE_PMD_ORDER);
> >               if (result != SCAN_SUCCEED)
> >                       goto out_nolock;
> >       }
> > @@ -1196,7 +1204,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >       pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
> >       if (pte) {
> >               result = __collapse_huge_page_isolate(vma, address, pte, cc,
> > -                                                   &compound_pagelist);
> > +                                     &compound_pagelist, HPAGE_PMD_ORDER);
> >               spin_unlock(pte_ptl);
> >       } else {
> >               result = SCAN_PMD_NULL;
> > @@ -1226,7 +1234,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >
> >       result = __collapse_huge_page_copy(pte, folio, pmd, _pmd,
> >                                          vma, address, pte_ptl,
> > -                                        &compound_pagelist);
> > +                                        &compound_pagelist, HPAGE_PMD_ORDER);
> >       pte_unmap(pte);
> >       if (unlikely(result != SCAN_SUCCEED))
> >               goto out_up_write;
>
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0cfcdc11cabd..3776055bd477 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -565,15 +565,17 @@  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 					unsigned long address,
 					pte_t *pte,
 					struct collapse_control *cc,
-					struct list_head *compound_pagelist)
+					struct list_head *compound_pagelist,
+					u8 order)
 {
 	struct page *page = NULL;
 	struct folio *folio = NULL;
 	pte_t *_pte;
 	int none_or_zero = 0, shared = 0, result = SCAN_FAIL, referenced = 0;
 	bool writable = false;
+	int scaled_none = khugepaged_max_ptes_none >> (HPAGE_PMD_ORDER - order);
 
-	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
+	for (_pte = pte; _pte < pte + (1 << order);
 	     _pte++, address += PAGE_SIZE) {
 		pte_t pteval = ptep_get(_pte);
 		if (pte_none(pteval) || (pte_present(pteval) &&
@@ -581,7 +583,7 @@  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 			++none_or_zero;
 			if (!userfaultfd_armed(vma) &&
 			    (!cc->is_khugepaged ||
-			     none_or_zero <= khugepaged_max_ptes_none)) {
+			     none_or_zero <= scaled_none)) {
 				continue;
 			} else {
 				result = SCAN_EXCEED_NONE_PTE;
@@ -609,8 +611,8 @@  static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		/* See khugepaged_scan_pmd(). */
 		if (folio_likely_mapped_shared(folio)) {
 			++shared;
-			if (cc->is_khugepaged &&
-			    shared > khugepaged_max_ptes_shared) {
+			if (order != HPAGE_PMD_ORDER || (cc->is_khugepaged &&
+			    shared > khugepaged_max_ptes_shared)) {
 				result = SCAN_EXCEED_SHARED_PTE;
 				count_vm_event(THP_SCAN_EXCEED_SHARED_PTE);
 				goto out;
@@ -711,14 +713,15 @@  static void __collapse_huge_page_copy_succeeded(pte_t *pte,
 						struct vm_area_struct *vma,
 						unsigned long address,
 						spinlock_t *ptl,
-						struct list_head *compound_pagelist)
+						struct list_head *compound_pagelist,
+						u8 order)
 {
 	struct folio *src, *tmp;
 	pte_t *_pte;
 	pte_t pteval;
 
-	for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
-	     _pte++, address += PAGE_SIZE) {
+	for (_pte = pte; _pte < pte + (1 << order);
+		_pte++, address += PAGE_SIZE) {
 		pteval = ptep_get(_pte);
 		if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
 			add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1);
@@ -764,7 +767,8 @@  static void __collapse_huge_page_copy_failed(pte_t *pte,
 					     pmd_t *pmd,
 					     pmd_t orig_pmd,
 					     struct vm_area_struct *vma,
-					     struct list_head *compound_pagelist)
+					     struct list_head *compound_pagelist,
+					     u8 order)
 {
 	spinlock_t *pmd_ptl;
 
@@ -781,7 +785,7 @@  static void __collapse_huge_page_copy_failed(pte_t *pte,
 	 * Release both raw and compound pages isolated
 	 * in __collapse_huge_page_isolate.
 	 */
-	release_pte_pages(pte, pte + HPAGE_PMD_NR, compound_pagelist);
+	release_pte_pages(pte, pte + (1 << order), compound_pagelist);
 }
 
 /*
@@ -802,7 +806,7 @@  static void __collapse_huge_page_copy_failed(pte_t *pte,
 static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
 		pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma,
 		unsigned long address, spinlock_t *ptl,
-		struct list_head *compound_pagelist)
+		struct list_head *compound_pagelist, u8 order)
 {
 	unsigned int i;
 	int result = SCAN_SUCCEED;
@@ -810,7 +814,7 @@  static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
 	/*
 	 * Copying pages' contents is subject to memory poison at any iteration.
 	 */
-	for (i = 0; i < HPAGE_PMD_NR; i++) {
+	for (i = 0; i < (1 << order); i++) {
 		pte_t pteval = ptep_get(pte + i);
 		struct page *page = folio_page(folio, i);
 		unsigned long src_addr = address + i * PAGE_SIZE;
@@ -829,10 +833,10 @@  static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio,
 
 	if (likely(result == SCAN_SUCCEED))
 		__collapse_huge_page_copy_succeeded(pte, vma, address, ptl,
-						    compound_pagelist);
+						    compound_pagelist, order);
 	else
 		__collapse_huge_page_copy_failed(pte, pmd, orig_pmd, vma,
-						 compound_pagelist);
+						 compound_pagelist, order);
 
 	return result;
 }
@@ -1000,11 +1004,11 @@  static int check_pmd_still_valid(struct mm_struct *mm,
 static int __collapse_huge_page_swapin(struct mm_struct *mm,
 				       struct vm_area_struct *vma,
 				       unsigned long haddr, pmd_t *pmd,
-				       int referenced)
+				       int referenced, u8 order)
 {
 	int swapped_in = 0;
 	vm_fault_t ret = 0;
-	unsigned long address, end = haddr + (HPAGE_PMD_NR * PAGE_SIZE);
+	unsigned long address, end = haddr + (PAGE_SIZE << order);
 	int result;
 	pte_t *pte = NULL;
 	spinlock_t *ptl;
@@ -1035,6 +1039,11 @@  static int __collapse_huge_page_swapin(struct mm_struct *mm,
 		if (!is_swap_pte(vmf.orig_pte))
 			continue;
 
+		if (order != HPAGE_PMD_ORDER) {
+			result = SCAN_EXCEED_SWAP_PTE;
+			goto out;
+		}
+
 		vmf.pte = pte;
 		vmf.ptl = ptl;
 		ret = do_swap_page(&vmf);
@@ -1114,7 +1123,6 @@  static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	int result = SCAN_FAIL;
 	struct vm_area_struct *vma;
 	struct mmu_notifier_range range;
-
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
 	/*
@@ -1149,7 +1157,7 @@  static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 		 * that case.  Continuing to collapse causes inconsistency.
 		 */
 		result = __collapse_huge_page_swapin(mm, vma, address, pmd,
-						     referenced);
+				referenced, HPAGE_PMD_ORDER);
 		if (result != SCAN_SUCCEED)
 			goto out_nolock;
 	}
@@ -1196,7 +1204,7 @@  static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	pte = pte_offset_map_lock(mm, &_pmd, address, &pte_ptl);
 	if (pte) {
 		result = __collapse_huge_page_isolate(vma, address, pte, cc,
-						      &compound_pagelist);
+					&compound_pagelist, HPAGE_PMD_ORDER);
 		spin_unlock(pte_ptl);
 	} else {
 		result = SCAN_PMD_NULL;
@@ -1226,7 +1234,7 @@  static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
 
 	result = __collapse_huge_page_copy(pte, folio, pmd, _pmd,
 					   vma, address, pte_ptl,
-					   &compound_pagelist);
+					   &compound_pagelist, HPAGE_PMD_ORDER);
 	pte_unmap(pte);
 	if (unlikely(result != SCAN_SUCCEED))
 		goto out_up_write;