diff mbox series

[v4,1/3] mm/khugepaged: refactor collapse_file control flow

Message ID 20230217085439.2826375-2-stevensd@google.com (mailing list archive)
State New
Headers show
Series mm/khugepaged: fix khugepaged+shmem races | expand

Commit Message

David Stevens Feb. 17, 2023, 8:54 a.m. UTC
From: David Stevens <stevensd@chromium.org>

Add a rollback label to deal with failure, instead of continuously
checking for RESULT_SUCCESS, to make it easier to add more failure
cases. The refactoring also allows the collapse_file tracepoint to
include hpage on success (instead of NULL).

Signed-off-by: David Stevens <stevensd@chromium.org>
---
 mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
 1 file changed, 110 insertions(+), 113 deletions(-)

Comments

Yang Shi Feb. 17, 2023, 11:44 p.m. UTC | #1
On Fri, Feb 17, 2023 at 12:55 AM David Stevens <stevensd@chromium.org> wrote:
>
> From: David Stevens <stevensd@chromium.org>
>
> Add a rollback label to deal with failure, instead of continuously
> checking for RESULT_SUCCESS, to make it easier to add more failure
> cases. The refactoring also allows the collapse_file tracepoint to
> include hpage on success (instead of NULL).
>
> Signed-off-by: David Stevens <stevensd@chromium.org>

The refactor looks good to me. Reviewed-by: Yang Shi <shy828301@gmail.com>

> ---
>  mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
>  1 file changed, 110 insertions(+), 113 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 8dbc39896811..6a3d6d2e25e0 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1885,6 +1885,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>         if (result != SCAN_SUCCEED)
>                 goto out;
>
> +       __SetPageLocked(hpage);
> +       if (is_shmem)
> +               __SetPageSwapBacked(hpage);
> +       hpage->index = start;
> +       hpage->mapping = mapping;
> +
>         /*
>          * Ensure we have slots for all the pages in the range.  This is
>          * almost certainly a no-op because most of the pages must be present
> @@ -1897,16 +1903,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>                 xas_unlock_irq(&xas);
>                 if (!xas_nomem(&xas, GFP_KERNEL)) {
>                         result = SCAN_FAIL;
> -                       goto out;
> +                       goto rollback;
>                 }
>         } while (1);
>
> -       __SetPageLocked(hpage);
> -       if (is_shmem)
> -               __SetPageSwapBacked(hpage);
> -       hpage->index = start;
> -       hpage->mapping = mapping;
> -
>         /*
>          * At this point the hpage is locked and not up-to-date.
>          * It's safe to insert it into the page cache, because nobody would
> @@ -2123,131 +2123,128 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>          */
>         try_to_unmap_flush();
>
> -       if (result == SCAN_SUCCEED) {
> -               /*
> -                * Replacing old pages with new one has succeeded, now we
> -                * attempt to copy the contents.
> -                */
> -               index = start;
> -               list_for_each_entry(page, &pagelist, lru) {
> -                       while (index < page->index) {
> -                               clear_highpage(hpage + (index % HPAGE_PMD_NR));
> -                               index++;
> -                       }
> -                       if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> -                                        page) > 0) {
> -                               result = SCAN_COPY_MC;
> -                               break;
> -                       }
> -                       index++;
> -               }
> -               while (result == SCAN_SUCCEED && index < end) {
> +       if (result != SCAN_SUCCEED)
> +               goto rollback;
> +
> +       /*
> +        * Replacing old pages with new one has succeeded, now we
> +        * attempt to copy the contents.
> +        */
> +       index = start;
> +       list_for_each_entry(page, &pagelist, lru) {
> +               while (index < page->index) {
>                         clear_highpage(hpage + (index % HPAGE_PMD_NR));
>                         index++;
>                 }
> +               if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> +                                page) > 0) {
> +                       result = SCAN_COPY_MC;
> +                       goto rollback;
> +               }
> +               index++;
> +       }
> +       while (index < end) {
> +               clear_highpage(hpage + (index % HPAGE_PMD_NR));
> +               index++;
>         }
>
> -       if (result == SCAN_SUCCEED) {
> -               /*
> -                * Copying old pages to huge one has succeeded, now we
> -                * need to free the old pages.
> -                */
> -               list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> -                       list_del(&page->lru);
> -                       page->mapping = NULL;
> -                       page_ref_unfreeze(page, 1);
> -                       ClearPageActive(page);
> -                       ClearPageUnevictable(page);
> -                       unlock_page(page);
> -                       put_page(page);
> -               }
> +       /*
> +        * Copying old pages to huge one has succeeded, now we
> +        * need to free the old pages.
> +        */
> +       list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> +               list_del(&page->lru);
> +               page->mapping = NULL;
> +               page_ref_unfreeze(page, 1);
> +               ClearPageActive(page);
> +               ClearPageUnevictable(page);
> +               unlock_page(page);
> +               put_page(page);
> +       }
>
> -               xas_lock_irq(&xas);
> -               if (is_shmem)
> -                       __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> -               else
> -                       __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +       xas_lock_irq(&xas);
> +       if (is_shmem)
> +               __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> +       else
> +               __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +
> +       if (nr_none) {
> +               __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> +               /* nr_none is always 0 for non-shmem. */
> +               __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> +       }
> +       /* Join all the small entries into a single multi-index entry. */
> +       xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> +       xas_store(&xas, hpage);
> +       xas_unlock_irq(&xas);
>
> -               if (nr_none) {
> -                       __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> -                       /* nr_none is always 0 for non-shmem. */
> -                       __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> -               }
> -               /* Join all the small entries into a single multi-index entry. */
> -               xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> -               xas_store(&xas, hpage);
> -               xas_unlock_irq(&xas);
> +       folio = page_folio(hpage);
> +       folio_mark_uptodate(folio);
> +       folio_ref_add(folio, HPAGE_PMD_NR - 1);
>
> -               folio = page_folio(hpage);
> -               folio_mark_uptodate(folio);
> -               folio_ref_add(folio, HPAGE_PMD_NR - 1);
> +       if (is_shmem)
> +               folio_mark_dirty(folio);
> +       folio_add_lru(folio);
>
> -               if (is_shmem)
> -                       folio_mark_dirty(folio);
> -               folio_add_lru(folio);
> +       /*
> +        * Remove pte page tables, so we can re-fault the page as huge.
> +        */
> +       result = retract_page_tables(mapping, start, mm, addr, hpage,
> +                                    cc);
> +       unlock_page(hpage);
> +       goto out;
> +
> +rollback:
> +       /* Something went wrong: roll back page cache changes */
> +       xas_lock_irq(&xas);
> +       if (nr_none) {
> +               mapping->nrpages -= nr_none;
> +               shmem_uncharge(mapping->host, nr_none);
> +       }
>
> -               /*
> -                * Remove pte page tables, so we can re-fault the page as huge.
> -                */
> -               result = retract_page_tables(mapping, start, mm, addr, hpage,
> -                                            cc);
> -               unlock_page(hpage);
> -               hpage = NULL;
> -       } else {
> -               /* Something went wrong: roll back page cache changes */
> -               xas_lock_irq(&xas);
> -               if (nr_none) {
> -                       mapping->nrpages -= nr_none;
> -                       shmem_uncharge(mapping->host, nr_none);
> +       xas_set(&xas, start);
> +       xas_for_each(&xas, page, end - 1) {
> +               page = list_first_entry_or_null(&pagelist,
> +                               struct page, lru);
> +               if (!page || xas.xa_index < page->index) {
> +                       if (!nr_none)
> +                               break;
> +                       nr_none--;
> +                       /* Put holes back where they were */
> +                       xas_store(&xas, NULL);
> +                       continue;
>                 }
>
> -               xas_set(&xas, start);
> -               xas_for_each(&xas, page, end - 1) {
> -                       page = list_first_entry_or_null(&pagelist,
> -                                       struct page, lru);
> -                       if (!page || xas.xa_index < page->index) {
> -                               if (!nr_none)
> -                                       break;
> -                               nr_none--;
> -                               /* Put holes back where they were */
> -                               xas_store(&xas, NULL);
> -                               continue;
> -                       }
> +               VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
>
> -                       VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> +               /* Unfreeze the page. */
> +               list_del(&page->lru);
> +               page_ref_unfreeze(page, 2);
> +               xas_store(&xas, page);
> +               xas_pause(&xas);
> +               xas_unlock_irq(&xas);
> +               unlock_page(page);
> +               putback_lru_page(page);
> +               xas_lock_irq(&xas);
> +       }
> +       VM_BUG_ON(nr_none);
> +       /*
> +        * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> +        * This undo is not needed unless failure is due to SCAN_COPY_MC.
> +        */
> +       if (!is_shmem && result == SCAN_COPY_MC)
> +               filemap_nr_thps_dec(mapping);
>
> -                       /* Unfreeze the page. */
> -                       list_del(&page->lru);
> -                       page_ref_unfreeze(page, 2);
> -                       xas_store(&xas, page);
> -                       xas_pause(&xas);
> -                       xas_unlock_irq(&xas);
> -                       unlock_page(page);
> -                       putback_lru_page(page);
> -                       xas_lock_irq(&xas);
> -               }
> -               VM_BUG_ON(nr_none);
> -               /*
> -                * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> -                * This undo is not needed unless failure is due to SCAN_COPY_MC.
> -                */
> -               if (!is_shmem && result == SCAN_COPY_MC)
> -                       filemap_nr_thps_dec(mapping);
> +       xas_unlock_irq(&xas);
>
> -               xas_unlock_irq(&xas);
> +       hpage->mapping = NULL;
>
> -               hpage->mapping = NULL;
> -       }
> +       unlock_page(hpage);
> +       mem_cgroup_uncharge(page_folio(hpage));
> +       put_page(hpage);
>
> -       if (hpage)
> -               unlock_page(hpage);
>  out:
>         VM_BUG_ON(!list_empty(&pagelist));
> -       if (hpage) {
> -               mem_cgroup_uncharge(page_folio(hpage));
> -               put_page(hpage);
> -       }
> -
>         trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
>         return result;
>  }
> --
> 2.39.2.637.g21b0678d19-goog
>
Peter Xu Feb. 21, 2023, 9:54 p.m. UTC | #2
On Fri, Feb 17, 2023 at 05:54:37PM +0900, David Stevens wrote:
> From: David Stevens <stevensd@chromium.org>
> 
> Add a rollback label to deal with failure, instead of continuously
> checking for RESULT_SUCCESS, to make it easier to add more failure
> cases. The refactoring also allows the collapse_file tracepoint to
> include hpage on success (instead of NULL).
> 
> Signed-off-by: David Stevens <stevensd@chromium.org>
> ---
>  mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
>  1 file changed, 110 insertions(+), 113 deletions(-)
> 
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 8dbc39896811..6a3d6d2e25e0 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1885,6 +1885,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  	if (result != SCAN_SUCCEED)
>  		goto out;
>  
> +	__SetPageLocked(hpage);
> +	if (is_shmem)
> +		__SetPageSwapBacked(hpage);
> +	hpage->index = start;
> +	hpage->mapping = mapping;
> +
>  	/*
>  	 * Ensure we have slots for all the pages in the range.  This is
>  	 * almost certainly a no-op because most of the pages must be present
> @@ -1897,16 +1903,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  		xas_unlock_irq(&xas);
>  		if (!xas_nomem(&xas, GFP_KERNEL)) {
>  			result = SCAN_FAIL;
> -			goto out;
> +			goto rollback;
>  		}
>  	} while (1);
>  
> -	__SetPageLocked(hpage);
> -	if (is_shmem)
> -		__SetPageSwapBacked(hpage);
> -	hpage->index = start;
> -	hpage->mapping = mapping;
> -
>  	/*
>  	 * At this point the hpage is locked and not up-to-date.
>  	 * It's safe to insert it into the page cache, because nobody would
> @@ -2123,131 +2123,128 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>  	 */
>  	try_to_unmap_flush();
>  
> -	if (result == SCAN_SUCCEED) {
> -		/*
> -		 * Replacing old pages with new one has succeeded, now we
> -		 * attempt to copy the contents.
> -		 */
> -		index = start;
> -		list_for_each_entry(page, &pagelist, lru) {
> -			while (index < page->index) {
> -				clear_highpage(hpage + (index % HPAGE_PMD_NR));
> -				index++;
> -			}
> -			if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> -					 page) > 0) {
> -				result = SCAN_COPY_MC;
> -				break;
> -			}
> -			index++;
> -		}
> -		while (result == SCAN_SUCCEED && index < end) {
> +	if (result != SCAN_SUCCEED)
> +		goto rollback;
> +
> +	/*
> +	 * Replacing old pages with new one has succeeded, now we
> +	 * attempt to copy the contents.
> +	 */
> +	index = start;
> +	list_for_each_entry(page, &pagelist, lru) {
> +		while (index < page->index) {
>  			clear_highpage(hpage + (index % HPAGE_PMD_NR));
>  			index++;
>  		}
> +		if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> +				 page) > 0) {
> +			result = SCAN_COPY_MC;
> +			goto rollback;
> +		}
> +		index++;
> +	}
> +	while (index < end) {
> +		clear_highpage(hpage + (index % HPAGE_PMD_NR));
> +		index++;
>  	}
>  
> -	if (result == SCAN_SUCCEED) {
> -		/*
> -		 * Copying old pages to huge one has succeeded, now we
> -		 * need to free the old pages.
> -		 */
> -		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> -			list_del(&page->lru);
> -			page->mapping = NULL;
> -			page_ref_unfreeze(page, 1);
> -			ClearPageActive(page);
> -			ClearPageUnevictable(page);
> -			unlock_page(page);
> -			put_page(page);
> -		}
> +	/*
> +	 * Copying old pages to huge one has succeeded, now we
> +	 * need to free the old pages.
> +	 */
> +	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> +		list_del(&page->lru);
> +		page->mapping = NULL;
> +		page_ref_unfreeze(page, 1);
> +		ClearPageActive(page);
> +		ClearPageUnevictable(page);
> +		unlock_page(page);
> +		put_page(page);
> +	}
>  
> -		xas_lock_irq(&xas);
> -		if (is_shmem)
> -			__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> -		else
> -			__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +	xas_lock_irq(&xas);
> +	if (is_shmem)
> +		__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> +	else
> +		__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> +
> +	if (nr_none) {
> +		__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> +		/* nr_none is always 0 for non-shmem. */
> +		__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> +	}
> +	/* Join all the small entries into a single multi-index entry. */
> +	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> +	xas_store(&xas, hpage);
> +	xas_unlock_irq(&xas);
>  
> -		if (nr_none) {
> -			__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> -			/* nr_none is always 0 for non-shmem. */
> -			__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> -		}
> -		/* Join all the small entries into a single multi-index entry. */
> -		xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> -		xas_store(&xas, hpage);
> -		xas_unlock_irq(&xas);
> +	folio = page_folio(hpage);
> +	folio_mark_uptodate(folio);
> +	folio_ref_add(folio, HPAGE_PMD_NR - 1);
>  
> -		folio = page_folio(hpage);
> -		folio_mark_uptodate(folio);
> -		folio_ref_add(folio, HPAGE_PMD_NR - 1);
> +	if (is_shmem)
> +		folio_mark_dirty(folio);
> +	folio_add_lru(folio);
>  
> -		if (is_shmem)
> -			folio_mark_dirty(folio);
> -		folio_add_lru(folio);
> +	/*
> +	 * Remove pte page tables, so we can re-fault the page as huge.
> +	 */
> +	result = retract_page_tables(mapping, start, mm, addr, hpage,
> +				     cc);
> +	unlock_page(hpage);
> +	goto out;
> +
> +rollback:
> +	/* Something went wrong: roll back page cache changes */
> +	xas_lock_irq(&xas);
> +	if (nr_none) {
> +		mapping->nrpages -= nr_none;
> +		shmem_uncharge(mapping->host, nr_none);
> +	}
>  
> -		/*
> -		 * Remove pte page tables, so we can re-fault the page as huge.
> -		 */
> -		result = retract_page_tables(mapping, start, mm, addr, hpage,
> -					     cc);
> -		unlock_page(hpage);
> -		hpage = NULL;
> -	} else {
> -		/* Something went wrong: roll back page cache changes */
> -		xas_lock_irq(&xas);
> -		if (nr_none) {
> -			mapping->nrpages -= nr_none;
> -			shmem_uncharge(mapping->host, nr_none);
> +	xas_set(&xas, start);
> +	xas_for_each(&xas, page, end - 1) {
> +		page = list_first_entry_or_null(&pagelist,
> +				struct page, lru);
> +		if (!page || xas.xa_index < page->index) {
> +			if (!nr_none)
> +				break;
> +			nr_none--;
> +			/* Put holes back where they were */
> +			xas_store(&xas, NULL);
> +			continue;
>  		}
>  
> -		xas_set(&xas, start);
> -		xas_for_each(&xas, page, end - 1) {
> -			page = list_first_entry_or_null(&pagelist,
> -					struct page, lru);
> -			if (!page || xas.xa_index < page->index) {
> -				if (!nr_none)
> -					break;
> -				nr_none--;
> -				/* Put holes back where they were */
> -				xas_store(&xas, NULL);
> -				continue;
> -			}
> +		VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
>  
> -			VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> +		/* Unfreeze the page. */
> +		list_del(&page->lru);
> +		page_ref_unfreeze(page, 2);
> +		xas_store(&xas, page);
> +		xas_pause(&xas);
> +		xas_unlock_irq(&xas);
> +		unlock_page(page);
> +		putback_lru_page(page);
> +		xas_lock_irq(&xas);
> +	}
> +	VM_BUG_ON(nr_none);
> +	/*
> +	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> +	 * This undo is not needed unless failure is due to SCAN_COPY_MC.
> +	 */
> +	if (!is_shmem && result == SCAN_COPY_MC)
> +		filemap_nr_thps_dec(mapping);
>  
> -			/* Unfreeze the page. */
> -			list_del(&page->lru);
> -			page_ref_unfreeze(page, 2);
> -			xas_store(&xas, page);
> -			xas_pause(&xas);
> -			xas_unlock_irq(&xas);
> -			unlock_page(page);
> -			putback_lru_page(page);
> -			xas_lock_irq(&xas);
> -		}
> -		VM_BUG_ON(nr_none);
> -		/*
> -		 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> -		 * This undo is not needed unless failure is due to SCAN_COPY_MC.
> -		 */
> -		if (!is_shmem && result == SCAN_COPY_MC)
> -			filemap_nr_thps_dec(mapping);
> +	xas_unlock_irq(&xas);
>  
> -		xas_unlock_irq(&xas);
> +	hpage->mapping = NULL;
>  
> -		hpage->mapping = NULL;
> -	}
> +	unlock_page(hpage);
> +	mem_cgroup_uncharge(page_folio(hpage));
> +	put_page(hpage);
>  
> -	if (hpage)
> -		unlock_page(hpage);
>  out:
>  	VM_BUG_ON(!list_empty(&pagelist));
> -	if (hpage) {
> -		mem_cgroup_uncharge(page_folio(hpage));
> -		put_page(hpage);
> -	}

Moving this chunk seems wrong, as it can leak the huge page if
alloc_charge_hpage() failed on mem charging, iiuc.

And I found that keeping it seems wrong either, because if mem charge
failed we'll uncharge even without charging it before.  But that's nothing
about this patch alone.

Posted a patch for this:

https://lore.kernel.org/all/20230221214344.609226-1-peterx@redhat.com/

I _think_ this patch will make sense after rebasing to that fix if that's
correct, but please review it and double check.

Thanks,

> -
>  	trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
>  	return result;
>  }
> -- 
> 2.39.2.637.g21b0678d19-goog
>
Yang Shi Feb. 21, 2023, 10:28 p.m. UTC | #3
On Tue, Feb 21, 2023 at 1:54 PM Peter Xu <peterx@redhat.com> wrote:
>
> On Fri, Feb 17, 2023 at 05:54:37PM +0900, David Stevens wrote:
> > From: David Stevens <stevensd@chromium.org>
> >
> > Add a rollback label to deal with failure, instead of continuously
> > checking for RESULT_SUCCESS, to make it easier to add more failure
> > cases. The refactoring also allows the collapse_file tracepoint to
> > include hpage on success (instead of NULL).
> >
> > Signed-off-by: David Stevens <stevensd@chromium.org>
> > ---
> >  mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
> >  1 file changed, 110 insertions(+), 113 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 8dbc39896811..6a3d6d2e25e0 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -1885,6 +1885,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >       if (result != SCAN_SUCCEED)
> >               goto out;
> >
> > +     __SetPageLocked(hpage);
> > +     if (is_shmem)
> > +             __SetPageSwapBacked(hpage);
> > +     hpage->index = start;
> > +     hpage->mapping = mapping;
> > +
> >       /*
> >        * Ensure we have slots for all the pages in the range.  This is
> >        * almost certainly a no-op because most of the pages must be present
> > @@ -1897,16 +1903,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >               xas_unlock_irq(&xas);
> >               if (!xas_nomem(&xas, GFP_KERNEL)) {
> >                       result = SCAN_FAIL;
> > -                     goto out;
> > +                     goto rollback;
> >               }
> >       } while (1);
> >
> > -     __SetPageLocked(hpage);
> > -     if (is_shmem)
> > -             __SetPageSwapBacked(hpage);
> > -     hpage->index = start;
> > -     hpage->mapping = mapping;
> > -
> >       /*
> >        * At this point the hpage is locked and not up-to-date.
> >        * It's safe to insert it into the page cache, because nobody would
> > @@ -2123,131 +2123,128 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >        */
> >       try_to_unmap_flush();
> >
> > -     if (result == SCAN_SUCCEED) {
> > -             /*
> > -              * Replacing old pages with new one has succeeded, now we
> > -              * attempt to copy the contents.
> > -              */
> > -             index = start;
> > -             list_for_each_entry(page, &pagelist, lru) {
> > -                     while (index < page->index) {
> > -                             clear_highpage(hpage + (index % HPAGE_PMD_NR));
> > -                             index++;
> > -                     }
> > -                     if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> > -                                      page) > 0) {
> > -                             result = SCAN_COPY_MC;
> > -                             break;
> > -                     }
> > -                     index++;
> > -             }
> > -             while (result == SCAN_SUCCEED && index < end) {
> > +     if (result != SCAN_SUCCEED)
> > +             goto rollback;
> > +
> > +     /*
> > +      * Replacing old pages with new one has succeeded, now we
> > +      * attempt to copy the contents.
> > +      */
> > +     index = start;
> > +     list_for_each_entry(page, &pagelist, lru) {
> > +             while (index < page->index) {
> >                       clear_highpage(hpage + (index % HPAGE_PMD_NR));
> >                       index++;
> >               }
> > +             if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> > +                              page) > 0) {
> > +                     result = SCAN_COPY_MC;
> > +                     goto rollback;
> > +             }
> > +             index++;
> > +     }
> > +     while (index < end) {
> > +             clear_highpage(hpage + (index % HPAGE_PMD_NR));
> > +             index++;
> >       }
> >
> > -     if (result == SCAN_SUCCEED) {
> > -             /*
> > -              * Copying old pages to huge one has succeeded, now we
> > -              * need to free the old pages.
> > -              */
> > -             list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> > -                     list_del(&page->lru);
> > -                     page->mapping = NULL;
> > -                     page_ref_unfreeze(page, 1);
> > -                     ClearPageActive(page);
> > -                     ClearPageUnevictable(page);
> > -                     unlock_page(page);
> > -                     put_page(page);
> > -             }
> > +     /*
> > +      * Copying old pages to huge one has succeeded, now we
> > +      * need to free the old pages.
> > +      */
> > +     list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> > +             list_del(&page->lru);
> > +             page->mapping = NULL;
> > +             page_ref_unfreeze(page, 1);
> > +             ClearPageActive(page);
> > +             ClearPageUnevictable(page);
> > +             unlock_page(page);
> > +             put_page(page);
> > +     }
> >
> > -             xas_lock_irq(&xas);
> > -             if (is_shmem)
> > -                     __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> > -             else
> > -                     __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> > +     xas_lock_irq(&xas);
> > +     if (is_shmem)
> > +             __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> > +     else
> > +             __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> > +
> > +     if (nr_none) {
> > +             __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> > +             /* nr_none is always 0 for non-shmem. */
> > +             __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> > +     }
> > +     /* Join all the small entries into a single multi-index entry. */
> > +     xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > +     xas_store(&xas, hpage);
> > +     xas_unlock_irq(&xas);
> >
> > -             if (nr_none) {
> > -                     __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> > -                     /* nr_none is always 0 for non-shmem. */
> > -                     __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> > -             }
> > -             /* Join all the small entries into a single multi-index entry. */
> > -             xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > -             xas_store(&xas, hpage);
> > -             xas_unlock_irq(&xas);
> > +     folio = page_folio(hpage);
> > +     folio_mark_uptodate(folio);
> > +     folio_ref_add(folio, HPAGE_PMD_NR - 1);
> >
> > -             folio = page_folio(hpage);
> > -             folio_mark_uptodate(folio);
> > -             folio_ref_add(folio, HPAGE_PMD_NR - 1);
> > +     if (is_shmem)
> > +             folio_mark_dirty(folio);
> > +     folio_add_lru(folio);
> >
> > -             if (is_shmem)
> > -                     folio_mark_dirty(folio);
> > -             folio_add_lru(folio);
> > +     /*
> > +      * Remove pte page tables, so we can re-fault the page as huge.
> > +      */
> > +     result = retract_page_tables(mapping, start, mm, addr, hpage,
> > +                                  cc);
> > +     unlock_page(hpage);
> > +     goto out;
> > +
> > +rollback:
> > +     /* Something went wrong: roll back page cache changes */
> > +     xas_lock_irq(&xas);
> > +     if (nr_none) {
> > +             mapping->nrpages -= nr_none;
> > +             shmem_uncharge(mapping->host, nr_none);
> > +     }
> >
> > -             /*
> > -              * Remove pte page tables, so we can re-fault the page as huge.
> > -              */
> > -             result = retract_page_tables(mapping, start, mm, addr, hpage,
> > -                                          cc);
> > -             unlock_page(hpage);
> > -             hpage = NULL;
> > -     } else {
> > -             /* Something went wrong: roll back page cache changes */
> > -             xas_lock_irq(&xas);
> > -             if (nr_none) {
> > -                     mapping->nrpages -= nr_none;
> > -                     shmem_uncharge(mapping->host, nr_none);
> > +     xas_set(&xas, start);
> > +     xas_for_each(&xas, page, end - 1) {
> > +             page = list_first_entry_or_null(&pagelist,
> > +                             struct page, lru);
> > +             if (!page || xas.xa_index < page->index) {
> > +                     if (!nr_none)
> > +                             break;
> > +                     nr_none--;
> > +                     /* Put holes back where they were */
> > +                     xas_store(&xas, NULL);
> > +                     continue;
> >               }
> >
> > -             xas_set(&xas, start);
> > -             xas_for_each(&xas, page, end - 1) {
> > -                     page = list_first_entry_or_null(&pagelist,
> > -                                     struct page, lru);
> > -                     if (!page || xas.xa_index < page->index) {
> > -                             if (!nr_none)
> > -                                     break;
> > -                             nr_none--;
> > -                             /* Put holes back where they were */
> > -                             xas_store(&xas, NULL);
> > -                             continue;
> > -                     }
> > +             VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> >
> > -                     VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> > +             /* Unfreeze the page. */
> > +             list_del(&page->lru);
> > +             page_ref_unfreeze(page, 2);
> > +             xas_store(&xas, page);
> > +             xas_pause(&xas);
> > +             xas_unlock_irq(&xas);
> > +             unlock_page(page);
> > +             putback_lru_page(page);
> > +             xas_lock_irq(&xas);
> > +     }
> > +     VM_BUG_ON(nr_none);
> > +     /*
> > +      * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> > +      * This undo is not needed unless failure is due to SCAN_COPY_MC.
> > +      */
> > +     if (!is_shmem && result == SCAN_COPY_MC)
> > +             filemap_nr_thps_dec(mapping);
> >
> > -                     /* Unfreeze the page. */
> > -                     list_del(&page->lru);
> > -                     page_ref_unfreeze(page, 2);
> > -                     xas_store(&xas, page);
> > -                     xas_pause(&xas);
> > -                     xas_unlock_irq(&xas);
> > -                     unlock_page(page);
> > -                     putback_lru_page(page);
> > -                     xas_lock_irq(&xas);
> > -             }
> > -             VM_BUG_ON(nr_none);
> > -             /*
> > -              * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> > -              * This undo is not needed unless failure is due to SCAN_COPY_MC.
> > -              */
> > -             if (!is_shmem && result == SCAN_COPY_MC)
> > -                     filemap_nr_thps_dec(mapping);
> > +     xas_unlock_irq(&xas);
> >
> > -             xas_unlock_irq(&xas);
> > +     hpage->mapping = NULL;
> >
> > -             hpage->mapping = NULL;
> > -     }
> > +     unlock_page(hpage);
> > +     mem_cgroup_uncharge(page_folio(hpage));
> > +     put_page(hpage);
> >
> > -     if (hpage)
> > -             unlock_page(hpage);
> >  out:
> >       VM_BUG_ON(!list_empty(&pagelist));
> > -     if (hpage) {
> > -             mem_cgroup_uncharge(page_folio(hpage));
> > -             put_page(hpage);
> > -     }
>
> Moving this chunk seems wrong, as it can leak the huge page if
> alloc_charge_hpage() failed on mem charging, iiuc.

Yeah, good catch.

>
> And I found that keeping it seems wrong either, because if mem charge
> failed we'll uncharge even without charging it before.  But that's nothing
> about this patch alone.

We should be able to just simply check the return value. For example:

if (result == SCAN_CGROUP_CHARGE_FAIL)
    put_page(hpage);

>
> Posted a patch for this:
>
> https://lore.kernel.org/all/20230221214344.609226-1-peterx@redhat.com/
>
> I _think_ this patch will make sense after rebasing to that fix if that's
> correct, but please review it and double check.

It is ok too.


>
> Thanks,
>
> > -
> >       trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
> >       return result;
> >  }
> > --
> > 2.39.2.637.g21b0678d19-goog
> >
>
> --
> Peter Xu
>
David Stevens Feb. 22, 2023, 4:08 a.m. UTC | #4
On Wed, Feb 22, 2023 at 6:54 AM Peter Xu <peterx@redhat.com> wrote:
>
> On Fri, Feb 17, 2023 at 05:54:37PM +0900, David Stevens wrote:
> > From: David Stevens <stevensd@chromium.org>
> >
> > Add a rollback label to deal with failure, instead of continuously
> > checking for RESULT_SUCCESS, to make it easier to add more failure
> > cases. The refactoring also allows the collapse_file tracepoint to
> > include hpage on success (instead of NULL).
> >
> > Signed-off-by: David Stevens <stevensd@chromium.org>
> > ---
> >  mm/khugepaged.c | 223 ++++++++++++++++++++++++------------------------
> >  1 file changed, 110 insertions(+), 113 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 8dbc39896811..6a3d6d2e25e0 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -1885,6 +1885,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >       if (result != SCAN_SUCCEED)
> >               goto out;
> >
> > +     __SetPageLocked(hpage);
> > +     if (is_shmem)
> > +             __SetPageSwapBacked(hpage);
> > +     hpage->index = start;
> > +     hpage->mapping = mapping;
> > +
> >       /*
> >        * Ensure we have slots for all the pages in the range.  This is
> >        * almost certainly a no-op because most of the pages must be present
> > @@ -1897,16 +1903,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >               xas_unlock_irq(&xas);
> >               if (!xas_nomem(&xas, GFP_KERNEL)) {
> >                       result = SCAN_FAIL;
> > -                     goto out;
> > +                     goto rollback;
> >               }
> >       } while (1);
> >
> > -     __SetPageLocked(hpage);
> > -     if (is_shmem)
> > -             __SetPageSwapBacked(hpage);
> > -     hpage->index = start;
> > -     hpage->mapping = mapping;
> > -
> >       /*
> >        * At this point the hpage is locked and not up-to-date.
> >        * It's safe to insert it into the page cache, because nobody would
> > @@ -2123,131 +2123,128 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> >        */
> >       try_to_unmap_flush();
> >
> > -     if (result == SCAN_SUCCEED) {
> > -             /*
> > -              * Replacing old pages with new one has succeeded, now we
> > -              * attempt to copy the contents.
> > -              */
> > -             index = start;
> > -             list_for_each_entry(page, &pagelist, lru) {
> > -                     while (index < page->index) {
> > -                             clear_highpage(hpage + (index % HPAGE_PMD_NR));
> > -                             index++;
> > -                     }
> > -                     if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> > -                                      page) > 0) {
> > -                             result = SCAN_COPY_MC;
> > -                             break;
> > -                     }
> > -                     index++;
> > -             }
> > -             while (result == SCAN_SUCCEED && index < end) {
> > +     if (result != SCAN_SUCCEED)
> > +             goto rollback;
> > +
> > +     /*
> > +      * Replacing old pages with new one has succeeded, now we
> > +      * attempt to copy the contents.
> > +      */
> > +     index = start;
> > +     list_for_each_entry(page, &pagelist, lru) {
> > +             while (index < page->index) {
> >                       clear_highpage(hpage + (index % HPAGE_PMD_NR));
> >                       index++;
> >               }
> > +             if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
> > +                              page) > 0) {
> > +                     result = SCAN_COPY_MC;
> > +                     goto rollback;
> > +             }
> > +             index++;
> > +     }
> > +     while (index < end) {
> > +             clear_highpage(hpage + (index % HPAGE_PMD_NR));
> > +             index++;
> >       }
> >
> > -     if (result == SCAN_SUCCEED) {
> > -             /*
> > -              * Copying old pages to huge one has succeeded, now we
> > -              * need to free the old pages.
> > -              */
> > -             list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> > -                     list_del(&page->lru);
> > -                     page->mapping = NULL;
> > -                     page_ref_unfreeze(page, 1);
> > -                     ClearPageActive(page);
> > -                     ClearPageUnevictable(page);
> > -                     unlock_page(page);
> > -                     put_page(page);
> > -             }
> > +     /*
> > +      * Copying old pages to huge one has succeeded, now we
> > +      * need to free the old pages.
> > +      */
> > +     list_for_each_entry_safe(page, tmp, &pagelist, lru) {
> > +             list_del(&page->lru);
> > +             page->mapping = NULL;
> > +             page_ref_unfreeze(page, 1);
> > +             ClearPageActive(page);
> > +             ClearPageUnevictable(page);
> > +             unlock_page(page);
> > +             put_page(page);
> > +     }
> >
> > -             xas_lock_irq(&xas);
> > -             if (is_shmem)
> > -                     __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> > -             else
> > -                     __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> > +     xas_lock_irq(&xas);
> > +     if (is_shmem)
> > +             __mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
> > +     else
> > +             __mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
> > +
> > +     if (nr_none) {
> > +             __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> > +             /* nr_none is always 0 for non-shmem. */
> > +             __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> > +     }
> > +     /* Join all the small entries into a single multi-index entry. */
> > +     xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > +     xas_store(&xas, hpage);
> > +     xas_unlock_irq(&xas);
> >
> > -             if (nr_none) {
> > -                     __mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
> > -                     /* nr_none is always 0 for non-shmem. */
> > -                     __mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
> > -             }
> > -             /* Join all the small entries into a single multi-index entry. */
> > -             xas_set_order(&xas, start, HPAGE_PMD_ORDER);
> > -             xas_store(&xas, hpage);
> > -             xas_unlock_irq(&xas);
> > +     folio = page_folio(hpage);
> > +     folio_mark_uptodate(folio);
> > +     folio_ref_add(folio, HPAGE_PMD_NR - 1);
> >
> > -             folio = page_folio(hpage);
> > -             folio_mark_uptodate(folio);
> > -             folio_ref_add(folio, HPAGE_PMD_NR - 1);
> > +     if (is_shmem)
> > +             folio_mark_dirty(folio);
> > +     folio_add_lru(folio);
> >
> > -             if (is_shmem)
> > -                     folio_mark_dirty(folio);
> > -             folio_add_lru(folio);
> > +     /*
> > +      * Remove pte page tables, so we can re-fault the page as huge.
> > +      */
> > +     result = retract_page_tables(mapping, start, mm, addr, hpage,
> > +                                  cc);
> > +     unlock_page(hpage);
> > +     goto out;
> > +
> > +rollback:
> > +     /* Something went wrong: roll back page cache changes */
> > +     xas_lock_irq(&xas);
> > +     if (nr_none) {
> > +             mapping->nrpages -= nr_none;
> > +             shmem_uncharge(mapping->host, nr_none);
> > +     }
> >
> > -             /*
> > -              * Remove pte page tables, so we can re-fault the page as huge.
> > -              */
> > -             result = retract_page_tables(mapping, start, mm, addr, hpage,
> > -                                          cc);
> > -             unlock_page(hpage);
> > -             hpage = NULL;
> > -     } else {
> > -             /* Something went wrong: roll back page cache changes */
> > -             xas_lock_irq(&xas);
> > -             if (nr_none) {
> > -                     mapping->nrpages -= nr_none;
> > -                     shmem_uncharge(mapping->host, nr_none);
> > +     xas_set(&xas, start);
> > +     xas_for_each(&xas, page, end - 1) {
> > +             page = list_first_entry_or_null(&pagelist,
> > +                             struct page, lru);
> > +             if (!page || xas.xa_index < page->index) {
> > +                     if (!nr_none)
> > +                             break;
> > +                     nr_none--;
> > +                     /* Put holes back where they were */
> > +                     xas_store(&xas, NULL);
> > +                     continue;
> >               }
> >
> > -             xas_set(&xas, start);
> > -             xas_for_each(&xas, page, end - 1) {
> > -                     page = list_first_entry_or_null(&pagelist,
> > -                                     struct page, lru);
> > -                     if (!page || xas.xa_index < page->index) {
> > -                             if (!nr_none)
> > -                                     break;
> > -                             nr_none--;
> > -                             /* Put holes back where they were */
> > -                             xas_store(&xas, NULL);
> > -                             continue;
> > -                     }
> > +             VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> >
> > -                     VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
> > +             /* Unfreeze the page. */
> > +             list_del(&page->lru);
> > +             page_ref_unfreeze(page, 2);
> > +             xas_store(&xas, page);
> > +             xas_pause(&xas);
> > +             xas_unlock_irq(&xas);
> > +             unlock_page(page);
> > +             putback_lru_page(page);
> > +             xas_lock_irq(&xas);
> > +     }
> > +     VM_BUG_ON(nr_none);
> > +     /*
> > +      * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> > +      * This undo is not needed unless failure is due to SCAN_COPY_MC.
> > +      */
> > +     if (!is_shmem && result == SCAN_COPY_MC)
> > +             filemap_nr_thps_dec(mapping);
> >
> > -                     /* Unfreeze the page. */
> > -                     list_del(&page->lru);
> > -                     page_ref_unfreeze(page, 2);
> > -                     xas_store(&xas, page);
> > -                     xas_pause(&xas);
> > -                     xas_unlock_irq(&xas);
> > -                     unlock_page(page);
> > -                     putback_lru_page(page);
> > -                     xas_lock_irq(&xas);
> > -             }
> > -             VM_BUG_ON(nr_none);
> > -             /*
> > -              * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
> > -              * This undo is not needed unless failure is due to SCAN_COPY_MC.
> > -              */
> > -             if (!is_shmem && result == SCAN_COPY_MC)
> > -                     filemap_nr_thps_dec(mapping);
> > +     xas_unlock_irq(&xas);
> >
> > -             xas_unlock_irq(&xas);
> > +     hpage->mapping = NULL;
> >
> > -             hpage->mapping = NULL;
> > -     }
> > +     unlock_page(hpage);
> > +     mem_cgroup_uncharge(page_folio(hpage));
> > +     put_page(hpage);
> >
> > -     if (hpage)
> > -             unlock_page(hpage);
> >  out:
> >       VM_BUG_ON(!list_empty(&pagelist));
> > -     if (hpage) {
> > -             mem_cgroup_uncharge(page_folio(hpage));
> > -             put_page(hpage);
> > -     }
>
> Moving this chunk seems wrong, as it can leak the huge page if
> alloc_charge_hpage() failed on mem charging, iiuc.
>
> And I found that keeping it seems wrong either, because if mem charge
> failed we'll uncharge even without charging it before.  But that's nothing
> about this patch alone.
>
> Posted a patch for this:
>
> https://lore.kernel.org/all/20230221214344.609226-1-peterx@redhat.com/
>
> I _think_ this patch will make sense after rebasing to that fix if that's
> correct, but please review it and double check.
>

Ah, good catch. I didn't notice that alloc_charge_hpage could allocate
*hpage even on failure. The failure path should work properly with
your fix.

-David
Peter Xu Feb. 22, 2023, 4:24 p.m. UTC | #5
On Wed, Feb 22, 2023 at 01:08:19PM +0900, David Stevens wrote:
> > >  out:
> > >       VM_BUG_ON(!list_empty(&pagelist));
> > > -     if (hpage) {
> > > -             mem_cgroup_uncharge(page_folio(hpage));
> > > -             put_page(hpage);
> > > -     }
> >
> > Moving this chunk seems wrong, as it can leak the huge page if
> > alloc_charge_hpage() failed on mem charging, iiuc.
> >
> > And I found that keeping it seems wrong either, because if mem charge
> > failed we'll uncharge even without charging it before.  But that's nothing
> > about this patch alone.
> >
> > Posted a patch for this:
> >
> > https://lore.kernel.org/all/20230221214344.609226-1-peterx@redhat.com/

[1]

> >
> > I _think_ this patch will make sense after rebasing to that fix if that's
> > correct, but please review it and double check.
> >
> 
> Ah, good catch. I didn't notice that alloc_charge_hpage could allocate
> *hpage even on failure. The failure path should work properly with
> your fix.

Thanks for confirming.

If we can merge above [1] before this patch, then I think this patch is
correct.  Only if so:

Acked-by: Peter Xu <peterx@redhat.com>
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8dbc39896811..6a3d6d2e25e0 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1885,6 +1885,12 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 	if (result != SCAN_SUCCEED)
 		goto out;
 
+	__SetPageLocked(hpage);
+	if (is_shmem)
+		__SetPageSwapBacked(hpage);
+	hpage->index = start;
+	hpage->mapping = mapping;
+
 	/*
 	 * Ensure we have slots for all the pages in the range.  This is
 	 * almost certainly a no-op because most of the pages must be present
@@ -1897,16 +1903,10 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 		xas_unlock_irq(&xas);
 		if (!xas_nomem(&xas, GFP_KERNEL)) {
 			result = SCAN_FAIL;
-			goto out;
+			goto rollback;
 		}
 	} while (1);
 
-	__SetPageLocked(hpage);
-	if (is_shmem)
-		__SetPageSwapBacked(hpage);
-	hpage->index = start;
-	hpage->mapping = mapping;
-
 	/*
 	 * At this point the hpage is locked and not up-to-date.
 	 * It's safe to insert it into the page cache, because nobody would
@@ -2123,131 +2123,128 @@  static int collapse_file(struct mm_struct *mm, unsigned long addr,
 	 */
 	try_to_unmap_flush();
 
-	if (result == SCAN_SUCCEED) {
-		/*
-		 * Replacing old pages with new one has succeeded, now we
-		 * attempt to copy the contents.
-		 */
-		index = start;
-		list_for_each_entry(page, &pagelist, lru) {
-			while (index < page->index) {
-				clear_highpage(hpage + (index % HPAGE_PMD_NR));
-				index++;
-			}
-			if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
-					 page) > 0) {
-				result = SCAN_COPY_MC;
-				break;
-			}
-			index++;
-		}
-		while (result == SCAN_SUCCEED && index < end) {
+	if (result != SCAN_SUCCEED)
+		goto rollback;
+
+	/*
+	 * Replacing old pages with new one has succeeded, now we
+	 * attempt to copy the contents.
+	 */
+	index = start;
+	list_for_each_entry(page, &pagelist, lru) {
+		while (index < page->index) {
 			clear_highpage(hpage + (index % HPAGE_PMD_NR));
 			index++;
 		}
+		if (copy_mc_page(hpage + (page->index % HPAGE_PMD_NR),
+				 page) > 0) {
+			result = SCAN_COPY_MC;
+			goto rollback;
+		}
+		index++;
+	}
+	while (index < end) {
+		clear_highpage(hpage + (index % HPAGE_PMD_NR));
+		index++;
 	}
 
-	if (result == SCAN_SUCCEED) {
-		/*
-		 * Copying old pages to huge one has succeeded, now we
-		 * need to free the old pages.
-		 */
-		list_for_each_entry_safe(page, tmp, &pagelist, lru) {
-			list_del(&page->lru);
-			page->mapping = NULL;
-			page_ref_unfreeze(page, 1);
-			ClearPageActive(page);
-			ClearPageUnevictable(page);
-			unlock_page(page);
-			put_page(page);
-		}
+	/*
+	 * Copying old pages to huge one has succeeded, now we
+	 * need to free the old pages.
+	 */
+	list_for_each_entry_safe(page, tmp, &pagelist, lru) {
+		list_del(&page->lru);
+		page->mapping = NULL;
+		page_ref_unfreeze(page, 1);
+		ClearPageActive(page);
+		ClearPageUnevictable(page);
+		unlock_page(page);
+		put_page(page);
+	}
 
-		xas_lock_irq(&xas);
-		if (is_shmem)
-			__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
-		else
-			__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
+	xas_lock_irq(&xas);
+	if (is_shmem)
+		__mod_lruvec_page_state(hpage, NR_SHMEM_THPS, nr);
+	else
+		__mod_lruvec_page_state(hpage, NR_FILE_THPS, nr);
+
+	if (nr_none) {
+		__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
+		/* nr_none is always 0 for non-shmem. */
+		__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
+	}
+	/* Join all the small entries into a single multi-index entry. */
+	xas_set_order(&xas, start, HPAGE_PMD_ORDER);
+	xas_store(&xas, hpage);
+	xas_unlock_irq(&xas);
 
-		if (nr_none) {
-			__mod_lruvec_page_state(hpage, NR_FILE_PAGES, nr_none);
-			/* nr_none is always 0 for non-shmem. */
-			__mod_lruvec_page_state(hpage, NR_SHMEM, nr_none);
-		}
-		/* Join all the small entries into a single multi-index entry. */
-		xas_set_order(&xas, start, HPAGE_PMD_ORDER);
-		xas_store(&xas, hpage);
-		xas_unlock_irq(&xas);
+	folio = page_folio(hpage);
+	folio_mark_uptodate(folio);
+	folio_ref_add(folio, HPAGE_PMD_NR - 1);
 
-		folio = page_folio(hpage);
-		folio_mark_uptodate(folio);
-		folio_ref_add(folio, HPAGE_PMD_NR - 1);
+	if (is_shmem)
+		folio_mark_dirty(folio);
+	folio_add_lru(folio);
 
-		if (is_shmem)
-			folio_mark_dirty(folio);
-		folio_add_lru(folio);
+	/*
+	 * Remove pte page tables, so we can re-fault the page as huge.
+	 */
+	result = retract_page_tables(mapping, start, mm, addr, hpage,
+				     cc);
+	unlock_page(hpage);
+	goto out;
+
+rollback:
+	/* Something went wrong: roll back page cache changes */
+	xas_lock_irq(&xas);
+	if (nr_none) {
+		mapping->nrpages -= nr_none;
+		shmem_uncharge(mapping->host, nr_none);
+	}
 
-		/*
-		 * Remove pte page tables, so we can re-fault the page as huge.
-		 */
-		result = retract_page_tables(mapping, start, mm, addr, hpage,
-					     cc);
-		unlock_page(hpage);
-		hpage = NULL;
-	} else {
-		/* Something went wrong: roll back page cache changes */
-		xas_lock_irq(&xas);
-		if (nr_none) {
-			mapping->nrpages -= nr_none;
-			shmem_uncharge(mapping->host, nr_none);
+	xas_set(&xas, start);
+	xas_for_each(&xas, page, end - 1) {
+		page = list_first_entry_or_null(&pagelist,
+				struct page, lru);
+		if (!page || xas.xa_index < page->index) {
+			if (!nr_none)
+				break;
+			nr_none--;
+			/* Put holes back where they were */
+			xas_store(&xas, NULL);
+			continue;
 		}
 
-		xas_set(&xas, start);
-		xas_for_each(&xas, page, end - 1) {
-			page = list_first_entry_or_null(&pagelist,
-					struct page, lru);
-			if (!page || xas.xa_index < page->index) {
-				if (!nr_none)
-					break;
-				nr_none--;
-				/* Put holes back where they were */
-				xas_store(&xas, NULL);
-				continue;
-			}
+		VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
 
-			VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
+		/* Unfreeze the page. */
+		list_del(&page->lru);
+		page_ref_unfreeze(page, 2);
+		xas_store(&xas, page);
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		unlock_page(page);
+		putback_lru_page(page);
+		xas_lock_irq(&xas);
+	}
+	VM_BUG_ON(nr_none);
+	/*
+	 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
+	 * This undo is not needed unless failure is due to SCAN_COPY_MC.
+	 */
+	if (!is_shmem && result == SCAN_COPY_MC)
+		filemap_nr_thps_dec(mapping);
 
-			/* Unfreeze the page. */
-			list_del(&page->lru);
-			page_ref_unfreeze(page, 2);
-			xas_store(&xas, page);
-			xas_pause(&xas);
-			xas_unlock_irq(&xas);
-			unlock_page(page);
-			putback_lru_page(page);
-			xas_lock_irq(&xas);
-		}
-		VM_BUG_ON(nr_none);
-		/*
-		 * Undo the updates of filemap_nr_thps_inc for non-SHMEM file only.
-		 * This undo is not needed unless failure is due to SCAN_COPY_MC.
-		 */
-		if (!is_shmem && result == SCAN_COPY_MC)
-			filemap_nr_thps_dec(mapping);
+	xas_unlock_irq(&xas);
 
-		xas_unlock_irq(&xas);
+	hpage->mapping = NULL;
 
-		hpage->mapping = NULL;
-	}
+	unlock_page(hpage);
+	mem_cgroup_uncharge(page_folio(hpage));
+	put_page(hpage);
 
-	if (hpage)
-		unlock_page(hpage);
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
-	if (hpage) {
-		mem_cgroup_uncharge(page_folio(hpage));
-		put_page(hpage);
-	}
-
 	trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
 	return result;
 }