Message ID | 20220323232929.3035443-3-jiaqiyan@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Memory poison recovery in khugepaged | expand |
On Wed, Mar 23, 2022 at 4:29 PM Jiaqi Yan <jiaqiyan@google.com> wrote: > > Make collapse_file roll back when copying pages failed. > More concretely: > * extract copy operations into a separate loop > * postpone the updates for nr_none until both scan and copy succeeded > * postpone joining small xarray entries until both scan and copy > succeeded > * as for update operations to NR_XXX_THPS > * for SHMEM file, postpone until both scan and copy succeeded > * for other file, roll back if scan succeeded but copy failed > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > --- > include/linux/highmem.h | 18 ++++++++++ > mm/khugepaged.c | 75 +++++++++++++++++++++++++++-------------- > 2 files changed, 67 insertions(+), 26 deletions(-) > > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > index 15d0aa4d349c..fc5aa221bdb5 100644 > --- a/include/linux/highmem.h > +++ b/include/linux/highmem.h > @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from) > kunmap_local(vfrom); > } > > +/* > + * Machine check exception handled version of copy_highpage. > + * Return true if copying page content failed; otherwise false. > + */ > +static inline bool copy_highpage_mc(struct page *to, struct page *from) > +{ > + char *vfrom, *vto; > + unsigned long ret; > + > + vfrom = kmap_local_page(from); > + vto = kmap_local_page(to); > + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); > + kunmap_local(vto); > + kunmap_local(vfrom); > + > + return ret > 0; > +} > + > #endif > > static inline void memcpy_page(struct page *dst_page, size_t dst_off, > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 84ed177f56ff..ed2b1cd4bbc6 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm, > { > struct address_space *mapping = file->f_mapping; > gfp_t gfp; > - struct page *new_page; > + struct page *new_page, *page, *tmp; It seems you removed the "struct page *page" from " if (result == SCAN_SUCCEED)", but keep the "struct page *page" under "for (index = start; index < end; index++)". I think the "struct page *page" in the for loop could be removed too. > pgoff_t index, end = start + HPAGE_PMD_NR; > LIST_HEAD(pagelist); > XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); > int nr_none = 0, result = SCAN_SUCCEED; > bool is_shmem = shmem_file(file); > + bool copy_failed = false; > int nr; > > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm, > } > nr = thp_nr_pages(new_page); > > - if (is_shmem) > - __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > - else { > + if (!is_shmem) { > __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); > filemap_nr_thps_inc(mapping); > /* > @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm, > } > } > > - if (nr_none) { > - __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > - if (is_shmem) > - __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > - } > - > - /* Join all the small entries into a single multi-index entry */ > - xas_set_order(&xas, start, HPAGE_PMD_ORDER); > - xas_store(&xas, new_page); > xa_locked: > xas_unlock_irq(&xas); > xa_unlocked: > > if (result == SCAN_SUCCEED) { > - struct page *page, *tmp; > - > /* > * Replacing old pages with new one has succeeded, now we > - * need to copy the content and free the old pages. > + * attempt to copy the contents. > */ > index = start; > - list_for_each_entry_safe(page, tmp, &pagelist, lru) { > + list_for_each_entry(page, &pagelist, lru) { > while (index < page->index) { > clear_highpage(new_page + (index % HPAGE_PMD_NR)); > index++; > } > - copy_highpage(new_page + (page->index % HPAGE_PMD_NR), > - page); > + if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) { > + copy_failed = true; > + break; > + } > + index++; > + } > + while (!copy_failed && index < end) { > + clear_highpage(new_page + (page->index % HPAGE_PMD_NR)); > + index++; > + } > + } > + > + if (result == SCAN_SUCCEED && !copy_failed) { I think you could set "result" to SCAN_COPY_MC (as same as the anonymous one), then you could drop !copy_failed and use "result" alone afterwards. > + /* > + * Copying old pages to huge one has succeeded, now we > + * need to free the old pages. > + */ > + list_for_each_entry_safe(page, tmp, &pagelist, lru) { > list_del(&page->lru); > page->mapping = NULL; > page_ref_unfreeze(page, 1); > @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm, > ClearPageUnevictable(page); > unlock_page(page); > put_page(page); > - index++; > } > - while (index < end) { > - clear_highpage(new_page + (index % HPAGE_PMD_NR)); > - index++; > + > + xas_lock_irq(&xas); > + if (is_shmem) > + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > + if (nr_none) { > + __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > + if (is_shmem) > + __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > } > + /* Join all the small entries into a single multi-index entry. */ > + xas_set_order(&xas, start, HPAGE_PMD_ORDER); > + xas_store(&xas, new_page); > + xas_unlock_irq(&xas); > > SetPageUptodate(new_page); > page_ref_add(new_page, HPAGE_PMD_NR - 1); > @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm, > > khugepaged_pages_collapsed++; > } else { > - struct page *page; > - > - /* Something went wrong: roll back page cache changes */ > + /* > + * Something went wrong: > + * either result != SCAN_SUCCEED or copy_failed, > + * roll back page cache changes > + */ > xas_lock_irq(&xas); > mapping->nrpages -= nr_none; > > @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm, > xas_lock_irq(&xas); > } > VM_BUG_ON(nr_none); > + /* > + * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file, > + * which is not updated yet for SHMEM file. > + * These undos are not needed if result is not SCAN_SUCCEED. > + */ > + if (!is_shmem && result == SCAN_SUCCEED) { Handling the error case when "result == SCAN_SUCCEED" looks awkward. With the above fixed (set result to SCAN_COPY_MC) we could avoid the awkwardness. > + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); I'm wondering whether we may defer NR_FILE_THPS update just like SHMEM because it has not to be updated in advance so that we have the user visible counters update in the single place. Just filemap_nr_thps needs to be updated in advance since it is used to sync with file open path to truncate huge pages. > + filemap_nr_thps_dec(mapping); > + } > xas_unlock_irq(&xas); > > new_page->mapping = NULL; > -- > 2.35.1.894.gb6a874cedc-goog >
在 2022/3/24 7:29, Jiaqi Yan 写道: > Make collapse_file roll back when copying pages failed. > More concretely: > * extract copy operations into a separate loop > * postpone the updates for nr_none until both scan and copy succeeded > * postpone joining small xarray entries until both scan and copy > succeeded > * as for update operations to NR_XXX_THPS > * for SHMEM file, postpone until both scan and copy succeeded > * for other file, roll back if scan succeeded but copy failed > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > --- > include/linux/highmem.h | 18 ++++++++++ > mm/khugepaged.c | 75 +++++++++++++++++++++++++++-------------- > 2 files changed, 67 insertions(+), 26 deletions(-) > > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > index 15d0aa4d349c..fc5aa221bdb5 100644 > --- a/include/linux/highmem.h > +++ b/include/linux/highmem.h > @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from) > kunmap_local(vfrom); > } > > +/* > + * Machine check exception handled version of copy_highpage. > + * Return true if copying page content failed; otherwise false. > + */ > +static inline bool copy_highpage_mc(struct page *to, struct page *from) > +{ > + char *vfrom, *vto; > + unsigned long ret; > + > + vfrom = kmap_local_page(from); > + vto = kmap_local_page(to); > + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); > + kunmap_local(vto); > + kunmap_local(vfrom); > + > + return ret > 0; > +} > + > #endif > > static inline void memcpy_page(struct page *dst_page, size_t dst_off, > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 84ed177f56ff..ed2b1cd4bbc6 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm, > { > struct address_space *mapping = file->f_mapping; > gfp_t gfp; > - struct page *new_page; > + struct page *new_page, *page, *tmp; > pgoff_t index, end = start + HPAGE_PMD_NR; > LIST_HEAD(pagelist); > XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); > int nr_none = 0, result = SCAN_SUCCEED; > bool is_shmem = shmem_file(file); > + bool copy_failed = false; > int nr; > > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm, > } > nr = thp_nr_pages(new_page); > > - if (is_shmem) > - __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > - else { > + if (!is_shmem) { > __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); > filemap_nr_thps_inc(mapping); > /* > @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm, > } > } > > - if (nr_none) { > - __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > - if (is_shmem) > - __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > - } > - > - /* Join all the small entries into a single multi-index entry */ > - xas_set_order(&xas, start, HPAGE_PMD_ORDER); > - xas_store(&xas, new_page); > xa_locked: > xas_unlock_irq(&xas); > xa_unlocked: > > if (result == SCAN_SUCCEED) { > - struct page *page, *tmp; > - > /* > * Replacing old pages with new one has succeeded, now we > - * need to copy the content and free the old pages. > + * attempt to copy the contents. > */ > index = start; > - list_for_each_entry_safe(page, tmp, &pagelist, lru) { > + list_for_each_entry(page, &pagelist, lru) { > while (index < page->index) { > clear_highpage(new_page + (index % HPAGE_PMD_NR)); > index++; > } > - copy_highpage(new_page + (page->index % HPAGE_PMD_NR), > - page); > + if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) { > + copy_failed = true; The 1st patch here used "copy_succeed = false", It is best that the logic of the two positions can be unified. > + break; > + } > + index++; > + } > + while (!copy_failed && index < end) { > + clear_highpage(new_page + (page->index % HPAGE_PMD_NR)); > + index++; > + } > + } > + > + if (result == SCAN_SUCCEED && !copy_failed) { > + /* > + * Copying old pages to huge one has succeeded, now we > + * need to free the old pages. > + */ > + list_for_each_entry_safe(page, tmp, &pagelist, lru) { > list_del(&page->lru); > page->mapping = NULL; > page_ref_unfreeze(page, 1); > @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm, > ClearPageUnevictable(page); > unlock_page(page); > put_page(page); > - index++; > } > - while (index < end) { > - clear_highpage(new_page + (index % HPAGE_PMD_NR)); > - index++; > + > + xas_lock_irq(&xas); > + if (is_shmem) > + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > + if (nr_none) { > + __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > + if (is_shmem) > + __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > } > + /* Join all the small entries into a single multi-index entry. */ > + xas_set_order(&xas, start, HPAGE_PMD_ORDER); > + xas_store(&xas, new_page); > + xas_unlock_irq(&xas); > > SetPageUptodate(new_page); > page_ref_add(new_page, HPAGE_PMD_NR - 1); > @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm, > > khugepaged_pages_collapsed++; > } else { > - struct page *page; > - > - /* Something went wrong: roll back page cache changes */ > + /* > + * Something went wrong: > + * either result != SCAN_SUCCEED or copy_failed, > + * roll back page cache changes > + */ > xas_lock_irq(&xas); > mapping->nrpages -= nr_none; > > @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm, > xas_lock_irq(&xas); > } > VM_BUG_ON(nr_none); > + /* > + * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file, > + * which is not updated yet for SHMEM file. > + * These undos are not needed if result is not SCAN_SUCCEED. > + */ > + if (!is_shmem && result == SCAN_SUCCEED) { > + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); > + filemap_nr_thps_dec(mapping); > + } > xas_unlock_irq(&xas); > > new_page->mapping = NULL;
On Mon, Mar 28, 2022 at 9:02 PM Tong Tiangen <tongtiangen@huawei.com> wrote: > > > > 在 2022/3/24 7:29, Jiaqi Yan 写道: > > Make collapse_file roll back when copying pages failed. > > More concretely: > > * extract copy operations into a separate loop > > * postpone the updates for nr_none until both scan and copy succeeded > > * postpone joining small xarray entries until both scan and copy > > succeeded > > * as for update operations to NR_XXX_THPS > > * for SHMEM file, postpone until both scan and copy succeeded > > * for other file, roll back if scan succeeded but copy failed > > > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > > --- > > include/linux/highmem.h | 18 ++++++++++ > > mm/khugepaged.c | 75 +++++++++++++++++++++++++++-------------- > > 2 files changed, 67 insertions(+), 26 deletions(-) > > > > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > > index 15d0aa4d349c..fc5aa221bdb5 100644 > > --- a/include/linux/highmem.h > > +++ b/include/linux/highmem.h > > @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from) > > kunmap_local(vfrom); > > } > > > > +/* > > + * Machine check exception handled version of copy_highpage. > > + * Return true if copying page content failed; otherwise false. > > + */ > > +static inline bool copy_highpage_mc(struct page *to, struct page *from) > > +{ > > + char *vfrom, *vto; > > + unsigned long ret; > > + > > + vfrom = kmap_local_page(from); > > + vto = kmap_local_page(to); > > + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); > > + kunmap_local(vto); > > + kunmap_local(vfrom); > > + > > + return ret > 0; > > +} > > + > > #endif > > > > static inline void memcpy_page(struct page *dst_page, size_t dst_off, > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > index 84ed177f56ff..ed2b1cd4bbc6 100644 > > --- a/mm/khugepaged.c > > +++ b/mm/khugepaged.c > > @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm, > > { > > struct address_space *mapping = file->f_mapping; > > gfp_t gfp; > > - struct page *new_page; > > + struct page *new_page, *page, *tmp; > > pgoff_t index, end = start + HPAGE_PMD_NR; > > LIST_HEAD(pagelist); > > XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); > > int nr_none = 0, result = SCAN_SUCCEED; > > bool is_shmem = shmem_file(file); > > + bool copy_failed = false; > > int nr; > > > > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > > @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm, > > } > > nr = thp_nr_pages(new_page); > > > > - if (is_shmem) > > - __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > > - else { > > + if (!is_shmem) { > > __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); > > filemap_nr_thps_inc(mapping); > > /* > > @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm, > > } > > } > > > > - if (nr_none) { > > - __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > > - if (is_shmem) > > - __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > > - } > > - > > - /* Join all the small entries into a single multi-index entry */ > > - xas_set_order(&xas, start, HPAGE_PMD_ORDER); > > - xas_store(&xas, new_page); > > xa_locked: > > xas_unlock_irq(&xas); > > xa_unlocked: > > > > if (result == SCAN_SUCCEED) { > > - struct page *page, *tmp; > > - > > /* > > * Replacing old pages with new one has succeeded, now we > > - * need to copy the content and free the old pages. > > + * attempt to copy the contents. > > */ > > index = start; > > - list_for_each_entry_safe(page, tmp, &pagelist, lru) { > > + list_for_each_entry(page, &pagelist, lru) { > > while (index < page->index) { > > clear_highpage(new_page + (index % HPAGE_PMD_NR)); > > index++; > > } > > - copy_highpage(new_page + (page->index % HPAGE_PMD_NR), > > - page); > > + if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) { > > + copy_failed = true; > > The 1st patch here used "copy_succeed = false", It is best that the > logic of the two positions can be unified. copy_failed here will be eliminated once we have SCAN_COPY_MC defined in version 2. Version 2 also renames "copy_succeeded" in collapse_huge_page() to "copied", mimicking the "isolated" variable for __collapse_huge_page_isolate(). > > > + break; > > + } > > + index++; > > + } > > + while (!copy_failed && index < end) { > > + clear_highpage(new_page + (page->index % HPAGE_PMD_NR)); > > + index++; > > + } > > + } > > + > > + if (result == SCAN_SUCCEED && !copy_failed) { > > + /* > > + * Copying old pages to huge one has succeeded, now we > > + * need to free the old pages. > > + */ > > + list_for_each_entry_safe(page, tmp, &pagelist, lru) { > > list_del(&page->lru); > > page->mapping = NULL; > > page_ref_unfreeze(page, 1); > > @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm, > > ClearPageUnevictable(page); > > unlock_page(page); > > put_page(page); > > - index++; > > } > > - while (index < end) { > > - clear_highpage(new_page + (index % HPAGE_PMD_NR)); > > - index++; > > + > > + xas_lock_irq(&xas); > > + if (is_shmem) > > + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > > + if (nr_none) { > > + __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > > + if (is_shmem) > > + __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > > } > > + /* Join all the small entries into a single multi-index entry. */ > > + xas_set_order(&xas, start, HPAGE_PMD_ORDER); > > + xas_store(&xas, new_page); > > + xas_unlock_irq(&xas); > > > > SetPageUptodate(new_page); > > page_ref_add(new_page, HPAGE_PMD_NR - 1); > > @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm, > > > > khugepaged_pages_collapsed++; > > } else { > > - struct page *page; > > - > > - /* Something went wrong: roll back page cache changes */ > > + /* > > + * Something went wrong: > > + * either result != SCAN_SUCCEED or copy_failed, > > + * roll back page cache changes > > + */ > > xas_lock_irq(&xas); > > mapping->nrpages -= nr_none; > > > > @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm, > > xas_lock_irq(&xas); > > } > > VM_BUG_ON(nr_none); > > + /* > > + * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file, > > + * which is not updated yet for SHMEM file. > > + * These undos are not needed if result is not SCAN_SUCCEED. > > + */ > > + if (!is_shmem && result == SCAN_SUCCEED) { > > + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); > > + filemap_nr_thps_dec(mapping); > > + } > > xas_unlock_irq(&xas); > > > > new_page->mapping = NULL;
On Mon, Mar 28, 2022 at 4:37 PM Yang Shi <shy828301@gmail.com> wrote: > > On Wed, Mar 23, 2022 at 4:29 PM Jiaqi Yan <jiaqiyan@google.com> wrote: > > > > Make collapse_file roll back when copying pages failed. > > More concretely: > > * extract copy operations into a separate loop > > * postpone the updates for nr_none until both scan and copy succeeded > > * postpone joining small xarray entries until both scan and copy > > succeeded > > * as for update operations to NR_XXX_THPS > > * for SHMEM file, postpone until both scan and copy succeeded > > * for other file, roll back if scan succeeded but copy failed > > > > Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> > > --- > > include/linux/highmem.h | 18 ++++++++++ > > mm/khugepaged.c | 75 +++++++++++++++++++++++++++-------------- > > 2 files changed, 67 insertions(+), 26 deletions(-) > > > > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > > index 15d0aa4d349c..fc5aa221bdb5 100644 > > --- a/include/linux/highmem.h > > +++ b/include/linux/highmem.h > > @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from) > > kunmap_local(vfrom); > > } > > > > +/* > > + * Machine check exception handled version of copy_highpage. > > + * Return true if copying page content failed; otherwise false. > > + */ > > +static inline bool copy_highpage_mc(struct page *to, struct page *from) > > +{ > > + char *vfrom, *vto; > > + unsigned long ret; > > + > > + vfrom = kmap_local_page(from); > > + vto = kmap_local_page(to); > > + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); > > + kunmap_local(vto); > > + kunmap_local(vfrom); > > + > > + return ret > 0; > > +} > > + > > #endif > > > > static inline void memcpy_page(struct page *dst_page, size_t dst_off, > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > > index 84ed177f56ff..ed2b1cd4bbc6 100644 > > --- a/mm/khugepaged.c > > +++ b/mm/khugepaged.c > > @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm, > > { > > struct address_space *mapping = file->f_mapping; > > gfp_t gfp; > > - struct page *new_page; > > + struct page *new_page, *page, *tmp; > > It seems you removed the "struct page *page" from " if (result == > SCAN_SUCCEED)", but keep the "struct page *page" under "for (index = > start; index < end; index++)". I think the "struct page *page" in the > for loop could be removed too. I agree. Removed in the next version. > > > pgoff_t index, end = start + HPAGE_PMD_NR; > > LIST_HEAD(pagelist); > > XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); > > int nr_none = 0, result = SCAN_SUCCEED; > > bool is_shmem = shmem_file(file); > > + bool copy_failed = false; > > int nr; > > > > VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); > > @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm, > > } > > nr = thp_nr_pages(new_page); > > > > - if (is_shmem) > > - __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > > - else { > > + if (!is_shmem) { > > __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); > > filemap_nr_thps_inc(mapping); > > /* > > @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm, > > } > > } > > > > - if (nr_none) { > > - __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > > - if (is_shmem) > > - __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > > - } > > - > > - /* Join all the small entries into a single multi-index entry */ > > - xas_set_order(&xas, start, HPAGE_PMD_ORDER); > > - xas_store(&xas, new_page); > > xa_locked: > > xas_unlock_irq(&xas); > > xa_unlocked: > > > > if (result == SCAN_SUCCEED) { > > - struct page *page, *tmp; > > - > > /* > > * Replacing old pages with new one has succeeded, now we > > - * need to copy the content and free the old pages. > > + * attempt to copy the contents. > > */ > > index = start; > > - list_for_each_entry_safe(page, tmp, &pagelist, lru) { > > + list_for_each_entry(page, &pagelist, lru) { > > while (index < page->index) { > > clear_highpage(new_page + (index % HPAGE_PMD_NR)); > > index++; > > } > > - copy_highpage(new_page + (page->index % HPAGE_PMD_NR), > > - page); > > + if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) { > > + copy_failed = true; > > + break; > > + } > > + index++; > > + } > > + while (!copy_failed && index < end) { > > + clear_highpage(new_page + (page->index % HPAGE_PMD_NR)); > > + index++; > > + } > > + } > > + > > + if (result == SCAN_SUCCEED && !copy_failed) { > > I think you could set "result" to SCAN_COPY_MC (as same as the > anonymous one), then you could drop !copy_failed and use "result" > alone afterwards. Yes, copy_failed variable will be eliminated in the next version. > > > + /* > > + * Copying old pages to huge one has succeeded, now we > > + * need to free the old pages. > > + */ > > + list_for_each_entry_safe(page, tmp, &pagelist, lru) { > > list_del(&page->lru); > > page->mapping = NULL; > > page_ref_unfreeze(page, 1); > > @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm, > > ClearPageUnevictable(page); > > unlock_page(page); > > put_page(page); > > - index++; > > } > > - while (index < end) { > > - clear_highpage(new_page + (index % HPAGE_PMD_NR)); > > - index++; > > + > > + xas_lock_irq(&xas); > > + if (is_shmem) > > + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); > > + if (nr_none) { > > + __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); > > + if (is_shmem) > > + __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); > > } > > + /* Join all the small entries into a single multi-index entry. */ > > + xas_set_order(&xas, start, HPAGE_PMD_ORDER); > > + xas_store(&xas, new_page); > > + xas_unlock_irq(&xas); > > > > SetPageUptodate(new_page); > > page_ref_add(new_page, HPAGE_PMD_NR - 1); > > @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm, > > > > khugepaged_pages_collapsed++; > > } else { > > - struct page *page; > > - > > - /* Something went wrong: roll back page cache changes */ > > + /* > > + * Something went wrong: > > + * either result != SCAN_SUCCEED or copy_failed, > > + * roll back page cache changes > > + */ > > xas_lock_irq(&xas); > > mapping->nrpages -= nr_none; > > > > @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm, > > xas_lock_irq(&xas); > > } > > VM_BUG_ON(nr_none); > > + /* > > + * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file, > > + * which is not updated yet for SHMEM file. > > + * These undos are not needed if result is not SCAN_SUCCEED. > > + */ > > + if (!is_shmem && result == SCAN_SUCCEED) { > > Handling the error case when "result == SCAN_SUCCEED" looks awkward. > With the above fixed (set result to SCAN_COPY_MC) we could avoid the > awkwardness. > Will look less awkward in the next version :) > > + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); > > I'm wondering whether we may defer NR_FILE_THPS update just like SHMEM > because it has not to be updated in advance so that we have the user > visible counters update in the single place. Just filemap_nr_thps > needs to be updated in advance since it is used to sync with file open > path to truncate huge pages. In version 2, I will postpone __mod_lruvec_page_state() until copying succeeded, and undo filemap_nr_thps_inc if (!is_shmem && result == SCAN_COPY_MC). > > > + filemap_nr_thps_dec(mapping); > > + } > > xas_unlock_irq(&xas); > > > > new_page->mapping = NULL; > > -- > > 2.35.1.894.gb6a874cedc-goog > > Thanks for the valuable feedback. I will send out the v2 RFC separately, and please leave/reply there. Thanks!
diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 15d0aa4d349c..fc5aa221bdb5 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -315,6 +315,24 @@ static inline void copy_highpage(struct page *to, struct page *from) kunmap_local(vfrom); } +/* + * Machine check exception handled version of copy_highpage. + * Return true if copying page content failed; otherwise false. + */ +static inline bool copy_highpage_mc(struct page *to, struct page *from) +{ + char *vfrom, *vto; + unsigned long ret; + + vfrom = kmap_local_page(from); + vto = kmap_local_page(to); + ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE); + kunmap_local(vto); + kunmap_local(vfrom); + + return ret > 0; +} + #endif static inline void memcpy_page(struct page *dst_page, size_t dst_off, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 84ed177f56ff..ed2b1cd4bbc6 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1708,12 +1708,13 @@ static void collapse_file(struct mm_struct *mm, { struct address_space *mapping = file->f_mapping; gfp_t gfp; - struct page *new_page; + struct page *new_page, *page, *tmp; pgoff_t index, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); + bool copy_failed = false; int nr; VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); @@ -1936,9 +1937,7 @@ static void collapse_file(struct mm_struct *mm, } nr = thp_nr_pages(new_page); - if (is_shmem) - __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); - else { + if (!is_shmem) { __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr); filemap_nr_thps_inc(mapping); /* @@ -1956,34 +1955,39 @@ static void collapse_file(struct mm_struct *mm, } } - if (nr_none) { - __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); - if (is_shmem) - __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); - } - - /* Join all the small entries into a single multi-index entry */ - xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, new_page); xa_locked: xas_unlock_irq(&xas); xa_unlocked: if (result == SCAN_SUCCEED) { - struct page *page, *tmp; - /* * Replacing old pages with new one has succeeded, now we - * need to copy the content and free the old pages. + * attempt to copy the contents. */ index = start; - list_for_each_entry_safe(page, tmp, &pagelist, lru) { + list_for_each_entry(page, &pagelist, lru) { while (index < page->index) { clear_highpage(new_page + (index % HPAGE_PMD_NR)); index++; } - copy_highpage(new_page + (page->index % HPAGE_PMD_NR), - page); + if (copy_highpage_mc(new_page + (page->index % HPAGE_PMD_NR), page)) { + copy_failed = true; + break; + } + index++; + } + while (!copy_failed && index < end) { + clear_highpage(new_page + (page->index % HPAGE_PMD_NR)); + index++; + } + } + + if (result == SCAN_SUCCEED && !copy_failed) { + /* + * Copying old pages to huge one has succeeded, now we + * need to free the old pages. + */ + list_for_each_entry_safe(page, tmp, &pagelist, lru) { list_del(&page->lru); page->mapping = NULL; page_ref_unfreeze(page, 1); @@ -1991,12 +1995,20 @@ static void collapse_file(struct mm_struct *mm, ClearPageUnevictable(page); unlock_page(page); put_page(page); - index++; } - while (index < end) { - clear_highpage(new_page + (index % HPAGE_PMD_NR)); - index++; + + xas_lock_irq(&xas); + if (is_shmem) + __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr); + if (nr_none) { + __mod_lruvec_page_state(new_page, NR_FILE_PAGES, nr_none); + if (is_shmem) + __mod_lruvec_page_state(new_page, NR_SHMEM, nr_none); } + /* Join all the small entries into a single multi-index entry. */ + xas_set_order(&xas, start, HPAGE_PMD_ORDER); + xas_store(&xas, new_page); + xas_unlock_irq(&xas); SetPageUptodate(new_page); page_ref_add(new_page, HPAGE_PMD_NR - 1); @@ -2012,9 +2024,11 @@ static void collapse_file(struct mm_struct *mm, khugepaged_pages_collapsed++; } else { - struct page *page; - - /* Something went wrong: roll back page cache changes */ + /* + * Something went wrong: + * either result != SCAN_SUCCEED or copy_failed, + * roll back page cache changes + */ xas_lock_irq(&xas); mapping->nrpages -= nr_none; @@ -2047,6 +2061,15 @@ static void collapse_file(struct mm_struct *mm, xas_lock_irq(&xas); } VM_BUG_ON(nr_none); + /* + * Undo the updates of thp_nr_pages(new_page) for non-SHMEM file, + * which is not updated yet for SHMEM file. + * These undos are not needed if result is not SCAN_SUCCEED. + */ + if (!is_shmem && result == SCAN_SUCCEED) { + __mod_lruvec_page_state(new_page, NR_FILE_THPS, -nr); + filemap_nr_thps_dec(mapping); + } xas_unlock_irq(&xas); new_page->mapping = NULL;
Make collapse_file roll back when copying pages failed. More concretely: * extract copy operations into a separate loop * postpone the updates for nr_none until both scan and copy succeeded * postpone joining small xarray entries until both scan and copy succeeded * as for update operations to NR_XXX_THPS * for SHMEM file, postpone until both scan and copy succeeded * for other file, roll back if scan succeeded but copy failed Signed-off-by: Jiaqi Yan <jiaqiyan@google.com> --- include/linux/highmem.h | 18 ++++++++++ mm/khugepaged.c | 75 +++++++++++++++++++++++++++-------------- 2 files changed, 67 insertions(+), 26 deletions(-)