Message ID | 20240503005023.174597-3-21cnbao@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | large folios swap-in: handle refault cases first | expand |
On 03/05/2024 01:50, Barry Song wrote: > From: Barry Song <v-songbaohua@oppo.com> > > To streamline maintenance efforts, we propose discontinuing the use of > swap_free(). Instead, we can simply invoke swap_free_nr() with nr set > to 1. This adjustment offers the advantage of enabling batch processing > within kernel/power/swap.c. Furthermore, swap_free_nr() is designed with > a bitmap consisting of only one long, resulting in overhead that can be > ignored for cases where nr equals 1. > > Suggested-by: "Huang, Ying" <ying.huang@intel.com> > Signed-off-by: Barry Song <v-songbaohua@oppo.com> > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > Cc: Pavel Machek <pavel@ucw.cz> > Cc: Len Brown <len.brown@intel.com> > Cc: Hugh Dickins <hughd@google.com> > --- > include/linux/swap.h | 5 ----- > kernel/power/swap.c | 7 +++---- > mm/memory.c | 2 +- > mm/rmap.c | 4 ++-- > mm/shmem.c | 4 ++-- > mm/swapfile.c | 19 +++++-------------- > 6 files changed, 13 insertions(+), 28 deletions(-) > > diff --git a/include/linux/swap.h b/include/linux/swap.h > index d1d35e92d7e9..f03cb446124e 100644 > --- a/include/linux/swap.h > +++ b/include/linux/swap.h > @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); > extern void swap_shmem_alloc(swp_entry_t); > extern int swap_duplicate(swp_entry_t); > extern int swapcache_prepare(swp_entry_t); > -extern void swap_free(swp_entry_t); I wonder if it would be cleaner to: #define swap_free(entry) swap_free_nr((entry), 1) To save all the churn for the callsites that just want to pass a single entry? > extern void swap_free_nr(swp_entry_t entry, int nr_pages); > extern void swapcache_free_entries(swp_entry_t *entries, int n); > extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); > @@ -561,10 +560,6 @@ static inline int swapcache_prepare(swp_entry_t swp) > return 0; > } > > -static inline void swap_free(swp_entry_t swp) > -{ > -} > - > static inline void swap_free_nr(swp_entry_t entry, int nr_pages) > { > } > diff --git a/kernel/power/swap.c b/kernel/power/swap.c > index 5bc04bfe2db1..6befaa88a342 100644 > --- a/kernel/power/swap.c > +++ b/kernel/power/swap.c > @@ -181,7 +181,7 @@ sector_t alloc_swapdev_block(int swap) > offset = swp_offset(get_swap_page_of_type(swap)); > if (offset) { > if (swsusp_extents_insert(offset)) > - swap_free(swp_entry(swap, offset)); > + swap_free_nr(swp_entry(swap, offset), 1); > else > return swapdev_block(swap, offset); > } > @@ -200,12 +200,11 @@ void free_all_swap_pages(int swap) > > while ((node = swsusp_extents.rb_node)) { > struct swsusp_extent *ext; > - unsigned long offset; > > ext = rb_entry(node, struct swsusp_extent, node); > rb_erase(node, &swsusp_extents); > - for (offset = ext->start; offset <= ext->end; offset++) > - swap_free(swp_entry(swap, offset)); > + swap_free_nr(swp_entry(swap, ext->start), > + ext->end - ext->start + 1); > > kfree(ext); > } > diff --git a/mm/memory.c b/mm/memory.c > index eea6e4984eae..f033eb3528ba 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -4225,7 +4225,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > * We're already holding a reference on the page but haven't mapped it > * yet. > */ > - swap_free(entry); > + swap_free_nr(entry, 1); > if (should_try_to_free_swap(folio, vma, vmf->flags)) > folio_free_swap(folio); > > diff --git a/mm/rmap.c b/mm/rmap.c > index 087a79f1f611..39ec7742acec 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -1865,7 +1865,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > goto walk_done_err; > } > if (arch_unmap_one(mm, vma, address, pteval) < 0) { > - swap_free(entry); > + swap_free_nr(entry, 1); > set_pte_at(mm, address, pvmw.pte, pteval); > goto walk_done_err; > } > @@ -1873,7 +1873,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > /* See folio_try_share_anon_rmap(): clear PTE first. */ > if (anon_exclusive && > folio_try_share_anon_rmap_pte(folio, subpage)) { > - swap_free(entry); > + swap_free_nr(entry, 1); > set_pte_at(mm, address, pvmw.pte, pteval); > goto walk_done_err; > } > diff --git a/mm/shmem.c b/mm/shmem.c > index fa2a0ed97507..bfc8a2beb24f 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1836,7 +1836,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, > * in shmem_evict_inode(). > */ > shmem_recalc_inode(inode, -1, -1); > - swap_free(swap); > + swap_free_nr(swap, 1); > } > > /* > @@ -1927,7 +1927,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, > > delete_from_swap_cache(folio); > folio_mark_dirty(folio); > - swap_free(swap); > + swap_free_nr(swap, 1); > put_swap_device(si); > > *foliop = folio; > diff --git a/mm/swapfile.c b/mm/swapfile.c > index ec12f2b9d229..ddcd0f24b9a1 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -1343,19 +1343,6 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) > swap_range_free(p, offset, 1); > } > > -/* > - * Caller has made sure that the swap device corresponding to entry > - * is still around or has not been recycled. > - */ > -void swap_free(swp_entry_t entry) > -{ > - struct swap_info_struct *p; > - > - p = _swap_info_get(entry); > - if (p) > - __swap_entry_free(p, entry); > -} > - > static void cluster_swap_free_nr(struct swap_info_struct *sis, > unsigned long offset, int nr_pages) > { > @@ -1385,6 +1372,10 @@ static void cluster_swap_free_nr(struct swap_info_struct *sis, > unlock_cluster_or_swap_info(sis, ci); > } > > +/* > + * Caller has made sure that the swap device corresponding to entry > + * is still around or has not been recycled. > + */ > void swap_free_nr(swp_entry_t entry, int nr_pages) > { > int nr; > @@ -1930,7 +1921,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, > new_pte = pte_mkuffd_wp(new_pte); > setpte: > set_pte_at(vma->vm_mm, addr, pte, new_pte); > - swap_free(entry); > + swap_free_nr(entry, 1); > out: > if (pte) > pte_unmap_unlock(pte, ptl);
On Fri, May 3, 2024 at 2:31 AM Ryan Roberts <ryan.roberts@arm.com> wrote: > > On 03/05/2024 01:50, Barry Song wrote: > > From: Barry Song <v-songbaohua@oppo.com> > > > > To streamline maintenance efforts, we propose discontinuing the use of > > swap_free(). Instead, we can simply invoke swap_free_nr() with nr set > > to 1. This adjustment offers the advantage of enabling batch processing > > within kernel/power/swap.c. Furthermore, swap_free_nr() is designed with > > a bitmap consisting of only one long, resulting in overhead that can be > > ignored for cases where nr equals 1. > > > > Suggested-by: "Huang, Ying" <ying.huang@intel.com> > > Signed-off-by: Barry Song <v-songbaohua@oppo.com> > > Cc: "Rafael J. Wysocki" <rafael@kernel.org> > > Cc: Pavel Machek <pavel@ucw.cz> > > Cc: Len Brown <len.brown@intel.com> > > Cc: Hugh Dickins <hughd@google.com> > > --- > > include/linux/swap.h | 5 ----- > > kernel/power/swap.c | 7 +++---- > > mm/memory.c | 2 +- > > mm/rmap.c | 4 ++-- > > mm/shmem.c | 4 ++-- > > mm/swapfile.c | 19 +++++-------------- > > 6 files changed, 13 insertions(+), 28 deletions(-) > > > > diff --git a/include/linux/swap.h b/include/linux/swap.h > > index d1d35e92d7e9..f03cb446124e 100644 > > --- a/include/linux/swap.h > > +++ b/include/linux/swap.h > > @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); > > extern void swap_shmem_alloc(swp_entry_t); > > extern int swap_duplicate(swp_entry_t); > > extern int swapcache_prepare(swp_entry_t); > > -extern void swap_free(swp_entry_t); > > I wonder if it would be cleaner to: > > #define swap_free(entry) swap_free_nr((entry), 1) > > To save all the churn for the callsites that just want to pass a single entry? > Either way works. It will produce the same machine code. I have a slight inclination to just drop swap_free(entry) API so that it discourages the caller to do a for loop over swap_free(). Acked-by: Chris Li <chrisl@kernel.org> Chris
On Fri, May 03, 2024 at 01:37:06PM -0700, Chris Li wrote: > Either way works. It will produce the same machine code. I have a > slight inclination to just drop swap_free(entry) API so that it > discourages the caller to do a for loop over swap_free(). Then just ad the number of entries parameter to swap_free and do away with the separate swap_free_nr.
On Sat, May 4, 2024 at 12:03 PM Christoph Hellwig <hch@infradead.org> wrote: > > On Fri, May 03, 2024 at 01:37:06PM -0700, Chris Li wrote: > > Either way works. It will produce the same machine code. I have a > > slight inclination to just drop swap_free(entry) API so that it > > discourages the caller to do a for loop over swap_free(). > > Then just ad the number of entries parameter to swap_free and do away > with the separate swap_free_nr. swap_free_nr() isn't separate, after this patch, it is the only one left. there won't be swap_free() any more. it seems you want to directly "rename" it to swap_free()?
On Sat, May 04, 2024 at 12:27:11PM +0800, Barry Song wrote: > swap_free_nr() isn't separate, after this patch, it is the only one left. > there won't be swap_free() any more. it seems you want to directly > "rename" it to swap_free()? Yes. Avoid the pointless suffix if it is the only variant.
On Sat, May 4, 2024 at 12:29 PM Christoph Hellwig <hch@infradead.org> wrote: > > On Sat, May 04, 2024 at 12:27:11PM +0800, Barry Song wrote: > > swap_free_nr() isn't separate, after this patch, it is the only one left. > > there won't be swap_free() any more. it seems you want to directly > > "rename" it to swap_free()? > > Yes. Avoid the pointless suffix if it is the only variant. well. it seems you are right. We usually use a suffix to differentiate two or more cases, but now, there is only one case left, the suffix seems no longer useful. one more problem is that free_swap_and_cache_nr() and swap_free_nr() are not quite aligned. extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); static inline void free_swap_and_cache(swp_entry_t entry) { free_swap_and_cache_nr(entry, 1); } The problem space is the same. I feel like in that case, we can also drop free_swap_and_cache_nr() and simply add the nr parameter?
Ryan Roberts <ryan.roberts@arm.com> writes: > On 03/05/2024 01:50, Barry Song wrote: >> From: Barry Song <v-songbaohua@oppo.com> >> >> To streamline maintenance efforts, we propose discontinuing the use of >> swap_free(). Instead, we can simply invoke swap_free_nr() with nr set >> to 1. This adjustment offers the advantage of enabling batch processing >> within kernel/power/swap.c. Furthermore, swap_free_nr() is designed with >> a bitmap consisting of only one long, resulting in overhead that can be >> ignored for cases where nr equals 1. >> >> Suggested-by: "Huang, Ying" <ying.huang@intel.com> >> Signed-off-by: Barry Song <v-songbaohua@oppo.com> >> Cc: "Rafael J. Wysocki" <rafael@kernel.org> >> Cc: Pavel Machek <pavel@ucw.cz> >> Cc: Len Brown <len.brown@intel.com> >> Cc: Hugh Dickins <hughd@google.com> >> --- >> include/linux/swap.h | 5 ----- >> kernel/power/swap.c | 7 +++---- >> mm/memory.c | 2 +- >> mm/rmap.c | 4 ++-- >> mm/shmem.c | 4 ++-- >> mm/swapfile.c | 19 +++++-------------- >> 6 files changed, 13 insertions(+), 28 deletions(-) >> >> diff --git a/include/linux/swap.h b/include/linux/swap.h >> index d1d35e92d7e9..f03cb446124e 100644 >> --- a/include/linux/swap.h >> +++ b/include/linux/swap.h >> @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); >> extern void swap_shmem_alloc(swp_entry_t); >> extern int swap_duplicate(swp_entry_t); >> extern int swapcache_prepare(swp_entry_t); >> -extern void swap_free(swp_entry_t); > > I wonder if it would be cleaner to: > > #define swap_free(entry) swap_free_nr((entry), 1) > > To save all the churn for the callsites that just want to pass a single entry? I prefer this way. Although I prefer inline functions. Otherwise, LGTM. Feel free to add Reviewed-by: "Huang, Ying" <ying.huang@intel.com> in the future version. >> extern void swap_free_nr(swp_entry_t entry, int nr_pages); >> extern void swapcache_free_entries(swp_entry_t *entries, int n); >> extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); >> @@ -561,10 +560,6 @@ static inline int swapcache_prepare(swp_entry_t swp) >> return 0; >> } >> >> -static inline void swap_free(swp_entry_t swp) >> -{ >> -} >> - >> static inline void swap_free_nr(swp_entry_t entry, int nr_pages) >> { >> } >> diff --git a/kernel/power/swap.c b/kernel/power/swap.c >> index 5bc04bfe2db1..6befaa88a342 100644 >> --- a/kernel/power/swap.c >> +++ b/kernel/power/swap.c >> @@ -181,7 +181,7 @@ sector_t alloc_swapdev_block(int swap) >> offset = swp_offset(get_swap_page_of_type(swap)); >> if (offset) { >> if (swsusp_extents_insert(offset)) >> - swap_free(swp_entry(swap, offset)); >> + swap_free_nr(swp_entry(swap, offset), 1); >> else >> return swapdev_block(swap, offset); >> } >> @@ -200,12 +200,11 @@ void free_all_swap_pages(int swap) >> >> while ((node = swsusp_extents.rb_node)) { >> struct swsusp_extent *ext; >> - unsigned long offset; >> >> ext = rb_entry(node, struct swsusp_extent, node); >> rb_erase(node, &swsusp_extents); >> - for (offset = ext->start; offset <= ext->end; offset++) >> - swap_free(swp_entry(swap, offset)); >> + swap_free_nr(swp_entry(swap, ext->start), >> + ext->end - ext->start + 1); >> >> kfree(ext); >> } >> diff --git a/mm/memory.c b/mm/memory.c >> index eea6e4984eae..f033eb3528ba 100644 >> --- a/mm/memory.c >> +++ b/mm/memory.c >> @@ -4225,7 +4225,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) >> * We're already holding a reference on the page but haven't mapped it >> * yet. >> */ >> - swap_free(entry); >> + swap_free_nr(entry, 1); >> if (should_try_to_free_swap(folio, vma, vmf->flags)) >> folio_free_swap(folio); >> >> diff --git a/mm/rmap.c b/mm/rmap.c >> index 087a79f1f611..39ec7742acec 100644 >> --- a/mm/rmap.c >> +++ b/mm/rmap.c >> @@ -1865,7 +1865,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >> goto walk_done_err; >> } >> if (arch_unmap_one(mm, vma, address, pteval) < 0) { >> - swap_free(entry); >> + swap_free_nr(entry, 1); >> set_pte_at(mm, address, pvmw.pte, pteval); >> goto walk_done_err; >> } >> @@ -1873,7 +1873,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >> /* See folio_try_share_anon_rmap(): clear PTE first. */ >> if (anon_exclusive && >> folio_try_share_anon_rmap_pte(folio, subpage)) { >> - swap_free(entry); >> + swap_free_nr(entry, 1); >> set_pte_at(mm, address, pvmw.pte, pteval); >> goto walk_done_err; >> } >> diff --git a/mm/shmem.c b/mm/shmem.c >> index fa2a0ed97507..bfc8a2beb24f 100644 >> --- a/mm/shmem.c >> +++ b/mm/shmem.c >> @@ -1836,7 +1836,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, >> * in shmem_evict_inode(). >> */ >> shmem_recalc_inode(inode, -1, -1); >> - swap_free(swap); >> + swap_free_nr(swap, 1); >> } >> >> /* >> @@ -1927,7 +1927,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, >> >> delete_from_swap_cache(folio); >> folio_mark_dirty(folio); >> - swap_free(swap); >> + swap_free_nr(swap, 1); >> put_swap_device(si); >> >> *foliop = folio; >> diff --git a/mm/swapfile.c b/mm/swapfile.c >> index ec12f2b9d229..ddcd0f24b9a1 100644 >> --- a/mm/swapfile.c >> +++ b/mm/swapfile.c >> @@ -1343,19 +1343,6 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) >> swap_range_free(p, offset, 1); >> } >> >> -/* >> - * Caller has made sure that the swap device corresponding to entry >> - * is still around or has not been recycled. >> - */ >> -void swap_free(swp_entry_t entry) >> -{ >> - struct swap_info_struct *p; >> - >> - p = _swap_info_get(entry); >> - if (p) >> - __swap_entry_free(p, entry); >> -} >> - >> static void cluster_swap_free_nr(struct swap_info_struct *sis, >> unsigned long offset, int nr_pages) >> { >> @@ -1385,6 +1372,10 @@ static void cluster_swap_free_nr(struct swap_info_struct *sis, >> unlock_cluster_or_swap_info(sis, ci); >> } >> >> +/* >> + * Caller has made sure that the swap device corresponding to entry >> + * is still around or has not been recycled. >> + */ >> void swap_free_nr(swp_entry_t entry, int nr_pages) >> { >> int nr; >> @@ -1930,7 +1921,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, >> new_pte = pte_mkuffd_wp(new_pte); >> setpte: >> set_pte_at(vma->vm_mm, addr, pte, new_pte); >> - swap_free(entry); >> + swap_free_nr(entry, 1); >> out: >> if (pte) >> pte_unmap_unlock(pte, ptl); -- Best Regards, Huang, Ying
On Wed, May 8, 2024 at 7:58 PM Huang, Ying <ying.huang@intel.com> wrote: > > Ryan Roberts <ryan.roberts@arm.com> writes: > > > On 03/05/2024 01:50, Barry Song wrote: > >> From: Barry Song <v-songbaohua@oppo.com> > >> > >> To streamline maintenance efforts, we propose discontinuing the use of > >> swap_free(). Instead, we can simply invoke swap_free_nr() with nr set > >> to 1. This adjustment offers the advantage of enabling batch processing > >> within kernel/power/swap.c. Furthermore, swap_free_nr() is designed with > >> a bitmap consisting of only one long, resulting in overhead that can be > >> ignored for cases where nr equals 1. > >> > >> Suggested-by: "Huang, Ying" <ying.huang@intel.com> > >> Signed-off-by: Barry Song <v-songbaohua@oppo.com> > >> Cc: "Rafael J. Wysocki" <rafael@kernel.org> > >> Cc: Pavel Machek <pavel@ucw.cz> > >> Cc: Len Brown <len.brown@intel.com> > >> Cc: Hugh Dickins <hughd@google.com> > >> --- > >> include/linux/swap.h | 5 ----- > >> kernel/power/swap.c | 7 +++---- > >> mm/memory.c | 2 +- > >> mm/rmap.c | 4 ++-- > >> mm/shmem.c | 4 ++-- > >> mm/swapfile.c | 19 +++++-------------- > >> 6 files changed, 13 insertions(+), 28 deletions(-) > >> > >> diff --git a/include/linux/swap.h b/include/linux/swap.h > >> index d1d35e92d7e9..f03cb446124e 100644 > >> --- a/include/linux/swap.h > >> +++ b/include/linux/swap.h > >> @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); > >> extern void swap_shmem_alloc(swp_entry_t); > >> extern int swap_duplicate(swp_entry_t); > >> extern int swapcache_prepare(swp_entry_t); > >> -extern void swap_free(swp_entry_t); > > > > I wonder if it would be cleaner to: > > > > #define swap_free(entry) swap_free_nr((entry), 1) > > > > To save all the churn for the callsites that just want to pass a single entry? > > I prefer this way. Although I prefer inline functions. Yes, using static inline is preferable. I've recently submitted a checkpatch/codestyle for this, which can be found at: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/commit/?h=mm-everything&id=39c58d5ed036 https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/commit/?h=mm-everything&id=8379bf0b0e1f5 Using static inline aligns with the established rule. > > Otherwise, LGTM. Feel free to add > > Reviewed-by: "Huang, Ying" <ying.huang@intel.com> Thanks! > > in the future version. I believe Christoph's vote leans towards simply removing swap_free_nr and renaming it to swap_free, while adding a new parameter as follows. void swap_free(swp_entry_t entry, int nr); { } now I see Ryan and you prefer static inline swap_free() { swap_free_nr(...., 1) } Chris slightly favors discouraging the use of swap_free() without the new parameter. Removing swap_free() can address this concern. It seems that maintaining swap_free() and having it call swap_free_nr() with a default value of 1 received the most support. To align with free_swap_and_cache() and free_swap_and_cache_nr(), I'll proceed with the "static inline" approach in the new version. Please voice any objections you may have, Christoph, Chris. > > >> extern void swap_free_nr(swp_entry_t entry, int nr_pages); > >> extern void swapcache_free_entries(swp_entry_t *entries, int n); > >> extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); > >> @@ -561,10 +560,6 @@ static inline int swapcache_prepare(swp_entry_t swp) > >> return 0; > >> } > >> > >> -static inline void swap_free(swp_entry_t swp) > >> -{ > >> -} > >> - > >> static inline void swap_free_nr(swp_entry_t entry, int nr_pages) > >> { > >> } > >> diff --git a/kernel/power/swap.c b/kernel/power/swap.c > >> index 5bc04bfe2db1..6befaa88a342 100644 > >> --- a/kernel/power/swap.c > >> +++ b/kernel/power/swap.c > >> @@ -181,7 +181,7 @@ sector_t alloc_swapdev_block(int swap) > >> offset = swp_offset(get_swap_page_of_type(swap)); > >> if (offset) { > >> if (swsusp_extents_insert(offset)) > >> - swap_free(swp_entry(swap, offset)); > >> + swap_free_nr(swp_entry(swap, offset), 1); > >> else > >> return swapdev_block(swap, offset); > >> } > >> @@ -200,12 +200,11 @@ void free_all_swap_pages(int swap) > >> > >> while ((node = swsusp_extents.rb_node)) { > >> struct swsusp_extent *ext; > >> - unsigned long offset; > >> > >> ext = rb_entry(node, struct swsusp_extent, node); > >> rb_erase(node, &swsusp_extents); > >> - for (offset = ext->start; offset <= ext->end; offset++) > >> - swap_free(swp_entry(swap, offset)); > >> + swap_free_nr(swp_entry(swap, ext->start), > >> + ext->end - ext->start + 1); > >> > >> kfree(ext); > >> } > >> diff --git a/mm/memory.c b/mm/memory.c > >> index eea6e4984eae..f033eb3528ba 100644 > >> --- a/mm/memory.c > >> +++ b/mm/memory.c > >> @@ -4225,7 +4225,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) > >> * We're already holding a reference on the page but haven't mapped it > >> * yet. > >> */ > >> - swap_free(entry); > >> + swap_free_nr(entry, 1); > >> if (should_try_to_free_swap(folio, vma, vmf->flags)) > >> folio_free_swap(folio); > >> > >> diff --git a/mm/rmap.c b/mm/rmap.c > >> index 087a79f1f611..39ec7742acec 100644 > >> --- a/mm/rmap.c > >> +++ b/mm/rmap.c > >> @@ -1865,7 +1865,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > >> goto walk_done_err; > >> } > >> if (arch_unmap_one(mm, vma, address, pteval) < 0) { > >> - swap_free(entry); > >> + swap_free_nr(entry, 1); > >> set_pte_at(mm, address, pvmw.pte, pteval); > >> goto walk_done_err; > >> } > >> @@ -1873,7 +1873,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, > >> /* See folio_try_share_anon_rmap(): clear PTE first. */ > >> if (anon_exclusive && > >> folio_try_share_anon_rmap_pte(folio, subpage)) { > >> - swap_free(entry); > >> + swap_free_nr(entry, 1); > >> set_pte_at(mm, address, pvmw.pte, pteval); > >> goto walk_done_err; > >> } > >> diff --git a/mm/shmem.c b/mm/shmem.c > >> index fa2a0ed97507..bfc8a2beb24f 100644 > >> --- a/mm/shmem.c > >> +++ b/mm/shmem.c > >> @@ -1836,7 +1836,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, > >> * in shmem_evict_inode(). > >> */ > >> shmem_recalc_inode(inode, -1, -1); > >> - swap_free(swap); > >> + swap_free_nr(swap, 1); > >> } > >> > >> /* > >> @@ -1927,7 +1927,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, > >> > >> delete_from_swap_cache(folio); > >> folio_mark_dirty(folio); > >> - swap_free(swap); > >> + swap_free_nr(swap, 1); > >> put_swap_device(si); > >> > >> *foliop = folio; > >> diff --git a/mm/swapfile.c b/mm/swapfile.c > >> index ec12f2b9d229..ddcd0f24b9a1 100644 > >> --- a/mm/swapfile.c > >> +++ b/mm/swapfile.c > >> @@ -1343,19 +1343,6 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) > >> swap_range_free(p, offset, 1); > >> } > >> > >> -/* > >> - * Caller has made sure that the swap device corresponding to entry > >> - * is still around or has not been recycled. > >> - */ > >> -void swap_free(swp_entry_t entry) > >> -{ > >> - struct swap_info_struct *p; > >> - > >> - p = _swap_info_get(entry); > >> - if (p) > >> - __swap_entry_free(p, entry); > >> -} > >> - > >> static void cluster_swap_free_nr(struct swap_info_struct *sis, > >> unsigned long offset, int nr_pages) > >> { > >> @@ -1385,6 +1372,10 @@ static void cluster_swap_free_nr(struct swap_info_struct *sis, > >> unlock_cluster_or_swap_info(sis, ci); > >> } > >> > >> +/* > >> + * Caller has made sure that the swap device corresponding to entry > >> + * is still around or has not been recycled. > >> + */ > >> void swap_free_nr(swp_entry_t entry, int nr_pages) > >> { > >> int nr; > >> @@ -1930,7 +1921,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, > >> new_pte = pte_mkuffd_wp(new_pte); > >> setpte: > >> set_pte_at(vma->vm_mm, addr, pte, new_pte); > >> - swap_free(entry); > >> + swap_free_nr(entry, 1); > >> out: > >> if (pte) > >> pte_unmap_unlock(pte, ptl); > > -- > Best Regards, > Huang, Ying Thanks Barry
On 08/05/2024 09:30, Barry Song wrote: > On Wed, May 8, 2024 at 7:58 PM Huang, Ying <ying.huang@intel.com> wrote: >> >> Ryan Roberts <ryan.roberts@arm.com> writes: >> >>> On 03/05/2024 01:50, Barry Song wrote: >>>> From: Barry Song <v-songbaohua@oppo.com> >>>> >>>> To streamline maintenance efforts, we propose discontinuing the use of >>>> swap_free(). Instead, we can simply invoke swap_free_nr() with nr set >>>> to 1. This adjustment offers the advantage of enabling batch processing >>>> within kernel/power/swap.c. Furthermore, swap_free_nr() is designed with >>>> a bitmap consisting of only one long, resulting in overhead that can be >>>> ignored for cases where nr equals 1. >>>> >>>> Suggested-by: "Huang, Ying" <ying.huang@intel.com> >>>> Signed-off-by: Barry Song <v-songbaohua@oppo.com> >>>> Cc: "Rafael J. Wysocki" <rafael@kernel.org> >>>> Cc: Pavel Machek <pavel@ucw.cz> >>>> Cc: Len Brown <len.brown@intel.com> >>>> Cc: Hugh Dickins <hughd@google.com> >>>> --- >>>> include/linux/swap.h | 5 ----- >>>> kernel/power/swap.c | 7 +++---- >>>> mm/memory.c | 2 +- >>>> mm/rmap.c | 4 ++-- >>>> mm/shmem.c | 4 ++-- >>>> mm/swapfile.c | 19 +++++-------------- >>>> 6 files changed, 13 insertions(+), 28 deletions(-) >>>> >>>> diff --git a/include/linux/swap.h b/include/linux/swap.h >>>> index d1d35e92d7e9..f03cb446124e 100644 >>>> --- a/include/linux/swap.h >>>> +++ b/include/linux/swap.h >>>> @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); >>>> extern void swap_shmem_alloc(swp_entry_t); >>>> extern int swap_duplicate(swp_entry_t); >>>> extern int swapcache_prepare(swp_entry_t); >>>> -extern void swap_free(swp_entry_t); >>> >>> I wonder if it would be cleaner to: >>> >>> #define swap_free(entry) swap_free_nr((entry), 1) >>> >>> To save all the churn for the callsites that just want to pass a single entry? >> >> I prefer this way. Although I prefer inline functions. Yes, I agree inline function is the better approach. > > Yes, using static inline is preferable. I've recently submitted > a checkpatch/codestyle for this, which can be found at: > https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/commit/?h=mm-everything&id=39c58d5ed036 > https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git/commit/?h=mm-everything&id=8379bf0b0e1f5 > > Using static inline aligns with the established rule. > >> >> Otherwise, LGTM. Feel free to add >> >> Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > > Thanks! > >> >> in the future version. > > I believe Christoph's vote leans towards simply removing swap_free_nr > and renaming it to swap_free, while adding a new parameter as follows. > > void swap_free(swp_entry_t entry, int nr); > { > } > > now I see Ryan and you prefer > > static inline swap_free() > { > swap_free_nr(...., 1) > } > > Chris slightly favors discouraging the use of swap_free() without the > new parameter. Removing swap_free() can address this concern. > > It seems that maintaining swap_free() and having it call swap_free_nr() with > a default value of 1 received the most support. > > To align with free_swap_and_cache() and free_swap_and_cache_nr(), > I'll proceed with the "static inline" approach in the new version. Please > voice any objections you may have, Christoph, Chris. I'm happy with either route. If you end up adding a nr param to swap_free() then it would also be good to give free_swap_and_cache_nr() the same treatment. > >> >>>> extern void swap_free_nr(swp_entry_t entry, int nr_pages); >>>> extern void swapcache_free_entries(swp_entry_t *entries, int n); >>>> extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); >>>> @@ -561,10 +560,6 @@ static inline int swapcache_prepare(swp_entry_t swp) >>>> return 0; >>>> } >>>> >>>> -static inline void swap_free(swp_entry_t swp) >>>> -{ >>>> -} >>>> - >>>> static inline void swap_free_nr(swp_entry_t entry, int nr_pages) >>>> { >>>> } >>>> diff --git a/kernel/power/swap.c b/kernel/power/swap.c >>>> index 5bc04bfe2db1..6befaa88a342 100644 >>>> --- a/kernel/power/swap.c >>>> +++ b/kernel/power/swap.c >>>> @@ -181,7 +181,7 @@ sector_t alloc_swapdev_block(int swap) >>>> offset = swp_offset(get_swap_page_of_type(swap)); >>>> if (offset) { >>>> if (swsusp_extents_insert(offset)) >>>> - swap_free(swp_entry(swap, offset)); >>>> + swap_free_nr(swp_entry(swap, offset), 1); >>>> else >>>> return swapdev_block(swap, offset); >>>> } >>>> @@ -200,12 +200,11 @@ void free_all_swap_pages(int swap) >>>> >>>> while ((node = swsusp_extents.rb_node)) { >>>> struct swsusp_extent *ext; >>>> - unsigned long offset; >>>> >>>> ext = rb_entry(node, struct swsusp_extent, node); >>>> rb_erase(node, &swsusp_extents); >>>> - for (offset = ext->start; offset <= ext->end; offset++) >>>> - swap_free(swp_entry(swap, offset)); >>>> + swap_free_nr(swp_entry(swap, ext->start), >>>> + ext->end - ext->start + 1); >>>> >>>> kfree(ext); >>>> } >>>> diff --git a/mm/memory.c b/mm/memory.c >>>> index eea6e4984eae..f033eb3528ba 100644 >>>> --- a/mm/memory.c >>>> +++ b/mm/memory.c >>>> @@ -4225,7 +4225,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) >>>> * We're already holding a reference on the page but haven't mapped it >>>> * yet. >>>> */ >>>> - swap_free(entry); >>>> + swap_free_nr(entry, 1); >>>> if (should_try_to_free_swap(folio, vma, vmf->flags)) >>>> folio_free_swap(folio); >>>> >>>> diff --git a/mm/rmap.c b/mm/rmap.c >>>> index 087a79f1f611..39ec7742acec 100644 >>>> --- a/mm/rmap.c >>>> +++ b/mm/rmap.c >>>> @@ -1865,7 +1865,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>>> goto walk_done_err; >>>> } >>>> if (arch_unmap_one(mm, vma, address, pteval) < 0) { >>>> - swap_free(entry); >>>> + swap_free_nr(entry, 1); >>>> set_pte_at(mm, address, pvmw.pte, pteval); >>>> goto walk_done_err; >>>> } >>>> @@ -1873,7 +1873,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, >>>> /* See folio_try_share_anon_rmap(): clear PTE first. */ >>>> if (anon_exclusive && >>>> folio_try_share_anon_rmap_pte(folio, subpage)) { >>>> - swap_free(entry); >>>> + swap_free_nr(entry, 1); >>>> set_pte_at(mm, address, pvmw.pte, pteval); >>>> goto walk_done_err; >>>> } >>>> diff --git a/mm/shmem.c b/mm/shmem.c >>>> index fa2a0ed97507..bfc8a2beb24f 100644 >>>> --- a/mm/shmem.c >>>> +++ b/mm/shmem.c >>>> @@ -1836,7 +1836,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, >>>> * in shmem_evict_inode(). >>>> */ >>>> shmem_recalc_inode(inode, -1, -1); >>>> - swap_free(swap); >>>> + swap_free_nr(swap, 1); >>>> } >>>> >>>> /* >>>> @@ -1927,7 +1927,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, >>>> >>>> delete_from_swap_cache(folio); >>>> folio_mark_dirty(folio); >>>> - swap_free(swap); >>>> + swap_free_nr(swap, 1); >>>> put_swap_device(si); >>>> >>>> *foliop = folio; >>>> diff --git a/mm/swapfile.c b/mm/swapfile.c >>>> index ec12f2b9d229..ddcd0f24b9a1 100644 >>>> --- a/mm/swapfile.c >>>> +++ b/mm/swapfile.c >>>> @@ -1343,19 +1343,6 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) >>>> swap_range_free(p, offset, 1); >>>> } >>>> >>>> -/* >>>> - * Caller has made sure that the swap device corresponding to entry >>>> - * is still around or has not been recycled. >>>> - */ >>>> -void swap_free(swp_entry_t entry) >>>> -{ >>>> - struct swap_info_struct *p; >>>> - >>>> - p = _swap_info_get(entry); >>>> - if (p) >>>> - __swap_entry_free(p, entry); >>>> -} >>>> - >>>> static void cluster_swap_free_nr(struct swap_info_struct *sis, >>>> unsigned long offset, int nr_pages) >>>> { >>>> @@ -1385,6 +1372,10 @@ static void cluster_swap_free_nr(struct swap_info_struct *sis, >>>> unlock_cluster_or_swap_info(sis, ci); >>>> } >>>> >>>> +/* >>>> + * Caller has made sure that the swap device corresponding to entry >>>> + * is still around or has not been recycled. >>>> + */ >>>> void swap_free_nr(swp_entry_t entry, int nr_pages) >>>> { >>>> int nr; >>>> @@ -1930,7 +1921,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, >>>> new_pte = pte_mkuffd_wp(new_pte); >>>> setpte: >>>> set_pte_at(vma->vm_mm, addr, pte, new_pte); >>>> - swap_free(entry); >>>> + swap_free_nr(entry, 1); >>>> out: >>>> if (pte) >>>> pte_unmap_unlock(pte, ptl); >> >> -- >> Best Regards, >> Huang, Ying > > Thanks > Barry
diff --git a/include/linux/swap.h b/include/linux/swap.h index d1d35e92d7e9..f03cb446124e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -482,7 +482,6 @@ extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); -extern void swap_free(swp_entry_t); extern void swap_free_nr(swp_entry_t entry, int nr_pages); extern void swapcache_free_entries(swp_entry_t *entries, int n); extern void free_swap_and_cache_nr(swp_entry_t entry, int nr); @@ -561,10 +560,6 @@ static inline int swapcache_prepare(swp_entry_t swp) return 0; } -static inline void swap_free(swp_entry_t swp) -{ -} - static inline void swap_free_nr(swp_entry_t entry, int nr_pages) { } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5bc04bfe2db1..6befaa88a342 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -181,7 +181,7 @@ sector_t alloc_swapdev_block(int swap) offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free(swp_entry(swap, offset)); + swap_free_nr(swp_entry(swap, offset), 1); else return swapdev_block(swap, offset); } @@ -200,12 +200,11 @@ void free_all_swap_pages(int swap) while ((node = swsusp_extents.rb_node)) { struct swsusp_extent *ext; - unsigned long offset; ext = rb_entry(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); - for (offset = ext->start; offset <= ext->end; offset++) - swap_free(swp_entry(swap, offset)); + swap_free_nr(swp_entry(swap, ext->start), + ext->end - ext->start + 1); kfree(ext); } diff --git a/mm/memory.c b/mm/memory.c index eea6e4984eae..f033eb3528ba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4225,7 +4225,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) * We're already holding a reference on the page but haven't mapped it * yet. */ - swap_free(entry); + swap_free_nr(entry, 1); if (should_try_to_free_swap(folio, vma, vmf->flags)) folio_free_swap(folio); diff --git a/mm/rmap.c b/mm/rmap.c index 087a79f1f611..39ec7742acec 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1865,7 +1865,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, goto walk_done_err; } if (arch_unmap_one(mm, vma, address, pteval) < 0) { - swap_free(entry); + swap_free_nr(entry, 1); set_pte_at(mm, address, pvmw.pte, pteval); goto walk_done_err; } @@ -1873,7 +1873,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* See folio_try_share_anon_rmap(): clear PTE first. */ if (anon_exclusive && folio_try_share_anon_rmap_pte(folio, subpage)) { - swap_free(entry); + swap_free_nr(entry, 1); set_pte_at(mm, address, pvmw.pte, pteval); goto walk_done_err; } diff --git a/mm/shmem.c b/mm/shmem.c index fa2a0ed97507..bfc8a2beb24f 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1836,7 +1836,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, * in shmem_evict_inode(). */ shmem_recalc_inode(inode, -1, -1); - swap_free(swap); + swap_free_nr(swap, 1); } /* @@ -1927,7 +1927,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, delete_from_swap_cache(folio); folio_mark_dirty(folio); - swap_free(swap); + swap_free_nr(swap, 1); put_swap_device(si); *foliop = folio; diff --git a/mm/swapfile.c b/mm/swapfile.c index ec12f2b9d229..ddcd0f24b9a1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1343,19 +1343,6 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) swap_range_free(p, offset, 1); } -/* - * Caller has made sure that the swap device corresponding to entry - * is still around or has not been recycled. - */ -void swap_free(swp_entry_t entry) -{ - struct swap_info_struct *p; - - p = _swap_info_get(entry); - if (p) - __swap_entry_free(p, entry); -} - static void cluster_swap_free_nr(struct swap_info_struct *sis, unsigned long offset, int nr_pages) { @@ -1385,6 +1372,10 @@ static void cluster_swap_free_nr(struct swap_info_struct *sis, unlock_cluster_or_swap_info(sis, ci); } +/* + * Caller has made sure that the swap device corresponding to entry + * is still around or has not been recycled. + */ void swap_free_nr(swp_entry_t entry, int nr_pages) { int nr; @@ -1930,7 +1921,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, new_pte = pte_mkuffd_wp(new_pte); setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); - swap_free(entry); + swap_free_nr(entry, 1); out: if (pte) pte_unmap_unlock(pte, ptl);