diff mbox series

[3/3] mm/hugetlb: Refactor __unmap_hugepage_range() to take folio instead of page

Message ID 20250417155530.124073-3-nifan.cxl@gmail.com (mailing list archive)
State New
Headers show
Series [1/3] mm/hugetlb: Refactor unmap_ref_private() to take folio instead of page | expand

Commit Message

Fan Ni April 17, 2025, 3:43 p.m. UTC
From: Fan Ni <fan.ni@samsung.com>

The function __unmap_hugepage_range() has two kinds of users:
1) unmap_hugepage_range(), which passes in the head page of a folio.
   Since unmap_hugepage_range() already takes folio and there are no other
   uses of the folio struct in the function, it is natural for
   __unmap_hugepage_range() to take folio also.
2) All other uses, which pass in NULL pointer.

In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
take folio.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---

Question: If the change in the patch makes sense, should we try to convert all
"page" uses in __unmap_hugepage_range() to folio?

---
 include/linux/hugetlb.h |  2 +-
 mm/hugetlb.c            | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

Comments

Sidhartha Kumar April 17, 2025, 4:21 p.m. UTC | #1
On 4/17/25 11:43 AM, nifan.cxl@gmail.com wrote:
> From: Fan Ni <fan.ni@samsung.com>
> 
> The function __unmap_hugepage_range() has two kinds of users:
> 1) unmap_hugepage_range(), which passes in the head page of a folio.
>     Since unmap_hugepage_range() already takes folio and there are no other
>     uses of the folio struct in the function, it is natural for
>     __unmap_hugepage_range() to take folio also.
> 2) All other uses, which pass in NULL pointer.
> 
> In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
> take folio.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> ---
> 
> Question: If the change in the patch makes sense, should we try to convert all
> "page" uses in __unmap_hugepage_range() to folio?
> 

For this to be correct, we have to ensure that the pte in:

	page = pte_page(pte);

only refers to the pte of a head page. pte comes from:

	pte = huge_ptep_get(mm, address, ptep);

and in the for loop above:
		
	for (; address < end; address += sz)

address is incremented by the huge page size so I think address here 
only points to head pages of hugetlb folios and it would make sense to 
convert page to folio here.

> ---
>   include/linux/hugetlb.h |  2 +-
>   mm/hugetlb.c            | 10 +++++-----
>   2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index b7699f35c87f..d6c503dd2f7d 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -133,7 +133,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
>   void __unmap_hugepage_range(struct mmu_gather *tlb,
>   			  struct vm_area_struct *vma,
>   			  unsigned long start, unsigned long end,
> -			  struct page *ref_page, zap_flags_t zap_flags);
> +			  struct folio *ref_folio, zap_flags_t zap_flags);
>   void hugetlb_report_meminfo(struct seq_file *);
>   int hugetlb_report_node_meminfo(char *buf, int len, int nid);
>   void hugetlb_show_meminfo_node(int nid);
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 3181dbe0c4bb..7d280ab23784 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -5833,7 +5833,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
>   
>   void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   			    unsigned long start, unsigned long end,
> -			    struct page *ref_page, zap_flags_t zap_flags)
> +			    struct folio *ref_folio, zap_flags_t zap_flags)
>   {
>   	struct mm_struct *mm = vma->vm_mm;
>   	unsigned long address;
> @@ -5910,8 +5910,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   		 * page is being unmapped, not a range. Ensure the page we
>   		 * are about to unmap is the actual page of interest.
>   		 */
> -		if (ref_page) {
> -			if (page != ref_page) {
> +		if (ref_folio) {
> +			if (page != folio_page(ref_folio, 0)) {
>   				spin_unlock(ptl);
>   				continue;
>   			}
> @@ -5977,7 +5977,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
>   		/*
>   		 * Bail out after unmapping reference page if supplied
>   		 */
> -		if (ref_page)
> +		if (ref_folio)
>   			break;
>   	}
>   	tlb_end_vma(tlb, vma);
> @@ -6052,7 +6052,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
>   	tlb_gather_mmu(&tlb, vma->vm_mm);
>   
>   	__unmap_hugepage_range(&tlb, vma, start, end,
> -			       folio_page(ref_folio, 0), zap_flags);
> +			       ref_folio, zap_flags);
>   
>   	mmu_notifier_invalidate_range_end(&range);
>   	tlb_finish_mmu(&tlb);
Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Fan Ni April 17, 2025, 4:34 p.m. UTC | #2
On Thu, Apr 17, 2025 at 12:21:55PM -0400, Sidhartha Kumar wrote:
> On 4/17/25 11:43 AM, nifan.cxl@gmail.com wrote:
> > From: Fan Ni <fan.ni@samsung.com>
> > 
> > The function __unmap_hugepage_range() has two kinds of users:
> > 1) unmap_hugepage_range(), which passes in the head page of a folio.
> >     Since unmap_hugepage_range() already takes folio and there are no other
> >     uses of the folio struct in the function, it is natural for
> >     __unmap_hugepage_range() to take folio also.
> > 2) All other uses, which pass in NULL pointer.
> > 
> > In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
> > take folio.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> > ---
> > 
> > Question: If the change in the patch makes sense, should we try to convert all
> > "page" uses in __unmap_hugepage_range() to folio?
> > 
> 
> For this to be correct, we have to ensure that the pte in:
> 
> 	page = pte_page(pte);
> 
> only refers to the pte of a head page. pte comes from:
> 
> 	pte = huge_ptep_get(mm, address, ptep);
> 
> and in the for loop above:
> 		
> 	for (; address < end; address += sz)
> 
> address is incremented by the huge page size so I think address here only
> points to head pages of hugetlb folios and it would make sense to convert
> page to folio here.
> 

Thanks Sidhartha for reviewing the series. I have similar understanding and
wanted to get confirmation from experts in this area.

Thanks.
Fan

> > ---
> >   include/linux/hugetlb.h |  2 +-
> >   mm/hugetlb.c            | 10 +++++-----
> >   2 files changed, 6 insertions(+), 6 deletions(-)
> > 
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index b7699f35c87f..d6c503dd2f7d 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -133,7 +133,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
> >   void __unmap_hugepage_range(struct mmu_gather *tlb,
> >   			  struct vm_area_struct *vma,
> >   			  unsigned long start, unsigned long end,
> > -			  struct page *ref_page, zap_flags_t zap_flags);
> > +			  struct folio *ref_folio, zap_flags_t zap_flags);
> >   void hugetlb_report_meminfo(struct seq_file *);
> >   int hugetlb_report_node_meminfo(char *buf, int len, int nid);
> >   void hugetlb_show_meminfo_node(int nid);
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 3181dbe0c4bb..7d280ab23784 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -5833,7 +5833,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
> >   void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >   			    unsigned long start, unsigned long end,
> > -			    struct page *ref_page, zap_flags_t zap_flags)
> > +			    struct folio *ref_folio, zap_flags_t zap_flags)
> >   {
> >   	struct mm_struct *mm = vma->vm_mm;
> >   	unsigned long address;
> > @@ -5910,8 +5910,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >   		 * page is being unmapped, not a range. Ensure the page we
> >   		 * are about to unmap is the actual page of interest.
> >   		 */
> > -		if (ref_page) {
> > -			if (page != ref_page) {
> > +		if (ref_folio) {
> > +			if (page != folio_page(ref_folio, 0)) {
> >   				spin_unlock(ptl);
> >   				continue;
> >   			}
> > @@ -5977,7 +5977,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
> >   		/*
> >   		 * Bail out after unmapping reference page if supplied
> >   		 */
> > -		if (ref_page)
> > +		if (ref_folio)
> >   			break;
> >   	}
> >   	tlb_end_vma(tlb, vma);
> > @@ -6052,7 +6052,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
> >   	tlb_gather_mmu(&tlb, vma->vm_mm);
> >   	__unmap_hugepage_range(&tlb, vma, start, end,
> > -			       folio_page(ref_folio, 0), zap_flags);
> > +			       ref_folio, zap_flags);
> >   	mmu_notifier_invalidate_range_end(&range);
> >   	tlb_finish_mmu(&tlb);
> Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Muchun Song April 18, 2025, 3:03 a.m. UTC | #3
> On Apr 18, 2025, at 00:34, Fan Ni <nifan.cxl@gmail.com> wrote:
> 
> On Thu, Apr 17, 2025 at 12:21:55PM -0400, Sidhartha Kumar wrote:
>> On 4/17/25 11:43 AM, nifan.cxl@gmail.com wrote:
>>> From: Fan Ni <fan.ni@samsung.com>
>>> 
>>> The function __unmap_hugepage_range() has two kinds of users:
>>> 1) unmap_hugepage_range(), which passes in the head page of a folio.
>>>    Since unmap_hugepage_range() already takes folio and there are no other
>>>    uses of the folio struct in the function, it is natural for
>>>    __unmap_hugepage_range() to take folio also.
>>> 2) All other uses, which pass in NULL pointer.
>>> 
>>> In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
>>> take folio.
>>> 
>>> Signed-off-by: Fan Ni <fan.ni@samsung.com>
>>> ---
>>> 
>>> Question: If the change in the patch makes sense, should we try to convert all
>>> "page" uses in __unmap_hugepage_range() to folio?
>>> 
>> 
>> For this to be correct, we have to ensure that the pte in:
>> 
>> 	page = pte_page(pte);
>> 
>> only refers to the pte of a head page. pte comes from:
>> 
>> 	pte = huge_ptep_get(mm, address, ptep);
>> 
>> and in the for loop above:
>> 
>> 	for (; address < end; address += sz)
>> 
>> address is incremented by the huge page size so I think address here only
>> points to head pages of hugetlb folios and it would make sense to convert
>> page to folio here.
>> 
> 
> Thanks Sidhartha for reviewing the series. I have similar understanding and
> wanted to get confirmation from experts in this area.

I think your understanding is right. BTW, you forgot to update definition of
__unmap_hugepage_range() under !CONFIG_HUGETLB_PAGE case.

> 
> Thanks.
> Fan
Fan Ni April 18, 2025, 4:27 a.m. UTC | #4
On Fri, Apr 18, 2025 at 11:03:59AM +0800, Muchun Song wrote:
> 
> 
> > On Apr 18, 2025, at 00:34, Fan Ni <nifan.cxl@gmail.com> wrote:
> > 
> > On Thu, Apr 17, 2025 at 12:21:55PM -0400, Sidhartha Kumar wrote:
> >> On 4/17/25 11:43 AM, nifan.cxl@gmail.com wrote:
> >>> From: Fan Ni <fan.ni@samsung.com>
> >>> 
> >>> The function __unmap_hugepage_range() has two kinds of users:
> >>> 1) unmap_hugepage_range(), which passes in the head page of a folio.
> >>>    Since unmap_hugepage_range() already takes folio and there are no other
> >>>    uses of the folio struct in the function, it is natural for
> >>>    __unmap_hugepage_range() to take folio also.
> >>> 2) All other uses, which pass in NULL pointer.
> >>> 
> >>> In both cases, we can pass in folio. Refactor __unmap_hugepage_range() to
> >>> take folio.
> >>> 
> >>> Signed-off-by: Fan Ni <fan.ni@samsung.com>
> >>> ---
> >>> 
> >>> Question: If the change in the patch makes sense, should we try to convert all
> >>> "page" uses in __unmap_hugepage_range() to folio?
> >>> 
> >> 
> >> For this to be correct, we have to ensure that the pte in:
> >> 
> >> 	page = pte_page(pte);
> >> 
> >> only refers to the pte of a head page. pte comes from:
> >> 
> >> 	pte = huge_ptep_get(mm, address, ptep);
> >> 
> >> and in the for loop above:
> >> 
> >> 	for (; address < end; address += sz)
> >> 
> >> address is incremented by the huge page size so I think address here only
> >> points to head pages of hugetlb folios and it would make sense to convert
> >> page to folio here.
> >> 
> > 
> > Thanks Sidhartha for reviewing the series. I have similar understanding and
> > wanted to get confirmation from experts in this area.
> 
> I think your understanding is right. BTW, you forgot to update definition of
> __unmap_hugepage_range() under !CONFIG_HUGETLB_PAGE case.
> 

Thanks Muchun. You are right, we need to update that.

Hi Andrew,

I see you picked this patch up, should I send a v2 for the series to fix the
issue mentioned above?
The fix is simple as below.

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d6c503dd2f7d..ebaf95231934 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -452,7 +452,7 @@ static inline long hugetlb_change_protection(
 
 static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
                        struct vm_area_struct *vma, unsigned long start,
-                       unsigned long end, struct page *ref_page,
+                       unsigned long end, struct folio *ref_folio,
                        zap_flags_t zap_flags)
 {
        BUG();



Fan
> > 
> > Thanks.
> > Fan
>
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b7699f35c87f..d6c503dd2f7d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -133,7 +133,7 @@  void unmap_hugepage_range(struct vm_area_struct *,
 void __unmap_hugepage_range(struct mmu_gather *tlb,
 			  struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end,
-			  struct page *ref_page, zap_flags_t zap_flags);
+			  struct folio *ref_folio, zap_flags_t zap_flags);
 void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(char *buf, int len, int nid);
 void hugetlb_show_meminfo_node(int nid);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3181dbe0c4bb..7d280ab23784 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5833,7 +5833,7 @@  int move_hugetlb_page_tables(struct vm_area_struct *vma,
 
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 			    unsigned long start, unsigned long end,
-			    struct page *ref_page, zap_flags_t zap_flags)
+			    struct folio *ref_folio, zap_flags_t zap_flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -5910,8 +5910,8 @@  void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 * page is being unmapped, not a range. Ensure the page we
 		 * are about to unmap is the actual page of interest.
 		 */
-		if (ref_page) {
-			if (page != ref_page) {
+		if (ref_folio) {
+			if (page != folio_page(ref_folio, 0)) {
 				spin_unlock(ptl);
 				continue;
 			}
@@ -5977,7 +5977,7 @@  void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		/*
 		 * Bail out after unmapping reference page if supplied
 		 */
-		if (ref_page)
+		if (ref_folio)
 			break;
 	}
 	tlb_end_vma(tlb, vma);
@@ -6052,7 +6052,7 @@  void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	tlb_gather_mmu(&tlb, vma->vm_mm);
 
 	__unmap_hugepage_range(&tlb, vma, start, end,
-			       folio_page(ref_folio, 0), zap_flags);
+			       ref_folio, zap_flags);
 
 	mmu_notifier_invalidate_range_end(&range);
 	tlb_finish_mmu(&tlb);