diff mbox series

[RFC,11/26] hugetlb: add hugetlb_walk_to to do PT walks

Message ID 20220624173656.2033256-12-jthoughton@google.com (mailing list archive)
State New
Headers show
Series hugetlb: Introduce HugeTLB high-granularity mapping | expand

Commit Message

James Houghton June 24, 2022, 5:36 p.m. UTC
This adds it for architectures that use GENERAL_HUGETLB, including x86.

Signed-off-by: James Houghton <jthoughton@google.com>
---
 include/linux/hugetlb.h |  2 ++
 mm/hugetlb.c            | 45 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

Comments

Manish June 27, 2022, 1:07 p.m. UTC | #1
On 24/06/22 11:06 pm, James Houghton wrote:
> This adds it for architectures that use GENERAL_HUGETLB, including x86.
>
> Signed-off-by: James Houghton <jthoughton@google.com>
> ---
>   include/linux/hugetlb.h |  2 ++
>   mm/hugetlb.c            | 45 +++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 47 insertions(+)
>
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index e7a6b944d0cc..605aa19d8572 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
>   			unsigned long addr, unsigned long sz);
>   pte_t *huge_pte_offset(struct mm_struct *mm,
>   		       unsigned long addr, unsigned long sz);
> +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
> +		    unsigned long addr, unsigned long sz, bool stop_at_none);
>   int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
>   				unsigned long *addr, pte_t *ptep);
>   void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 557b0afdb503..3ec2a921ee6f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>   	return (pte_t *)pmd;
>   }


not strong feeling but this name looks confusing to me as it does

not only walk over page-tables but can also alloc.

> +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
> +		    unsigned long addr, unsigned long sz, bool stop_at_none)
> +{
> +	pte_t *ptep;
> +
> +	if (!hpte->ptep) {
> +		pgd_t *pgd = pgd_offset(mm, addr);
> +
> +		if (!pgd)
> +			return -ENOMEM;
> +		ptep = (pte_t *)p4d_alloc(mm, pgd, addr);
> +		if (!ptep)
> +			return -ENOMEM;
> +		hugetlb_pte_populate(hpte, ptep, P4D_SHIFT);
> +	}
> +
> +	while (hugetlb_pte_size(hpte) > sz &&
> +			!hugetlb_pte_present_leaf(hpte) &&
> +			!(stop_at_none && hugetlb_pte_none(hpte))) {

Should this ordering of if-else condition be in reverse, i mean it will look

more natural and possibly less condition checks as we go from top to bottom.

> +		if (hpte->shift == PMD_SHIFT) {
> +			ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr);
> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PAGE_SHIFT;
> +			hpte->ptep = ptep;
> +		} else if (hpte->shift == PUD_SHIFT) {
> +			ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep,
> +						  addr);
> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PMD_SHIFT;
> +			hpte->ptep = ptep;
> +		} else if (hpte->shift == P4D_SHIFT) {
> +			ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep,
> +						  addr);
> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PUD_SHIFT;
> +			hpte->ptep = ptep;
> +		} else
> +			BUG();
> +	}
> +	return 0;
> +}
> +
>   #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
>   
>   #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
Mike Kravetz July 7, 2022, 11:03 p.m. UTC | #2
On 06/27/22 18:37, manish.mishra wrote:
> 
> On 24/06/22 11:06 pm, James Houghton wrote:
> > This adds it for architectures that use GENERAL_HUGETLB, including x86.

I expect this will be used in arch independent code and there will need to
be at least a stub for all architectures?

> > 
> > Signed-off-by: James Houghton <jthoughton@google.com>
> > ---
> >   include/linux/hugetlb.h |  2 ++
> >   mm/hugetlb.c            | 45 +++++++++++++++++++++++++++++++++++++++++
> >   2 files changed, 47 insertions(+)
> > 
> > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> > index e7a6b944d0cc..605aa19d8572 100644
> > --- a/include/linux/hugetlb.h
> > +++ b/include/linux/hugetlb.h
> > @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
> >   			unsigned long addr, unsigned long sz);
> >   pte_t *huge_pte_offset(struct mm_struct *mm,
> >   		       unsigned long addr, unsigned long sz);
> > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
> > +		    unsigned long addr, unsigned long sz, bool stop_at_none);
> >   int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
> >   				unsigned long *addr, pte_t *ptep);
> >   void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
> > diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> > index 557b0afdb503..3ec2a921ee6f 100644
> > --- a/mm/hugetlb.c
> > +++ b/mm/hugetlb.c
> > @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
> >   	return (pte_t *)pmd;
> >   }
> 
> 
> not strong feeling but this name looks confusing to me as it does
> 
> not only walk over page-tables but can also alloc.
> 

Somewhat agree.  With this we have:
- huge_pte_offset to walk/lookup a pte
- huge_pte_alloc to allocate ptes
- hugetlb_walk_to which does some/all of both

Do not see anything obviously wrong with the routine, but future
direction would be to combine/clean up these routines with similar
purpose.
Peter Xu Sept. 8, 2022, 6:20 p.m. UTC | #3
On Fri, Jun 24, 2022 at 05:36:41PM +0000, James Houghton wrote:
> This adds it for architectures that use GENERAL_HUGETLB, including x86.
> 
> Signed-off-by: James Houghton <jthoughton@google.com>
> ---
>  include/linux/hugetlb.h |  2 ++
>  mm/hugetlb.c            | 45 +++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 47 insertions(+)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index e7a6b944d0cc..605aa19d8572 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
>  			unsigned long addr, unsigned long sz);
>  pte_t *huge_pte_offset(struct mm_struct *mm,
>  		       unsigned long addr, unsigned long sz);
> +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
> +		    unsigned long addr, unsigned long sz, bool stop_at_none);
>  int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
>  				unsigned long *addr, pte_t *ptep);
>  void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 557b0afdb503..3ec2a921ee6f 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>  	return (pte_t *)pmd;
>  }
>  
> +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
> +		    unsigned long addr, unsigned long sz, bool stop_at_none)
> +{
> +	pte_t *ptep;
> +
> +	if (!hpte->ptep) {
> +		pgd_t *pgd = pgd_offset(mm, addr);
> +
> +		if (!pgd)
> +			return -ENOMEM;
> +		ptep = (pte_t *)p4d_alloc(mm, pgd, addr);
> +		if (!ptep)
> +			return -ENOMEM;
> +		hugetlb_pte_populate(hpte, ptep, P4D_SHIFT);
> +	}
> +
> +	while (hugetlb_pte_size(hpte) > sz &&
> +			!hugetlb_pte_present_leaf(hpte) &&
> +			!(stop_at_none && hugetlb_pte_none(hpte))) {
> +		if (hpte->shift == PMD_SHIFT) {
> +			ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr);

I had a feeling that the pairing pte_unmap() was lost.

I think most distros are not with CONFIG_HIGHPTE at all, but still..

> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PAGE_SHIFT;
> +			hpte->ptep = ptep;
> +		} else if (hpte->shift == PUD_SHIFT) {
> +			ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep,
> +						  addr);
> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PMD_SHIFT;
> +			hpte->ptep = ptep;
> +		} else if (hpte->shift == P4D_SHIFT) {
> +			ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep,
> +						  addr);
> +			if (!ptep)
> +				return -ENOMEM;
> +			hpte->shift = PUD_SHIFT;
> +			hpte->ptep = ptep;
> +		} else
> +			BUG();
> +	}
> +	return 0;
> +}
> +
>  #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
>  
>  #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
> -- 
> 2.37.0.rc0.161.g10f37bed90-goog
>
diff mbox series

Patch

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e7a6b944d0cc..605aa19d8572 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -258,6 +258,8 @@  pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long addr, unsigned long sz);
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz);
+int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
+		    unsigned long addr, unsigned long sz, bool stop_at_none);
 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
 				unsigned long *addr, pte_t *ptep);
 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 557b0afdb503..3ec2a921ee6f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6981,6 +6981,51 @@  pte_t *huge_pte_offset(struct mm_struct *mm,
 	return (pte_t *)pmd;
 }
 
+int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte,
+		    unsigned long addr, unsigned long sz, bool stop_at_none)
+{
+	pte_t *ptep;
+
+	if (!hpte->ptep) {
+		pgd_t *pgd = pgd_offset(mm, addr);
+
+		if (!pgd)
+			return -ENOMEM;
+		ptep = (pte_t *)p4d_alloc(mm, pgd, addr);
+		if (!ptep)
+			return -ENOMEM;
+		hugetlb_pte_populate(hpte, ptep, P4D_SHIFT);
+	}
+
+	while (hugetlb_pte_size(hpte) > sz &&
+			!hugetlb_pte_present_leaf(hpte) &&
+			!(stop_at_none && hugetlb_pte_none(hpte))) {
+		if (hpte->shift == PMD_SHIFT) {
+			ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr);
+			if (!ptep)
+				return -ENOMEM;
+			hpte->shift = PAGE_SHIFT;
+			hpte->ptep = ptep;
+		} else if (hpte->shift == PUD_SHIFT) {
+			ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep,
+						  addr);
+			if (!ptep)
+				return -ENOMEM;
+			hpte->shift = PMD_SHIFT;
+			hpte->ptep = ptep;
+		} else if (hpte->shift == P4D_SHIFT) {
+			ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep,
+						  addr);
+			if (!ptep)
+				return -ENOMEM;
+			hpte->shift = PUD_SHIFT;
+			hpte->ptep = ptep;
+		} else
+			BUG();
+	}
+	return 0;
+}
+
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
 
 #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING