Message ID | 20220624173656.2033256-12-jthoughton@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | hugetlb: Introduce HugeTLB high-granularity mapping | expand |
On 24/06/22 11:06 pm, James Houghton wrote: > This adds it for architectures that use GENERAL_HUGETLB, including x86. > > Signed-off-by: James Houghton <jthoughton@google.com> > --- > include/linux/hugetlb.h | 2 ++ > mm/hugetlb.c | 45 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 47 insertions(+) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index e7a6b944d0cc..605aa19d8572 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, > unsigned long addr, unsigned long sz); > pte_t *huge_pte_offset(struct mm_struct *mm, > unsigned long addr, unsigned long sz); > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, > + unsigned long addr, unsigned long sz, bool stop_at_none); > int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, > unsigned long *addr, pte_t *ptep); > void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 557b0afdb503..3ec2a921ee6f 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm, > return (pte_t *)pmd; > } not strong feeling but this name looks confusing to me as it does not only walk over page-tables but can also alloc. > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, > + unsigned long addr, unsigned long sz, bool stop_at_none) > +{ > + pte_t *ptep; > + > + if (!hpte->ptep) { > + pgd_t *pgd = pgd_offset(mm, addr); > + > + if (!pgd) > + return -ENOMEM; > + ptep = (pte_t *)p4d_alloc(mm, pgd, addr); > + if (!ptep) > + return -ENOMEM; > + hugetlb_pte_populate(hpte, ptep, P4D_SHIFT); > + } > + > + while (hugetlb_pte_size(hpte) > sz && > + !hugetlb_pte_present_leaf(hpte) && > + !(stop_at_none && hugetlb_pte_none(hpte))) { Should this ordering of if-else condition be in reverse, i mean it will look more natural and possibly less condition checks as we go from top to bottom. > + if (hpte->shift == PMD_SHIFT) { > + ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr); > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PAGE_SHIFT; > + hpte->ptep = ptep; > + } else if (hpte->shift == PUD_SHIFT) { > + ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep, > + addr); > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PMD_SHIFT; > + hpte->ptep = ptep; > + } else if (hpte->shift == P4D_SHIFT) { > + ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep, > + addr); > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PUD_SHIFT; > + hpte->ptep = ptep; > + } else > + BUG(); > + } > + return 0; > +} > + > #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ > > #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
On 06/27/22 18:37, manish.mishra wrote: > > On 24/06/22 11:06 pm, James Houghton wrote: > > This adds it for architectures that use GENERAL_HUGETLB, including x86. I expect this will be used in arch independent code and there will need to be at least a stub for all architectures? > > > > Signed-off-by: James Houghton <jthoughton@google.com> > > --- > > include/linux/hugetlb.h | 2 ++ > > mm/hugetlb.c | 45 +++++++++++++++++++++++++++++++++++++++++ > > 2 files changed, 47 insertions(+) > > > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > > index e7a6b944d0cc..605aa19d8572 100644 > > --- a/include/linux/hugetlb.h > > +++ b/include/linux/hugetlb.h > > @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, > > unsigned long addr, unsigned long sz); > > pte_t *huge_pte_offset(struct mm_struct *mm, > > unsigned long addr, unsigned long sz); > > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, > > + unsigned long addr, unsigned long sz, bool stop_at_none); > > int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, > > unsigned long *addr, pte_t *ptep); > > void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index 557b0afdb503..3ec2a921ee6f 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm, > > return (pte_t *)pmd; > > } > > > not strong feeling but this name looks confusing to me as it does > > not only walk over page-tables but can also alloc. > Somewhat agree. With this we have: - huge_pte_offset to walk/lookup a pte - huge_pte_alloc to allocate ptes - hugetlb_walk_to which does some/all of both Do not see anything obviously wrong with the routine, but future direction would be to combine/clean up these routines with similar purpose.
On Fri, Jun 24, 2022 at 05:36:41PM +0000, James Houghton wrote: > This adds it for architectures that use GENERAL_HUGETLB, including x86. > > Signed-off-by: James Houghton <jthoughton@google.com> > --- > include/linux/hugetlb.h | 2 ++ > mm/hugetlb.c | 45 +++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 47 insertions(+) > > diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h > index e7a6b944d0cc..605aa19d8572 100644 > --- a/include/linux/hugetlb.h > +++ b/include/linux/hugetlb.h > @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, > unsigned long addr, unsigned long sz); > pte_t *huge_pte_offset(struct mm_struct *mm, > unsigned long addr, unsigned long sz); > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, > + unsigned long addr, unsigned long sz, bool stop_at_none); > int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, > unsigned long *addr, pte_t *ptep); > void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index 557b0afdb503..3ec2a921ee6f 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm, > return (pte_t *)pmd; > } > > +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, > + unsigned long addr, unsigned long sz, bool stop_at_none) > +{ > + pte_t *ptep; > + > + if (!hpte->ptep) { > + pgd_t *pgd = pgd_offset(mm, addr); > + > + if (!pgd) > + return -ENOMEM; > + ptep = (pte_t *)p4d_alloc(mm, pgd, addr); > + if (!ptep) > + return -ENOMEM; > + hugetlb_pte_populate(hpte, ptep, P4D_SHIFT); > + } > + > + while (hugetlb_pte_size(hpte) > sz && > + !hugetlb_pte_present_leaf(hpte) && > + !(stop_at_none && hugetlb_pte_none(hpte))) { > + if (hpte->shift == PMD_SHIFT) { > + ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr); I had a feeling that the pairing pte_unmap() was lost. I think most distros are not with CONFIG_HIGHPTE at all, but still.. > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PAGE_SHIFT; > + hpte->ptep = ptep; > + } else if (hpte->shift == PUD_SHIFT) { > + ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep, > + addr); > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PMD_SHIFT; > + hpte->ptep = ptep; > + } else if (hpte->shift == P4D_SHIFT) { > + ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep, > + addr); > + if (!ptep) > + return -ENOMEM; > + hpte->shift = PUD_SHIFT; > + hpte->ptep = ptep; > + } else > + BUG(); > + } > + return 0; > +} > + > #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ > > #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING > -- > 2.37.0.rc0.161.g10f37bed90-goog >
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e7a6b944d0cc..605aa19d8572 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -258,6 +258,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, + unsigned long addr, unsigned long sz, bool stop_at_none); int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 557b0afdb503..3ec2a921ee6f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6981,6 +6981,51 @@ pte_t *huge_pte_offset(struct mm_struct *mm, return (pte_t *)pmd; } +int hugetlb_walk_to(struct mm_struct *mm, struct hugetlb_pte *hpte, + unsigned long addr, unsigned long sz, bool stop_at_none) +{ + pte_t *ptep; + + if (!hpte->ptep) { + pgd_t *pgd = pgd_offset(mm, addr); + + if (!pgd) + return -ENOMEM; + ptep = (pte_t *)p4d_alloc(mm, pgd, addr); + if (!ptep) + return -ENOMEM; + hugetlb_pte_populate(hpte, ptep, P4D_SHIFT); + } + + while (hugetlb_pte_size(hpte) > sz && + !hugetlb_pte_present_leaf(hpte) && + !(stop_at_none && hugetlb_pte_none(hpte))) { + if (hpte->shift == PMD_SHIFT) { + ptep = pte_alloc_map(mm, (pmd_t *)hpte->ptep, addr); + if (!ptep) + return -ENOMEM; + hpte->shift = PAGE_SHIFT; + hpte->ptep = ptep; + } else if (hpte->shift == PUD_SHIFT) { + ptep = (pte_t *)pmd_alloc(mm, (pud_t *)hpte->ptep, + addr); + if (!ptep) + return -ENOMEM; + hpte->shift = PMD_SHIFT; + hpte->ptep = ptep; + } else if (hpte->shift == P4D_SHIFT) { + ptep = (pte_t *)pud_alloc(mm, (p4d_t *)hpte->ptep, + addr); + if (!ptep) + return -ENOMEM; + hpte->shift = PUD_SHIFT; + hpte->ptep = ptep; + } else + BUG(); + } + return 0; +} + #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ #ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
This adds it for architectures that use GENERAL_HUGETLB, including x86. Signed-off-by: James Houghton <jthoughton@google.com> --- include/linux/hugetlb.h | 2 ++ mm/hugetlb.c | 45 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+)