diff mbox series

[v5,13/21] mm/hugetlb: Use PG_slab to indicate split pmd

Message ID 20201120064325.34492-14-songmuchun@bytedance.com (mailing list archive)
State New, archived
Headers show
Series Free some vmemmap pages of hugetlb page | expand

Commit Message

Muchun Song Nov. 20, 2020, 6:43 a.m. UTC
When we allocate hugetlb page from buddy, we may need split huge pmd
to pte. When we free the hugetlb page, we can merge pte to pmd. So
we need to distinguish whether the previous pmd has been split. The
page table is not allocated from slab. So we can reuse the PG_slab
to indicate that the pmd has been split.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
 mm/hugetlb_vmemmap.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

Comments

Michal Hocko Nov. 20, 2020, 8:16 a.m. UTC | #1
On Fri 20-11-20 14:43:17, Muchun Song wrote:
> When we allocate hugetlb page from buddy, we may need split huge pmd
> to pte. When we free the hugetlb page, we can merge pte to pmd. So
> we need to distinguish whether the previous pmd has been split. The
> page table is not allocated from slab. So we can reuse the PG_slab
> to indicate that the pmd has been split.

PageSlab is used outside of the slab allocator proper and that code
might get confused by this AFAICS.

From the above description it is not really clear why this is needed
though. Who is supposed to use this? Say you are allocating a fresh
hugetlb page. Once you have it, nobody else can be interfering. It is
exclusive to the caller. The later machinery can check the vmemmap page
tables to find out whether a split is needed or not. Or do I miss
something?

> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
>  mm/hugetlb_vmemmap.c | 26 ++++++++++++++++++++++++--
>  1 file changed, 24 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index 06e2b8a7b7c8..e2ddc73ce25f 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -293,6 +293,25 @@ static void remap_huge_page_pmd_vmemmap(struct hstate *h, pmd_t *pmd,
>  	flush_tlb_kernel_range(start, end);
>  }
>  
> +static inline bool pmd_split(pmd_t *pmd)
> +{
> +	return PageSlab(pmd_page(*pmd));
> +}
> +
> +static inline void set_pmd_split(pmd_t *pmd)
> +{
> +	/*
> +	 * We should not use slab for page table allocation. So we can set
> +	 * PG_slab to indicate that the pmd has been split.
> +	 */
> +	__SetPageSlab(pmd_page(*pmd));
> +}
> +
> +static inline void clear_pmd_split(pmd_t *pmd)
> +{
> +	__ClearPageSlab(pmd_page(*pmd));
> +}
> +
>  static void __remap_huge_page_pte_vmemmap(struct page *reuse, pte_t *ptep,
>  					  unsigned long start,
>  					  unsigned long end,
> @@ -357,11 +376,12 @@ void alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
>  	ptl = vmemmap_pmd_lock(pmd);
>  	remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &remap_pages,
>  				    __remap_huge_page_pte_vmemmap);
> -	if (!freed_vmemmap_hpage_dec(pmd_page(*pmd))) {
> +	if (!freed_vmemmap_hpage_dec(pmd_page(*pmd)) && pmd_split(pmd)) {
>  		/*
>  		 * Todo:
>  		 * Merge pte to huge pmd if it has ever been split.
>  		 */
> +		clear_pmd_split(pmd);
>  	}
>  	spin_unlock(ptl);
>  }
> @@ -443,8 +463,10 @@ void free_huge_page_vmemmap(struct hstate *h, struct page *head)
>  	BUG_ON(!pmd);
>  
>  	ptl = vmemmap_pmd_lock(pmd);
> -	if (vmemmap_pmd_huge(pmd))
> +	if (vmemmap_pmd_huge(pmd)) {
>  		split_vmemmap_huge_page(head, pmd);
> +		set_pmd_split(pmd);
> +	}
>  
>  	remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &free_pages,
>  				    __free_huge_page_pte_vmemmap);
> -- 
> 2.11.0
>
Muchun Song Nov. 20, 2020, 9:30 a.m. UTC | #2
On Fri, Nov 20, 2020 at 4:16 PM Michal Hocko <mhocko@suse.com> wrote:
>
> On Fri 20-11-20 14:43:17, Muchun Song wrote:
> > When we allocate hugetlb page from buddy, we may need split huge pmd
> > to pte. When we free the hugetlb page, we can merge pte to pmd. So
> > we need to distinguish whether the previous pmd has been split. The
> > page table is not allocated from slab. So we can reuse the PG_slab
> > to indicate that the pmd has been split.
>
> PageSlab is used outside of the slab allocator proper and that code
> might get confused by this AFAICS.

I got your concerns. Maybe we can use PG_private instead of the
PG_slab.

>
> From the above description it is not really clear why this is needed
> though. Who is supposed to use this? Say you are allocating a fresh
> hugetlb page. Once you have it, nobody else can be interfering. It is
> exclusive to the caller. The later machinery can check the vmemmap page
> tables to find out whether a split is needed or not. Or do I miss
> something?

Yeah, the commit log needs some improvement. The vmemmap pages
can use huge page mapping or basepage(e.g. 4KB) mapping. These two
cases may exist at the same time. I want to know which page size the
vmemmap pages mapping to. If we have split a PMD page table then
we set the flag, when we free the HugeTLB and the flag is set, we want
to merge the PTE page table to PMD. If the flag is not set, we do nothing
about the PTE page table.

Thanks.

>
> > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > ---
> >  mm/hugetlb_vmemmap.c | 26 ++++++++++++++++++++++++--
> >  1 file changed, 24 insertions(+), 2 deletions(-)
> >
> > diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> > index 06e2b8a7b7c8..e2ddc73ce25f 100644
> > --- a/mm/hugetlb_vmemmap.c
> > +++ b/mm/hugetlb_vmemmap.c
> > @@ -293,6 +293,25 @@ static void remap_huge_page_pmd_vmemmap(struct hstate *h, pmd_t *pmd,
> >       flush_tlb_kernel_range(start, end);
> >  }
> >
> > +static inline bool pmd_split(pmd_t *pmd)
> > +{
> > +     return PageSlab(pmd_page(*pmd));
> > +}
> > +
> > +static inline void set_pmd_split(pmd_t *pmd)
> > +{
> > +     /*
> > +      * We should not use slab for page table allocation. So we can set
> > +      * PG_slab to indicate that the pmd has been split.
> > +      */
> > +     __SetPageSlab(pmd_page(*pmd));
> > +}
> > +
> > +static inline void clear_pmd_split(pmd_t *pmd)
> > +{
> > +     __ClearPageSlab(pmd_page(*pmd));
> > +}
> > +
> >  static void __remap_huge_page_pte_vmemmap(struct page *reuse, pte_t *ptep,
> >                                         unsigned long start,
> >                                         unsigned long end,
> > @@ -357,11 +376,12 @@ void alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
> >       ptl = vmemmap_pmd_lock(pmd);
> >       remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &remap_pages,
> >                                   __remap_huge_page_pte_vmemmap);
> > -     if (!freed_vmemmap_hpage_dec(pmd_page(*pmd))) {
> > +     if (!freed_vmemmap_hpage_dec(pmd_page(*pmd)) && pmd_split(pmd)) {
> >               /*
> >                * Todo:
> >                * Merge pte to huge pmd if it has ever been split.
> >                */
> > +             clear_pmd_split(pmd);
> >       }
> >       spin_unlock(ptl);
> >  }
> > @@ -443,8 +463,10 @@ void free_huge_page_vmemmap(struct hstate *h, struct page *head)
> >       BUG_ON(!pmd);
> >
> >       ptl = vmemmap_pmd_lock(pmd);
> > -     if (vmemmap_pmd_huge(pmd))
> > +     if (vmemmap_pmd_huge(pmd)) {
> >               split_vmemmap_huge_page(head, pmd);
> > +             set_pmd_split(pmd);
> > +     }
> >
> >       remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &free_pages,
> >                                   __free_huge_page_pte_vmemmap);
> > --
> > 2.11.0
> >
>
> --
> Michal Hocko
> SUSE Labs
Michal Hocko Nov. 23, 2020, 7:48 a.m. UTC | #3
On Fri 20-11-20 17:30:27, Muchun Song wrote:
> On Fri, Nov 20, 2020 at 4:16 PM Michal Hocko <mhocko@suse.com> wrote:
> >
> > On Fri 20-11-20 14:43:17, Muchun Song wrote:
> > > When we allocate hugetlb page from buddy, we may need split huge pmd
> > > to pte. When we free the hugetlb page, we can merge pte to pmd. So
> > > we need to distinguish whether the previous pmd has been split. The
> > > page table is not allocated from slab. So we can reuse the PG_slab
> > > to indicate that the pmd has been split.
> >
> > PageSlab is used outside of the slab allocator proper and that code
> > might get confused by this AFAICS.
> 
> I got your concerns. Maybe we can use PG_private instead of the
> PG_slab.

Reusing a page flag arbitrarily is not that easy. Hugetlb pages have a
lot of spare room in struct page so I would rather use something else.
Muchun Song Nov. 23, 2020, 8:01 a.m. UTC | #4
On Mon, Nov 23, 2020 at 3:48 PM Michal Hocko <mhocko@suse.com> wrote:
>
> On Fri 20-11-20 17:30:27, Muchun Song wrote:
> > On Fri, Nov 20, 2020 at 4:16 PM Michal Hocko <mhocko@suse.com> wrote:
> > >
> > > On Fri 20-11-20 14:43:17, Muchun Song wrote:
> > > > When we allocate hugetlb page from buddy, we may need split huge pmd
> > > > to pte. When we free the hugetlb page, we can merge pte to pmd. So
> > > > we need to distinguish whether the previous pmd has been split. The
> > > > page table is not allocated from slab. So we can reuse the PG_slab
> > > > to indicate that the pmd has been split.
> > >
> > > PageSlab is used outside of the slab allocator proper and that code
> > > might get confused by this AFAICS.
> >
> > I got your concerns. Maybe we can use PG_private instead of the
> > PG_slab.
>
> Reusing a page flag arbitrarily is not that easy. Hugetlb pages have a
> lot of spare room in struct page so I would rather use something else.

This page is the PMD page table of vmemmap, not the vmemmap page
of HugeTLB. And the page table does not use PG_private. Maybe it is
enough. Thanks.

> --
> Michal Hocko
> SUSE Labs
diff mbox series

Patch

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 06e2b8a7b7c8..e2ddc73ce25f 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -293,6 +293,25 @@  static void remap_huge_page_pmd_vmemmap(struct hstate *h, pmd_t *pmd,
 	flush_tlb_kernel_range(start, end);
 }
 
+static inline bool pmd_split(pmd_t *pmd)
+{
+	return PageSlab(pmd_page(*pmd));
+}
+
+static inline void set_pmd_split(pmd_t *pmd)
+{
+	/*
+	 * We should not use slab for page table allocation. So we can set
+	 * PG_slab to indicate that the pmd has been split.
+	 */
+	__SetPageSlab(pmd_page(*pmd));
+}
+
+static inline void clear_pmd_split(pmd_t *pmd)
+{
+	__ClearPageSlab(pmd_page(*pmd));
+}
+
 static void __remap_huge_page_pte_vmemmap(struct page *reuse, pte_t *ptep,
 					  unsigned long start,
 					  unsigned long end,
@@ -357,11 +376,12 @@  void alloc_huge_page_vmemmap(struct hstate *h, struct page *head)
 	ptl = vmemmap_pmd_lock(pmd);
 	remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &remap_pages,
 				    __remap_huge_page_pte_vmemmap);
-	if (!freed_vmemmap_hpage_dec(pmd_page(*pmd))) {
+	if (!freed_vmemmap_hpage_dec(pmd_page(*pmd)) && pmd_split(pmd)) {
 		/*
 		 * Todo:
 		 * Merge pte to huge pmd if it has ever been split.
 		 */
+		clear_pmd_split(pmd);
 	}
 	spin_unlock(ptl);
 }
@@ -443,8 +463,10 @@  void free_huge_page_vmemmap(struct hstate *h, struct page *head)
 	BUG_ON(!pmd);
 
 	ptl = vmemmap_pmd_lock(pmd);
-	if (vmemmap_pmd_huge(pmd))
+	if (vmemmap_pmd_huge(pmd)) {
 		split_vmemmap_huge_page(head, pmd);
+		set_pmd_split(pmd);
+	}
 
 	remap_huge_page_pmd_vmemmap(h, pmd, (unsigned long)head, &free_pages,
 				    __free_huge_page_pte_vmemmap);