Message ID | f7b2a6f6f5dfecbcac07fa3e187f10860c3a39ee.1655887440.git.baolin.wang@linux.alibaba.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add PUD and kernel PTE level pagetable account | expand |
On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote: > Now the PUD level ptes are always protected by mm->page_table_lock, > which means no split pagetable lock needed. So the generic PUD level > pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), > that means we will miss to account PUD level pagetable pages. > > Adding pagetable account by calling pgtable_set_and_inc() or > pgtable_clear_and_dec() when allocating or freeing PUD level pagetable > pages to help to get an accurate pagetable accounting. > > Moreover this patch will also mark the PUD level pagetable with PG_table > flag, which will help to do sanity validation in unpoison_memory() and > get more accurate pagetable accounting by /proc/kpageflags interface. > > Meanwhile converting the architectures with using generic PUD pagatable > allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec() > to account PUD level pagetable. > > Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> > --- > arch/arm64/include/asm/tlb.h | 5 ++++- > arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- > arch/mips/include/asm/pgalloc.h | 11 ++++++++--- > arch/s390/include/asm/tlb.h | 1 + > arch/x86/mm/pgtable.c | 5 ++++- > include/asm-generic/pgalloc.h | 12 ++++++++++-- > 6 files changed, 35 insertions(+), 10 deletions(-) > > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index c995d1f..47e0623 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, > unsigned long addr) > { > - tlb_remove_table(tlb, virt_to_page(pudp)); > + struct page *page = virt_to_page(pudp); > + > + pgtable_clear_and_dec(page); > + tlb_remove_table(tlb, page); > } > #endif > > diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h > index b0a57b2..50a896f 100644 > --- a/arch/loongarch/include/asm/pgalloc.h > +++ b/arch/loongarch/include/asm/pgalloc.h > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > { > pud_t *pud; > + struct page *pg; struct page *page; looks better IMO. > + > + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); > + if (!pg) > + return NULL; > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > - if (pud) > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > + pgtable_set_and_inc(pg); > + pud = (pud_t *)page_address(pg); I don't think __get_free_pages() should be replaced with alloc_pages() here, just call pgtable_set_and_inc() with virt_to_page(pud). The same applies for the cases below. > + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > return pud; > } > > diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h > index 867e9c3..0950f5f 100644 > --- a/arch/mips/include/asm/pgalloc.h > +++ b/arch/mips/include/asm/pgalloc.h > @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > { > + struct page *pg; > pud_t *pud; > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > - if (pud) > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); > + if (!pg) > + return NULL; > + > + pgtable_set_and_inc(pg); > + pud = (pud_t *)page_address(pg); > + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > return pud; > } > > diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h > index fe6407f..45f9541 100644 > --- a/arch/s390/include/asm/tlb.h > +++ b/arch/s390/include/asm/tlb.h > @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, > { > if (mm_pud_folded(tlb->mm)) > return; > + pgtable_clear_and_dec(virt_to_page(pud)); > tlb->mm->context.flush_mm = 1; > tlb->freed_tables = 1; > tlb->cleared_p4ds = 1; > diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c > index a932d77..a8ab3f9 100644 > --- a/arch/x86/mm/pgtable.c > +++ b/arch/x86/mm/pgtable.c > @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) > #if CONFIG_PGTABLE_LEVELS > 3 > void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) > { > + struct page *page = virt_to_page(pud); > + > + pgtable_clear_and_dec(page); > paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); > - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); > + paravirt_tlb_remove_table(tlb, page); > } > > #if CONFIG_PGTABLE_LEVELS > 4 > diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h > index 977bea1..328a714 100644 > --- a/include/asm-generic/pgalloc.h > +++ b/include/asm-generic/pgalloc.h > @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) > > static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) > { > + struct page *page; > gfp_t gfp = GFP_PGTABLE_USER; > > if (mm == &init_mm) > gfp = GFP_PGTABLE_KERNEL; > - return (pud_t *)get_zeroed_page(gfp); > + page = alloc_pages((gfp | __GFP_ZERO) & ~__GFP_HIGHMEM, 0); > + if (!page) > + return NULL; > + pgtable_set_and_inc(page); > + return (pud_t *)page_address(page); > } > > #ifndef __HAVE_ARCH_PUD_ALLOC_ONE > @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > > static inline void __pud_free(struct mm_struct *mm, pud_t *pud) > { > + struct page *page = virt_to_page(pud); > + > BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); > - free_page((unsigned long)pud); > + pgtable_clear_and_dec(page); > + __free_page(page); > } > > #ifndef __HAVE_ARCH_PUD_FREE > -- > 1.8.3.1 >
On 6/22/2022 10:38 PM, Mike Rapoport wrote: > On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote: >> Now the PUD level ptes are always protected by mm->page_table_lock, >> which means no split pagetable lock needed. So the generic PUD level >> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), >> that means we will miss to account PUD level pagetable pages. >> >> Adding pagetable account by calling pgtable_set_and_inc() or >> pgtable_clear_and_dec() when allocating or freeing PUD level pagetable >> pages to help to get an accurate pagetable accounting. >> >> Moreover this patch will also mark the PUD level pagetable with PG_table >> flag, which will help to do sanity validation in unpoison_memory() and >> get more accurate pagetable accounting by /proc/kpageflags interface. >> >> Meanwhile converting the architectures with using generic PUD pagatable >> allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec() >> to account PUD level pagetable. >> >> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> >> --- >> arch/arm64/include/asm/tlb.h | 5 ++++- >> arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- >> arch/mips/include/asm/pgalloc.h | 11 ++++++++--- >> arch/s390/include/asm/tlb.h | 1 + >> arch/x86/mm/pgtable.c | 5 ++++- >> include/asm-generic/pgalloc.h | 12 ++++++++++-- >> 6 files changed, 35 insertions(+), 10 deletions(-) >> >> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h >> index c995d1f..47e0623 100644 >> --- a/arch/arm64/include/asm/tlb.h >> +++ b/arch/arm64/include/asm/tlb.h >> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, >> static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, >> unsigned long addr) >> { >> - tlb_remove_table(tlb, virt_to_page(pudp)); >> + struct page *page = virt_to_page(pudp); >> + >> + pgtable_clear_and_dec(page); >> + tlb_remove_table(tlb, page); >> } >> #endif >> >> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h >> index b0a57b2..50a896f 100644 >> --- a/arch/loongarch/include/asm/pgalloc.h >> +++ b/arch/loongarch/include/asm/pgalloc.h >> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) >> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) >> { >> pud_t *pud; >> + struct page *pg; > > struct page *page; > > looks better IMO. Sure. > >> + >> + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); >> + if (!pg) >> + return NULL; >> >> - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); >> - if (pud) >> - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >> + pgtable_set_and_inc(pg); >> + pud = (pud_t *)page_address(pg); > > I don't think __get_free_pages() should be replaced with alloc_pages() > here, just call pgtable_set_and_inc() with virt_to_page(pud). > > The same applies for the cases below. Sure. Will do in next version. Thanks.
On Wed, Jun 22, 2022 at 09:38:30AM -0500, Mike Rapoport wrote: > On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote: > > +++ b/arch/loongarch/include/asm/pgalloc.h > > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) > > static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) > > { > > pud_t *pud; > > + struct page *pg; > > struct page *page; > > looks better IMO. > > > + > > + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); GFP_KERNEL does not include __GFP_HIGHMEM, so you can just use GFP_KERNEL here. > > + if (!pg) > > + return NULL; > > > > - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); > > - if (pud) > > - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); > > + pgtable_set_and_inc(pg); > > + pud = (pud_t *)page_address(pg); > > I don't think __get_free_pages() should be replaced with alloc_pages() > here, just call pgtable_set_and_inc() with virt_to_page(pud). I don't understand why you want that. Take a look at the implementation of __get_free_pages(). Converting back to a struct page after calling that seems like a real waste of time to me.
On 6/23/2022 9:28 PM, Matthew Wilcox wrote: > On Wed, Jun 22, 2022 at 09:38:30AM -0500, Mike Rapoport wrote: >> On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote: >>> +++ b/arch/loongarch/include/asm/pgalloc.h >>> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) >>> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) >>> { >>> pud_t *pud; >>> + struct page *pg; >> >> struct page *page; >> >> looks better IMO. >> >>> + >>> + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); > > GFP_KERNEL does not include __GFP_HIGHMEM, so you can just use > GFP_KERNEL here. Yes. Thanks. > >>> + if (!pg) >>> + return NULL; >>> >>> - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); >>> - if (pud) >>> - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); >>> + pgtable_set_and_inc(pg); >>> + pud = (pud_t *)page_address(pg); >> >> I don't think __get_free_pages() should be replaced with alloc_pages() >> here, just call pgtable_set_and_inc() with virt_to_page(pud). > > I don't understand why you want that. Take a look at the implementation > of __get_free_pages(). Converting back to a struct page after calling > that seems like a real waste of time to me. IMO I have no strong preference. The code can be simpler with using __get_free_pages(), however like Matthew said it will add more conversion.
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index c995d1f..47e0623 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { - tlb_remove_table(tlb, virt_to_page(pudp)); + struct page *page = virt_to_page(pudp); + + pgtable_clear_and_dec(page); + tlb_remove_table(tlb, page); } #endif diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index b0a57b2..50a896f 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; + struct page *pg; + + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); + if (!pg) + return NULL; - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); - if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + pgtable_set_and_inc(pg); + pud = (pud_t *)page_address(pg); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); return pud; } diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index 867e9c3..0950f5f 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { + struct page *pg; pud_t *pud; - pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER); - if (pud) - pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); + pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER); + if (!pg) + return NULL; + + pgtable_set_and_inc(pg); + pud = (pud_t *)page_address(pg); + pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table); return pud; } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index fe6407f..45f9541 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, { if (mm_pud_folded(tlb->mm)) return; + pgtable_clear_and_dec(virt_to_page(pud)); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index a932d77..a8ab3f9 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { + struct page *page = virt_to_page(pud); + + pgtable_clear_and_dec(page); paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); - paravirt_tlb_remove_table(tlb, virt_to_page(pud)); + paravirt_tlb_remove_table(tlb, page); } #if CONFIG_PGTABLE_LEVELS > 4 diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 977bea1..328a714 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) { + struct page *page; gfp_t gfp = GFP_PGTABLE_USER; if (mm == &init_mm) gfp = GFP_PGTABLE_KERNEL; - return (pud_t *)get_zeroed_page(gfp); + page = alloc_pages((gfp | __GFP_ZERO) & ~__GFP_HIGHMEM, 0); + if (!page) + return NULL; + pgtable_set_and_inc(page); + return (pud_t *)page_address(page); } #ifndef __HAVE_ARCH_PUD_ALLOC_ONE @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { + struct page *page = virt_to_page(pud); + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - free_page((unsigned long)pud); + pgtable_clear_and_dec(page); + __free_page(page); } #ifndef __HAVE_ARCH_PUD_FREE
Now the PUD level ptes are always protected by mm->page_table_lock, which means no split pagetable lock needed. So the generic PUD level pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(), that means we will miss to account PUD level pagetable pages. Adding pagetable account by calling pgtable_set_and_inc() or pgtable_clear_and_dec() when allocating or freeing PUD level pagetable pages to help to get an accurate pagetable accounting. Moreover this patch will also mark the PUD level pagetable with PG_table flag, which will help to do sanity validation in unpoison_memory() and get more accurate pagetable accounting by /proc/kpageflags interface. Meanwhile converting the architectures with using generic PUD pagatable allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec() to account PUD level pagetable. Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com> --- arch/arm64/include/asm/tlb.h | 5 ++++- arch/loongarch/include/asm/pgalloc.h | 11 ++++++++--- arch/mips/include/asm/pgalloc.h | 11 ++++++++--- arch/s390/include/asm/tlb.h | 1 + arch/x86/mm/pgtable.c | 5 ++++- include/asm-generic/pgalloc.h | 12 ++++++++++-- 6 files changed, 35 insertions(+), 10 deletions(-)