diff mbox series

[RFC,v2,2/3] mm: Add PUD level pagetable account

Message ID f7b2a6f6f5dfecbcac07fa3e187f10860c3a39ee.1655887440.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Add PUD and kernel PTE level pagetable account | expand

Commit Message

Baolin Wang June 22, 2022, 8:58 a.m. UTC
Now the PUD level ptes are always protected by mm->page_table_lock,
which means no split pagetable lock needed. So the generic PUD level
pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
that means we will miss to account PUD level pagetable pages.

Adding pagetable account by calling pgtable_set_and_inc() or
pgtable_clear_and_dec() when allocating or freeing PUD level pagetable
pages to help to get an accurate pagetable accounting.

Moreover this patch will also mark the PUD level pagetable with PG_table
flag, which will help to do sanity validation in unpoison_memory() and
get more accurate pagetable accounting by /proc/kpageflags interface.

Meanwhile converting the architectures with using generic PUD pagatable
allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec()
to account PUD level pagetable.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 arch/arm64/include/asm/tlb.h         |  5 ++++-
 arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
 arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
 arch/s390/include/asm/tlb.h          |  1 +
 arch/x86/mm/pgtable.c                |  5 ++++-
 include/asm-generic/pgalloc.h        | 12 ++++++++++--
 6 files changed, 35 insertions(+), 10 deletions(-)

Comments

Mike Rapoport June 22, 2022, 2:38 p.m. UTC | #1
On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote:
> Now the PUD level ptes are always protected by mm->page_table_lock,
> which means no split pagetable lock needed. So the generic PUD level
> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
> that means we will miss to account PUD level pagetable pages.
> 
> Adding pagetable account by calling pgtable_set_and_inc() or
> pgtable_clear_and_dec() when allocating or freeing PUD level pagetable
> pages to help to get an accurate pagetable accounting.
> 
> Moreover this patch will also mark the PUD level pagetable with PG_table
> flag, which will help to do sanity validation in unpoison_memory() and
> get more accurate pagetable accounting by /proc/kpageflags interface.
> 
> Meanwhile converting the architectures with using generic PUD pagatable
> allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec()
> to account PUD level pagetable.
> 
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  arch/arm64/include/asm/tlb.h         |  5 ++++-
>  arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
>  arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
>  arch/s390/include/asm/tlb.h          |  1 +
>  arch/x86/mm/pgtable.c                |  5 ++++-
>  include/asm-generic/pgalloc.h        | 12 ++++++++++--
>  6 files changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> index c995d1f..47e0623 100644
> --- a/arch/arm64/include/asm/tlb.h
> +++ b/arch/arm64/include/asm/tlb.h
> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
>  static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
>  				  unsigned long addr)
>  {
> -	tlb_remove_table(tlb, virt_to_page(pudp));
> +	struct page *page = virt_to_page(pudp);
> +
> +	pgtable_clear_and_dec(page);
> +	tlb_remove_table(tlb, page);
>  }
>  #endif
>  
> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
> index b0a57b2..50a896f 100644
> --- a/arch/loongarch/include/asm/pgalloc.h
> +++ b/arch/loongarch/include/asm/pgalloc.h
> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>  {
>  	pud_t *pud;
> +	struct page *pg;

	struct page *page;

looks better IMO.

> +
> +	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
> +	if (!pg)
> +		return NULL;
>  
> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> -	if (pud)
> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> +	pgtable_set_and_inc(pg);
> +	pud = (pud_t *)page_address(pg);

I don't think __get_free_pages() should be replaced with alloc_pages()
here, just call pgtable_set_and_inc() with virt_to_page(pud). 

The same applies for the cases below.

> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>  	return pud;
>  }
>  
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 867e9c3..0950f5f 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>  
>  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>  {
> +	struct page *pg;
>  	pud_t *pud;
>  
> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> -	if (pud)
> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> +	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
> +	if (!pg)
> +		return NULL;
> +
> +	pgtable_set_and_inc(pg);
> +	pud = (pud_t *)page_address(pg);
> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>  	return pud;
>  }
>  
> diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
> index fe6407f..45f9541 100644
> --- a/arch/s390/include/asm/tlb.h
> +++ b/arch/s390/include/asm/tlb.h
> @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
>  {
>  	if (mm_pud_folded(tlb->mm))
>  		return;
> +	pgtable_clear_and_dec(virt_to_page(pud));
>  	tlb->mm->context.flush_mm = 1;
>  	tlb->freed_tables = 1;
>  	tlb->cleared_p4ds = 1;
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index a932d77..a8ab3f9 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
>  #if CONFIG_PGTABLE_LEVELS > 3
>  void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
>  {
> +	struct page *page = virt_to_page(pud);
> +
> +	pgtable_clear_and_dec(page);
>  	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
> -	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
> +	paravirt_tlb_remove_table(tlb, page);
>  }
>  
>  #if CONFIG_PGTABLE_LEVELS > 4
> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
> index 977bea1..328a714 100644
> --- a/include/asm-generic/pgalloc.h
> +++ b/include/asm-generic/pgalloc.h
> @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>  
>  static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>  {
> +	struct page *page;
>  	gfp_t gfp = GFP_PGTABLE_USER;
>  
>  	if (mm == &init_mm)
>  		gfp = GFP_PGTABLE_KERNEL;
> -	return (pud_t *)get_zeroed_page(gfp);
> +	page = alloc_pages((gfp | __GFP_ZERO) & ~__GFP_HIGHMEM, 0);
> +	if (!page)
> +		return NULL;
> +	pgtable_set_and_inc(page);
> +	return (pud_t *)page_address(page);
>  }
>  
>  #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
> @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>  
>  static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
>  {
> +	struct page *page = virt_to_page(pud);
> +
>  	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
> -	free_page((unsigned long)pud);
> +	pgtable_clear_and_dec(page);
> +	__free_page(page);
>  }
>  
>  #ifndef __HAVE_ARCH_PUD_FREE
> -- 
> 1.8.3.1
>
Baolin Wang June 23, 2022, 3:32 a.m. UTC | #2
On 6/22/2022 10:38 PM, Mike Rapoport wrote:
> On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote:
>> Now the PUD level ptes are always protected by mm->page_table_lock,
>> which means no split pagetable lock needed. So the generic PUD level
>> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
>> that means we will miss to account PUD level pagetable pages.
>>
>> Adding pagetable account by calling pgtable_set_and_inc() or
>> pgtable_clear_and_dec() when allocating or freeing PUD level pagetable
>> pages to help to get an accurate pagetable accounting.
>>
>> Moreover this patch will also mark the PUD level pagetable with PG_table
>> flag, which will help to do sanity validation in unpoison_memory() and
>> get more accurate pagetable accounting by /proc/kpageflags interface.
>>
>> Meanwhile converting the architectures with using generic PUD pagatable
>> allocation to add corresponding pgtable_set_and_inc() or pgtable_clear_and_dec()
>> to account PUD level pagetable.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   arch/arm64/include/asm/tlb.h         |  5 ++++-
>>   arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
>>   arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
>>   arch/s390/include/asm/tlb.h          |  1 +
>>   arch/x86/mm/pgtable.c                |  5 ++++-
>>   include/asm-generic/pgalloc.h        | 12 ++++++++++--
>>   6 files changed, 35 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
>> index c995d1f..47e0623 100644
>> --- a/arch/arm64/include/asm/tlb.h
>> +++ b/arch/arm64/include/asm/tlb.h
>> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
>>   static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
>>   				  unsigned long addr)
>>   {
>> -	tlb_remove_table(tlb, virt_to_page(pudp));
>> +	struct page *page = virt_to_page(pudp);
>> +
>> +	pgtable_clear_and_dec(page);
>> +	tlb_remove_table(tlb, page);
>>   }
>>   #endif
>>   
>> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
>> index b0a57b2..50a896f 100644
>> --- a/arch/loongarch/include/asm/pgalloc.h
>> +++ b/arch/loongarch/include/asm/pgalloc.h
>> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>>   {
>>   	pud_t *pud;
>> +	struct page *pg;
> 
> 	struct page *page;
> 
> looks better IMO.

Sure.

> 
>> +
>> +	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
>> +	if (!pg)
>> +		return NULL;
>>   
>> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
>> -	if (pud)
>> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>> +	pgtable_set_and_inc(pg);
>> +	pud = (pud_t *)page_address(pg);
> 
> I don't think __get_free_pages() should be replaced with alloc_pages()
> here, just call pgtable_set_and_inc() with virt_to_page(pud).
> 
> The same applies for the cases below.

Sure. Will do in next version. Thanks.
Matthew Wilcox June 23, 2022, 1:28 p.m. UTC | #3
On Wed, Jun 22, 2022 at 09:38:30AM -0500, Mike Rapoport wrote:
> On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote:
> > +++ b/arch/loongarch/include/asm/pgalloc.h
> > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
> >  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
> >  {
> >  	pud_t *pud;
> > +	struct page *pg;
> 
> 	struct page *page;
> 
> looks better IMO.
> 
> > +
> > +	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);

GFP_KERNEL does not include __GFP_HIGHMEM, so you can just use
GFP_KERNEL here.

> > +	if (!pg)
> > +		return NULL;
> >  
> > -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> > -	if (pud)
> > -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> > +	pgtable_set_and_inc(pg);
> > +	pud = (pud_t *)page_address(pg);
> 
> I don't think __get_free_pages() should be replaced with alloc_pages()
> here, just call pgtable_set_and_inc() with virt_to_page(pud). 

I don't understand why you want that.  Take a look at the implementation
of __get_free_pages().  Converting back to a struct page after calling
that seems like a real waste of time to me.
Baolin Wang June 24, 2022, 8:52 a.m. UTC | #4
On 6/23/2022 9:28 PM, Matthew Wilcox wrote:
> On Wed, Jun 22, 2022 at 09:38:30AM -0500, Mike Rapoport wrote:
>> On Wed, Jun 22, 2022 at 04:58:53PM +0800, Baolin Wang wrote:
>>> +++ b/arch/loongarch/include/asm/pgalloc.h
>>> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>>>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>>>   {
>>>   	pud_t *pud;
>>> +	struct page *pg;
>>
>> 	struct page *page;
>>
>> looks better IMO.
>>
>>> +
>>> +	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
> 
> GFP_KERNEL does not include __GFP_HIGHMEM, so you can just use
> GFP_KERNEL here.

Yes. Thanks.

> 
>>> +	if (!pg)
>>> +		return NULL;
>>>   
>>> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
>>> -	if (pud)
>>> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>>> +	pgtable_set_and_inc(pg);
>>> +	pud = (pud_t *)page_address(pg);
>>
>> I don't think __get_free_pages() should be replaced with alloc_pages()
>> here, just call pgtable_set_and_inc() with virt_to_page(pud).
> 
> I don't understand why you want that.  Take a look at the implementation
> of __get_free_pages().  Converting back to a struct page after calling
> that seems like a real waste of time to me.

IMO I have no strong preference. The code can be simpler with using 
__get_free_pages(), however like Matthew said it will add more conversion.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f..47e0623 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -94,7 +94,10 @@  static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 				  unsigned long addr)
 {
-	tlb_remove_table(tlb, virt_to_page(pudp));
+	struct page *page = virt_to_page(pudp);
+
+	pgtable_clear_and_dec(page);
+	tlb_remove_table(tlb, page);
 }
 #endif
 
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index b0a57b2..50a896f 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -89,10 +89,15 @@  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pud_t *pud;
+	struct page *pg;
+
+	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
+	if (!pg)
+		return NULL;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
-	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	pgtable_set_and_inc(pg);
+	pud = (pud_t *)page_address(pg);
+	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;
 }
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 867e9c3..0950f5f 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -89,11 +89,16 @@  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
+	struct page *pg;
 	pud_t *pud;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
-	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	pg = alloc_pages(GFP_KERNEL & ~__GFP_HIGHMEM, PUD_ORDER);
+	if (!pg)
+		return NULL;
+
+	pgtable_set_and_inc(pg);
+	pud = (pud_t *)page_address(pg);
+	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f..45f9541 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -125,6 +125,7 @@  static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 {
 	if (mm_pud_folded(tlb->mm))
 		return;
+	pgtable_clear_and_dec(virt_to_page(pud));
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_p4ds = 1;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a932d77..a8ab3f9 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -76,8 +76,11 @@  void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 #if CONFIG_PGTABLE_LEVELS > 3
 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
+	struct page *page = virt_to_page(pud);
+
+	pgtable_clear_and_dec(page);
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
+	paravirt_tlb_remove_table(tlb, page);
 }
 
 #if CONFIG_PGTABLE_LEVELS > 4
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 977bea1..328a714 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -149,11 +149,16 @@  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
+	struct page *page;
 	gfp_t gfp = GFP_PGTABLE_USER;
 
 	if (mm == &init_mm)
 		gfp = GFP_PGTABLE_KERNEL;
-	return (pud_t *)get_zeroed_page(gfp);
+	page = alloc_pages((gfp | __GFP_ZERO) & ~__GFP_HIGHMEM, 0);
+	if (!page)
+		return NULL;
+	pgtable_set_and_inc(page);
+	return (pud_t *)page_address(page);
 }
 
 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
@@ -174,8 +179,11 @@  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 
 static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
 {
+	struct page *page = virt_to_page(pud);
+
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
+	pgtable_clear_and_dec(page);
+	__free_page(page);
 }
 
 #ifndef __HAVE_ARCH_PUD_FREE