diff mbox series

[RFC,v3,2/3] mm: Add PUD level pagetable account

Message ID 6a6a768634b9ce8537154264e35e6a66a79b6ca8.1656586863.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series Add PUD and kernel PTE level pagetable account | expand

Commit Message

Baolin Wang June 30, 2022, 11:11 a.m. UTC
Now the PUD level ptes are always protected by mm->page_table_lock,
which means no split pagetable lock needed. So the generic PUD level
pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
that means we will miss to account PUD level pagetable pages.

Adding pagetable account by calling pgtable_page_inc() or
pgtable_page_dec() when allocating or freeing PUD level pagetable
pages to help to get an accurate pagetable accounting.

Moreover this patch will also mark the PUD level pagetable with PG_table
flag, which will help to do sanity validation in unpoison_memory() and
get more accurate pagetable accounting by /proc/kpageflags interface.

Meanwhile converting the architectures with using generic PUD pagatable
allocation to add corresponding pgtable_page_inc() or pgtable_page_dec()
to account PUD level pagetable.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 arch/arm64/include/asm/tlb.h         |  5 ++++-
 arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
 arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
 arch/s390/include/asm/tlb.h          |  1 +
 arch/x86/mm/pgtable.c                |  5 ++++-
 include/asm-generic/pgalloc.h        | 12 ++++++++++--
 6 files changed, 35 insertions(+), 10 deletions(-)

Comments

Mike Rapoport June 30, 2022, 2:17 p.m. UTC | #1
On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote:
> Now the PUD level ptes are always protected by mm->page_table_lock,
> which means no split pagetable lock needed. So the generic PUD level
> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
> that means we will miss to account PUD level pagetable pages.
> 
> Adding pagetable account by calling pgtable_page_inc() or
> pgtable_page_dec() when allocating or freeing PUD level pagetable
> pages to help to get an accurate pagetable accounting.
> 
> Moreover this patch will also mark the PUD level pagetable with PG_table
> flag, which will help to do sanity validation in unpoison_memory() and
> get more accurate pagetable accounting by /proc/kpageflags interface.
> 
> Meanwhile converting the architectures with using generic PUD pagatable
> allocation to add corresponding pgtable_page_inc() or pgtable_page_dec()
> to account PUD level pagetable.
> 
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  arch/arm64/include/asm/tlb.h         |  5 ++++-
>  arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
>  arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
>  arch/s390/include/asm/tlb.h          |  1 +
>  arch/x86/mm/pgtable.c                |  5 ++++-
>  include/asm-generic/pgalloc.h        | 12 ++++++++++--
>  6 files changed, 35 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> index c995d1f..1772df9 100644
> --- a/arch/arm64/include/asm/tlb.h
> +++ b/arch/arm64/include/asm/tlb.h
> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
>  static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
>  				  unsigned long addr)
>  {
> -	tlb_remove_table(tlb, virt_to_page(pudp));
> +	struct page *page = virt_to_page(pudp);
> +
> +	pgtable_page_dec(page);

Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
consistent with what we currently have for PTEs and PMDs.

This applies to all the additions of pgtable_page_dec() and
pgtable_page_inc().

> +	tlb_remove_table(tlb, page);
>  }
>  #endif
>  
> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
> index b0a57b2..19bfe14 100644
> --- a/arch/loongarch/include/asm/pgalloc.h
> +++ b/arch/loongarch/include/asm/pgalloc.h
> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>  {
>  	pud_t *pud;
> +	struct page *page;
>  
> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> -	if (pud)
> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
> +	if (!page)
> +		return NULL;
> +
> +	pgtable_page_inc(page);
> +	pud = (pud_t *)page_address(page);
> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>  	return pud;
>  }
>  
> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
> index 867e9c3..990f614 100644
> --- a/arch/mips/include/asm/pgalloc.h
> +++ b/arch/mips/include/asm/pgalloc.h
> @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>  
>  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>  {
> +	struct page *page;
>  	pud_t *pud;
>  
> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> -	if (pud)
> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
> +	if (!page)
> +		return NULL;
> +
> +	pgtable_page_inc(page);
> +	pud = (pud_t *)page_address(page);
> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>  	return pud;
>  }
>  
> diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
> index fe6407f..744e2d7 100644
> --- a/arch/s390/include/asm/tlb.h
> +++ b/arch/s390/include/asm/tlb.h
> @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
>  {
>  	if (mm_pud_folded(tlb->mm))
>  		return;
> +	pgtable_page_dec(virt_to_page(pud));
>  	tlb->mm->context.flush_mm = 1;
>  	tlb->freed_tables = 1;
>  	tlb->cleared_p4ds = 1;
> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
> index a932d77..5e46e31 100644
> --- a/arch/x86/mm/pgtable.c
> +++ b/arch/x86/mm/pgtable.c
> @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
>  #if CONFIG_PGTABLE_LEVELS > 3
>  void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
>  {
> +	struct page *page = virt_to_page(pud);
> +
> +	pgtable_page_dec(page);
>  	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
> -	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
> +	paravirt_tlb_remove_table(tlb, page);
>  }
>  
>  #if CONFIG_PGTABLE_LEVELS > 4
> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
> index 977bea1..11350f7 100644
> --- a/include/asm-generic/pgalloc.h
> +++ b/include/asm-generic/pgalloc.h
> @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>  
>  static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>  {
> +	struct page *page;
>  	gfp_t gfp = GFP_PGTABLE_USER;
>  
>  	if (mm == &init_mm)
>  		gfp = GFP_PGTABLE_KERNEL;
> -	return (pud_t *)get_zeroed_page(gfp);
> +	page = alloc_pages(gfp, 0);
> +	if (!page)
> +		return NULL;
> +	pgtable_page_inc(page);
> +	return (pud_t *)page_address(page);
>  }
>  
>  #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
> @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>  
>  static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
>  {
> +	struct page *page = virt_to_page(pud);
> +
>  	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
> -	free_page((unsigned long)pud);
> +	pgtable_page_dec(page);
> +	__free_page(page);
>  }
>  
>  #ifndef __HAVE_ARCH_PUD_FREE
> -- 
> 1.8.3.1
>
Baolin Wang July 1, 2022, 8:04 a.m. UTC | #2
On 6/30/2022 10:17 PM, Mike Rapoport wrote:
> On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote:
>> Now the PUD level ptes are always protected by mm->page_table_lock,
>> which means no split pagetable lock needed. So the generic PUD level
>> pagetable pages allocation will not call pgtable_pte_page_ctor/dtor(),
>> that means we will miss to account PUD level pagetable pages.
>>
>> Adding pagetable account by calling pgtable_page_inc() or
>> pgtable_page_dec() when allocating or freeing PUD level pagetable
>> pages to help to get an accurate pagetable accounting.
>>
>> Moreover this patch will also mark the PUD level pagetable with PG_table
>> flag, which will help to do sanity validation in unpoison_memory() and
>> get more accurate pagetable accounting by /proc/kpageflags interface.
>>
>> Meanwhile converting the architectures with using generic PUD pagatable
>> allocation to add corresponding pgtable_page_inc() or pgtable_page_dec()
>> to account PUD level pagetable.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   arch/arm64/include/asm/tlb.h         |  5 ++++-
>>   arch/loongarch/include/asm/pgalloc.h | 11 ++++++++---
>>   arch/mips/include/asm/pgalloc.h      | 11 ++++++++---
>>   arch/s390/include/asm/tlb.h          |  1 +
>>   arch/x86/mm/pgtable.c                |  5 ++++-
>>   include/asm-generic/pgalloc.h        | 12 ++++++++++--
>>   6 files changed, 35 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
>> index c995d1f..1772df9 100644
>> --- a/arch/arm64/include/asm/tlb.h
>> +++ b/arch/arm64/include/asm/tlb.h
>> @@ -94,7 +94,10 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
>>   static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
>>   				  unsigned long addr)
>>   {
>> -	tlb_remove_table(tlb, virt_to_page(pudp));
>> +	struct page *page = virt_to_page(pudp);
>> +
>> +	pgtable_page_dec(page);
> 
> Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
> consistent with what we currently have for PTEs and PMDs.
> 
> This applies to all the additions of pgtable_page_dec() and
> pgtable_page_inc().

OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() 
helpers to keep consistent, which are just wrappers of 
pgtable_page_inc() and pgtable_page_dec().

> 
>> +	tlb_remove_table(tlb, page);
>>   }
>>   #endif
>>   
>> diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
>> index b0a57b2..19bfe14 100644
>> --- a/arch/loongarch/include/asm/pgalloc.h
>> +++ b/arch/loongarch/include/asm/pgalloc.h
>> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>>   {
>>   	pud_t *pud;
>> +	struct page *page;
>>   
>> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
>> -	if (pud)
>> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>> +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
>> +	if (!page)
>> +		return NULL;
>> +
>> +	pgtable_page_inc(page);
>> +	pud = (pud_t *)page_address(page);
>> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>>   	return pud;
>>   }
>>   
>> diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
>> index 867e9c3..990f614 100644
>> --- a/arch/mips/include/asm/pgalloc.h
>> +++ b/arch/mips/include/asm/pgalloc.h
>> @@ -89,11 +89,16 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>>   
>>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>>   {
>> +	struct page *page;
>>   	pud_t *pud;
>>   
>> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
>> -	if (pud)
>> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>> +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
>> +	if (!page)
>> +		return NULL;
>> +
>> +	pgtable_page_inc(page);
>> +	pud = (pud_t *)page_address(page);
>> +	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
>>   	return pud;
>>   }
>>   
>> diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
>> index fe6407f..744e2d7 100644
>> --- a/arch/s390/include/asm/tlb.h
>> +++ b/arch/s390/include/asm/tlb.h
>> @@ -125,6 +125,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
>>   {
>>   	if (mm_pud_folded(tlb->mm))
>>   		return;
>> +	pgtable_page_dec(virt_to_page(pud));
>>   	tlb->mm->context.flush_mm = 1;
>>   	tlb->freed_tables = 1;
>>   	tlb->cleared_p4ds = 1;
>> diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
>> index a932d77..5e46e31 100644
>> --- a/arch/x86/mm/pgtable.c
>> +++ b/arch/x86/mm/pgtable.c
>> @@ -76,8 +76,11 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
>>   #if CONFIG_PGTABLE_LEVELS > 3
>>   void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
>>   {
>> +	struct page *page = virt_to_page(pud);
>> +
>> +	pgtable_page_dec(page);
>>   	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
>> -	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
>> +	paravirt_tlb_remove_table(tlb, page);
>>   }
>>   
>>   #if CONFIG_PGTABLE_LEVELS > 4
>> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
>> index 977bea1..11350f7 100644
>> --- a/include/asm-generic/pgalloc.h
>> +++ b/include/asm-generic/pgalloc.h
>> @@ -149,11 +149,16 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
>>   
>>   static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>>   {
>> +	struct page *page;
>>   	gfp_t gfp = GFP_PGTABLE_USER;
>>   
>>   	if (mm == &init_mm)
>>   		gfp = GFP_PGTABLE_KERNEL;
>> -	return (pud_t *)get_zeroed_page(gfp);
>> +	page = alloc_pages(gfp, 0);
>> +	if (!page)
>> +		return NULL;
>> +	pgtable_page_inc(page);
>> +	return (pud_t *)page_address(page);
>>   }
>>   
>>   #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
>> @@ -174,8 +179,11 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>>   
>>   static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
>>   {
>> +	struct page *page = virt_to_page(pud);
>> +
>>   	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
>> -	free_page((unsigned long)pud);
>> +	pgtable_page_dec(page);
>> +	__free_page(page);
>>   }
>>   
>>   #ifndef __HAVE_ARCH_PUD_FREE
>> -- 
>> 1.8.3.1
>>
>
Matthew Wilcox (Oracle) July 3, 2022, 3:40 a.m. UTC | #3
On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote:
> > Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
> > consistent with what we currently have for PTEs and PMDs.
> > 
> > This applies to all the additions of pgtable_page_dec() and
> > pgtable_page_inc().
> 
> OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to
> keep consistent, which are just wrappers of pgtable_page_inc() and
> pgtable_page_dec().

I think you misunderstand Mike.

Don't add pgtable_page_inc() and pgtable_page_dec().  Just add
pgtable_pud_page_ctor() and pgtable_pud_page_dtor().  At least, that
was what I said last time yo uposted these patches.
Matthew Wilcox (Oracle) July 3, 2022, 3:47 a.m. UTC | #4
On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote:
> +++ b/arch/loongarch/include/asm/pgalloc.h
> @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
>  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
>  {
>  	pud_t *pud;
> +	struct page *page;
>  
> -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> -	if (pud)
> -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);

Argh.  I just got rid of PMD_ORDER from PA-RISC.  Now Loongstupid has
reintroduced it, and worse introduced PUD_ORDER.  See commit
7bf82eb3873f.
Baolin Wang July 3, 2022, 2:06 p.m. UTC | #5
On 7/3/2022 11:40 AM, Matthew Wilcox wrote:
> On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote:
>>> Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
>>> consistent with what we currently have for PTEs and PMDs.
>>>
>>> This applies to all the additions of pgtable_page_dec() and
>>> pgtable_page_inc().
>>
>> OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to
>> keep consistent, which are just wrappers of pgtable_page_inc() and
>> pgtable_page_dec().
> 
> I think you misunderstand Mike.
> 
> Don't add pgtable_page_inc() and pgtable_page_dec().  Just add
> pgtable_pud_page_ctor() and pgtable_pud_page_dtor().  At least, that
> was what I said last time yo uposted these patches.

My concern is that I need another helpers for kernel page table 
allocation helpers, if only adding pgtable_pud_page_ctor() and 
pgtable_pud_page_dtor() like below:

static inline void pgtable_pud_page_ctor(struct page *page)
{
	__SetPageTable(page);
	inc_lruvec_page_state(page, NR_PAGETABLE);
}

static inline void pgtable_pud_page_dtor(struct page *page)
{
	__ClearPageTable(page);
	dec_lruvec_page_state(page, NR_PAGETABLE);
}

So for kernel pte page table allocation, I need another similar helpers 
like below. However they do the samething with 
pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is 
good for adding these duplicate code.

static inline void pgtable_kernel_pte_page_ctor(struct page *page)
{
	__SetPageTable(page);
	inc_lruvec_page_state(page, NR_PAGETABLE);
}

static inline void pgtable_kernel_pte_page_dtor(struct page *page)
{
	__ClearPageTable(page);
	dec_lruvec_page_state(page, NR_PAGETABLE);
}

Instead adding a common helpers seems more readable to me, which can 
also simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). 
Something like below.

static inline void pgtable_page_inc(struct page *page)
{
	__SetPageTable(page);
	inc_lruvec_page_state(page, NR_PAGETABLE);
}

static inline void pgtable_page_dec(struct page *page)
{
	__ClearPageTable(page);
	dec_lruvec_page_state(page, NR_PAGETABLE);
}

static inline void pgtable_pud_page_ctor(struct page *page)
{
	pgtable_page_inc(page);
}

static inline void pgtable_pud_page_dtor(struct page *page)
{
	pgtable_page_dec(page);
}

For kernel pte page table, we can just use 
pgtable_page_inc/pgtable_page_dec(), or adding 
pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just 
wrappers of pgtable_page_inc() and pgtable_page_dec().

Matthew and Mike, how do you think? Thanks.
Mike Rapoport July 3, 2022, 2:18 p.m. UTC | #6
On Sun, Jul 03, 2022 at 04:47:07AM +0100, Matthew Wilcox wrote:
> On Thu, Jun 30, 2022 at 07:11:15PM +0800, Baolin Wang wrote:
> > +++ b/arch/loongarch/include/asm/pgalloc.h
> > @@ -89,10 +89,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
> >  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
> >  {
> >  	pud_t *pud;
> > +	struct page *page;
> >  
> > -	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
> > -	if (pud)
> > -		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
> > +	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
> 
> Argh.  I just got rid of PMD_ORDER from PA-RISC.  Now Loongstupid has
> reintroduced it, and worse introduced PUD_ORDER.  See commit
> 7bf82eb3873f.

Let's try and deal with all those PxD_ORDERs for once.

https://lore.kernel.org/linux-arch/20220703141203.147893-1-rppt@kernel.org/
Mike Rapoport July 3, 2022, 2:28 p.m. UTC | #7
On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote:
> 
> 
> On 7/3/2022 11:40 AM, Matthew Wilcox wrote:
> > On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote:
> > > > Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
> > > > consistent with what we currently have for PTEs and PMDs.
> > > > 
> > > > This applies to all the additions of pgtable_page_dec() and
> > > > pgtable_page_inc().
> > > 
> > > OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to
> > > keep consistent, which are just wrappers of pgtable_page_inc() and
> > > pgtable_page_dec().
> > 
> > I think you misunderstand Mike.
> > 
> > Don't add pgtable_page_inc() and pgtable_page_dec().  Just add
> > pgtable_pud_page_ctor() and pgtable_pud_page_dtor().  At least, that
> > was what I said last time you posted these patches.
> 
> My concern is that I need another helpers for kernel page table allocation
> helpers, if only adding pgtable_pud_page_ctor() and pgtable_pud_page_dtor()
> like below:
> 
> static inline void pgtable_pud_page_ctor(struct page *page)
> {
> 	__SetPageTable(page);
> 	inc_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> static inline void pgtable_pud_page_dtor(struct page *page)
> {
> 	__ClearPageTable(page);
> 	dec_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> So for kernel pte page table allocation, I need another similar helpers like
> below. However they do the samething with
> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good
> for adding these duplicate code.
> 
> static inline void pgtable_kernel_pte_page_ctor(struct page *page)
> {
> 	__SetPageTable(page);
> 	inc_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> static inline void pgtable_kernel_pte_page_dtor(struct page *page)
> {
> 	__ClearPageTable(page);
> 	dec_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> Instead adding a common helpers seems more readable to me, which can also
> simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). Something
> like below.
> 
> static inline void pgtable_page_inc(struct page *page)
> {
> 	__SetPageTable(page);
> 	inc_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> static inline void pgtable_page_dec(struct page *page)
> {
> 	__ClearPageTable(page);
> 	dec_lruvec_page_state(page, NR_PAGETABLE);
> }
> 
> static inline void pgtable_pud_page_ctor(struct page *page)
> {
> 	pgtable_page_inc(page);
> }
> 
> static inline void pgtable_pud_page_dtor(struct page *page)
> {
> 	pgtable_page_dec(page);
> }
> 
> For kernel pte page table, we can just use
> pgtable_page_inc/pgtable_page_dec(), or adding
> pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just
> wrappers of pgtable_page_inc() and pgtable_page_dec().
> 
> Matthew and Mike, how do you think? Thanks.

I actually meant to add pgtable_pud_page_ctor/dtor() as a wrapper for the
new helper to keep pud tables allocation consistent with pmd and pte and
as a provision for the time we'll have per-page pud locks.

For the accounting of the kernel page tables a new helper does make sense
because there are no locks to initialize for the kernel page tables.

I can't say that I'm happy with the pgtable_page_inc/dec names, though.

Maybe page_{set,clear}_pgtable()?
Baolin Wang July 3, 2022, 2:49 p.m. UTC | #8
On 7/3/2022 10:28 PM, Mike Rapoport wrote:
> On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote:
>>
>>
>> On 7/3/2022 11:40 AM, Matthew Wilcox wrote:
>>> On Fri, Jul 01, 2022 at 04:04:21PM +0800, Baolin Wang wrote:
>>>>> Using pgtable_pud_page_ctor() and pgtable_pud_page_dtor() would be
>>>>> consistent with what we currently have for PTEs and PMDs.
>>>>>
>>>>> This applies to all the additions of pgtable_page_dec() and
>>>>> pgtable_page_inc().
>>>>
>>>> OK. I can add pgtable_pud_page_ctor() and pgtable_pud_page_dtor() helpers to
>>>> keep consistent, which are just wrappers of pgtable_page_inc() and
>>>> pgtable_page_dec().
>>>
>>> I think you misunderstand Mike.
>>>
>>> Don't add pgtable_page_inc() and pgtable_page_dec().  Just add
>>> pgtable_pud_page_ctor() and pgtable_pud_page_dtor().  At least, that
>>> was what I said last time you posted these patches.
>>
>> My concern is that I need another helpers for kernel page table allocation
>> helpers, if only adding pgtable_pud_page_ctor() and pgtable_pud_page_dtor()
>> like below:
>>
>> static inline void pgtable_pud_page_ctor(struct page *page)
>> {
>> 	__SetPageTable(page);
>> 	inc_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> static inline void pgtable_pud_page_dtor(struct page *page)
>> {
>> 	__ClearPageTable(page);
>> 	dec_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> So for kernel pte page table allocation, I need another similar helpers like
>> below. However they do the samething with
>> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good
>> for adding these duplicate code.
>>
>> static inline void pgtable_kernel_pte_page_ctor(struct page *page)
>> {
>> 	__SetPageTable(page);
>> 	inc_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> static inline void pgtable_kernel_pte_page_dtor(struct page *page)
>> {
>> 	__ClearPageTable(page);
>> 	dec_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> Instead adding a common helpers seems more readable to me, which can also
>> simplify original pgtable_pmd_page_dtor()/pgtable_pmd_page_ctor(). Something
>> like below.
>>
>> static inline void pgtable_page_inc(struct page *page)
>> {
>> 	__SetPageTable(page);
>> 	inc_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> static inline void pgtable_page_dec(struct page *page)
>> {
>> 	__ClearPageTable(page);
>> 	dec_lruvec_page_state(page, NR_PAGETABLE);
>> }
>>
>> static inline void pgtable_pud_page_ctor(struct page *page)
>> {
>> 	pgtable_page_inc(page);
>> }
>>
>> static inline void pgtable_pud_page_dtor(struct page *page)
>> {
>> 	pgtable_page_dec(page);
>> }
>>
>> For kernel pte page table, we can just use
>> pgtable_page_inc/pgtable_page_dec(), or adding
>> pgtable_kernel_pte_page_ctor/pgtable_kernel_pte_page_dtor, which just
>> wrappers of pgtable_page_inc() and pgtable_page_dec().
>>
>> Matthew and Mike, how do you think? Thanks.
> 
> I actually meant to add pgtable_pud_page_ctor/dtor() as a wrapper for the
> new helper to keep pud tables allocation consistent with pmd and pte and
> as a provision for the time we'll have per-page pud locks.
> 
> For the accounting of the kernel page tables a new helper does make sense
> because there are no locks to initialize for the kernel page tables.

Thanks for clarification. That is also my thought.

> 
> I can't say that I'm happy with the pgtable_page_inc/dec names, though.
> 
> Maybe page_{set,clear}_pgtable()?

Sounds better than pgtable_page_inc/dec() for me. I will use them in 
next version if no other objections. Thanks.
Matthew Wilcox (Oracle) July 3, 2022, 2:52 p.m. UTC | #9
On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote:
> So for kernel pte page table allocation, I need another similar helpers like
> below. However they do the samething with
> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good
> for adding these duplicate code.

Why do we want to account kernel PTE page tables in NR_PAGETABLE?
I think that's confusing.
Baolin Wang July 3, 2022, 3:07 p.m. UTC | #10
On 7/3/2022 10:52 PM, Matthew Wilcox wrote:
> On Sun, Jul 03, 2022 at 10:06:32PM +0800, Baolin Wang wrote:
>> So for kernel pte page table allocation, I need another similar helpers like
>> below. However they do the samething with
>> pgtable_pud_page_ctor/pgtable_pud_page_dtor, so I am not sure this is good
>> for adding these duplicate code.
> 
> Why do we want to account kernel PTE page tables in NR_PAGETABLE?
> I think that's confusing.

Why this will confuse you? I think it is inconsistent that kernel PTE 
page tables are not accounted, because we will account PMD/PUD level 
page tables no matter they are userspace pagetable pages or kernel 
pagetable pages.

Moreover the the vmalloc()/vmap() can consume some kernel pagetable 
pages, which should be accounted.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c995d1f..1772df9 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -94,7 +94,10 @@  static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 				  unsigned long addr)
 {
-	tlb_remove_table(tlb, virt_to_page(pudp));
+	struct page *page = virt_to_page(pudp);
+
+	pgtable_page_dec(page);
+	tlb_remove_table(tlb, page);
 }
 #endif
 
diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index b0a57b2..19bfe14 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -89,10 +89,15 @@  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
 	pud_t *pud;
+	struct page *page;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
-	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
+	if (!page)
+		return NULL;
+
+	pgtable_page_inc(page);
+	pud = (pud_t *)page_address(page);
+	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;
 }
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 867e9c3..990f614 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -89,11 +89,16 @@  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 {
+	struct page *page;
 	pud_t *pud;
 
-	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_ORDER);
-	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+	page = alloc_pages(GFP_KERNEL, PUD_ORDER);
+	if (!page)
+		return NULL;
+
+	pgtable_page_inc(page);
+	pud = (pud_t *)page_address(page);
+	pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
 	return pud;
 }
 
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index fe6407f..744e2d7 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -125,6 +125,7 @@  static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
 {
 	if (mm_pud_folded(tlb->mm))
 		return;
+	pgtable_page_dec(virt_to_page(pud));
 	tlb->mm->context.flush_mm = 1;
 	tlb->freed_tables = 1;
 	tlb->cleared_p4ds = 1;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index a932d77..5e46e31 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -76,8 +76,11 @@  void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 #if CONFIG_PGTABLE_LEVELS > 3
 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
+	struct page *page = virt_to_page(pud);
+
+	pgtable_page_dec(page);
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
-	paravirt_tlb_remove_table(tlb, virt_to_page(pud));
+	paravirt_tlb_remove_table(tlb, page);
 }
 
 #if CONFIG_PGTABLE_LEVELS > 4
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 977bea1..11350f7 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -149,11 +149,16 @@  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 
 static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
+	struct page *page;
 	gfp_t gfp = GFP_PGTABLE_USER;
 
 	if (mm == &init_mm)
 		gfp = GFP_PGTABLE_KERNEL;
-	return (pud_t *)get_zeroed_page(gfp);
+	page = alloc_pages(gfp, 0);
+	if (!page)
+		return NULL;
+	pgtable_page_inc(page);
+	return (pud_t *)page_address(page);
 }
 
 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE
@@ -174,8 +179,11 @@  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 
 static inline void __pud_free(struct mm_struct *mm, pud_t *pud)
 {
+	struct page *page = virt_to_page(pud);
+
 	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-	free_page((unsigned long)pud);
+	pgtable_page_dec(page);
+	__free_page(page);
 }
 
 #ifndef __HAVE_ARCH_PUD_FREE