diff mbox series

[v4,6/9] mm/mremap: Use range flush that does TLB and page walk cache flush

Message ID 20210414085915.301189-7-aneesh.kumar@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Speedup mremap on ppc64 | expand

Commit Message

Aneesh Kumar K.V April 14, 2021, 8:59 a.m. UTC
Some architectures do have the concept of page walk cache which need
to be flush when updating higher levels of page tables. A fast mremap
that involves moving page table pages instead of copying pte entries
should flush page walk cache since the old translation cache is no more
valid.

Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and
page walk cache where TLB entries are mapped with page size PAGE_SIZE.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++
 mm/mremap.c                                   | 15 +++++++++++++--
 2 files changed, 24 insertions(+), 2 deletions(-)

Comments

Michael Ellerman April 20, 2021, 3:47 a.m. UTC | #1
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
> Some architectures do have the concept of page walk cache which need
> to be flush when updating higher levels of page tables. A fast mremap
> that involves moving page table pages instead of copying pte entries
> should flush page walk cache since the old translation cache is no more
> valid.
>
> Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and
> page walk cache where TLB entries are mapped with page size PAGE_SIZE.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++
>  mm/mremap.c                                   | 15 +++++++++++++--
>  2 files changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
> index f9f8a3a264f7..c236b66f490b 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
> @@ -80,6 +80,17 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
>  	return flush_hugetlb_tlb_pwc_range(vma, start, end, false);
>  }
>  
> +#define flush_pte_tlb_pwc_range flush_tlb_pwc_range
> +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma,
> +					   unsigned long start, unsigned long end,
> +					   bool also_pwc)

This still uses the also_pwc name, which is a bit inconsistent with the
previous patch.

But, does it even need to be a parameter? AFAICS you always pass true,
and pwc=true is sort of implied by the name isn't it?

cheers

> +{
> +	if (radix_enabled())
> +		return radix__flush_tlb_pwc_range_psize(vma->vm_mm, start,
> +							end, mmu_virtual_psize, also_pwc);
> +	return hash__flush_tlb_range(vma, start, end);
> +}
> +
>  static inline void flush_tlb_range(struct vm_area_struct *vma,
>  				   unsigned long start, unsigned long end)
>  {
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 574287f9bb39..0e7b11daafee 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -210,6 +210,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
>  		drop_rmap_locks(vma);
>  }
>  
> +#ifndef flush_pte_tlb_pwc_range
> +#define flush_pte_tlb_pwc_range flush_pte_tlb_pwc_range
> +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma,
> +					   unsigned long start,
> +					   unsigned long end,
> +					   bool also_pwc)
> +{
> +	return flush_tlb_range(vma, start, end);
> +}
> +#endif
> +
>  #ifdef CONFIG_HAVE_MOVE_PMD
>  static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
> @@ -260,7 +271,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
>  	VM_BUG_ON(!pmd_none(*new_pmd));
>  	pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd));
>  
> -	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
> +	flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PMD_SIZE, true);
>  	if (new_ptl != old_ptl)
>  		spin_unlock(new_ptl);
>  	spin_unlock(old_ptl);
> @@ -307,7 +318,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
>  	VM_BUG_ON(!pud_none(*new_pud));
>  
>  	pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud));
> -	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
> +	flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PUD_SIZE, true);
>  	if (new_ptl != old_ptl)
>  		spin_unlock(new_ptl);
>  	spin_unlock(old_ptl);
> -- 
> 2.30.2
Aneesh Kumar K.V April 20, 2021, 4:17 a.m. UTC | #2
On 4/20/21 9:17 AM, Michael Ellerman wrote:
> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes:
>> Some architectures do have the concept of page walk cache which need
>> to be flush when updating higher levels of page tables. A fast mremap
>> that involves moving page table pages instead of copying pte entries
>> should flush page walk cache since the old translation cache is no more
>> valid.
>>
>> Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and
>> page walk cache where TLB entries are mapped with page size PAGE_SIZE.
>>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++
>>   mm/mremap.c                                   | 15 +++++++++++++--
>>   2 files changed, 24 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> index f9f8a3a264f7..c236b66f490b 100644
>> --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
>> @@ -80,6 +80,17 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
>>   	return flush_hugetlb_tlb_pwc_range(vma, start, end, false);
>>   }
>>   
>> +#define flush_pte_tlb_pwc_range flush_tlb_pwc_range
>> +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma,
>> +					   unsigned long start, unsigned long end,
>> +					   bool also_pwc)
> 
> This still uses the also_pwc name, which is a bit inconsistent with the
> previous patch.
> 

will fix that.

> But, does it even need to be a parameter? AFAICS you always pass true,
> and pwc=true is sort of implied by the name isn't it?
> 

I don't have strong opinion about that. I was wondering having flush_pwc 
explicitly called out is a better indication of we are flushing page 
walk cache. Will drop that in the next update.


-aneesh
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index f9f8a3a264f7..c236b66f490b 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -80,6 +80,17 @@  static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
 	return flush_hugetlb_tlb_pwc_range(vma, start, end, false);
 }
 
+#define flush_pte_tlb_pwc_range flush_tlb_pwc_range
+static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma,
+					   unsigned long start, unsigned long end,
+					   bool also_pwc)
+{
+	if (radix_enabled())
+		return radix__flush_tlb_pwc_range_psize(vma->vm_mm, start,
+							end, mmu_virtual_psize, also_pwc);
+	return hash__flush_tlb_range(vma, start, end);
+}
+
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
diff --git a/mm/mremap.c b/mm/mremap.c
index 574287f9bb39..0e7b11daafee 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -210,6 +210,17 @@  static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		drop_rmap_locks(vma);
 }
 
+#ifndef flush_pte_tlb_pwc_range
+#define flush_pte_tlb_pwc_range flush_pte_tlb_pwc_range
+static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma,
+					   unsigned long start,
+					   unsigned long end,
+					   bool also_pwc)
+{
+	return flush_tlb_range(vma, start, end);
+}
+#endif
+
 #ifdef CONFIG_HAVE_MOVE_PMD
 static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
@@ -260,7 +271,7 @@  static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 	VM_BUG_ON(!pmd_none(*new_pmd));
 	pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd));
 
-	flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
+	flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PMD_SIZE, true);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);
@@ -307,7 +318,7 @@  static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
 	VM_BUG_ON(!pud_none(*new_pud));
 
 	pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud));
-	flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE);
+	flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PUD_SIZE, true);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	spin_unlock(old_ptl);