Message ID | 20210414085915.301189-7-aneesh.kumar@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Speedup mremap on ppc64 | expand |
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > Some architectures do have the concept of page walk cache which need > to be flush when updating higher levels of page tables. A fast mremap > that involves moving page table pages instead of copying pte entries > should flush page walk cache since the old translation cache is no more > valid. > > Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and > page walk cache where TLB entries are mapped with page size PAGE_SIZE. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++ > mm/mremap.c | 15 +++++++++++++-- > 2 files changed, 24 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h > index f9f8a3a264f7..c236b66f490b 100644 > --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h > +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h > @@ -80,6 +80,17 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, > return flush_hugetlb_tlb_pwc_range(vma, start, end, false); > } > > +#define flush_pte_tlb_pwc_range flush_tlb_pwc_range > +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma, > + unsigned long start, unsigned long end, > + bool also_pwc) This still uses the also_pwc name, which is a bit inconsistent with the previous patch. But, does it even need to be a parameter? AFAICS you always pass true, and pwc=true is sort of implied by the name isn't it? cheers > +{ > + if (radix_enabled()) > + return radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, > + end, mmu_virtual_psize, also_pwc); > + return hash__flush_tlb_range(vma, start, end); > +} > + > static inline void flush_tlb_range(struct vm_area_struct *vma, > unsigned long start, unsigned long end) > { > diff --git a/mm/mremap.c b/mm/mremap.c > index 574287f9bb39..0e7b11daafee 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -210,6 +210,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, > drop_rmap_locks(vma); > } > > +#ifndef flush_pte_tlb_pwc_range > +#define flush_pte_tlb_pwc_range flush_pte_tlb_pwc_range > +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma, > + unsigned long start, > + unsigned long end, > + bool also_pwc) > +{ > + return flush_tlb_range(vma, start, end); > +} > +#endif > + > #ifdef CONFIG_HAVE_MOVE_PMD > static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, > unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) > @@ -260,7 +271,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, > VM_BUG_ON(!pmd_none(*new_pmd)); > pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd)); > > - flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); > + flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PMD_SIZE, true); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > spin_unlock(old_ptl); > @@ -307,7 +318,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, > VM_BUG_ON(!pud_none(*new_pud)); > > pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud)); > - flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); > + flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PUD_SIZE, true); > if (new_ptl != old_ptl) > spin_unlock(new_ptl); > spin_unlock(old_ptl); > -- > 2.30.2
On 4/20/21 9:17 AM, Michael Ellerman wrote: > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >> Some architectures do have the concept of page walk cache which need >> to be flush when updating higher levels of page tables. A fast mremap >> that involves moving page table pages instead of copying pte entries >> should flush page walk cache since the old translation cache is no more >> valid. >> >> Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and >> page walk cache where TLB entries are mapped with page size PAGE_SIZE. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++ >> mm/mremap.c | 15 +++++++++++++-- >> 2 files changed, 24 insertions(+), 2 deletions(-) >> >> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h >> index f9f8a3a264f7..c236b66f490b 100644 >> --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h >> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h >> @@ -80,6 +80,17 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, >> return flush_hugetlb_tlb_pwc_range(vma, start, end, false); >> } >> >> +#define flush_pte_tlb_pwc_range flush_tlb_pwc_range >> +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma, >> + unsigned long start, unsigned long end, >> + bool also_pwc) > > This still uses the also_pwc name, which is a bit inconsistent with the > previous patch. > will fix that. > But, does it even need to be a parameter? AFAICS you always pass true, > and pwc=true is sort of implied by the name isn't it? > I don't have strong opinion about that. I was wondering having flush_pwc explicitly called out is a better indication of we are flushing page walk cache. Will drop that in the next update. -aneesh
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index f9f8a3a264f7..c236b66f490b 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -80,6 +80,17 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, return flush_hugetlb_tlb_pwc_range(vma, start, end, false); } +#define flush_pte_tlb_pwc_range flush_tlb_pwc_range +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + bool also_pwc) +{ + if (radix_enabled()) + return radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, + end, mmu_virtual_psize, also_pwc); + return hash__flush_tlb_range(vma, start, end); +} + static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { diff --git a/mm/mremap.c b/mm/mremap.c index 574287f9bb39..0e7b11daafee 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -210,6 +210,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, drop_rmap_locks(vma); } +#ifndef flush_pte_tlb_pwc_range +#define flush_pte_tlb_pwc_range flush_pte_tlb_pwc_range +static inline void flush_pte_tlb_pwc_range(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + bool also_pwc) +{ + return flush_tlb_range(vma, start, end); +} +#endif + #ifdef CONFIG_HAVE_MOVE_PMD static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd) @@ -260,7 +271,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pmd_none(*new_pmd)); pmd_populate(mm, new_pmd, (pgtable_t)pmd_page_vaddr(pmd)); - flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); + flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PMD_SIZE, true); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl); @@ -307,7 +318,7 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr, VM_BUG_ON(!pud_none(*new_pud)); pud_populate(mm, new_pud, (pmd_t *)pud_page_vaddr(pud)); - flush_tlb_range(vma, old_addr, old_addr + PUD_SIZE); + flush_pte_tlb_pwc_range(vma, old_addr, old_addr + PUD_SIZE, true); if (new_ptl != old_ptl) spin_unlock(new_ptl); spin_unlock(old_ptl);
Some architectures do have the concept of page walk cache which need to be flush when updating higher levels of page tables. A fast mremap that involves moving page table pages instead of copying pte entries should flush page walk cache since the old translation cache is no more valid. Add new helper flush_pte_tlb_pwc_range() which invalidates both TLB and page walk cache where TLB entries are mapped with page size PAGE_SIZE. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- arch/powerpc/include/asm/book3s/64/tlbflush.h | 11 +++++++++++ mm/mremap.c | 15 +++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-)