Message ID | 20240125062044.63344-1-cuiyunhui@bytedance.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | RISC-V: add uniprocessor flush_tlb_range() support | expand |
On Thu, Jan 25, 2024 at 6:23 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote: > > Add support for flush_tlb_range() to improve TLB performance for > UP systems. In order to avoid the mutual inclusion of tlbflush.h > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> > --- > arch/riscv/include/asm/tlbflush.h | 61 ++++++---- > arch/riscv/mm/Makefile | 2 +- > arch/riscv/mm/tlbflush.c | 195 ++++++++++++++++++------------ > 3 files changed, 156 insertions(+), 102 deletions(-) > Boot tested with defconfig + rz/five enabled, no issues seen on RZ/Five SMARC EVK. Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> Cheers, Prabhakar > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 928f096dca21..426f043fb450 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -10,12 +10,21 @@ > #include <linux/mm_types.h> > #include <asm/smp.h> > #include <asm/errata_list.h> > +#include <asm/tlbbatch.h> > > #define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) > #define FLUSH_TLB_NO_ASID ((unsigned long)-1) > > #ifdef CONFIG_MMU > extern unsigned long asid_mask; > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > + > +struct flush_tlb_range_data { > + unsigned long asid; > + unsigned long start; > + unsigned long size; > + unsigned long stride; > +}; > > static inline void local_flush_tlb_all(void) > { > @@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr) > { > ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); > } > + > +static inline void local_flush_tlb_all_asid(unsigned long asid) > +{ > + if (asid != FLUSH_TLB_NO_ASID) > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : > + : "r" (asid) > + : "memory"); > + else > + local_flush_tlb_all(); > +} > + > +static inline void local_flush_tlb_page_asid(unsigned long addr, > + unsigned long asid) > +{ > + if (asid != FLUSH_TLB_NO_ASID) > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (addr), "r" (asid) > + : "memory"); > + else > + local_flush_tlb_page(addr); > +} > + > +static inline unsigned long get_mm_asid(struct mm_struct *mm) > +{ > + return static_branch_unlikely(&use_asid_allocator) ? > + atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; > +} > #else /* CONFIG_MMU */ > #define local_flush_tlb_all() do { } while (0) > #define local_flush_tlb_page(addr) do { } while (0) > #endif /* CONFIG_MMU */ > > -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) > void flush_tlb_all(void); > void flush_tlb_mm(struct mm_struct *mm); > void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, > @@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, > void arch_flush_tlb_batched_pending(struct mm_struct *mm); > void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); > > -#else /* CONFIG_SMP && CONFIG_MMU */ > - > -#define flush_tlb_all() local_flush_tlb_all() > -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) > - > -static inline void flush_tlb_range(struct vm_area_struct *vma, > - unsigned long start, unsigned long end) > -{ > - local_flush_tlb_all(); > -} > - > -/* Flush a range of kernel pages */ > -static inline void flush_tlb_kernel_range(unsigned long start, > - unsigned long end) > -{ > - local_flush_tlb_all(); > -} > - > -#define flush_tlb_mm(mm) flush_tlb_all() > -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() > -#endif /* !CONFIG_SMP || !CONFIG_MMU */ > - > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile > index 2c869f8026a8..7c6c4c858a6b 100644 > --- a/arch/riscv/mm/Makefile > +++ b/arch/riscv/mm/Makefile > @@ -19,7 +19,7 @@ obj-y += context.o > obj-y += pmem.o > > ifeq ($(CONFIG_MMU),y) > -obj-$(CONFIG_SMP) += tlbflush.o > +obj-y += tlbflush.o > endif > obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o > obj-$(CONFIG_PTDUMP_CORE) += ptdump.o > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index 8d12b26f5ac3..4765603fa08a 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -6,28 +6,36 @@ > #include <linux/hugetlb.h> > #include <asm/sbi.h> > #include <asm/mmu_context.h> > +#include <asm/tlbflush.h> > > -static inline void local_flush_tlb_all_asid(unsigned long asid) > +static unsigned long get_stride_size(struct vm_area_struct *vma) > { > - if (asid != FLUSH_TLB_NO_ASID) > - __asm__ __volatile__ ("sfence.vma x0, %0" > - : > - : "r" (asid) > - : "memory"); > - else > - local_flush_tlb_all(); > -} > + unsigned long stride_size; > > -static inline void local_flush_tlb_page_asid(unsigned long addr, > - unsigned long asid) > -{ > - if (asid != FLUSH_TLB_NO_ASID) > - __asm__ __volatile__ ("sfence.vma %0, %1" > - : > - : "r" (addr), "r" (asid) > - : "memory"); > - else > - local_flush_tlb_page(addr); > + if (!is_vm_hugetlb_page(vma)) > + return PAGE_SIZE; > + > + stride_size = huge_page_size(hstate_vma(vma)); > + > + /* > + * As stated in the privileged specification, every PTE in a > + * NAPOT region must be invalidated, so reset the stride in that > + * case. > + */ > + if (has_svnapot()) { > + if (stride_size >= PGDIR_SIZE) > + stride_size = PGDIR_SIZE; > + else if (stride_size >= P4D_SIZE) > + stride_size = P4D_SIZE; > + else if (stride_size >= PUD_SIZE) > + stride_size = PUD_SIZE; > + else if (stride_size >= PMD_SIZE) > + stride_size = PMD_SIZE; > + else > + stride_size = PAGE_SIZE; > + } > + > + return stride_size; > } > > /* > @@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start, > local_flush_tlb_range_threshold_asid(start, size, stride, asid); > } > > -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > -{ > - local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); > -} > - > +#ifdef CONFIG_SMP > static void __ipi_flush_tlb_all(void *info) > { > local_flush_tlb_all(); > } > > -void flush_tlb_all(void) > -{ > - if (riscv_use_ipi_for_rfence()) > - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); > - else > - sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); > -} > - > -struct flush_tlb_range_data { > - unsigned long asid; > - unsigned long start; > - unsigned long size; > - unsigned long stride; > -}; > - > static void __ipi_flush_tlb_range_asid(void *info) > { > struct flush_tlb_range_data *d = info; > @@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, > put_cpu(); > } > > -static inline unsigned long get_mm_asid(struct mm_struct *mm) > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > { > - return static_branch_unlikely(&use_asid_allocator) ? > - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; > + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > + addr, PAGE_SIZE, PAGE_SIZE); > +} > + > +void flush_tlb_all(void) > +{ > + if (riscv_use_ipi_for_rfence()) > + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); > + else > + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); > } > > void flush_tlb_mm(struct mm_struct *mm) > @@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, > start, end - start, page_size); > } > > -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > -{ > - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > - addr, PAGE_SIZE, PAGE_SIZE); > -} > - > void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > { > unsigned long stride_size; > > - if (!is_vm_hugetlb_page(vma)) { > - stride_size = PAGE_SIZE; > - } else { > - stride_size = huge_page_size(hstate_vma(vma)); > - > - /* > - * As stated in the privileged specification, every PTE in a > - * NAPOT region must be invalidated, so reset the stride in that > - * case. > - */ > - if (has_svnapot()) { > - if (stride_size >= PGDIR_SIZE) > - stride_size = PGDIR_SIZE; > - else if (stride_size >= P4D_SIZE) > - stride_size = P4D_SIZE; > - else if (stride_size >= PUD_SIZE) > - stride_size = PUD_SIZE; > - else if (stride_size >= PMD_SIZE) > - stride_size = PMD_SIZE; > - else > - stride_size = PAGE_SIZE; > - } > - } > - > + stride_size = get_stride_size(vma); > __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > start, end - start, stride_size); > } > @@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) > start, end - start, PAGE_SIZE); > } > > +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > +{ > + __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, > + FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > @@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > } > #endif > > +#else > +static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start, > + unsigned long size, unsigned long stride) > +{ > + unsigned long asid = FLUSH_TLB_NO_ASID; > + > + if (mm) > + asid = get_mm_asid(mm); > + > + local_flush_tlb_range_asid(start, size, stride, asid); > +} > + > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > +{ > + local_flush_tlb_page(addr); > +} > + > +void flush_tlb_all(void) > +{ > + local_flush_tlb_all(); > +} > + > +void flush_tlb_mm(struct mm_struct *mm) > +{ > + __flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > +void flush_tlb_mm_range(struct mm_struct *mm, > + unsigned long start, unsigned long end, > + unsigned int page_size) > +{ > + __flush_tlb_range_up(mm, start, end - start, page_size); > +} > + > +void flush_tlb_range(struct vm_area_struct *vma, > + unsigned long start, unsigned long end) > +{ > + unsigned long stride_size; > + > + stride_size = get_stride_size(vma); > + __flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size); > +} > + > +/* Flush a range of kernel pages */ > +void flush_tlb_kernel_range(unsigned long start, > + unsigned long end) > +{ > + __flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE); > +} > + > +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > +{ > + __flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + __flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE); > +} > +#endif > + > +#endif > + > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, > + FLUSH_TLB_NO_ASID); > +} > + > bool arch_tlbbatch_should_defer(struct mm_struct *mm) > { > return true; > @@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) > { > flush_tlb_mm(mm); > } > - > -void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > -{ > - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, > - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > -} > -- > 2.20.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi Prabhakar, On Sat, Jan 27, 2024 at 1:42 AM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > On Thu, Jan 25, 2024 at 6:23 AM Yunhui Cui <cuiyunhui@bytedance.com> wrote: > > > > Add support for flush_tlb_range() to improve TLB performance for > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > > > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> > > --- > > arch/riscv/include/asm/tlbflush.h | 61 ++++++---- > > arch/riscv/mm/Makefile | 2 +- > > arch/riscv/mm/tlbflush.c | 195 ++++++++++++++++++------------ > > 3 files changed, 156 insertions(+), 102 deletions(-) > > > Boot tested with defconfig + rz/five enabled, no issues seen on > RZ/Five SMARC EVK. > > Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> > Thank you for testing ! Thanks, Yunhui
On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > Add support for flush_tlb_range() to improve TLB performance for > UP systems. In order to avoid the mutual inclusion of tlbflush.h > and hugetlb.h, the UP part is also implemented in tlbflush.c. Hi Yunhui, IIRC, Samuel sent similar patch series a few weeks ago. https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ After that series, do you still need this patch? Thanks > > Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> > --- > arch/riscv/include/asm/tlbflush.h | 61 ++++++---- > arch/riscv/mm/Makefile | 2 +- > arch/riscv/mm/tlbflush.c | 195 ++++++++++++++++++------------ > 3 files changed, 156 insertions(+), 102 deletions(-) > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 928f096dca21..426f043fb450 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -10,12 +10,21 @@ > #include <linux/mm_types.h> > #include <asm/smp.h> > #include <asm/errata_list.h> > +#include <asm/tlbbatch.h> > > #define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) > #define FLUSH_TLB_NO_ASID ((unsigned long)-1) > > #ifdef CONFIG_MMU > extern unsigned long asid_mask; > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > + > +struct flush_tlb_range_data { > + unsigned long asid; > + unsigned long start; > + unsigned long size; > + unsigned long stride; > +}; > > static inline void local_flush_tlb_all(void) > { > @@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr) > { > ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); > } > + > +static inline void local_flush_tlb_all_asid(unsigned long asid) > +{ > + if (asid != FLUSH_TLB_NO_ASID) > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : > + : "r" (asid) > + : "memory"); > + else > + local_flush_tlb_all(); > +} > + > +static inline void local_flush_tlb_page_asid(unsigned long addr, > + unsigned long asid) > +{ > + if (asid != FLUSH_TLB_NO_ASID) > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (addr), "r" (asid) > + : "memory"); > + else > + local_flush_tlb_page(addr); > +} > + > +static inline unsigned long get_mm_asid(struct mm_struct *mm) > +{ > + return static_branch_unlikely(&use_asid_allocator) ? > + atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; > +} > #else /* CONFIG_MMU */ > #define local_flush_tlb_all() do { } while (0) > #define local_flush_tlb_page(addr) do { } while (0) > #endif /* CONFIG_MMU */ > > -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) > void flush_tlb_all(void); > void flush_tlb_mm(struct mm_struct *mm); > void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, > @@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, > void arch_flush_tlb_batched_pending(struct mm_struct *mm); > void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); > > -#else /* CONFIG_SMP && CONFIG_MMU */ > - > -#define flush_tlb_all() local_flush_tlb_all() > -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) > - > -static inline void flush_tlb_range(struct vm_area_struct *vma, > - unsigned long start, unsigned long end) > -{ > - local_flush_tlb_all(); > -} > - > -/* Flush a range of kernel pages */ > -static inline void flush_tlb_kernel_range(unsigned long start, > - unsigned long end) > -{ > - local_flush_tlb_all(); > -} > - > -#define flush_tlb_mm(mm) flush_tlb_all() > -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() > -#endif /* !CONFIG_SMP || !CONFIG_MMU */ > - > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile > index 2c869f8026a8..7c6c4c858a6b 100644 > --- a/arch/riscv/mm/Makefile > +++ b/arch/riscv/mm/Makefile > @@ -19,7 +19,7 @@ obj-y += context.o > obj-y += pmem.o > > ifeq ($(CONFIG_MMU),y) > -obj-$(CONFIG_SMP) += tlbflush.o > +obj-y += tlbflush.o > endif > obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o > obj-$(CONFIG_PTDUMP_CORE) += ptdump.o > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index 8d12b26f5ac3..4765603fa08a 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -6,28 +6,36 @@ > #include <linux/hugetlb.h> > #include <asm/sbi.h> > #include <asm/mmu_context.h> > +#include <asm/tlbflush.h> > > -static inline void local_flush_tlb_all_asid(unsigned long asid) > +static unsigned long get_stride_size(struct vm_area_struct *vma) > { > - if (asid != FLUSH_TLB_NO_ASID) > - __asm__ __volatile__ ("sfence.vma x0, %0" > - : > - : "r" (asid) > - : "memory"); > - else > - local_flush_tlb_all(); > -} > + unsigned long stride_size; > > -static inline void local_flush_tlb_page_asid(unsigned long addr, > - unsigned long asid) > -{ > - if (asid != FLUSH_TLB_NO_ASID) > - __asm__ __volatile__ ("sfence.vma %0, %1" > - : > - : "r" (addr), "r" (asid) > - : "memory"); > - else > - local_flush_tlb_page(addr); > + if (!is_vm_hugetlb_page(vma)) > + return PAGE_SIZE; > + > + stride_size = huge_page_size(hstate_vma(vma)); > + > + /* > + * As stated in the privileged specification, every PTE in a > + * NAPOT region must be invalidated, so reset the stride in that > + * case. > + */ > + if (has_svnapot()) { > + if (stride_size >= PGDIR_SIZE) > + stride_size = PGDIR_SIZE; > + else if (stride_size >= P4D_SIZE) > + stride_size = P4D_SIZE; > + else if (stride_size >= PUD_SIZE) > + stride_size = PUD_SIZE; > + else if (stride_size >= PMD_SIZE) > + stride_size = PMD_SIZE; > + else > + stride_size = PAGE_SIZE; > + } > + > + return stride_size; > } > > /* > @@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start, > local_flush_tlb_range_threshold_asid(start, size, stride, asid); > } > > -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > -{ > - local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); > -} > - > +#ifdef CONFIG_SMP > static void __ipi_flush_tlb_all(void *info) > { > local_flush_tlb_all(); > } > > -void flush_tlb_all(void) > -{ > - if (riscv_use_ipi_for_rfence()) > - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); > - else > - sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); > -} > - > -struct flush_tlb_range_data { > - unsigned long asid; > - unsigned long start; > - unsigned long size; > - unsigned long stride; > -}; > - > static void __ipi_flush_tlb_range_asid(void *info) > { > struct flush_tlb_range_data *d = info; > @@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, > put_cpu(); > } > > -static inline unsigned long get_mm_asid(struct mm_struct *mm) > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > { > - return static_branch_unlikely(&use_asid_allocator) ? > - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; > + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > + addr, PAGE_SIZE, PAGE_SIZE); > +} > + > +void flush_tlb_all(void) > +{ > + if (riscv_use_ipi_for_rfence()) > + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); > + else > + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); > } > > void flush_tlb_mm(struct mm_struct *mm) > @@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, > start, end - start, page_size); > } > > -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > -{ > - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > - addr, PAGE_SIZE, PAGE_SIZE); > -} > - > void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > { > unsigned long stride_size; > > - if (!is_vm_hugetlb_page(vma)) { > - stride_size = PAGE_SIZE; > - } else { > - stride_size = huge_page_size(hstate_vma(vma)); > - > - /* > - * As stated in the privileged specification, every PTE in a > - * NAPOT region must be invalidated, so reset the stride in that > - * case. > - */ > - if (has_svnapot()) { > - if (stride_size >= PGDIR_SIZE) > - stride_size = PGDIR_SIZE; > - else if (stride_size >= P4D_SIZE) > - stride_size = P4D_SIZE; > - else if (stride_size >= PUD_SIZE) > - stride_size = PUD_SIZE; > - else if (stride_size >= PMD_SIZE) > - stride_size = PMD_SIZE; > - else > - stride_size = PAGE_SIZE; > - } > - } > - > + stride_size = get_stride_size(vma); > __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), > start, end - start, stride_size); > } > @@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) > start, end - start, PAGE_SIZE); > } > > +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > +{ > + __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, > + FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > @@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > } > #endif > > +#else > +static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start, > + unsigned long size, unsigned long stride) > +{ > + unsigned long asid = FLUSH_TLB_NO_ASID; > + > + if (mm) > + asid = get_mm_asid(mm); > + > + local_flush_tlb_range_asid(start, size, stride, asid); > +} > + > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > +{ > + local_flush_tlb_page(addr); > +} > + > +void flush_tlb_all(void) > +{ > + local_flush_tlb_all(); > +} > + > +void flush_tlb_mm(struct mm_struct *mm) > +{ > + __flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > +void flush_tlb_mm_range(struct mm_struct *mm, > + unsigned long start, unsigned long end, > + unsigned int page_size) > +{ > + __flush_tlb_range_up(mm, start, end - start, page_size); > +} > + > +void flush_tlb_range(struct vm_area_struct *vma, > + unsigned long start, unsigned long end) > +{ > + unsigned long stride_size; > + > + stride_size = get_stride_size(vma); > + __flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size); > +} > + > +/* Flush a range of kernel pages */ > +void flush_tlb_kernel_range(unsigned long start, > + unsigned long end) > +{ > + __flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE); > +} > + > +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > +{ > + __flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > +} > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + __flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE); > +} > +#endif > + > +#endif > + > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, > + FLUSH_TLB_NO_ASID); > +} > + > bool arch_tlbbatch_should_defer(struct mm_struct *mm) > { > return true; > @@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) > { > flush_tlb_mm(mm); > } > - > -void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) > -{ > - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, > - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); > -} > -- > 2.20.1 >
Hi Jisheng, On Mon, Jan 29, 2024 at 4:02 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > > Add support for flush_tlb_range() to improve TLB performance for > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > Hi Yunhui, > > IIRC, Samuel sent similar patch series a few weeks ago. > > https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ > > After that series, do you still need this patch? Thank you for your reminder. I didn't find it before I mailed my patch. I just looked at the content of this patch. I understand that my patch is needed. For a single core, a more concise TLB flush logic is needed, and it is helpful to improve performance. Thanks, Yunhui
On Mon, Jan 29, 2024 at 04:26:57PM +0800, yunhui cui wrote: > Hi Jisheng, > > On Mon, Jan 29, 2024 at 4:02 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > > > Add support for flush_tlb_range() to improve TLB performance for > > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > > > Hi Yunhui, > > > > IIRC, Samuel sent similar patch series a few weeks ago. > > > > https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ > > > > After that series, do you still need this patch? > > Thank you for your reminder. I didn't find it before I mailed my > patch. I just looked at the content of this patch. I understand that > my patch is needed. For a single core, a more concise TLB flush logic > is needed, and it is helpful to improve performance. Currently, riscv UP flush_tlb_range still use flush all TLB entries, obviously it's is a big hammer, this is what your patch is trying to optimize. I'm not sure whether I understand your code correctly or not. Let me know if I misunderstand your code. After patch5 of the Samuel's series, __flush_tlb_range is unified for SMP and UP, so that UP can also benefit from recent improvements, such as range flush rather than all. Thanks > > Thanks, > Yunhui
Hi Jisheng, On Mon, Jan 29, 2024 at 5:51 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > On Mon, Jan 29, 2024 at 04:26:57PM +0800, yunhui cui wrote: > > Hi Jisheng, > > > > On Mon, Jan 29, 2024 at 4:02 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > > > > Add support for flush_tlb_range() to improve TLB performance for > > > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > > > > > Hi Yunhui, > > > > > > IIRC, Samuel sent similar patch series a few weeks ago. > > > > > > https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ > > > > > > After that series, do you still need this patch? > > > > Thank you for your reminder. I didn't find it before I mailed my > > patch. I just looked at the content of this patch. I understand that > > my patch is needed. For a single core, a more concise TLB flush logic > > is needed, and it is helpful to improve performance. > > Currently, riscv UP flush_tlb_range still use flush all TLB entries, > obviously it's is a big hammer, this is what your patch is trying to > optimize. I'm not sure whether I understand your code correctly or not. > Let me know if I misunderstand your code. > > After patch5 of the Samuel's series, __flush_tlb_range is unified for > SMP and UP, so that UP can also benefit from recent improvements, such > as range flush rather than all. In my opinion, UP does not need to combine some SMP if... else, on_each_cpu(...) logic, which is also a manifestation of performance improvement. what do you think? Thanks, Yunhui
On Mon, Jan 29, 2024 at 07:02:10PM +0800, yunhui cui wrote: > Hi Jisheng, > > On Mon, Jan 29, 2024 at 5:51 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > On Mon, Jan 29, 2024 at 04:26:57PM +0800, yunhui cui wrote: > > > Hi Jisheng, > > > > > > On Mon, Jan 29, 2024 at 4:02 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > > > On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > > > > > Add support for flush_tlb_range() to improve TLB performance for > > > > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > > > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > > > > > > > Hi Yunhui, > > > > > > > > IIRC, Samuel sent similar patch series a few weeks ago. > > > > > > > > https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ > > > > > > > > After that series, do you still need this patch? > > > > > > Thank you for your reminder. I didn't find it before I mailed my > > > patch. I just looked at the content of this patch. I understand that > > > my patch is needed. For a single core, a more concise TLB flush logic > > > is needed, and it is helpful to improve performance. > > > > Currently, riscv UP flush_tlb_range still use flush all TLB entries, > > obviously it's is a big hammer, this is what your patch is trying to > > optimize. I'm not sure whether I understand your code correctly or not. > > Let me know if I misunderstand your code. > > > > After patch5 of the Samuel's series, __flush_tlb_range is unified for > > SMP and UP, so that UP can also benefit from recent improvements, such > > as range flush rather than all. > > In my opinion, UP does not need to combine some SMP if... else, > on_each_cpu(...) logic, which is also a manifestation of performance Hi Yunhui, IIRC, the compiler will optimise out the unnecessary logic under UP, I may misread the code. But if no, indeed, there's improvement room. However, even in this case, IMHO, it's better if you can base on Samuel's series. Anyway, the optimization(range tlb entries rather than *all* entries under UP case) you want to do has been implemented. While I'm not sure whether we can rely on the compiler to optimize out all unnecessary logics. Thanks > improvement. what do you think? > Thanks, > Yunhui
Hi Jisheng, On Mon, Jan 29, 2024 at 8:05 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > On Mon, Jan 29, 2024 at 07:02:10PM +0800, yunhui cui wrote: > > Hi Jisheng, > > > > On Mon, Jan 29, 2024 at 5:51 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > On Mon, Jan 29, 2024 at 04:26:57PM +0800, yunhui cui wrote: > > > > Hi Jisheng, > > > > > > > > On Mon, Jan 29, 2024 at 4:02 PM Jisheng Zhang <jszhang@kernel.org> wrote: > > > > > > > > > > On Thu, Jan 25, 2024 at 02:20:44PM +0800, Yunhui Cui wrote: > > > > > > Add support for flush_tlb_range() to improve TLB performance for > > > > > > UP systems. In order to avoid the mutual inclusion of tlbflush.h > > > > > > and hugetlb.h, the UP part is also implemented in tlbflush.c. > > > > > > > > > > Hi Yunhui, > > > > > > > > > > IIRC, Samuel sent similar patch series a few weeks ago. > > > > > > > > > > https://lore.kernel.org/linux-riscv/20240102220134.3229156-1-samuel.holland@sifive.com/ > > > > > > > > > > After that series, do you still need this patch? > > > > > > > > Thank you for your reminder. I didn't find it before I mailed my > > > > patch. I just looked at the content of this patch. I understand that > > > > my patch is needed. For a single core, a more concise TLB flush logic > > > > is needed, and it is helpful to improve performance. > > > > > > Currently, riscv UP flush_tlb_range still use flush all TLB entries, > > > obviously it's is a big hammer, this is what your patch is trying to > > > optimize. I'm not sure whether I understand your code correctly or not. > > > Let me know if I misunderstand your code. > > > > > > After patch5 of the Samuel's series, __flush_tlb_range is unified for > > > SMP and UP, so that UP can also benefit from recent improvements, such > > > as range flush rather than all. > > > > In my opinion, UP does not need to combine some SMP if... else, > > on_each_cpu(...) logic, which is also a manifestation of performance > > Hi Yunhui, > > IIRC, the compiler will optimise out the unnecessary logic under UP, I > may misread the code. But if no, indeed, there's improvement room. > However, even in this case, IMHO, it's better if you can base on > Samuel's series. > Anyway, the optimization(range tlb entries rather than *all* entries under > UP case) you want to do has been implemented. While I'm not sure whether > we can rely on the compiler to optimize out all unnecessary logics. Okay, let's see if there are any necessary optimizations based on Samuel's series. Thanks, Yunhui
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 928f096dca21..426f043fb450 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -10,12 +10,21 @@ #include <linux/mm_types.h> #include <asm/smp.h> #include <asm/errata_list.h> +#include <asm/tlbbatch.h> #define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) #define FLUSH_TLB_NO_ASID ((unsigned long)-1) #ifdef CONFIG_MMU extern unsigned long asid_mask; +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); + +struct flush_tlb_range_data { + unsigned long asid; + unsigned long start; + unsigned long size; + unsigned long stride; +}; static inline void local_flush_tlb_all(void) { @@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); + else + local_flush_tlb_all(); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + if (asid != FLUSH_TLB_NO_ASID) + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); + else + local_flush_tlb_page(addr); +} + +static inline unsigned long get_mm_asid(struct mm_struct *mm) +{ + return static_branch_unlikely(&use_asid_allocator) ? + atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; +} #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) #endif /* CONFIG_MMU */ -#if defined(CONFIG_SMP) && defined(CONFIG_MMU) void flush_tlb_all(void); void flush_tlb_mm(struct mm_struct *mm); void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, @@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, void arch_flush_tlb_batched_pending(struct mm_struct *mm); void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -#else /* CONFIG_SMP && CONFIG_MMU */ - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - local_flush_tlb_all(); -} - -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - local_flush_tlb_all(); -} - -#define flush_tlb_mm(mm) flush_tlb_all() -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() -#endif /* !CONFIG_SMP || !CONFIG_MMU */ - #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2c869f8026a8..7c6c4c858a6b 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -19,7 +19,7 @@ obj-y += context.o obj-y += pmem.o ifeq ($(CONFIG_MMU),y) -obj-$(CONFIG_SMP) += tlbflush.o +obj-y += tlbflush.o endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 8d12b26f5ac3..4765603fa08a 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -6,28 +6,36 @@ #include <linux/hugetlb.h> #include <asm/sbi.h> #include <asm/mmu_context.h> +#include <asm/tlbflush.h> -static inline void local_flush_tlb_all_asid(unsigned long asid) +static unsigned long get_stride_size(struct vm_area_struct *vma) { - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); - else - local_flush_tlb_all(); -} + unsigned long stride_size; -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - if (asid != FLUSH_TLB_NO_ASID) - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); - else - local_flush_tlb_page(addr); + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + stride_size = huge_page_size(hstate_vma(vma)); + + /* + * As stated in the privileged specification, every PTE in a + * NAPOT region must be invalidated, so reset the stride in that + * case. + */ + if (has_svnapot()) { + if (stride_size >= PGDIR_SIZE) + stride_size = PGDIR_SIZE; + else if (stride_size >= P4D_SIZE) + stride_size = P4D_SIZE; + else if (stride_size >= PUD_SIZE) + stride_size = PUD_SIZE; + else if (stride_size >= PMD_SIZE) + stride_size = PMD_SIZE; + else + stride_size = PAGE_SIZE; + } + + return stride_size; } /* @@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start, local_flush_tlb_range_threshold_asid(start, size, stride, asid); } -void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); -} - +#ifdef CONFIG_SMP static void __ipi_flush_tlb_all(void *info) { local_flush_tlb_all(); } -void flush_tlb_all(void) -{ - if (riscv_use_ipi_for_rfence()) - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); - else - sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); -} - -struct flush_tlb_range_data { - unsigned long asid; - unsigned long start; - unsigned long size; - unsigned long stride; -}; - static void __ipi_flush_tlb_range_asid(void *info) { struct flush_tlb_range_data *d = info; @@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, put_cpu(); } -static inline unsigned long get_mm_asid(struct mm_struct *mm) +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) { - return static_branch_unlikely(&use_asid_allocator) ? - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; + __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), + addr, PAGE_SIZE, PAGE_SIZE); +} + +void flush_tlb_all(void) +{ + if (riscv_use_ipi_for_rfence()) + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); + else + sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); } void flush_tlb_mm(struct mm_struct *mm) @@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm, start, end - start, page_size); } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) -{ - __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), - addr, PAGE_SIZE, PAGE_SIZE); -} - void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { unsigned long stride_size; - if (!is_vm_hugetlb_page(vma)) { - stride_size = PAGE_SIZE; - } else { - stride_size = huge_page_size(hstate_vma(vma)); - - /* - * As stated in the privileged specification, every PTE in a - * NAPOT region must be invalidated, so reset the stride in that - * case. - */ - if (has_svnapot()) { - if (stride_size >= PGDIR_SIZE) - stride_size = PGDIR_SIZE; - else if (stride_size >= P4D_SIZE) - stride_size = P4D_SIZE; - else if (stride_size >= PUD_SIZE) - stride_size = PUD_SIZE; - else if (stride_size >= PMD_SIZE) - stride_size = PMD_SIZE; - else - stride_size = PAGE_SIZE; - } - } - + stride_size = get_stride_size(vma); __flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm), start, end - start, stride_size); } @@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) start, end - start, PAGE_SIZE); } +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, + FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, } #endif +#else +static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) +{ + unsigned long asid = FLUSH_TLB_NO_ASID; + + if (mm) + asid = get_mm_asid(mm); + + local_flush_tlb_range_asid(start, size, stride, asid); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +{ + local_flush_tlb_page(addr); +} + +void flush_tlb_all(void) +{ + local_flush_tlb_all(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + __flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} + +void flush_tlb_mm_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + unsigned int page_size) +{ + __flush_tlb_range_up(mm, start, end - start, page_size); +} + +void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + unsigned long stride_size; + + stride_size = get_stride_size(vma); + __flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size); +} + +/* Flush a range of kernel pages */ +void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + __flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE); +} + +void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) +{ + __flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + __flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE); +} +#endif + +#endif + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, + FLUSH_TLB_NO_ASID); +} + bool arch_tlbbatch_should_defer(struct mm_struct *mm) { return true; @@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm) { flush_tlb_mm(mm); } - -void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) -{ - __flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0, - FLUSH_TLB_MAX_SIZE, PAGE_SIZE); -}
Add support for flush_tlb_range() to improve TLB performance for UP systems. In order to avoid the mutual inclusion of tlbflush.h and hugetlb.h, the UP part is also implemented in tlbflush.c. Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> --- arch/riscv/include/asm/tlbflush.h | 61 ++++++---- arch/riscv/mm/Makefile | 2 +- arch/riscv/mm/tlbflush.c | 195 ++++++++++++++++++------------ 3 files changed, 156 insertions(+), 102 deletions(-)