Message ID | 20210606152050.636038-3-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/2] riscv: pass the mm_struct to __sbi_tlb_flush_range | expand |
Rebase with "THP supprt for RISCV" & "Add DMA_COHERENT v2" on linux-5.13-rc4. Tested-by: Guo Ren <guoren@kernel.org> On Sun, Jun 6, 2021 at 11:21 PM Christoph Hellwig <hch@lst.de> wrote: > > From: Guo Ren <guoren@linux.alibaba.com> > > Implement optimized version of the tlb flushing routines for systems > using ASIDs. These are behind the use_asid_allocator static branch to > not affect existing systems not using ASIDs. > > Signed-off-by: Guo Ren <guoren@linux.alibaba.com> > [hch: rebased on top of previous cleanups, use the same algorithm as > the non-ASID based code for local vs global flushes, keep functions > as local as possible] > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > arch/riscv/include/asm/mmu_context.h | 2 ++ > arch/riscv/mm/context.c | 2 +- > arch/riscv/mm/tlbflush.c | 47 +++++++++++++++++++++++----- > 3 files changed, 43 insertions(+), 8 deletions(-) > > diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h > index b0659413a080..7030837adc1a 100644 > --- a/arch/riscv/include/asm/mmu_context.h > +++ b/arch/riscv/include/asm/mmu_context.h > @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, > return 0; > } > > +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); > + > #include <asm-generic/mmu_context.h> > > #endif /* _ASM_RISCV_MMU_CONTEXT_H */ > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index 9bc46ab01c25..6ed696bad558 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -18,7 +18,7 @@ > > #ifdef CONFIG_MMU > > -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); > > static unsigned long asid_bits; > static unsigned long num_asids; > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index b458949fa8df..64f8201237c2 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -4,6 +4,24 @@ > #include <linux/smp.h> > #include <linux/sched.h> > #include <asm/sbi.h> > +#include <asm/mmu_context.h> > + > +static inline void local_flush_tlb_all_asid(unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : > + : "r" (asid) > + : "memory"); > +} > + > +static inline void local_flush_tlb_page_asid(unsigned long addr, > + unsigned long asid) > +{ > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : > + : "r" (addr), "r" (asid) > + : "memory"); > +} > > void flush_tlb_all(void) > { > @@ -16,21 +34,36 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, > struct cpumask *cmask = mm_cpumask(mm); > struct cpumask hmask; > unsigned int cpuid; > + bool broadcast; > > if (cpumask_empty(cmask)) > return; > > cpuid = get_cpu(); > + /* check if the tlbflush needs to be sent to other CPUs */ > + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; > + if (static_branch_unlikely(&use_asid_allocator)) { > + unsigned long asid = atomic_long_read(&mm->context.id); > > - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { > - /* local cpu is the only cpu present in cpumask */ > - if (size <= stride) > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), > + start, size, asid); > + } else if (size <= stride) { > + local_flush_tlb_page_asid(start, asid); > + } else { > + local_flush_tlb_all_asid(asid); > + } It will reduce efficiency in our system, but I'll add "local_flush_tlb_range_asid(start, start+size, stride, size)" later. > + } else { > + if (broadcast) { > + riscv_cpuid_to_hartid_mask(cmask, &hmask); > + sbi_remote_sfence_vma(cpumask_bits(&hmask), > + start, size); > + } else if (size <= stride) { This logic is from the Atish's patch. https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/arch/riscv/mm/tlbflush.c?h=next-20210604&id=6efb16b1d5514865d0f7a01910648568ad3225d8 I don't know why not use the range? Maybe fix some hardware issues. > local_flush_tlb_page(start); > - else > + } else { > local_flush_tlb_all(); > - } else { > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); > + } > } > > put_cpu(); > -- > 2.30.2 >
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index b0659413a080..7030837adc1a 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -33,6 +33,8 @@ static inline int init_new_context(struct task_struct *tsk, return 0; } +DECLARE_STATIC_KEY_FALSE(use_asid_allocator); + #include <asm-generic/mmu_context.h> #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 9bc46ab01c25..6ed696bad558 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -18,7 +18,7 @@ #ifdef CONFIG_MMU -static DEFINE_STATIC_KEY_FALSE(use_asid_allocator); +DEFINE_STATIC_KEY_FALSE(use_asid_allocator); static unsigned long asid_bits; static unsigned long num_asids; diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index b458949fa8df..64f8201237c2 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -4,6 +4,24 @@ #include <linux/smp.h> #include <linux/sched.h> #include <asm/sbi.h> +#include <asm/mmu_context.h> + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} void flush_tlb_all(void) { @@ -16,21 +34,36 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, struct cpumask *cmask = mm_cpumask(mm); struct cpumask hmask; unsigned int cpuid; + bool broadcast; if (cpumask_empty(cmask)) return; cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + if (static_branch_unlikely(&use_asid_allocator)) { + unsigned long asid = atomic_long_read(&mm->context.id); - if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) { - /* local cpu is the only cpu present in cpumask */ - if (size <= stride) + if (broadcast) { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), + start, size, asid); + } else if (size <= stride) { + local_flush_tlb_page_asid(start, asid); + } else { + local_flush_tlb_all_asid(asid); + } + } else { + if (broadcast) { + riscv_cpuid_to_hartid_mask(cmask, &hmask); + sbi_remote_sfence_vma(cpumask_bits(&hmask), + start, size); + } else if (size <= stride) { local_flush_tlb_page(start); - else + } else { local_flush_tlb_all(); - } else { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size); + } } put_cpu();