Message ID | 33588efb3909a4d699a952f93c26ea2f3c8bfdc4.1552069700.git.gary@garyguo.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improvements related to TLB and I$ flush | expand |
Just realised that __setup has been deprecated in favour of early_param. I'll incorporate it in v3 of the patch. > + > +static int __init setup_tlbi_max_ops(char *str) { > + int value = 0; > + > + get_option(&str, &value); > + > + /* > + * This value cannot be greater or equal to PTRS_PER_PTE, as we need > + * to full flush for any non-leaf page table change. The value has also > + * be at least 1. > + */ > + if (value >= PTRS_PER_PTE || value < 1) > + return 0; > + > + tlbi_range_threshold = value * PAGE_SIZE; > + return 1; > +} > +__setup("tlbi_max_ops=", setup_tlbi_max_ops); > +
On 3/8/19 10:40 AM, Gary Guo wrote: > From: Gary Guo <gary@garyguo.net> > > This patch rewrites the logic related to TLB flushing, both to cleanup > the code and to improve performance. > > We now use sfence.vma variant with specified ASID and virtual address > whenever possible. Even though only ASID 0 is used, it still improves > performance by preventing global mappings from being flushed from TLB. > > This patch also includes a IPI-based remote TLB shootdown, which is > useful at this stage for testing because BBL/OpenSBI ignores operands > of sbi_remote_sfence_vma_asid and always perform a global TLB flush. > The SBI-based remote TLB shootdown can still be opt-in using boot > cmdline "tlbi_method=sbi". > > Signed-off-by: Gary Guo <gary@garyguo.net> > --- > arch/riscv/include/asm/pgtable.h | 2 +- > arch/riscv/include/asm/tlbflush.h | 73 ++++------ > arch/riscv/mm/Makefile | 1 + > arch/riscv/mm/context.c | 8 +- > arch/riscv/mm/tlbflush.c | 216 ++++++++++++++++++++++++++++++ > 5 files changed, 252 insertions(+), 48 deletions(-) > create mode 100644 arch/riscv/mm/tlbflush.c > > diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h > index 16301966d65b..47a8616b9de0 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > * Relying on flush_tlb_fix_spurious_fault would suffice, but > * the extra traps reduce performance. So, eagerly SFENCE.VMA. > */ > - local_flush_tlb_page(address); > + local_flush_tlb_page(vma, address); > } > > #define __HAVE_ARCH_PTE_SAME > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 54fee0cadb1e..d6c247ce17f3 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -1,22 +1,14 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > /* > * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> > * Copyright (C) 2012 Regents of the University of California > - * > - * This program is free software; you can redistribute it and/or > - * modify it under the terms of the GNU General Public License > - * as published by the Free Software Foundation, version 2. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > + * Copyright (C) 2019 Gary Guo, University of Cambridge > */ > > #ifndef _ASM_RISCV_TLBFLUSH_H > #define _ASM_RISCV_TLBFLUSH_H > > #include <linux/mm_types.h> > -#include <asm/smp.h> > > /* > * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction > @@ -27,53 +19,42 @@ static inline void local_flush_tlb_all(void) > __asm__ __volatile__ ("sfence.vma" : : : "memory"); > } > > -/* Flush one page from local TLB */ > -static inline void local_flush_tlb_page(unsigned long addr) > +static inline void local_flush_tlb_mm(struct mm_struct *mm) > { > - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); > + /* Flush ASID 0 so that global mappings are not affected */ > + __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory"); > } > > -#ifndef CONFIG_SMP > - > -#define flush_tlb_all() local_flush_tlb_all() > -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) > - > -static inline void flush_tlb_range(struct vm_area_struct *vma, > - unsigned long start, unsigned long end) > +static inline void local_flush_tlb_page(struct vm_area_struct *vma, > + unsigned long addr) > { > - local_flush_tlb_all(); > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (addr), "r" (0) > + : "memory"); > } > > -#define flush_tlb_mm(mm) flush_tlb_all() > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); > > -#else /* CONFIG_SMP */ > - > -#include <asm/sbi.h> > +#ifdef CONFIG_SMP > > -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, > - unsigned long size) > -{ > - struct cpumask hmask; > +void flush_tlb_all(void); > +void flush_tlb_mm(struct mm_struct *mm); > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end); > +void flush_tlb_kernel_range(unsigned long start, unsigned long end); > > - cpumask_clear(&hmask); > - riscv_cpuid_to_hartid_mask(cmask, &hmask); > - sbi_remote_sfence_vma(hmask.bits, start, size); > -} > +#else /* CONFIG_SMP */ > > -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) > -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) > -#define flush_tlb_range(vma, start, end) \ > - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) > -#define flush_tlb_mm(mm) \ > - remote_sfence_vma(mm_cpumask(mm), 0, -1) > +#define flush_tlb_all() local_flush_tlb_all() > +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) > +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) > +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) > +#define flush_tlb_kernel_range(start, end) \ > + local_flush_tlb_kernel_range(start, end) > > #endif /* CONFIG_SMP */ > > -/* Flush a range of kernel pages */ > -static inline void flush_tlb_kernel_range(unsigned long start, > - unsigned long end) > -{ > - flush_tlb_all(); > -} > - > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile > index d75b035786d6..53b68fd3cb45 100644 > --- a/arch/riscv/mm/Makefile > +++ b/arch/riscv/mm/Makefile > @@ -4,3 +4,4 @@ obj-y += extable.o > obj-y += ioremap.o > obj-y += cacheflush.o > obj-y += context.o > +obj-y += tlbflush.o > diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c > index fbb1cfe80267..0f787bcd3a7a 100644 > --- a/arch/riscv/mm/context.c > +++ b/arch/riscv/mm/context.c > @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, > * privileged ISA 1.10 yet. > */ > csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); > - local_flush_tlb_all(); > + > + /* > + * sfence.vma after SATP write. We call it on MM context instead of > + * calling local_flush_tlb_all to prevent global mappings from being > + * affected. > + */ > + local_flush_tlb_mm(next); > > flush_icache_deferred(next); > } > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > new file mode 100644 > index 000000000000..b4b35e825495 > --- /dev/null > +++ b/arch/riscv/mm/tlbflush.c > @@ -0,0 +1,216 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2019 Gary Guo, University of Cambridge > + */ > + > +#include <linux/mm.h> > +#include <asm/sbi.h> > + > +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1) > + > +/* > + * This controls the maximum amount of page-level sfence.vma that the kernel > + * can issue when the kernel needs to flush a range from the TLB. If the size > + * of range goes beyond this threshold, a full sfence.vma is issued. > + * > + * Increase this number can negatively impact performance on implemntations > + * where sfence.vma's address operand is ignored and always perform a global > + * TLB flush. On the other hand, implementations with page-level TLB flush > + * support can benefit from a larger number. > + */ > +static unsigned long tlbi_range_threshold = PAGE_SIZE; > + > +static int __init setup_tlbi_max_ops(char *str) > +{ > + int value = 0; > + > + get_option(&str, &value); > + > + /* > + * This value cannot be greater or equal to PTRS_PER_PTE, as we need > + * to full flush for any non-leaf page table change. The value has also > + * be at least 1. > + */ > + if (value >= PTRS_PER_PTE || value < 1) > + return 0; > + > + tlbi_range_threshold = value * PAGE_SIZE; > + return 1; > +} > +__setup("tlbi_max_ops=", setup_tlbi_max_ops); > + > +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start), "r" (0) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + local_flush_tlb_all(); > + return; > + } > + > + while (start < end) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start) > + : "memory"); > + start += PAGE_SIZE; > + } > +} > + > +#ifdef CONFIG_SMP > + > +/* > + * BBL/OpenSBI are currently ignoring ASID and address range provided > + * by SBI call argument, and do a full TLB flush instead. This may > + * negatively impact performance on implementations with page-level > + * sfence.vma support. > + * > + * We provide an IPI-based remote shootdown implementation to improve > + * performance on implementations with page-level sfence.vma, and also to > + * allow testing of these implementations. > + * > + * This parameter allows the approach (IPI/SBI) to be specified using boot > + * cmdline. > + */ > +static bool tlbi_ipi = true; > + > +static int __init setup_tlbi_method(char *str) > +{ > + if (strcmp(str, "ipi") == 0) > + tlbi_ipi = true; > + else if (strcmp(str, "sbi") == 0) > + tlbi_ipi = false; > + else > + return 0; > + > + return 1; > +} > +__setup("tlbi_method=", setup_tlbi_method); > + > + > +struct tlbi { > + unsigned long start; > + unsigned long size; > + unsigned long asid; > +}; > + > +static void ipi_remote_sfence_vma(void *info) > +{ > + struct tlbi *data = info; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0" > + : : "r" (start + i) > + : "memory"); > + } > +} > + > +static void ipi_remote_sfence_vma_asid(void *info) > +{ > + struct tlbi *data = info; > + unsigned long asid = data->asid; > + unsigned long start = data->start; > + unsigned long size = data->size; > + unsigned long i; > + > + /* Flush entire MM context */ > + if (size == SFENCE_VMA_FLUSH_ALL) { > + __asm__ __volatile__ ("sfence.vma x0, %0" > + : : "r" (asid) > + : "memory"); > + return; > + } > + > + for (i = 0; i < size; i += PAGE_SIZE) { > + __asm__ __volatile__ ("sfence.vma %0, %1" > + : : "r" (start + i), "r" (asid) > + : "memory"); > + } > +} > + > +static void remote_sfence_vma(unsigned long start, unsigned long size) > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + }; > + on_each_cpu(ipi_remote_sfence_vma, &info, 1); > + } else > + sbi_remote_sfence_vma(NULL, start, size); > +} > + > +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start, > + unsigned long size, unsigned long asid) > +{ > + if (tlbi_ipi) { > + struct tlbi info = { > + .start = start, > + .size = size, > + .asid = asid, > + }; > + on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1); > + } else { > + cpumask_t hmask; > + > + cpumask_clear(&hmask); > + riscv_cpuid_to_hartid_mask(mask, &hmask); > + sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid); > + } > +} > + > + > +void flush_tlb_all(void) > +{ > + sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL); > +} > + > +void flush_tlb_mm(struct mm_struct *mm) > +{ > + remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0); > +} > + > +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) > +{ > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0); > +} > + Can you please rebase on top of latest master ? The fixmap patchset which got merged as a part of 5.1-rc1. It introduced another flush_tlb_page usage with old arguments causing a compilation failure. Here is the PR. https://patchwork.kernel.org/patch/10823195/ or if you prefer the series. https://patchwork.kernel.org/project/linux-riscv/list/?series=79489 Regards, Atish > + > +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, > + unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_mm(vma->vm_mm); > + return; > + } > + > + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0); > +} > + > +void flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + if (end - start > tlbi_range_threshold) { > + flush_tlb_all(); > + return; > + } > + > + remote_sfence_vma(start, end - start); > +} > + > +#endif /* CONFIG_SMP */ >
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 16301966d65b..47a8616b9de0 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -279,7 +279,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + local_flush_tlb_page(vma, address); } #define __HAVE_ARCH_PTE_SAME diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 54fee0cadb1e..d6c247ce17f3 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -1,22 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2009 Chen Liqin <liqin.chen@sunplusct.com> * Copyright (C) 2012 Regents of the University of California - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Copyright (C) 2019 Gary Guo, University of Cambridge */ #ifndef _ASM_RISCV_TLBFLUSH_H #define _ASM_RISCV_TLBFLUSH_H #include <linux/mm_types.h> -#include <asm/smp.h> /* * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction @@ -27,53 +19,42 @@ static inline void local_flush_tlb_all(void) __asm__ __volatile__ ("sfence.vma" : : : "memory"); } -/* Flush one page from local TLB */ -static inline void local_flush_tlb_page(unsigned long addr) +static inline void local_flush_tlb_mm(struct mm_struct *mm) { - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); + /* Flush ASID 0 so that global mappings are not affected */ + __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (0) : "memory"); } -#ifndef CONFIG_SMP - -#define flush_tlb_all() local_flush_tlb_all() -#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void local_flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) { - local_flush_tlb_all(); + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (addr), "r" (0) + : "memory"); } -#define flush_tlb_mm(mm) flush_tlb_all() +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); -#else /* CONFIG_SMP */ - -#include <asm/sbi.h> +#ifdef CONFIG_SMP -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start, - unsigned long size) -{ - struct cpumask hmask; +void flush_tlb_all(void); +void flush_tlb_mm(struct mm_struct *mm); +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +void flush_tlb_kernel_range(unsigned long start, unsigned long end); - cpumask_clear(&hmask); - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(hmask.bits, start, size); -} +#else /* CONFIG_SMP */ -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1) -#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0) -#define flush_tlb_range(vma, start, end) \ - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start)) -#define flush_tlb_mm(mm) \ - remote_sfence_vma(mm_cpumask(mm), 0, -1) +#define flush_tlb_all() local_flush_tlb_all() +#define flush_tlb_mm(mm) local_flush_tlb_mm(mm) +#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr) +#define flush_tlb_range(vma, start, end) local_flush_tlb_range(vma, start, end) +#define flush_tlb_kernel_range(start, end) \ + local_flush_tlb_kernel_range(start, end) #endif /* CONFIG_SMP */ -/* Flush a range of kernel pages */ -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d75b035786d6..53b68fd3cb45 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -4,3 +4,4 @@ obj-y += extable.o obj-y += ioremap.o obj-y += cacheflush.o obj-y += context.o +obj-y += tlbflush.o diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index fbb1cfe80267..0f787bcd3a7a 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -64,7 +64,13 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, * privileged ISA 1.10 yet. */ csr_write(sptbr, virt_to_pfn(next->pgd) | SATP_MODE); - local_flush_tlb_all(); + + /* + * sfence.vma after SATP write. We call it on MM context instead of + * calling local_flush_tlb_all to prevent global mappings from being + * affected. + */ + local_flush_tlb_mm(next); flush_icache_deferred(next); } diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c new file mode 100644 index 000000000000..b4b35e825495 --- /dev/null +++ b/arch/riscv/mm/tlbflush.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Gary Guo, University of Cambridge + */ + +#include <linux/mm.h> +#include <asm/sbi.h> + +#define SFENCE_VMA_FLUSH_ALL ((unsigned long) -1) + +/* + * This controls the maximum amount of page-level sfence.vma that the kernel + * can issue when the kernel needs to flush a range from the TLB. If the size + * of range goes beyond this threshold, a full sfence.vma is issued. + * + * Increase this number can negatively impact performance on implemntations + * where sfence.vma's address operand is ignored and always perform a global + * TLB flush. On the other hand, implementations with page-level TLB flush + * support can benefit from a larger number. + */ +static unsigned long tlbi_range_threshold = PAGE_SIZE; + +static int __init setup_tlbi_max_ops(char *str) +{ + int value = 0; + + get_option(&str, &value); + + /* + * This value cannot be greater or equal to PTRS_PER_PTE, as we need + * to full flush for any non-leaf page table change. The value has also + * be at least 1. + */ + if (value >= PTRS_PER_PTE || value < 1) + return 0; + + tlbi_range_threshold = value * PAGE_SIZE; + return 1; +} +__setup("tlbi_max_ops=", setup_tlbi_max_ops); + +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + local_flush_tlb_mm(vma->vm_mm); + return; + } + + while (start < end) { + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (start), "r" (0) + : "memory"); + start += PAGE_SIZE; + } +} + +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + local_flush_tlb_all(); + return; + } + + while (start < end) { + __asm__ __volatile__ ("sfence.vma %0" + : : "r" (start) + : "memory"); + start += PAGE_SIZE; + } +} + +#ifdef CONFIG_SMP + +/* + * BBL/OpenSBI are currently ignoring ASID and address range provided + * by SBI call argument, and do a full TLB flush instead. This may + * negatively impact performance on implementations with page-level + * sfence.vma support. + * + * We provide an IPI-based remote shootdown implementation to improve + * performance on implementations with page-level sfence.vma, and also to + * allow testing of these implementations. + * + * This parameter allows the approach (IPI/SBI) to be specified using boot + * cmdline. + */ +static bool tlbi_ipi = true; + +static int __init setup_tlbi_method(char *str) +{ + if (strcmp(str, "ipi") == 0) + tlbi_ipi = true; + else if (strcmp(str, "sbi") == 0) + tlbi_ipi = false; + else + return 0; + + return 1; +} +__setup("tlbi_method=", setup_tlbi_method); + + +struct tlbi { + unsigned long start; + unsigned long size; + unsigned long asid; +}; + +static void ipi_remote_sfence_vma(void *info) +{ + struct tlbi *data = info; + unsigned long start = data->start; + unsigned long size = data->size; + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + __asm__ __volatile__ ("sfence.vma %0" + : : "r" (start + i) + : "memory"); + } +} + +static void ipi_remote_sfence_vma_asid(void *info) +{ + struct tlbi *data = info; + unsigned long asid = data->asid; + unsigned long start = data->start; + unsigned long size = data->size; + unsigned long i; + + /* Flush entire MM context */ + if (size == SFENCE_VMA_FLUSH_ALL) { + __asm__ __volatile__ ("sfence.vma x0, %0" + : : "r" (asid) + : "memory"); + return; + } + + for (i = 0; i < size; i += PAGE_SIZE) { + __asm__ __volatile__ ("sfence.vma %0, %1" + : : "r" (start + i), "r" (asid) + : "memory"); + } +} + +static void remote_sfence_vma(unsigned long start, unsigned long size) +{ + if (tlbi_ipi) { + struct tlbi info = { + .start = start, + .size = size, + }; + on_each_cpu(ipi_remote_sfence_vma, &info, 1); + } else + sbi_remote_sfence_vma(NULL, start, size); +} + +static void remote_sfence_vma_asid(cpumask_t *mask, unsigned long start, + unsigned long size, unsigned long asid) +{ + if (tlbi_ipi) { + struct tlbi info = { + .start = start, + .size = size, + .asid = asid, + }; + on_each_cpu_mask(mask, ipi_remote_sfence_vma_asid, &info, 1); + } else { + cpumask_t hmask; + + cpumask_clear(&hmask); + riscv_cpuid_to_hartid_mask(mask, &hmask); + sbi_remote_sfence_vma_asid(hmask.bits, start, size, asid); + } +} + + +void flush_tlb_all(void) +{ + sbi_remote_sfence_vma(NULL, 0, SFENCE_VMA_FLUSH_ALL); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + remote_sfence_vma_asid(mm_cpumask(mm), 0, SFENCE_VMA_FLUSH_ALL, 0); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) +{ + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE, 0); +} + + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + flush_tlb_mm(vma->vm_mm); + return; + } + + remote_sfence_vma_asid(mm_cpumask(vma->vm_mm), start, end - start, 0); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (end - start > tlbi_range_threshold) { + flush_tlb_all(); + return; + } + + remote_sfence_vma(start, end - start); +} + +#endif /* CONFIG_SMP */