diff mbox series

[v8] riscv: mm: Add support for Svinval extension

Message ID 20240702102637.9074-1-mchitale@ventanamicro.com (mailing list archive)
State New
Headers show
Series [v8] riscv: mm: Add support for Svinval extension | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR success PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Mayuresh Chitale July 2, 2024, 10:26 a.m. UTC
The Svinval extension splits SFENCE.VMA instruction into finer-grained
invalidation and ordering operations and is mandatory for RVA23S64 profile.
When Svinval is enabled the local_flush_tlb_range_threshold_asid function
should use the following sequence to optimize the tlb flushes instead of
a simple sfence.vma:

sfence.w.inval
svinval.vma
  .
  .
svinval.vma
sfence.inval.ir

The maximum number of consecutive svinval.vma instructions that
can be executed in local_flush_tlb_range_threshold_asid function
is limited to 64. This is required to avoid soft lockups and the
approach is similar to that used in arm64.

Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
Changes in v8:
- Fix line wrap
- Add RB tag

Changes in v7:
- Use existing svinval macros in the insn-def.h
- Rename local_sinval_vma_asid to local_sinval_vma

Changes in v6:
- Rebase on latest torvalds/master

Changes in v5:
- Reduce tlb flush threshold to 64
- Improve implementation of local_flush_tlb* functions

Changes in v4:
- Rebase and refactor as per latest changes on torvalds/master
- Drop patch 1 in the series

Changes in v3:
- Fix incorrect vma used for sinval instructions
- Use unified static key mechanism for svinval
- Rebased on torvalds/master

Changes in v2:
- Rebased on 5.18-rc3
- update riscv_fill_hwcap to probe Svinval extension

 arch/riscv/mm/tlbflush.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

Comments

Alexandre Ghiti July 2, 2024, 12:32 p.m. UTC | #1
Hi Mayuresh,

On Tue, Jul 2, 2024 at 12:26 PM Mayuresh Chitale
<mchitale@ventanamicro.com> wrote:
>
> The Svinval extension splits SFENCE.VMA instruction into finer-grained
> invalidation and ordering operations and is mandatory for RVA23S64 profile.
> When Svinval is enabled the local_flush_tlb_range_threshold_asid function
> should use the following sequence to optimize the tlb flushes instead of
> a simple sfence.vma:
>
> sfence.w.inval
> svinval.vma
>   .
>   .
> svinval.vma
> sfence.inval.ir
>
> The maximum number of consecutive svinval.vma instructions that
> can be executed in local_flush_tlb_range_threshold_asid function
> is limited to 64. This is required to avoid soft lockups and the
> approach is similar to that used in arm64.
>
> Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> Changes in v8:
> - Fix line wrap
> - Add RB tag
>
> Changes in v7:
> - Use existing svinval macros in the insn-def.h
> - Rename local_sinval_vma_asid to local_sinval_vma
>
> Changes in v6:
> - Rebase on latest torvalds/master
>
> Changes in v5:
> - Reduce tlb flush threshold to 64
> - Improve implementation of local_flush_tlb* functions
>
> Changes in v4:
> - Rebase and refactor as per latest changes on torvalds/master
> - Drop patch 1 in the series
>
> Changes in v3:
> - Fix incorrect vma used for sinval instructions
> - Use unified static key mechanism for svinval
> - Rebased on torvalds/master
>
> Changes in v2:
> - Rebased on 5.18-rc3
> - update riscv_fill_hwcap to probe Svinval extension
>
>  arch/riscv/mm/tlbflush.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 9b6e86ce3867..782147a63f3b 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -6,6 +6,27 @@
>  #include <linux/hugetlb.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
> +#include <asm/cpufeature.h>
> +
> +#define has_svinval()  riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> +
> +static inline void local_sfence_inval_ir(void)
> +{
> +       asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +}
> +
> +static inline void local_sfence_w_inval(void)
> +{
> +       asm volatile(SFENCE_W_INVAL() ::: "memory");
> +}
> +
> +static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
> +{
> +       if (asid != FLUSH_TLB_NO_ASID)
> +               asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
> +       else
> +               asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
> +}
>
>  /*
>   * Flush entire TLB if number of entries to be flushed is greater
> @@ -26,6 +47,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
>                 return;
>         }
>
> +       if (has_svinval()) {
> +               local_sfence_w_inval();
> +               for (i = 0; i < nr_ptes_in_range; ++i) {
> +                       local_sinval_vma(start, asid);
> +                       start += stride;
> +               }
> +               local_sfence_inval_ir();
> +               return;
> +       }
> +
>         for (i = 0; i < nr_ptes_in_range; ++i) {
>                 local_flush_tlb_page_asid(start, asid);
>                 start += stride;
> --
> 2.34.1
>

Great, thanks again for reworking this patchset!

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex
Palmer Dabbelt July 24, 2024, 2:50 p.m. UTC | #2
On Tue, 02 Jul 2024 03:26:37 PDT (-0700), mchitale@ventanamicro.com wrote:
> The Svinval extension splits SFENCE.VMA instruction into finer-grained
> invalidation and ordering operations and is mandatory for RVA23S64 profile.
> When Svinval is enabled the local_flush_tlb_range_threshold_asid function
> should use the following sequence to optimize the tlb flushes instead of

Do you have any performance numbers for the optimization?  As per here 
<https://lore.kernel.org/all/mhng-f799bd2b-7f22-4c03-bdb2-903fa3b5d508@palmer-ri-x1c9a/>.

> a simple sfence.vma:
>
> sfence.w.inval
> svinval.vma
>   .
>   .
> svinval.vma
> sfence.inval.ir
>
> The maximum number of consecutive svinval.vma instructions that
> can be executed in local_flush_tlb_range_threshold_asid function
> is limited to 64. This is required to avoid soft lockups and the
> approach is similar to that used in arm64.
>
> Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
> Changes in v8:
> - Fix line wrap
> - Add RB tag
>
> Changes in v7:
> - Use existing svinval macros in the insn-def.h
> - Rename local_sinval_vma_asid to local_sinval_vma
>
> Changes in v6:
> - Rebase on latest torvalds/master
>
> Changes in v5:
> - Reduce tlb flush threshold to 64
> - Improve implementation of local_flush_tlb* functions
>
> Changes in v4:
> - Rebase and refactor as per latest changes on torvalds/master
> - Drop patch 1 in the series
>
> Changes in v3:
> - Fix incorrect vma used for sinval instructions
> - Use unified static key mechanism for svinval
> - Rebased on torvalds/master
>
> Changes in v2:
> - Rebased on 5.18-rc3
> - update riscv_fill_hwcap to probe Svinval extension
>
>  arch/riscv/mm/tlbflush.c | 32 ++++++++++++++++++++++++++++++++
>  1 file changed, 32 insertions(+)
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 9b6e86ce3867..782147a63f3b 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -6,6 +6,27 @@
>  #include <linux/hugetlb.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
> +#include <asm/cpufeature.h>
> +
> +#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> +
> +static inline void local_sfence_inval_ir(void)
> +{
> +	asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +}
> +
> +static inline void local_sfence_w_inval(void)
> +{
> +	asm volatile(SFENCE_W_INVAL() ::: "memory");
> +}
> +
> +static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
> +{
> +	if (asid != FLUSH_TLB_NO_ASID)
> +		asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
> +	else
> +		asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
> +}
>
>  /*
>   * Flush entire TLB if number of entries to be flushed is greater
> @@ -26,6 +47,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
>  		return;
>  	}
>
> +	if (has_svinval()) {
> +		local_sfence_w_inval();
> +		for (i = 0; i < nr_ptes_in_range; ++i) {
> +			local_sinval_vma(start, asid);
> +			start += stride;
> +		}
> +		local_sfence_inval_ir();
> +		return;
> +	}
> +
>  	for (i = 0; i < nr_ptes_in_range; ++i) {
>  		local_flush_tlb_page_asid(start, asid);
>  		start += stride;
Mayuresh Chitale July 30, 2024, 8:43 a.m. UTC | #3
On Wed, Jul 24, 2024 at 8:20 PM Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
> On Tue, 02 Jul 2024 03:26:37 PDT (-0700), mchitale@ventanamicro.com wrote:
> > The Svinval extension splits SFENCE.VMA instruction into finer-grained
> > invalidation and ordering operations and is mandatory for RVA23S64 profile.
> > When Svinval is enabled the local_flush_tlb_range_threshold_asid function
> > should use the following sequence to optimize the tlb flushes instead of
>
> Do you have any performance numbers for the optimization?  As per here
> <https://lore.kernel.org/all/mhng-f799bd2b-7f22-4c03-bdb2-903fa3b5d508@palmer-ri-x1c9a/>.

No, currently there are no numbers available for comparison but the
rationale for the optimization is described in the spec. The extension
is mandatory for the RVA23S64 profile but any platform that doesn't
support this extension will not be impacted as the code executes only
if the svinval extension is enabled at the boot up.
>
> > a simple sfence.vma:
> >
> > sfence.w.inval
> > svinval.vma
> >   .
> >   .
> > svinval.vma
> > sfence.inval.ir
> >
> > The maximum number of consecutive svinval.vma instructions that
> > can be executed in local_flush_tlb_range_threshold_asid function
> > is limited to 64. This is required to avoid soft lockups and the
> > approach is similar to that used in arm64.
> >
> > Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> > Changes in v8:
> > - Fix line wrap
> > - Add RB tag
> >
> > Changes in v7:
> > - Use existing svinval macros in the insn-def.h
> > - Rename local_sinval_vma_asid to local_sinval_vma
> >
> > Changes in v6:
> > - Rebase on latest torvalds/master
> >
> > Changes in v5:
> > - Reduce tlb flush threshold to 64
> > - Improve implementation of local_flush_tlb* functions
> >
> > Changes in v4:
> > - Rebase and refactor as per latest changes on torvalds/master
> > - Drop patch 1 in the series
> >
> > Changes in v3:
> > - Fix incorrect vma used for sinval instructions
> > - Use unified static key mechanism for svinval
> > - Rebased on torvalds/master
> >
> > Changes in v2:
> > - Rebased on 5.18-rc3
> > - update riscv_fill_hwcap to probe Svinval extension
> >
> >  arch/riscv/mm/tlbflush.c | 32 ++++++++++++++++++++++++++++++++
> >  1 file changed, 32 insertions(+)
> >
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index 9b6e86ce3867..782147a63f3b 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -6,6 +6,27 @@
> >  #include <linux/hugetlb.h>
> >  #include <asm/sbi.h>
> >  #include <asm/mmu_context.h>
> > +#include <asm/cpufeature.h>
> > +
> > +#define has_svinval()        riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> > +
> > +static inline void local_sfence_inval_ir(void)
> > +{
> > +     asm volatile(SFENCE_INVAL_IR() ::: "memory");
> > +}
> > +
> > +static inline void local_sfence_w_inval(void)
> > +{
> > +     asm volatile(SFENCE_W_INVAL() ::: "memory");
> > +}
> > +
> > +static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
> > +{
> > +     if (asid != FLUSH_TLB_NO_ASID)
> > +             asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
> > +     else
> > +             asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
> > +}
> >
> >  /*
> >   * Flush entire TLB if number of entries to be flushed is greater
> > @@ -26,6 +47,16 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
> >               return;
> >       }
> >
> > +     if (has_svinval()) {
> > +             local_sfence_w_inval();
> > +             for (i = 0; i < nr_ptes_in_range; ++i) {
> > +                     local_sinval_vma(start, asid);
> > +                     start += stride;
> > +             }
> > +             local_sfence_inval_ir();
> > +             return;
> > +     }
> > +
> >       for (i = 0; i < nr_ptes_in_range; ++i) {
> >               local_flush_tlb_page_asid(start, asid);
> >               start += stride;
diff mbox series

Patch

diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 9b6e86ce3867..782147a63f3b 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -6,6 +6,27 @@ 
 #include <linux/hugetlb.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
+#include <asm/cpufeature.h>
+
+#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
+
+static inline void local_sfence_inval_ir(void)
+{
+	asm volatile(SFENCE_INVAL_IR() ::: "memory");
+}
+
+static inline void local_sfence_w_inval(void)
+{
+	asm volatile(SFENCE_W_INVAL() ::: "memory");
+}
+
+static inline void local_sinval_vma(unsigned long vma, unsigned long asid)
+{
+	if (asid != FLUSH_TLB_NO_ASID)
+		asm volatile(SINVAL_VMA(%0, %1) : : "r" (vma), "r" (asid) : "memory");
+	else
+		asm volatile(SINVAL_VMA(%0, zero) : : "r" (vma) : "memory");
+}
 
 /*
  * Flush entire TLB if number of entries to be flushed is greater
@@ -26,6 +47,16 @@  static void local_flush_tlb_range_threshold_asid(unsigned long start,
 		return;
 	}
 
+	if (has_svinval()) {
+		local_sfence_w_inval();
+		for (i = 0; i < nr_ptes_in_range; ++i) {
+			local_sinval_vma(start, asid);
+			start += stride;
+		}
+		local_sfence_inval_ir();
+		return;
+	}
+
 	for (i = 0; i < nr_ptes_in_range; ++i) {
 		local_flush_tlb_page_asid(start, asid);
 		start += stride;