diff mbox series

[RFC/RFT,2/4] riscv: Add a runtime detection of invalid TLB entries caching

Message ID 20231207150348.82096-3-alexghiti@rivosinc.com (mailing list archive)
State Changes Requested
Headers show
Series Remove preventive sfence.vma | expand

Checks

Context Check Description
conchuod/vmtest-fixes-PR fail merge-conflict

Commit Message

Alexandre Ghiti Dec. 7, 2023, 3:03 p.m. UTC
This mechanism allows to completely bypass the sfence.vma introduced by
the previous commit for uarchs that do not cache invalid TLB entries.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)

Comments

Christophe Leroy Dec. 7, 2023, 3:55 p.m. UTC | #1
Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> This mechanism allows to completely bypass the sfence.vma introduced by
> the previous commit for uarchs that do not cache invalid TLB entries.
> 
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>   arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 124 insertions(+)
> 
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 379403de6c6f..2e854613740c 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
>   EXPORT_SYMBOL(pgtable_l4_enabled);
>   EXPORT_SYMBOL(pgtable_l5_enabled);
>   
> +bool tlb_caching_invalid_entries;
> +
>   phys_addr_t phys_ram_base __ro_after_init;
>   EXPORT_SYMBOL(phys_ram_base);
>   
> @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
>   	satp_mode = SATP_MODE_39;
>   }
>   
> +static void __init enable_pgtable_l5(void)
> +{
> +	pgtable_l5_enabled = true;
> +	satp_mode = SATP_MODE_57;
> +}
> +
> +static void __init enable_pgtable_l4(void)
> +{
> +	pgtable_l4_enabled = true;
> +	satp_mode = SATP_MODE_48;
> +}
> +
>   static int __init print_no4lvl(char *p)
>   {
>   	pr_info("Disabled 4-level and 5-level paging");
> @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   	memset(early_pud, 0, PAGE_SIZE);
>   	memset(early_pmd, 0, PAGE_SIZE);
>   }
> +
> +/* Determine at runtime if the uarch caches invalid TLB entries */
> +static __init void set_tlb_caching_invalid_entries(void)
> +{
> +#define NR_RETRIES_CACHING_INVALID_ENTRIES	50

Looks odd to have macros nested in the middle of a function.

> +	uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> +	// TODO the test_addr as defined below could go into another pud...
> +	uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> +	pmd_t valid_pmd;
> +	u64 satp;
> +	int i = 0;
> +
> +	/* To ease the page table creation */
> +	disable_pgtable_l5();
> +	disable_pgtable_l4();
> +
> +	/* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> +	create_pgd_mapping(early_pg_dir,
> +			   set_tlb_caching_invalid_entries_pmd,
> +			   (uintptr_t)early_pmd,
> +			   PGDIR_SIZE, PAGE_TABLE);
> +
> +	/* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> +	create_pmd_mapping(early_pmd,
> +			   set_tlb_caching_invalid_entries_pmd,
> +			   set_tlb_caching_invalid_entries_pmd,
> +			   PMD_SIZE, PAGE_KERNEL_EXEC);
> +	create_pmd_mapping(early_pmd,
> +			   set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> +			   set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> +			   PMD_SIZE, PAGE_KERNEL_EXEC);
> +
> +	/* Establish an invalid mapping */
> +	create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> +
> +	/* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> +	valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> +
> +	local_flush_tlb_all();
> +	satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> +	csr_write(CSR_SATP, satp);
> +
> +	/*
> +	 * Set stvec to after the trapping access, access this invalid mapping
> +	 * and legitimately trap
> +	 */
> +	// TODO: Should I save the previous stvec?
> +#define ASM_STR(x)	__ASM_STR(x)

Looks odd to have macros nested in the middle of a function.


> +	asm volatile(
> +		"la a0, 1f				\n"
> +		"csrw " ASM_STR(CSR_TVEC) ", a0		\n"
> +		"ld a0, 0(%0)				\n"
> +		".align 2				\n"
> +		"1:					\n"
> +		:
> +		: "r" (test_addr)
> +		: "a0"
> +	);
> +
> +	/* Now establish a valid mapping to check if the invalid one is cached */
> +	early_pmd[pmd_index(test_addr)] = valid_pmd;
> +
> +	/*
> +	 * Access the valid mapping multiple times: indeed, we can't use
> +	 * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> +	 * so we may trap even if the uarch does not cache invalid entries. By
> +	 * trying a few times, we make sure that those uarchs will see the right
> +	 * mapping at some point.
> +	 */
> +
> +	i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> +
> +#define ASM_STR(x)	__ASM_STR(x)

Deplicate define ?

> +	asm_volatile_goto(
> +		"la a0, 1f					\n"
> +		"csrw " ASM_STR(CSR_TVEC) ", a0			\n"
> +		".align 2					\n"
> +		"1:						\n"
> +		"addi %0, %0, -1				\n"
> +		"blt %0, zero, %l[caching_invalid_entries]	\n"
> +		"ld a0, 0(%1)					\n"
> +		:
> +		: "r" (i), "r" (test_addr)
> +		: "a0"
> +		: caching_invalid_entries
> +	);
> +
> +	csr_write(CSR_SATP, 0ULL);
> +	local_flush_tlb_all();
> +
> +	/* If we don't trap, the uarch does not cache invalid entries! */
> +	tlb_caching_invalid_entries = false;
> +	goto clean;
> +
> +caching_invalid_entries:
> +	csr_write(CSR_SATP, 0ULL);
> +	local_flush_tlb_all();
> +
> +	tlb_caching_invalid_entries = true;
> +clean:
> +	memset(early_pg_dir, 0, PAGE_SIZE);
> +	memset(early_pmd, 0, PAGE_SIZE);

Use clear_page() instead ?

> +
> +	enable_pgtable_l4();
> +	enable_pgtable_l5();
> +}
>   #endif
>   
>   /*
> @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   #endif
>   
>   #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> +	set_tlb_caching_invalid_entries();
>   	set_satp_mode(dtb_pa);
>   #endif
>   
> @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
>   	local_flush_tlb_all();
>   
>   	pt_ops_set_late();
> +
> +	pr_info("uarch caches invalid entries: %s",
> +		tlb_caching_invalid_entries ? "yes" : "no");
>   }
>   #else
>   asmlinkage void __init setup_vm(uintptr_t dtb_pa)
Alexandre Ghiti Dec. 8, 2023, 2:30 p.m. UTC | #2
On Thu, Dec 7, 2023 at 4:55 PM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> > This mechanism allows to completely bypass the sfence.vma introduced by
> > the previous commit for uarchs that do not cache invalid TLB entries.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> >   arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 124 insertions(+)
> >
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 379403de6c6f..2e854613740c 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
> >   EXPORT_SYMBOL(pgtable_l4_enabled);
> >   EXPORT_SYMBOL(pgtable_l5_enabled);
> >
> > +bool tlb_caching_invalid_entries;
> > +
> >   phys_addr_t phys_ram_base __ro_after_init;
> >   EXPORT_SYMBOL(phys_ram_base);
> >
> > @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
> >       satp_mode = SATP_MODE_39;
> >   }
> >
> > +static void __init enable_pgtable_l5(void)
> > +{
> > +     pgtable_l5_enabled = true;
> > +     satp_mode = SATP_MODE_57;
> > +}
> > +
> > +static void __init enable_pgtable_l4(void)
> > +{
> > +     pgtable_l4_enabled = true;
> > +     satp_mode = SATP_MODE_48;
> > +}
> > +
> >   static int __init print_no4lvl(char *p)
> >   {
> >       pr_info("Disabled 4-level and 5-level paging");
> > @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> >       memset(early_pud, 0, PAGE_SIZE);
> >       memset(early_pmd, 0, PAGE_SIZE);
> >   }
> > +
> > +/* Determine at runtime if the uarch caches invalid TLB entries */
> > +static __init void set_tlb_caching_invalid_entries(void)
> > +{
> > +#define NR_RETRIES_CACHING_INVALID_ENTRIES   50
>
> Looks odd to have macros nested in the middle of a function.
>
> > +     uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> > +     // TODO the test_addr as defined below could go into another pud...
> > +     uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> > +     pmd_t valid_pmd;
> > +     u64 satp;
> > +     int i = 0;
> > +
> > +     /* To ease the page table creation */
> > +     disable_pgtable_l5();
> > +     disable_pgtable_l4();
> > +
> > +     /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> > +     create_pgd_mapping(early_pg_dir,
> > +                        set_tlb_caching_invalid_entries_pmd,
> > +                        (uintptr_t)early_pmd,
> > +                        PGDIR_SIZE, PAGE_TABLE);
> > +
> > +     /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> > +     create_pmd_mapping(early_pmd,
> > +                        set_tlb_caching_invalid_entries_pmd,
> > +                        set_tlb_caching_invalid_entries_pmd,
> > +                        PMD_SIZE, PAGE_KERNEL_EXEC);
> > +     create_pmd_mapping(early_pmd,
> > +                        set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > +                        set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > +                        PMD_SIZE, PAGE_KERNEL_EXEC);
> > +
> > +     /* Establish an invalid mapping */
> > +     create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> > +
> > +     /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> > +     valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> > +
> > +     local_flush_tlb_all();
> > +     satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> > +     csr_write(CSR_SATP, satp);
> > +
> > +     /*
> > +      * Set stvec to after the trapping access, access this invalid mapping
> > +      * and legitimately trap
> > +      */
> > +     // TODO: Should I save the previous stvec?
> > +#define ASM_STR(x)   __ASM_STR(x)
>
> Looks odd to have macros nested in the middle of a function.
>
>
> > +     asm volatile(
> > +             "la a0, 1f                              \n"
> > +             "csrw " ASM_STR(CSR_TVEC) ", a0         \n"
> > +             "ld a0, 0(%0)                           \n"
> > +             ".align 2                               \n"
> > +             "1:                                     \n"
> > +             :
> > +             : "r" (test_addr)
> > +             : "a0"
> > +     );
> > +
> > +     /* Now establish a valid mapping to check if the invalid one is cached */
> > +     early_pmd[pmd_index(test_addr)] = valid_pmd;
> > +
> > +     /*
> > +      * Access the valid mapping multiple times: indeed, we can't use
> > +      * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> > +      * so we may trap even if the uarch does not cache invalid entries. By
> > +      * trying a few times, we make sure that those uarchs will see the right
> > +      * mapping at some point.
> > +      */
> > +
> > +     i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> > +
> > +#define ASM_STR(x)   __ASM_STR(x)
>
> Deplicate define ?
>
> > +     asm_volatile_goto(
> > +             "la a0, 1f                                      \n"
> > +             "csrw " ASM_STR(CSR_TVEC) ", a0                 \n"
> > +             ".align 2                                       \n"
> > +             "1:                                             \n"
> > +             "addi %0, %0, -1                                \n"
> > +             "blt %0, zero, %l[caching_invalid_entries]      \n"
> > +             "ld a0, 0(%1)                                   \n"
> > +             :
> > +             : "r" (i), "r" (test_addr)
> > +             : "a0"
> > +             : caching_invalid_entries
> > +     );
> > +
> > +     csr_write(CSR_SATP, 0ULL);
> > +     local_flush_tlb_all();
> > +
> > +     /* If we don't trap, the uarch does not cache invalid entries! */
> > +     tlb_caching_invalid_entries = false;
> > +     goto clean;
> > +
> > +caching_invalid_entries:
> > +     csr_write(CSR_SATP, 0ULL);
> > +     local_flush_tlb_all();
> > +
> > +     tlb_caching_invalid_entries = true;
> > +clean:
> > +     memset(early_pg_dir, 0, PAGE_SIZE);
> > +     memset(early_pmd, 0, PAGE_SIZE);
>
> Use clear_page() instead ?
>
> > +
> > +     enable_pgtable_l4();
> > +     enable_pgtable_l5();
> > +}
> >   #endif
> >
> >   /*
> > @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >   #endif
> >
> >   #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> > +     set_tlb_caching_invalid_entries();
> >       set_satp_mode(dtb_pa);
> >   #endif
> >
> > @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
> >       local_flush_tlb_all();
> >
> >       pt_ops_set_late();
> > +
> > +     pr_info("uarch caches invalid entries: %s",
> > +             tlb_caching_invalid_entries ? "yes" : "no");
> >   }
> >   #else
> >   asmlinkage void __init setup_vm(uintptr_t dtb_pa)

I left this patch so that people can easily test this without knowing
what their uarch is actually doing, but it will very likely be dropped
as a new extension has just been proposed for that.

Thanks anyway, I should have been more clear in the patch title,

Alex
diff mbox series

Patch

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 379403de6c6f..2e854613740c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -56,6 +56,8 @@  bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
 EXPORT_SYMBOL(pgtable_l4_enabled);
 EXPORT_SYMBOL(pgtable_l5_enabled);
 
+bool tlb_caching_invalid_entries;
+
 phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
 
@@ -750,6 +752,18 @@  static void __init disable_pgtable_l4(void)
 	satp_mode = SATP_MODE_39;
 }
 
+static void __init enable_pgtable_l5(void)
+{
+	pgtable_l5_enabled = true;
+	satp_mode = SATP_MODE_57;
+}
+
+static void __init enable_pgtable_l4(void)
+{
+	pgtable_l4_enabled = true;
+	satp_mode = SATP_MODE_48;
+}
+
 static int __init print_no4lvl(char *p)
 {
 	pr_info("Disabled 4-level and 5-level paging");
@@ -826,6 +840,112 @@  static __init void set_satp_mode(uintptr_t dtb_pa)
 	memset(early_pud, 0, PAGE_SIZE);
 	memset(early_pmd, 0, PAGE_SIZE);
 }
+
+/* Determine at runtime if the uarch caches invalid TLB entries */
+static __init void set_tlb_caching_invalid_entries(void)
+{
+#define NR_RETRIES_CACHING_INVALID_ENTRIES	50
+	uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
+	// TODO the test_addr as defined below could go into another pud...
+	uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
+	pmd_t valid_pmd;
+	u64 satp;
+	int i = 0;
+
+	/* To ease the page table creation */
+	disable_pgtable_l5();
+	disable_pgtable_l4();
+
+	/* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
+	create_pgd_mapping(early_pg_dir,
+			   set_tlb_caching_invalid_entries_pmd,
+			   (uintptr_t)early_pmd,
+			   PGDIR_SIZE, PAGE_TABLE);
+
+	/* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
+	create_pmd_mapping(early_pmd,
+			   set_tlb_caching_invalid_entries_pmd,
+			   set_tlb_caching_invalid_entries_pmd,
+			   PMD_SIZE, PAGE_KERNEL_EXEC);
+	create_pmd_mapping(early_pmd,
+			   set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+			   set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+			   PMD_SIZE, PAGE_KERNEL_EXEC);
+
+	/* Establish an invalid mapping */
+	create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
+
+	/* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
+	valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
+
+	local_flush_tlb_all();
+	satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
+	csr_write(CSR_SATP, satp);
+
+	/*
+	 * Set stvec to after the trapping access, access this invalid mapping
+	 * and legitimately trap
+	 */
+	// TODO: Should I save the previous stvec?
+#define ASM_STR(x)	__ASM_STR(x)
+	asm volatile(
+		"la a0, 1f				\n"
+		"csrw " ASM_STR(CSR_TVEC) ", a0		\n"
+		"ld a0, 0(%0)				\n"
+		".align 2				\n"
+		"1:					\n"
+		:
+		: "r" (test_addr)
+		: "a0"
+	);
+
+	/* Now establish a valid mapping to check if the invalid one is cached */
+	early_pmd[pmd_index(test_addr)] = valid_pmd;
+
+	/*
+	 * Access the valid mapping multiple times: indeed, we can't use
+	 * sfence.vma as a barrier to make sure the cpu did not reorder accesses
+	 * so we may trap even if the uarch does not cache invalid entries. By
+	 * trying a few times, we make sure that those uarchs will see the right
+	 * mapping at some point.
+	 */
+
+	i = NR_RETRIES_CACHING_INVALID_ENTRIES;
+
+#define ASM_STR(x)	__ASM_STR(x)
+	asm_volatile_goto(
+		"la a0, 1f					\n"
+		"csrw " ASM_STR(CSR_TVEC) ", a0			\n"
+		".align 2					\n"
+		"1:						\n"
+		"addi %0, %0, -1				\n"
+		"blt %0, zero, %l[caching_invalid_entries]	\n"
+		"ld a0, 0(%1)					\n"
+		:
+		: "r" (i), "r" (test_addr)
+		: "a0"
+		: caching_invalid_entries
+	);
+
+	csr_write(CSR_SATP, 0ULL);
+	local_flush_tlb_all();
+
+	/* If we don't trap, the uarch does not cache invalid entries! */
+	tlb_caching_invalid_entries = false;
+	goto clean;
+
+caching_invalid_entries:
+	csr_write(CSR_SATP, 0ULL);
+	local_flush_tlb_all();
+
+	tlb_caching_invalid_entries = true;
+clean:
+	memset(early_pg_dir, 0, PAGE_SIZE);
+	memset(early_pmd, 0, PAGE_SIZE);
+
+	enable_pgtable_l4();
+	enable_pgtable_l5();
+}
 #endif
 
 /*
@@ -1072,6 +1192,7 @@  asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 #endif
 
 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+	set_tlb_caching_invalid_entries();
 	set_satp_mode(dtb_pa);
 #endif
 
@@ -1322,6 +1443,9 @@  static void __init setup_vm_final(void)
 	local_flush_tlb_all();
 
 	pt_ops_set_late();
+
+	pr_info("uarch caches invalid entries: %s",
+		tlb_caching_invalid_entries ? "yes" : "no");
 }
 #else
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)