diff mbox series

[V1] riscv: mm: Support > 1GB kernel image size when creating early page table

Message ID 20240430092211.26269-1-gaoshanliukou@163.com (mailing list archive)
State New
Headers show
Series [V1] riscv: mm: Support > 1GB kernel image size when creating early page table | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR fail PR summary
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 fail .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

yang.zhang April 30, 2024, 9:22 a.m. UTC
From: "yang.zhang" <yang.zhang@hexintek.com>

By default, when creating early page table, only one PMD page table, but
if kernel image size exceeds 1GB, it need two PMD page table, otherwise,
it would BUG_ON in create_kernel_page_table.

In addition, if trap earlier, trap vector doesn't yet set properly, current
value maybe set by previous firmwire, typically it's the _start of kernel,
it's confused and difficult to debuge, so set it earlier.
---
I'm not sure whether hugesize kernel is reasonable, if not, ignore this patch.
This issue can be reproduced simpily by changing '_end' in vmlinux.lds.S
such as _end = . + 0x40000000;

Signed-off-by: yang.zhang <yang.zhang@hexintek.com>
---
 arch/riscv/mm/init.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

Comments

Alexandre Ghiti April 30, 2024, 12:08 p.m. UTC | #1
Hi Yang,

On Tue, Apr 30, 2024 at 11:24 AM yang.zhang <gaoshanliukou@163.com> wrote:
>
> From: "yang.zhang" <yang.zhang@hexintek.com>
>
> By default, when creating early page table, only one PMD page table, but
> if kernel image size exceeds 1GB, it need two PMD page table, otherwise,
> it would BUG_ON in create_kernel_page_table.
>
> In addition, if trap earlier, trap vector doesn't yet set properly, current
> value maybe set by previous firmwire, typically it's the _start of kernel,
> it's confused and difficult to debuge, so set it earlier.

Totally agree with that, I'm used to debugging this part of the code
and I know stvec points to __start and not handle_exception at this
point...But that deserves a fix indeed, it's confusing. I don't see
this fix in the patch below though?

> ---
> I'm not sure whether hugesize kernel is reasonable, if not, ignore this patch.

Unless you have a good use case for this, I don't think that's
necessary, we never encountered such situations so I'd rather not
complicate this code even more.

Thanks,

Alex

> This issue can be reproduced simpily by changing '_end' in vmlinux.lds.S
> such as _end = . + 0x40000000;
>
> Signed-off-by: yang.zhang <yang.zhang@hexintek.com>
> ---
>  arch/riscv/mm/init.c | 41 +++++++++++++++++++++++++++++++++++------
>  1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index fe8e159394d8..094b39f920d3 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -386,11 +386,13 @@ static void __init create_pte_mapping(pte_t *ptep,
>  static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
>  static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
>  static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
> +static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>
>  #ifdef CONFIG_XIP_KERNEL
>  #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
>  #define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
>  #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
> +#define early_pmd2      ((pmd_t *)XIP_FIXUP(early_pmd2))
>  #endif /* CONFIG_XIP_KERNEL */
>
>  static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
> @@ -432,9 +434,14 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
>
>  static phys_addr_t __init alloc_pmd_early(uintptr_t va)
>  {
> -       BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
> +       uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1);
> +       uintptr_t current_pud_idx = pud_index(va);
>
> -       return (uintptr_t)early_pmd;
> +       BUG_ON(current_pud_idx > end_pud_idx);
> +       if (current_pud_idx == end_pud_idx)
> +               return (uintptr_t)early_pmd2;
> +       else
> +               return (uintptr_t)early_pmd;
>  }
>
>  static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
> @@ -769,6 +776,18 @@ static void __init set_mmap_rnd_bits_max(void)
>         mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
>  }
>
> +static pmd_t *__init select_pmd_early(uintptr_t pa)
> +{
> +       uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1);
> +       uintptr_t current_pud_idx = pud_index(pa);
> +
> +       BUG_ON(current_pud_idx > end_pud_idx);
> +       if (current_pud_idx == end_pud_idx)
> +               return early_pmd2;
> +       else
> +               return early_pmd;
> +}
> +
>  /*
>   * There is a simple way to determine if 4-level is supported by the
>   * underlying hardware: establish 1:1 mapping in 4-level page table mode
> @@ -780,6 +799,7 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>         u64 identity_satp, hw_satp;
>         uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
>         u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
> +       pmd_t *target_pmd, *target_pmd2;
>
>         if (satp_mode_cmdline == SATP_MODE_57) {
>                 disable_pgtable_l5();
> @@ -789,17 +809,24 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>                 return;
>         }
>
> +       target_pmd = select_pmd_early(set_satp_mode_pmd);
> +       target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE);
>         create_p4d_mapping(early_p4d,
>                         set_satp_mode_pmd, (uintptr_t)early_pud,
>                         P4D_SIZE, PAGE_TABLE);
>         create_pud_mapping(early_pud,
> -                          set_satp_mode_pmd, (uintptr_t)early_pmd,
> +                          set_satp_mode_pmd, (uintptr_t)target_pmd,
> +                          PUD_SIZE, PAGE_TABLE);
> +       /* Handle the case where set_satp_mode straddles 2 PUDs */
> +       if (target_pmd2 != target_pmd)
> +               create_pud_mapping(early_pud,
> +                          set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2,
>                            PUD_SIZE, PAGE_TABLE);
>         /* Handle the case where set_satp_mode straddles 2 PMDs */
> -       create_pmd_mapping(early_pmd,
> +       create_pmd_mapping(target_pmd,
>                            set_satp_mode_pmd, set_satp_mode_pmd,
>                            PMD_SIZE, PAGE_KERNEL_EXEC);
> -       create_pmd_mapping(early_pmd,
> +       create_pmd_mapping(target_pmd2,
>                            set_satp_mode_pmd + PMD_SIZE,
>                            set_satp_mode_pmd + PMD_SIZE,
>                            PMD_SIZE, PAGE_KERNEL_EXEC);
> @@ -829,7 +856,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>         memset(early_pg_dir, 0, PAGE_SIZE);
>         memset(early_p4d, 0, PAGE_SIZE);
>         memset(early_pud, 0, PAGE_SIZE);
> -       memset(early_pmd, 0, PAGE_SIZE);
> +       memset(target_pmd, 0, PAGE_SIZE);
> +       if (target_pmd2 != target_pmd)
> +               memset(target_pmd2, 0, PAGE_SIZE);
>  }
>  #endif
>
> --
> 2.25.1
>
yang.zhang May 6, 2024, 2:30 a.m. UTC | #2
At 2024-04-30 20:08:18, "Alexandre Ghiti" <alexghiti@rivosinc.com> wrote:
>Hi Yang,
>
>On Tue, Apr 30, 2024 at 11:24 AM yang.zhang <gaoshanliukou@163.com> wrote:
>>
>> From: "yang.zhang" <yang.zhang@hexintek.com>
>>
>> By default, when creating early page table, only one PMD page table, but
>> if kernel image size exceeds 1GB, it need two PMD page table, otherwise,
>> it would BUG_ON in create_kernel_page_table.
>>
>> In addition, if trap earlier, trap vector doesn't yet set properly, current
>> value maybe set by previous firmwire, typically it's the _start of kernel,
>> it's confused and difficult to debug, so set it earlier.
>
>Totally agree with that, I'm used to debugging this part of the code
>and I know stvec points to __start and not handle_exception at this
>point...But that deserves a fix indeed, it's confusing. I don't see
>this fix in the patch below though?
Sorry, i missed it.
I send patch v2 for this.
>> ---
>> I'm not sure whether hugesize kernel is reasonable, if not, ignore this patch.
>
>Unless you have a good use case for this, I don't think that's
>necessary, we never encountered such situations so I'd rather not
>complicate this code even more.
I just see the kernel space is reserved 2GB(which is 0xffffffff80000000 -0xffffffffffffffff ).
Normally, kernel image size is just some MBs, so this patch maybe unnecessary.
Just ignore.
I add the patch for set trap vector earlier for helping debug.
Thanks.

>Thanks,
>
>Alex
>
>> This issue can be reproduced simpily by changing '_end' in vmlinux.lds.S
>> such as _end = . + 0x40000000;
>>
>> Signed-off-by: yang.zhang <yang.zhang@hexintek.com>
>> ---
>>  arch/riscv/mm/init.c | 41 +++++++++++++++++++++++++++++++++++------
>>  1 file changed, 35 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> index fe8e159394d8..094b39f920d3 100644
>> --- a/arch/riscv/mm/init.c
>> +++ b/arch/riscv/mm/init.c
>> @@ -386,11 +386,13 @@ static void __init create_pte_mapping(pte_t *ptep,
>>  static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
>>  static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
>>  static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>> +static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>>
>>  #ifdef CONFIG_XIP_KERNEL
>>  #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
>>  #define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
>>  #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
>> +#define early_pmd2      ((pmd_t *)XIP_FIXUP(early_pmd2))
>>  #endif /* CONFIG_XIP_KERNEL */
>>
>>  static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
>> @@ -432,9 +434,14 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
>>
>>  static phys_addr_t __init alloc_pmd_early(uintptr_t va)
>>  {
>> -       BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
>> +       uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1);
>> +       uintptr_t current_pud_idx = pud_index(va);
>>
>> -       return (uintptr_t)early_pmd;
>> +       BUG_ON(current_pud_idx > end_pud_idx);
>> +       if (current_pud_idx == end_pud_idx)
>> +               return (uintptr_t)early_pmd2;
>> +       else
>> +               return (uintptr_t)early_pmd;
>>  }
>>
>>  static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
>> @@ -769,6 +776,18 @@ static void __init set_mmap_rnd_bits_max(void)
>>         mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
>>  }
>>
>> +static pmd_t *__init select_pmd_early(uintptr_t pa)
>> +{
>> +       uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1);
>> +       uintptr_t current_pud_idx = pud_index(pa);
>> +
>> +       BUG_ON(current_pud_idx > end_pud_idx);
>> +       if (current_pud_idx == end_pud_idx)
>> +               return early_pmd2;
>> +       else
>> +               return early_pmd;
>> +}
>> +
>>  /*
>>   * There is a simple way to determine if 4-level is supported by the
>>   * underlying hardware: establish 1:1 mapping in 4-level page table mode
>> @@ -780,6 +799,7 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>>         u64 identity_satp, hw_satp;
>>         uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
>>         u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
>> +       pmd_t *target_pmd, *target_pmd2;
>>
>>         if (satp_mode_cmdline == SATP_MODE_57) {
>>                 disable_pgtable_l5();
>> @@ -789,17 +809,24 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>>                 return;
>>         }
>>
>> +       target_pmd = select_pmd_early(set_satp_mode_pmd);
>> +       target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE);
>>         create_p4d_mapping(early_p4d,
>>                         set_satp_mode_pmd, (uintptr_t)early_pud,
>>                         P4D_SIZE, PAGE_TABLE);
>>         create_pud_mapping(early_pud,
>> -                          set_satp_mode_pmd, (uintptr_t)early_pmd,
>> +                          set_satp_mode_pmd, (uintptr_t)target_pmd,
>> +                          PUD_SIZE, PAGE_TABLE);
>> +       /* Handle the case where set_satp_mode straddles 2 PUDs */
>> +       if (target_pmd2 != target_pmd)
>> +               create_pud_mapping(early_pud,
>> +                          set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2,
>>                            PUD_SIZE, PAGE_TABLE);
>>         /* Handle the case where set_satp_mode straddles 2 PMDs */
>> -       create_pmd_mapping(early_pmd,
>> +       create_pmd_mapping(target_pmd,
>>                            set_satp_mode_pmd, set_satp_mode_pmd,
>>                            PMD_SIZE, PAGE_KERNEL_EXEC);
>> -       create_pmd_mapping(early_pmd,
>> +       create_pmd_mapping(target_pmd2,
>>                            set_satp_mode_pmd + PMD_SIZE,
>>                            set_satp_mode_pmd + PMD_SIZE,
>>                            PMD_SIZE, PAGE_KERNEL_EXEC);
>> @@ -829,7 +856,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>>         memset(early_pg_dir, 0, PAGE_SIZE);
>>         memset(early_p4d, 0, PAGE_SIZE);
>>         memset(early_pud, 0, PAGE_SIZE);
>> -       memset(early_pmd, 0, PAGE_SIZE);
>> +       memset(target_pmd, 0, PAGE_SIZE);
>> +       if (target_pmd2 != target_pmd)
>> +               memset(target_pmd2, 0, PAGE_SIZE);
>>  }
>>  #endif
>>
>> --
>> 2.25.1
>>
diff mbox series

Patch

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index fe8e159394d8..094b39f920d3 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -386,11 +386,13 @@  static void __init create_pte_mapping(pte_t *ptep,
 static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
 static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 #ifdef CONFIG_XIP_KERNEL
 #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
 #define fixmap_pmd     ((pmd_t *)XIP_FIXUP(fixmap_pmd))
 #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
+#define early_pmd2      ((pmd_t *)XIP_FIXUP(early_pmd2))
 #endif /* CONFIG_XIP_KERNEL */
 
 static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
@@ -432,9 +434,14 @@  static pmd_t *__init get_pmd_virt_late(phys_addr_t pa)
 
 static phys_addr_t __init alloc_pmd_early(uintptr_t va)
 {
-	BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT);
+	uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1);
+	uintptr_t current_pud_idx = pud_index(va);
 
-	return (uintptr_t)early_pmd;
+	BUG_ON(current_pud_idx > end_pud_idx);
+	if (current_pud_idx == end_pud_idx)
+		return (uintptr_t)early_pmd2;
+	else
+		return (uintptr_t)early_pmd;
 }
 
 static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
@@ -769,6 +776,18 @@  static void __init set_mmap_rnd_bits_max(void)
 	mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3;
 }
 
+static pmd_t *__init select_pmd_early(uintptr_t pa)
+{
+	uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1);
+	uintptr_t current_pud_idx = pud_index(pa);
+
+	BUG_ON(current_pud_idx > end_pud_idx);
+	if (current_pud_idx == end_pud_idx)
+		return early_pmd2;
+	else
+		return early_pmd;
+}
+
 /*
  * There is a simple way to determine if 4-level is supported by the
  * underlying hardware: establish 1:1 mapping in 4-level page table mode
@@ -780,6 +799,7 @@  static __init void set_satp_mode(uintptr_t dtb_pa)
 	u64 identity_satp, hw_satp;
 	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
 	u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
+	pmd_t *target_pmd, *target_pmd2;
 
 	if (satp_mode_cmdline == SATP_MODE_57) {
 		disable_pgtable_l5();
@@ -789,17 +809,24 @@  static __init void set_satp_mode(uintptr_t dtb_pa)
 		return;
 	}
 
+	target_pmd = select_pmd_early(set_satp_mode_pmd);
+	target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE);
 	create_p4d_mapping(early_p4d,
 			set_satp_mode_pmd, (uintptr_t)early_pud,
 			P4D_SIZE, PAGE_TABLE);
 	create_pud_mapping(early_pud,
-			   set_satp_mode_pmd, (uintptr_t)early_pmd,
+			   set_satp_mode_pmd, (uintptr_t)target_pmd,
+			   PUD_SIZE, PAGE_TABLE);
+	/* Handle the case where set_satp_mode straddles 2 PUDs */
+	if (target_pmd2 != target_pmd)
+		create_pud_mapping(early_pud,
+			   set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2,
 			   PUD_SIZE, PAGE_TABLE);
 	/* Handle the case where set_satp_mode straddles 2 PMDs */
-	create_pmd_mapping(early_pmd,
+	create_pmd_mapping(target_pmd,
 			   set_satp_mode_pmd, set_satp_mode_pmd,
 			   PMD_SIZE, PAGE_KERNEL_EXEC);
-	create_pmd_mapping(early_pmd,
+	create_pmd_mapping(target_pmd2,
 			   set_satp_mode_pmd + PMD_SIZE,
 			   set_satp_mode_pmd + PMD_SIZE,
 			   PMD_SIZE, PAGE_KERNEL_EXEC);
@@ -829,7 +856,9 @@  static __init void set_satp_mode(uintptr_t dtb_pa)
 	memset(early_pg_dir, 0, PAGE_SIZE);
 	memset(early_p4d, 0, PAGE_SIZE);
 	memset(early_pud, 0, PAGE_SIZE);
-	memset(early_pmd, 0, PAGE_SIZE);
+	memset(target_pmd, 0, PAGE_SIZE);
+	if (target_pmd2 != target_pmd)
+		memset(target_pmd2, 0, PAGE_SIZE);
 }
 #endif