Message ID | 20240430092211.26269-1-gaoshanliukou@163.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [V1] riscv: mm: Support > 1GB kernel image size when creating early page table | expand |
Hi Yang, On Tue, Apr 30, 2024 at 11:24 AM yang.zhang <gaoshanliukou@163.com> wrote: > > From: "yang.zhang" <yang.zhang@hexintek.com> > > By default, when creating early page table, only one PMD page table, but > if kernel image size exceeds 1GB, it need two PMD page table, otherwise, > it would BUG_ON in create_kernel_page_table. > > In addition, if trap earlier, trap vector doesn't yet set properly, current > value maybe set by previous firmwire, typically it's the _start of kernel, > it's confused and difficult to debuge, so set it earlier. Totally agree with that, I'm used to debugging this part of the code and I know stvec points to __start and not handle_exception at this point...But that deserves a fix indeed, it's confusing. I don't see this fix in the patch below though? > --- > I'm not sure whether hugesize kernel is reasonable, if not, ignore this patch. Unless you have a good use case for this, I don't think that's necessary, we never encountered such situations so I'd rather not complicate this code even more. Thanks, Alex > This issue can be reproduced simpily by changing '_end' in vmlinux.lds.S > such as _end = . + 0x40000000; > > Signed-off-by: yang.zhang <yang.zhang@hexintek.com> > --- > arch/riscv/mm/init.c | 41 +++++++++++++++++++++++++++++++++++------ > 1 file changed, 35 insertions(+), 6 deletions(-) > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index fe8e159394d8..094b39f920d3 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -386,11 +386,13 @@ static void __init create_pte_mapping(pte_t *ptep, > static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; > static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; > static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); > +static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); > > #ifdef CONFIG_XIP_KERNEL > #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) > #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) > #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) > +#define early_pmd2 ((pmd_t *)XIP_FIXUP(early_pmd2)) > #endif /* CONFIG_XIP_KERNEL */ > > static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; > @@ -432,9 +434,14 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) > > static phys_addr_t __init alloc_pmd_early(uintptr_t va) > { > - BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); > + uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1); > + uintptr_t current_pud_idx = pud_index(va); > > - return (uintptr_t)early_pmd; > + BUG_ON(current_pud_idx > end_pud_idx); > + if (current_pud_idx == end_pud_idx) > + return (uintptr_t)early_pmd2; > + else > + return (uintptr_t)early_pmd; > } > > static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) > @@ -769,6 +776,18 @@ static void __init set_mmap_rnd_bits_max(void) > mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; > } > > +static pmd_t *__init select_pmd_early(uintptr_t pa) > +{ > + uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1); > + uintptr_t current_pud_idx = pud_index(pa); > + > + BUG_ON(current_pud_idx > end_pud_idx); > + if (current_pud_idx == end_pud_idx) > + return early_pmd2; > + else > + return early_pmd; > +} > + > /* > * There is a simple way to determine if 4-level is supported by the > * underlying hardware: establish 1:1 mapping in 4-level page table mode > @@ -780,6 +799,7 @@ static __init void set_satp_mode(uintptr_t dtb_pa) > u64 identity_satp, hw_satp; > uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; > u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); > + pmd_t *target_pmd, *target_pmd2; > > if (satp_mode_cmdline == SATP_MODE_57) { > disable_pgtable_l5(); > @@ -789,17 +809,24 @@ static __init void set_satp_mode(uintptr_t dtb_pa) > return; > } > > + target_pmd = select_pmd_early(set_satp_mode_pmd); > + target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE); > create_p4d_mapping(early_p4d, > set_satp_mode_pmd, (uintptr_t)early_pud, > P4D_SIZE, PAGE_TABLE); > create_pud_mapping(early_pud, > - set_satp_mode_pmd, (uintptr_t)early_pmd, > + set_satp_mode_pmd, (uintptr_t)target_pmd, > + PUD_SIZE, PAGE_TABLE); > + /* Handle the case where set_satp_mode straddles 2 PUDs */ > + if (target_pmd2 != target_pmd) > + create_pud_mapping(early_pud, > + set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2, > PUD_SIZE, PAGE_TABLE); > /* Handle the case where set_satp_mode straddles 2 PMDs */ > - create_pmd_mapping(early_pmd, > + create_pmd_mapping(target_pmd, > set_satp_mode_pmd, set_satp_mode_pmd, > PMD_SIZE, PAGE_KERNEL_EXEC); > - create_pmd_mapping(early_pmd, > + create_pmd_mapping(target_pmd2, > set_satp_mode_pmd + PMD_SIZE, > set_satp_mode_pmd + PMD_SIZE, > PMD_SIZE, PAGE_KERNEL_EXEC); > @@ -829,7 +856,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa) > memset(early_pg_dir, 0, PAGE_SIZE); > memset(early_p4d, 0, PAGE_SIZE); > memset(early_pud, 0, PAGE_SIZE); > - memset(early_pmd, 0, PAGE_SIZE); > + memset(target_pmd, 0, PAGE_SIZE); > + if (target_pmd2 != target_pmd) > + memset(target_pmd2, 0, PAGE_SIZE); > } > #endif > > -- > 2.25.1 >
At 2024-04-30 20:08:18, "Alexandre Ghiti" <alexghiti@rivosinc.com> wrote: >Hi Yang, > >On Tue, Apr 30, 2024 at 11:24 AM yang.zhang <gaoshanliukou@163.com> wrote: >> >> From: "yang.zhang" <yang.zhang@hexintek.com> >> >> By default, when creating early page table, only one PMD page table, but >> if kernel image size exceeds 1GB, it need two PMD page table, otherwise, >> it would BUG_ON in create_kernel_page_table. >> >> In addition, if trap earlier, trap vector doesn't yet set properly, current >> value maybe set by previous firmwire, typically it's the _start of kernel, >> it's confused and difficult to debug, so set it earlier. > >Totally agree with that, I'm used to debugging this part of the code >and I know stvec points to __start and not handle_exception at this >point...But that deserves a fix indeed, it's confusing. I don't see >this fix in the patch below though? Sorry, i missed it. I send patch v2 for this. >> --- >> I'm not sure whether hugesize kernel is reasonable, if not, ignore this patch. > >Unless you have a good use case for this, I don't think that's >necessary, we never encountered such situations so I'd rather not >complicate this code even more. I just see the kernel space is reserved 2GB(which is 0xffffffff80000000 -0xffffffffffffffff ). Normally, kernel image size is just some MBs, so this patch maybe unnecessary. Just ignore. I add the patch for set trap vector earlier for helping debug. Thanks. >Thanks, > >Alex > >> This issue can be reproduced simpily by changing '_end' in vmlinux.lds.S >> such as _end = . + 0x40000000; >> >> Signed-off-by: yang.zhang <yang.zhang@hexintek.com> >> --- >> arch/riscv/mm/init.c | 41 +++++++++++++++++++++++++++++++++++------ >> 1 file changed, 35 insertions(+), 6 deletions(-) >> >> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c >> index fe8e159394d8..094b39f920d3 100644 >> --- a/arch/riscv/mm/init.c >> +++ b/arch/riscv/mm/init.c >> @@ -386,11 +386,13 @@ static void __init create_pte_mapping(pte_t *ptep, >> static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; >> static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; >> static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); >> +static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); >> >> #ifdef CONFIG_XIP_KERNEL >> #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) >> #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) >> #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) >> +#define early_pmd2 ((pmd_t *)XIP_FIXUP(early_pmd2)) >> #endif /* CONFIG_XIP_KERNEL */ >> >> static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; >> @@ -432,9 +434,14 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) >> >> static phys_addr_t __init alloc_pmd_early(uintptr_t va) >> { >> - BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); >> + uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1); >> + uintptr_t current_pud_idx = pud_index(va); >> >> - return (uintptr_t)early_pmd; >> + BUG_ON(current_pud_idx > end_pud_idx); >> + if (current_pud_idx == end_pud_idx) >> + return (uintptr_t)early_pmd2; >> + else >> + return (uintptr_t)early_pmd; >> } >> >> static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) >> @@ -769,6 +776,18 @@ static void __init set_mmap_rnd_bits_max(void) >> mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; >> } >> >> +static pmd_t *__init select_pmd_early(uintptr_t pa) >> +{ >> + uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1); >> + uintptr_t current_pud_idx = pud_index(pa); >> + >> + BUG_ON(current_pud_idx > end_pud_idx); >> + if (current_pud_idx == end_pud_idx) >> + return early_pmd2; >> + else >> + return early_pmd; >> +} >> + >> /* >> * There is a simple way to determine if 4-level is supported by the >> * underlying hardware: establish 1:1 mapping in 4-level page table mode >> @@ -780,6 +799,7 @@ static __init void set_satp_mode(uintptr_t dtb_pa) >> u64 identity_satp, hw_satp; >> uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; >> u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); >> + pmd_t *target_pmd, *target_pmd2; >> >> if (satp_mode_cmdline == SATP_MODE_57) { >> disable_pgtable_l5(); >> @@ -789,17 +809,24 @@ static __init void set_satp_mode(uintptr_t dtb_pa) >> return; >> } >> >> + target_pmd = select_pmd_early(set_satp_mode_pmd); >> + target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE); >> create_p4d_mapping(early_p4d, >> set_satp_mode_pmd, (uintptr_t)early_pud, >> P4D_SIZE, PAGE_TABLE); >> create_pud_mapping(early_pud, >> - set_satp_mode_pmd, (uintptr_t)early_pmd, >> + set_satp_mode_pmd, (uintptr_t)target_pmd, >> + PUD_SIZE, PAGE_TABLE); >> + /* Handle the case where set_satp_mode straddles 2 PUDs */ >> + if (target_pmd2 != target_pmd) >> + create_pud_mapping(early_pud, >> + set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2, >> PUD_SIZE, PAGE_TABLE); >> /* Handle the case where set_satp_mode straddles 2 PMDs */ >> - create_pmd_mapping(early_pmd, >> + create_pmd_mapping(target_pmd, >> set_satp_mode_pmd, set_satp_mode_pmd, >> PMD_SIZE, PAGE_KERNEL_EXEC); >> - create_pmd_mapping(early_pmd, >> + create_pmd_mapping(target_pmd2, >> set_satp_mode_pmd + PMD_SIZE, >> set_satp_mode_pmd + PMD_SIZE, >> PMD_SIZE, PAGE_KERNEL_EXEC); >> @@ -829,7 +856,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa) >> memset(early_pg_dir, 0, PAGE_SIZE); >> memset(early_p4d, 0, PAGE_SIZE); >> memset(early_pud, 0, PAGE_SIZE); >> - memset(early_pmd, 0, PAGE_SIZE); >> + memset(target_pmd, 0, PAGE_SIZE); >> + if (target_pmd2 != target_pmd) >> + memset(target_pmd2, 0, PAGE_SIZE); >> } >> #endif >> >> -- >> 2.25.1 >>
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fe8e159394d8..094b39f920d3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -386,11 +386,13 @@ static void __init create_pte_mapping(pte_t *ptep, static pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +static pmd_t early_pmd2[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL #define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) #define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) +#define early_pmd2 ((pmd_t *)XIP_FIXUP(early_pmd2)) #endif /* CONFIG_XIP_KERNEL */ static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss; @@ -432,9 +434,14 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); + uintptr_t end_pud_idx = pud_index(kernel_map.virt_addr + kernel_map.size - 1); + uintptr_t current_pud_idx = pud_index(va); - return (uintptr_t)early_pmd; + BUG_ON(current_pud_idx > end_pud_idx); + if (current_pud_idx == end_pud_idx) + return (uintptr_t)early_pmd2; + else + return (uintptr_t)early_pmd; } static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) @@ -769,6 +776,18 @@ static void __init set_mmap_rnd_bits_max(void) mmap_rnd_bits_max = MMAP_VA_BITS - PAGE_SHIFT - 3; } +static pmd_t *__init select_pmd_early(uintptr_t pa) +{ + uintptr_t end_pud_idx = pud_index(kernel_map.phys_addr + kernel_map.size - 1); + uintptr_t current_pud_idx = pud_index(pa); + + BUG_ON(current_pud_idx > end_pud_idx); + if (current_pud_idx == end_pud_idx) + return early_pmd2; + else + return early_pmd; +} + /* * There is a simple way to determine if 4-level is supported by the * underlying hardware: establish 1:1 mapping in 4-level page table mode @@ -780,6 +799,7 @@ static __init void set_satp_mode(uintptr_t dtb_pa) u64 identity_satp, hw_satp; uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa); + pmd_t *target_pmd, *target_pmd2; if (satp_mode_cmdline == SATP_MODE_57) { disable_pgtable_l5(); @@ -789,17 +809,24 @@ static __init void set_satp_mode(uintptr_t dtb_pa) return; } + target_pmd = select_pmd_early(set_satp_mode_pmd); + target_pmd2 = select_pmd_early(set_satp_mode_pmd + PMD_SIZE); create_p4d_mapping(early_p4d, set_satp_mode_pmd, (uintptr_t)early_pud, P4D_SIZE, PAGE_TABLE); create_pud_mapping(early_pud, - set_satp_mode_pmd, (uintptr_t)early_pmd, + set_satp_mode_pmd, (uintptr_t)target_pmd, + PUD_SIZE, PAGE_TABLE); + /* Handle the case where set_satp_mode straddles 2 PUDs */ + if (target_pmd2 != target_pmd) + create_pud_mapping(early_pud, + set_satp_mode_pmd + PMD_SIZE, (uintptr_t)target_pmd2, PUD_SIZE, PAGE_TABLE); /* Handle the case where set_satp_mode straddles 2 PMDs */ - create_pmd_mapping(early_pmd, + create_pmd_mapping(target_pmd, set_satp_mode_pmd, set_satp_mode_pmd, PMD_SIZE, PAGE_KERNEL_EXEC); - create_pmd_mapping(early_pmd, + create_pmd_mapping(target_pmd2, set_satp_mode_pmd + PMD_SIZE, set_satp_mode_pmd + PMD_SIZE, PMD_SIZE, PAGE_KERNEL_EXEC); @@ -829,7 +856,9 @@ static __init void set_satp_mode(uintptr_t dtb_pa) memset(early_pg_dir, 0, PAGE_SIZE); memset(early_p4d, 0, PAGE_SIZE); memset(early_pud, 0, PAGE_SIZE); - memset(early_pmd, 0, PAGE_SIZE); + memset(target_pmd, 0, PAGE_SIZE); + if (target_pmd2 != target_pmd) + memset(target_pmd2, 0, PAGE_SIZE); } #endif