Message ID | 20240117140333.2479667-1-vincent.chen@sifive.com (mailing list archive) |
---|---|
State | Accepted |
Commit | d9807d60c145836043ffa602328ea1d66dc458b1 |
Headers | show |
Series | [v6] riscv: mm: execute local TLB flush after populating vmemmap | expand |
Hi Vincent, Thank you for the patch. On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote: > > The spare_init() calls memmap_populate() many times to create VA to PA > mapping for the VMEMMAP area, where all "struct page" are located once > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later > initialized in the zone_sizes_init() function. However, during this > process, no sfence.vma instruction is executed for this VMEMMAP area. > This omission may cause the hart to fail to perform page table walk > because some data related to the address translation is invisible to the > hart. To solve this issue, the local_flush_tlb_kernel_range() is called > right after the sparse_init() to execute a sfence.vma instruction for this > VMEMMAP area, ensuring that all data related to the address translation > is visible to the hart. > > Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") > Signed-off-by: Vincent Chen <vincent.chen@sifive.com> > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> > --- > arch/riscv/include/asm/tlbflush.h | 2 ++ > arch/riscv/mm/init.c | 5 +++++ > arch/riscv/mm/tlbflush.c | 6 ++++++ > 3 files changed, 13 insertions(+) > I get below build issue with this patch applied: arch/riscv/mm/tlbflush.c:207:6: error: redefinition of 'local_flush_tlb_kernel_range' 207 | void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/mm/tlbflush.c:69:6: note: previous definition of 'local_flush_tlb_kernel_range' with type 'void(long unsigned int, long unsigned int)' 69 | void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1 make[4]: *** Waiting for unfinished jobs.... CC kernel/resource.o make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2 make[3]: *** Waiting for unfinished jobs.... I have attached the defconfig file. Cheers, Prabhakar > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > index 8f3418c5f172..525267379ccb 100644 > --- a/arch/riscv/include/asm/tlbflush.h > +++ b/arch/riscv/include/asm/tlbflush.h > @@ -46,6 +46,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end); > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end); > #endif > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); > #else /* CONFIG_SMP && CONFIG_MMU */ > > #define flush_tlb_all() local_flush_tlb_all() > @@ -66,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, > > #define flush_tlb_mm(mm) flush_tlb_all() > #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() > +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() > #endif /* !CONFIG_SMP || !CONFIG_MMU */ > > #endif /* _ASM_RISCV_TLBFLUSH_H */ > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 2e011cbddf3a..cc56a0945120 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -1377,6 +1377,10 @@ void __init misc_mem_init(void) > early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); > arch_numa_init(); > sparse_init(); > +#ifdef CONFIG_SPARSEMEM_VMEMMAP > + /* The entire VMEMMAP region has been populated. Flush TLB for this region */ > + local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); > +#endif > zone_sizes_init(); > arch_reserve_crashkernel(); > memblock_dump_all(); > @@ -1386,6 +1390,7 @@ void __init misc_mem_init(void) > int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > struct vmem_altmap *altmap) > { > + /* Defer the required TLB flush until the entire VMEMMAP region has been populated */ > return vmemmap_populate_basepages(start, end, node, NULL); > } > #endif > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > index e6659d7368b3..d11a4ae87ec1 100644 > --- a/arch/riscv/mm/tlbflush.c > +++ b/arch/riscv/mm/tlbflush.c > @@ -193,6 +193,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) > __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); > } > > +/* Flush a range of kernel pages without broadcasting */ > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > +{ > + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); > +} > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > unsigned long end) > -- > 2.34.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi Prabhakar, On Tue, Jan 30, 2024 at 10:59 AM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > Hi Vincent, > > Thank you for the patch. > > On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote: > > > > The spare_init() calls memmap_populate() many times to create VA to PA > > mapping for the VMEMMAP area, where all "struct page" are located once > > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later > > initialized in the zone_sizes_init() function. However, during this > > process, no sfence.vma instruction is executed for this VMEMMAP area. > > This omission may cause the hart to fail to perform page table walk > > because some data related to the address translation is invisible to the > > hart. To solve this issue, the local_flush_tlb_kernel_range() is called > > right after the sparse_init() to execute a sfence.vma instruction for this > > VMEMMAP area, ensuring that all data related to the address translation > > is visible to the hart. > > > > Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") > > Signed-off-by: Vincent Chen <vincent.chen@sifive.com> > > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > --- > > arch/riscv/include/asm/tlbflush.h | 2 ++ > > arch/riscv/mm/init.c | 5 +++++ > > arch/riscv/mm/tlbflush.c | 6 ++++++ > > 3 files changed, 13 insertions(+) > > > I get below build issue with this patch applied: > > arch/riscv/mm/tlbflush.c:207:6: error: redefinition of > 'local_flush_tlb_kernel_range' > 207 | void local_flush_tlb_kernel_range(unsigned long start, > unsigned long end) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ > arch/riscv/mm/tlbflush.c:69:6: note: previous definition of > 'local_flush_tlb_kernel_range' with type 'void(long unsigned int, > long unsigned int)' > 69 | void local_flush_tlb_kernel_range(unsigned long start, > unsigned long end) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ > make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1 > make[4]: *** Waiting for unfinished jobs.... > CC kernel/resource.o > make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2 > make[3]: *** Waiting for unfinished jobs.... > > I have attached the defconfig file. > Indeed, we recently added the same function, you can find a rebased version of this patch on top of 6.8 here: https://github.com/AlexGhiti/linux-riscv/commit/756d1871f58edac455773db0bb996f8d064a6b06 > Cheers, > Prabhakar > > > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h > > index 8f3418c5f172..525267379ccb 100644 > > --- a/arch/riscv/include/asm/tlbflush.h > > +++ b/arch/riscv/include/asm/tlbflush.h > > @@ -46,6 +46,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end); > > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > > unsigned long end); > > #endif > > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); > > #else /* CONFIG_SMP && CONFIG_MMU */ > > > > #define flush_tlb_all() local_flush_tlb_all() > > @@ -66,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, > > > > #define flush_tlb_mm(mm) flush_tlb_all() > > #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() > > +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() > > #endif /* !CONFIG_SMP || !CONFIG_MMU */ > > > > #endif /* _ASM_RISCV_TLBFLUSH_H */ > > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > > index 2e011cbddf3a..cc56a0945120 100644 > > --- a/arch/riscv/mm/init.c > > +++ b/arch/riscv/mm/init.c > > @@ -1377,6 +1377,10 @@ void __init misc_mem_init(void) > > early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); > > arch_numa_init(); > > sparse_init(); > > +#ifdef CONFIG_SPARSEMEM_VMEMMAP > > + /* The entire VMEMMAP region has been populated. Flush TLB for this region */ > > + local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); > > +#endif > > zone_sizes_init(); > > arch_reserve_crashkernel(); > > memblock_dump_all(); > > @@ -1386,6 +1390,7 @@ void __init misc_mem_init(void) > > int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, > > struct vmem_altmap *altmap) > > { > > + /* Defer the required TLB flush until the entire VMEMMAP region has been populated */ > > return vmemmap_populate_basepages(start, end, node, NULL); > > } > > #endif > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c > > index e6659d7368b3..d11a4ae87ec1 100644 > > --- a/arch/riscv/mm/tlbflush.c > > +++ b/arch/riscv/mm/tlbflush.c > > @@ -193,6 +193,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) > > __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); > > } > > > > +/* Flush a range of kernel pages without broadcasting */ > > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) > > +{ > > + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); > > +} > > + > > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > > void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, > > unsigned long end) > > -- > > 2.34.1 > > > > > > _______________________________________________ > > linux-riscv mailing list > > linux-riscv@lists.infradead.org > > http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi Alexandre, On Tue, Jan 30, 2024 at 10:59 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > Hi Prabhakar, > > On Tue, Jan 30, 2024 at 10:59 AM Lad, Prabhakar > <prabhakar.csengg@gmail.com> wrote: > > > > Hi Vincent, > > > > Thank you for the patch. > > > > On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote: > > > > > > The spare_init() calls memmap_populate() many times to create VA to PA > > > mapping for the VMEMMAP area, where all "struct page" are located once > > > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later > > > initialized in the zone_sizes_init() function. However, during this > > > process, no sfence.vma instruction is executed for this VMEMMAP area. > > > This omission may cause the hart to fail to perform page table walk > > > because some data related to the address translation is invisible to the > > > hart. To solve this issue, the local_flush_tlb_kernel_range() is called > > > right after the sparse_init() to execute a sfence.vma instruction for this > > > VMEMMAP area, ensuring that all data related to the address translation > > > is visible to the hart. > > > > > > Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") > > > Signed-off-by: Vincent Chen <vincent.chen@sifive.com> > > > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > --- > > > arch/riscv/include/asm/tlbflush.h | 2 ++ > > > arch/riscv/mm/init.c | 5 +++++ > > > arch/riscv/mm/tlbflush.c | 6 ++++++ > > > 3 files changed, 13 insertions(+) > > > > > I get below build issue with this patch applied: > > > > arch/riscv/mm/tlbflush.c:207:6: error: redefinition of > > 'local_flush_tlb_kernel_range' > > 207 | void local_flush_tlb_kernel_range(unsigned long start, > > unsigned long end) > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > arch/riscv/mm/tlbflush.c:69:6: note: previous definition of > > 'local_flush_tlb_kernel_range' with type 'void(long unsigned int, > > long unsigned int)' > > 69 | void local_flush_tlb_kernel_range(unsigned long start, > > unsigned long end) > > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ > > make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1 > > make[4]: *** Waiting for unfinished jobs.... > > CC kernel/resource.o > > make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2 > > make[3]: *** Waiting for unfinished jobs.... > > > > I have attached the defconfig file. > > > > Indeed, we recently added the same function, you can find a rebased > version of this patch on top of 6.8 here: > https://github.com/AlexGhiti/linux-riscv/commit/756d1871f58edac455773db0bb996f8d064a6b06 > Thanks for the pointer. Cheers, Prabhakar
Hello: This patch was applied to riscv/linux.git (fixes) by Palmer Dabbelt <palmer@rivosinc.com>: On Wed, 17 Jan 2024 22:03:33 +0800 you wrote: > The spare_init() calls memmap_populate() many times to create VA to PA > mapping for the VMEMMAP area, where all "struct page" are located once > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later > initialized in the zone_sizes_init() function. However, during this > process, no sfence.vma instruction is executed for this VMEMMAP area. > This omission may cause the hart to fail to perform page table walk > because some data related to the address translation is invisible to the > hart. To solve this issue, the local_flush_tlb_kernel_range() is called > right after the sparse_init() to execute a sfence.vma instruction for this > VMEMMAP area, ensuring that all data related to the address translation > is visible to the hart. > > [...] Here is the summary with links: - [v6] riscv: mm: execute local TLB flush after populating vmemmap https://git.kernel.org/riscv/c/d9807d60c145 You are awesome, thank you!
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 8f3418c5f172..525267379ccb 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -46,6 +46,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end); void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); #endif +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); #else /* CONFIG_SMP && CONFIG_MMU */ #define flush_tlb_all() local_flush_tlb_all() @@ -66,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, #define flush_tlb_mm(mm) flush_tlb_all() #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() #endif /* !CONFIG_SMP || !CONFIG_MMU */ #endif /* _ASM_RISCV_TLBFLUSH_H */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 2e011cbddf3a..cc56a0945120 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1377,6 +1377,10 @@ void __init misc_mem_init(void) early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); arch_numa_init(); sparse_init(); +#ifdef CONFIG_SPARSEMEM_VMEMMAP + /* The entire VMEMMAP region has been populated. Flush TLB for this region */ + local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); +#endif zone_sizes_init(); arch_reserve_crashkernel(); memblock_dump_all(); @@ -1386,6 +1390,7 @@ void __init misc_mem_init(void) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { + /* Defer the required TLB flush until the entire VMEMMAP region has been populated */ return vmemmap_populate_basepages(start, end, node, NULL); } #endif diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index e6659d7368b3..d11a4ae87ec1 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -193,6 +193,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); } +/* Flush a range of kernel pages without broadcasting */ +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)