diff mbox series

[v6] riscv: mm: execute local TLB flush after populating vmemmap

Message ID 20240117140333.2479667-1-vincent.chen@sifive.com (mailing list archive)
State Accepted
Commit d9807d60c145836043ffa602328ea1d66dc458b1
Headers show
Series [v6] riscv: mm: execute local TLB flush after populating vmemmap | expand

Checks

Context Check Description
conchuod/patch-1-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-1-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-1-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-1-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-1-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-1-test-6 success .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-1-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-1-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-1-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-1-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-1-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-1-test-12 success .github/scripts/patches/tests/verify_signedoff.sh
conchuod/vmtest-fixes-PR success PR summary

Commit Message

Vincent Chen Jan. 17, 2024, 2:03 p.m. UTC
The spare_init() calls memmap_populate() many times to create VA to PA
mapping for the VMEMMAP area, where all "struct page" are located once
CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later
initialized in the zone_sizes_init() function. However, during this
process, no sfence.vma instruction is executed for this VMEMMAP area.
This omission may cause the hart to fail to perform page table walk
because some data related to the address translation is invisible to the
hart. To solve this issue, the local_flush_tlb_kernel_range() is called
right after the sparse_init() to execute a sfence.vma instruction for this
VMEMMAP area, ensuring that all data related to the address translation
is visible to the hart.

Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/include/asm/tlbflush.h | 2 ++
 arch/riscv/mm/init.c              | 5 +++++
 arch/riscv/mm/tlbflush.c          | 6 ++++++
 3 files changed, 13 insertions(+)

Comments

Lad, Prabhakar Jan. 30, 2024, 9:59 a.m. UTC | #1
Hi Vincent,

Thank you for the patch.

On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote:
>
> The spare_init() calls memmap_populate() many times to create VA to PA
> mapping for the VMEMMAP area, where all "struct page" are located once
> CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later
> initialized in the zone_sizes_init() function. However, during this
> process, no sfence.vma instruction is executed for this VMEMMAP area.
> This omission may cause the hart to fail to perform page table walk
> because some data related to the address translation is invisible to the
> hart. To solve this issue, the local_flush_tlb_kernel_range() is called
> right after the sparse_init() to execute a sfence.vma instruction for this
> VMEMMAP area, ensuring that all data related to the address translation
> is visible to the hart.
>
> Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
> Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>  arch/riscv/include/asm/tlbflush.h | 2 ++
>  arch/riscv/mm/init.c              | 5 +++++
>  arch/riscv/mm/tlbflush.c          | 6 ++++++
>  3 files changed, 13 insertions(+)
>
I get below build issue with this patch applied:

arch/riscv/mm/tlbflush.c:207:6: error: redefinition of
'local_flush_tlb_kernel_range'
  207 | void local_flush_tlb_kernel_range(unsigned long start,
unsigned long end)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
arch/riscv/mm/tlbflush.c:69:6: note: previous definition of
'local_flush_tlb_kernel_range' with type 'void(long unsigned int,
long unsigned int)'
   69 | void local_flush_tlb_kernel_range(unsigned long start,
unsigned long end)
      |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1
make[4]: *** Waiting for unfinished jobs....
  CC      kernel/resource.o
make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2
make[3]: *** Waiting for unfinished jobs....

I have attached the defconfig file.

Cheers,
Prabhakar

> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index 8f3418c5f172..525267379ccb 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -46,6 +46,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end);
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>                         unsigned long end);
>  #endif
> +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
>  #else /* CONFIG_SMP && CONFIG_MMU */
>
>  #define flush_tlb_all() local_flush_tlb_all()
> @@ -66,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start,
>
>  #define flush_tlb_mm(mm) flush_tlb_all()
>  #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
>  #endif /* !CONFIG_SMP || !CONFIG_MMU */
>
>  #endif /* _ASM_RISCV_TLBFLUSH_H */
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 2e011cbddf3a..cc56a0945120 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -1377,6 +1377,10 @@ void __init misc_mem_init(void)
>         early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
>         arch_numa_init();
>         sparse_init();
> +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> +       /* The entire VMEMMAP region has been populated. Flush TLB for this region */
> +       local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
> +#endif
>         zone_sizes_init();
>         arch_reserve_crashkernel();
>         memblock_dump_all();
> @@ -1386,6 +1390,7 @@ void __init misc_mem_init(void)
>  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>                                struct vmem_altmap *altmap)
>  {
> +       /* Defer the required TLB flush until the entire VMEMMAP region has been populated */
>         return vmemmap_populate_basepages(start, end, node, NULL);
>  }
>  #endif
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index e6659d7368b3..d11a4ae87ec1 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -193,6 +193,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>         __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
>  }
>
> +/* Flush a range of kernel pages without broadcasting */
> +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +       local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID);
> +}
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>                         unsigned long end)
> --
> 2.34.1
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Alexandre Ghiti Jan. 30, 2024, 10:58 a.m. UTC | #2
Hi Prabhakar,

On Tue, Jan 30, 2024 at 10:59 AM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Vincent,
>
> Thank you for the patch.
>
> On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote:
> >
> > The spare_init() calls memmap_populate() many times to create VA to PA
> > mapping for the VMEMMAP area, where all "struct page" are located once
> > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later
> > initialized in the zone_sizes_init() function. However, during this
> > process, no sfence.vma instruction is executed for this VMEMMAP area.
> > This omission may cause the hart to fail to perform page table walk
> > because some data related to the address translation is invisible to the
> > hart. To solve this issue, the local_flush_tlb_kernel_range() is called
> > right after the sparse_init() to execute a sfence.vma instruction for this
> > VMEMMAP area, ensuring that all data related to the address translation
> > is visible to the hart.
> >
> > Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
> > Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> >  arch/riscv/include/asm/tlbflush.h | 2 ++
> >  arch/riscv/mm/init.c              | 5 +++++
> >  arch/riscv/mm/tlbflush.c          | 6 ++++++
> >  3 files changed, 13 insertions(+)
> >
> I get below build issue with this patch applied:
>
> arch/riscv/mm/tlbflush.c:207:6: error: redefinition of
> 'local_flush_tlb_kernel_range'
>   207 | void local_flush_tlb_kernel_range(unsigned long start,
> unsigned long end)
>       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
> arch/riscv/mm/tlbflush.c:69:6: note: previous definition of
> 'local_flush_tlb_kernel_range' with type 'void(long unsigned int,
> long unsigned int)'
>    69 | void local_flush_tlb_kernel_range(unsigned long start,
> unsigned long end)
>       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
> make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1
> make[4]: *** Waiting for unfinished jobs....
>   CC      kernel/resource.o
> make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2
> make[3]: *** Waiting for unfinished jobs....
>
> I have attached the defconfig file.
>

Indeed, we recently added the same function, you can find a rebased
version of this patch on top of 6.8 here:
https://github.com/AlexGhiti/linux-riscv/commit/756d1871f58edac455773db0bb996f8d064a6b06

> Cheers,
> Prabhakar
>
> > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> > index 8f3418c5f172..525267379ccb 100644
> > --- a/arch/riscv/include/asm/tlbflush.h
> > +++ b/arch/riscv/include/asm/tlbflush.h
> > @@ -46,6 +46,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end);
> >  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >                         unsigned long end);
> >  #endif
> > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
> >  #else /* CONFIG_SMP && CONFIG_MMU */
> >
> >  #define flush_tlb_all() local_flush_tlb_all()
> > @@ -66,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start,
> >
> >  #define flush_tlb_mm(mm) flush_tlb_all()
> >  #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> > +#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
> >  #endif /* !CONFIG_SMP || !CONFIG_MMU */
> >
> >  #endif /* _ASM_RISCV_TLBFLUSH_H */
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 2e011cbddf3a..cc56a0945120 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -1377,6 +1377,10 @@ void __init misc_mem_init(void)
> >         early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
> >         arch_numa_init();
> >         sparse_init();
> > +#ifdef CONFIG_SPARSEMEM_VMEMMAP
> > +       /* The entire VMEMMAP region has been populated. Flush TLB for this region */
> > +       local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
> > +#endif
> >         zone_sizes_init();
> >         arch_reserve_crashkernel();
> >         memblock_dump_all();
> > @@ -1386,6 +1390,7 @@ void __init misc_mem_init(void)
> >  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
> >                                struct vmem_altmap *altmap)
> >  {
> > +       /* Defer the required TLB flush until the entire VMEMMAP region has been populated */
> >         return vmemmap_populate_basepages(start, end, node, NULL);
> >  }
> >  #endif
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index e6659d7368b3..d11a4ae87ec1 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -193,6 +193,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> >         __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
> >  }
> >
> > +/* Flush a range of kernel pages without broadcasting */
> > +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
> > +{
> > +       local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID);
> > +}
> > +
> >  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >                         unsigned long end)
> > --
> > 2.34.1
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
Lad, Prabhakar Jan. 30, 2024, 9:38 p.m. UTC | #3
Hi Alexandre,

On Tue, Jan 30, 2024 at 10:59 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Prabhakar,
>
> On Tue, Jan 30, 2024 at 10:59 AM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Vincent,
> >
> > Thank you for the patch.
> >
> > On Wed, Jan 17, 2024 at 2:07 PM Vincent Chen <vincent.chen@sifive.com> wrote:
> > >
> > > The spare_init() calls memmap_populate() many times to create VA to PA
> > > mapping for the VMEMMAP area, where all "struct page" are located once
> > > CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later
> > > initialized in the zone_sizes_init() function. However, during this
> > > process, no sfence.vma instruction is executed for this VMEMMAP area.
> > > This omission may cause the hart to fail to perform page table walk
> > > because some data related to the address translation is invisible to the
> > > hart. To solve this issue, the local_flush_tlb_kernel_range() is called
> > > right after the sparse_init() to execute a sfence.vma instruction for this
> > > VMEMMAP area, ensuring that all data related to the address translation
> > > is visible to the hart.
> > >
> > > Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem")
> > > Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
> > > Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > >  arch/riscv/include/asm/tlbflush.h | 2 ++
> > >  arch/riscv/mm/init.c              | 5 +++++
> > >  arch/riscv/mm/tlbflush.c          | 6 ++++++
> > >  3 files changed, 13 insertions(+)
> > >
> > I get below build issue with this patch applied:
> >
> > arch/riscv/mm/tlbflush.c:207:6: error: redefinition of
> > 'local_flush_tlb_kernel_range'
> >   207 | void local_flush_tlb_kernel_range(unsigned long start,
> > unsigned long end)
> >       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > arch/riscv/mm/tlbflush.c:69:6: note: previous definition of
> > 'local_flush_tlb_kernel_range' with type 'void(long unsigned int,
> > long unsigned int)'
> >    69 | void local_flush_tlb_kernel_range(unsigned long start,
> > unsigned long end)
> >       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
> > make[4]: *** [scripts/Makefile.build:243: arch/riscv/mm/tlbflush.o] Error 1
> > make[4]: *** Waiting for unfinished jobs....
> >   CC      kernel/resource.o
> > make[3]: *** [scripts/Makefile.build:481: arch/riscv/mm] Error 2
> > make[3]: *** Waiting for unfinished jobs....
> >
> > I have attached the defconfig file.
> >
>
> Indeed, we recently added the same function, you can find a rebased
> version of this patch on top of 6.8 here:
> https://github.com/AlexGhiti/linux-riscv/commit/756d1871f58edac455773db0bb996f8d064a6b06
>
Thanks for the pointer.

Cheers,
Prabhakar
patchwork-bot+linux-riscv@kernel.org Feb. 8, 2024, 1:30 a.m. UTC | #4
Hello:

This patch was applied to riscv/linux.git (fixes)
by Palmer Dabbelt <palmer@rivosinc.com>:

On Wed, 17 Jan 2024 22:03:33 +0800 you wrote:
> The spare_init() calls memmap_populate() many times to create VA to PA
> mapping for the VMEMMAP area, where all "struct page" are located once
> CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later
> initialized in the zone_sizes_init() function. However, during this
> process, no sfence.vma instruction is executed for this VMEMMAP area.
> This omission may cause the hart to fail to perform page table walk
> because some data related to the address translation is invisible to the
> hart. To solve this issue, the local_flush_tlb_kernel_range() is called
> right after the sparse_init() to execute a sfence.vma instruction for this
> VMEMMAP area, ensuring that all data related to the address translation
> is visible to the hart.
> 
> [...]

Here is the summary with links:
  - [v6] riscv: mm: execute local TLB flush after populating vmemmap
    https://git.kernel.org/riscv/c/d9807d60c145

You are awesome, thank you!
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 8f3418c5f172..525267379ccb 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -46,6 +46,7 @@  void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end);
 #endif
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end);
 #else /* CONFIG_SMP && CONFIG_MMU */
 
 #define flush_tlb_all() local_flush_tlb_all()
@@ -66,6 +67,7 @@  static inline void flush_tlb_kernel_range(unsigned long start,
 
 #define flush_tlb_mm(mm) flush_tlb_all()
 #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#define local_flush_tlb_kernel_range(start, end) flush_tlb_all()
 #endif /* !CONFIG_SMP || !CONFIG_MMU */
 
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 2e011cbddf3a..cc56a0945120 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -1377,6 +1377,10 @@  void __init misc_mem_init(void)
 	early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
 	arch_numa_init();
 	sparse_init();
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+	/* The entire VMEMMAP region has been populated. Flush TLB for this region */
+	local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END);
+#endif
 	zone_sizes_init();
 	arch_reserve_crashkernel();
 	memblock_dump_all();
@@ -1386,6 +1390,7 @@  void __init misc_mem_init(void)
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 			       struct vmem_altmap *altmap)
 {
+	/* Defer the required TLB flush until the entire VMEMMAP region has been populated */
 	return vmemmap_populate_basepages(start, end, node, NULL);
 }
 #endif
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index e6659d7368b3..d11a4ae87ec1 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -193,6 +193,12 @@  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 	__flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
 }
 
+/* Flush a range of kernel pages without broadcasting */
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end)