diff mbox series

[2/2] arm64: remove page granularity limitation from KFENCE

Message ID 20210524172606.08dac28d@xhacker.debian (mailing list archive)
State New, archived
Headers show
Series arm64: remove page granularity limitation from KFENCE | expand

Commit Message

Jisheng Zhang May 24, 2021, 9:26 a.m. UTC
KFENCE requires linear map to be mapped at page granularity, so that
it is possible to protect/unprotect single pages in the KFENCE pool.
Currently if KFENCE is enabled, arm64 maps all pages at page
granularity, it seems overkilled. In fact, we only need to map the
pages in KFENCE pool itself at page granularity. We acchieve this goal
by allocating KFENCE pool before paging_init() so we know the KFENCE
pool address, then we take care to map the pool at page granularity
during map_mem().

Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
---
 arch/arm64/kernel/setup.c |  3 +++
 arch/arm64/mm/mmu.c       | 27 +++++++++++++++++++--------
 2 files changed, 22 insertions(+), 8 deletions(-)

Comments

Ard Biesheuvel May 24, 2021, 6:04 p.m. UTC | #1
On Mon, 24 May 2021 at 19:31, Marco Elver <elver@google.com> wrote:
>
> +Cc Mark
>
> On Mon, 24 May 2021 at 11:26, Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:
> >
> > KFENCE requires linear map to be mapped at page granularity, so that
> > it is possible to protect/unprotect single pages in the KFENCE pool.
> > Currently if KFENCE is enabled, arm64 maps all pages at page
> > granularity, it seems overkilled. In fact, we only need to map the
> > pages in KFENCE pool itself at page granularity. We acchieve this goal
> > by allocating KFENCE pool before paging_init() so we know the KFENCE
> > pool address, then we take care to map the pool at page granularity
> > during map_mem().
> >
> > Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>

Could you please share some performance numbers that result from this
optimization?

(There are other reasons why we may need to map the linear region down
to pages unconditionally in the future, so it would be good to have
some solid numbers about the potential impact of doing so)


> > ---
> >  arch/arm64/kernel/setup.c |  3 +++
> >  arch/arm64/mm/mmu.c       | 27 +++++++++++++++++++--------
> >  2 files changed, 22 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> > index 61845c0821d9..51c0d6e8b67b 100644
> > --- a/arch/arm64/kernel/setup.c
> > +++ b/arch/arm64/kernel/setup.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/screen_info.h>
> >  #include <linux/init.h>
> >  #include <linux/kexec.h>
> > +#include <linux/kfence.h>
> >  #include <linux/root_dev.h>
> >  #include <linux/cpu.h>
> >  #include <linux/interrupt.h>
> > @@ -345,6 +346,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
> >
> >         arm64_memblock_init();
> >
> > +       kfence_alloc_pool();
> > +
> >         paging_init();
> >
> >         acpi_table_upgrade();
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 89b66ef43a0f..12712d31a054 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/init.h>
> >  #include <linux/ioport.h>
> >  #include <linux/kexec.h>
> > +#include <linux/kfence.h>
> >  #include <linux/libfdt.h>
> >  #include <linux/mman.h>
> >  #include <linux/nodemask.h>
> > @@ -515,10 +516,16 @@ static void __init map_mem(pgd_t *pgdp)
> >          */
> >         BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
> >
> > -       if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
> > -           IS_ENABLED(CONFIG_KFENCE))
> > +       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
> >                 flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> >
> > +       /*
> > +        * KFENCE requires linear map to be mapped at page granularity, so
> > +        * temporarily skip mapping for __kfence_pool in the following
> > +        * for-loop
> > +        */
> > +       memblock_mark_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
> > +
>
> Did you build this with CONFIG_KFENCE unset? I don't think it builds.
>
> >         /*
> >          * Take care not to create a writable alias for the
> >          * read-only text and rodata sections of the kernel image.
> > @@ -553,6 +560,15 @@ static void __init map_mem(pgd_t *pgdp)
> >         __map_memblock(pgdp, kernel_start, kernel_end,
> >                        PAGE_KERNEL, NO_CONT_MAPPINGS);
> >         memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
> > +
> > +       /*
> > +        * Map the __kfence_pool at page granularity now.
> > +        */
> > +       __map_memblock(pgdp, __pa(__kfence_pool),
> > +                      __pa(__kfence_pool + KFENCE_POOL_SIZE),
> > +                      pgprot_tagged(PAGE_KERNEL),
> > +                      NO_EXEC_MAPPINGS | NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
> > +       memblock_clear_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
> >  }
> >
> >  void mark_rodata_ro(void)
> > @@ -1480,12 +1496,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
> >
> >         VM_BUG_ON(!mhp_range_allowed(start, size, true));
> >
> > -       /*
> > -        * KFENCE requires linear map to be mapped at page granularity, so that
> > -        * it is possible to protect/unprotect single pages in the KFENCE pool.
> > -        */
> > -       if (rodata_full || debug_pagealloc_enabled() ||
> > -           IS_ENABLED(CONFIG_KFENCE))
> > +       if (rodata_full || debug_pagealloc_enabled())
> >                 flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> >
> >         __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
> > --
> > 2.31.0
> >
> > --
> > You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> > To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20210524172606.08dac28d%40xhacker.debian.
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Jisheng Zhang May 25, 2021, 2:15 a.m. UTC | #2
On Mon, 24 May 2021 20:04:53 +0200
Ard Biesheuvel <ardb@kernel.org> wrote:


> 
> 
> On Mon, 24 May 2021 at 19:31, Marco Elver <elver@google.com> wrote:
> >
> > +Cc Mark
> >
> > On Mon, 24 May 2021 at 11:26, Jisheng Zhang <Jisheng.Zhang@synaptics.com> wrote:  
> > >
> > > KFENCE requires linear map to be mapped at page granularity, so that
> > > it is possible to protect/unprotect single pages in the KFENCE pool.
> > > Currently if KFENCE is enabled, arm64 maps all pages at page
> > > granularity, it seems overkilled. In fact, we only need to map the
> > > pages in KFENCE pool itself at page granularity. We acchieve this goal
> > > by allocating KFENCE pool before paging_init() so we know the KFENCE
> > > pool address, then we take care to map the pool at page granularity
> > > during map_mem().
> > >
> > > Signed-off-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>  
> 
> Could you please share some performance numbers that result from this
> optimization?

I didn't have performance numbers so far, in fact I even didn't find a suitable
benchmark tool to show the gain numbers. IMHO the performance gain comes from
two aspects: the efficient use of TLB entries and the depth of page table walk
when TLB missing. IOW, the performance benchmark tool used to demonstrate the
optimization of arm64 block and cont support can be used here too. Would you
please give some clues?

> 
> (There are other reasons why we may need to map the linear region down
> to pages unconditionally in the future, so it would be good to have
> some solid numbers about the potential impact of doing so)

I suppose this feature is similar as RODATA_FULL which can be disabled if
not used. Take the RODATA_FULL for example, it can be disabled if all
modules/drivers are builtin, there's no secure side affect too.

This series tries to keep block mappings or contiguous hints as much as
possible. In fact, as for KFENCE, it's achievable.

PS: Searching the KFENCE patches history, arm64 experts said there's no
safe way to break block mapping into page mapping on arm64, I suppose this
is true during system running. I'm not sure whether "no safe way" conclusion
still applies to kernel initialization or not. Maybe for arm64 KFENCE case,
it's safe to break block mapping as x86 platform does?


Thanks in advance


> 
> 
> > > ---
> > >  arch/arm64/kernel/setup.c |  3 +++
> > >  arch/arm64/mm/mmu.c       | 27 +++++++++++++++++++--------
> > >  2 files changed, 22 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> > > index 61845c0821d9..51c0d6e8b67b 100644
> > > --- a/arch/arm64/kernel/setup.c
> > > +++ b/arch/arm64/kernel/setup.c
> > > @@ -18,6 +18,7 @@
> > >  #include <linux/screen_info.h>
> > >  #include <linux/init.h>
> > >  #include <linux/kexec.h>
> > > +#include <linux/kfence.h>
> > >  #include <linux/root_dev.h>
> > >  #include <linux/cpu.h>
> > >  #include <linux/interrupt.h>
> > > @@ -345,6 +346,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
> > >
> > >         arm64_memblock_init();
> > >
> > > +       kfence_alloc_pool();
> > > +
> > >         paging_init();
> > >
> > >         acpi_table_upgrade();
> > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > > index 89b66ef43a0f..12712d31a054 100644
> > > --- a/arch/arm64/mm/mmu.c
> > > +++ b/arch/arm64/mm/mmu.c
> > > @@ -13,6 +13,7 @@
> > >  #include <linux/init.h>
> > >  #include <linux/ioport.h>
> > >  #include <linux/kexec.h>
> > > +#include <linux/kfence.h>
> > >  #include <linux/libfdt.h>
> > >  #include <linux/mman.h>
> > >  #include <linux/nodemask.h>
> > > @@ -515,10 +516,16 @@ static void __init map_mem(pgd_t *pgdp)
> > >          */
> > >         BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
> > >
> > > -       if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
> > > -           IS_ENABLED(CONFIG_KFENCE))
> > > +       if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
> > >                 flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> > >
> > > +       /*
> > > +        * KFENCE requires linear map to be mapped at page granularity, so
> > > +        * temporarily skip mapping for __kfence_pool in the following
> > > +        * for-loop
> > > +        */
> > > +       memblock_mark_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
> > > +  
> >
> > Did you build this with CONFIG_KFENCE unset? I don't think it builds.
> >  
> > >         /*
> > >          * Take care not to create a writable alias for the
> > >          * read-only text and rodata sections of the kernel image.
> > > @@ -553,6 +560,15 @@ static void __init map_mem(pgd_t *pgdp)
> > >         __map_memblock(pgdp, kernel_start, kernel_end,
> > >                        PAGE_KERNEL, NO_CONT_MAPPINGS);
> > >         memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
> > > +
> > > +       /*
> > > +        * Map the __kfence_pool at page granularity now.
> > > +        */
> > > +       __map_memblock(pgdp, __pa(__kfence_pool),
> > > +                      __pa(__kfence_pool + KFENCE_POOL_SIZE),
> > > +                      pgprot_tagged(PAGE_KERNEL),
> > > +                      NO_EXEC_MAPPINGS | NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
> > > +       memblock_clear_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
> > >  }
> > >
> > >  void mark_rodata_ro(void)
> > > @@ -1480,12 +1496,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
> > >
> > >         VM_BUG_ON(!mhp_range_allowed(start, size, true));
> > >
> > > -       /*
> > > -        * KFENCE requires linear map to be mapped at page granularity, so that
> > > -        * it is possible to protect/unprotect single pages in the KFENCE pool.
> > > -        */
> > > -       if (rodata_full || debug_pagealloc_enabled() ||
> > > -           IS_ENABLED(CONFIG_KFENCE))
> > > +       if (rodata_full || debug_pagealloc_enabled())
> > >                 flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> > >
> > >         __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
> > > --
> > > 2.31.0
> > >
> > > --
> > > You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> > > To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> > > To view this discussion on the web visit https://urldefense.proofpoint.com/v2/url?u=https-3A__groups.google.com_d_msgid_kasan-2Ddev_20210524172606.08dac28d-2540xhacker.debian&d=DwIBaQ&c=7dfBJ8cXbWjhc0BhImu8wQ&r=wlaKTGoVCDxOzHc2QUzpzGEf9oY3eidXlAe3OF1omvo&m=tRid6vgpMdeQY77uEe7j0LTyjaW0r0d36StAfCnvb0A&s=tcnSvCZSGJgJk-0AOpFpY1Aaiq27DeGLpguxNv2M9yE&e= .  
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > https://urldefense.proofpoint.com/v2/url?u=http-3A__lists.infradead.org_mailman_listinfo_linux-2Darm-2Dkernel&d=DwIBaQ&c=7dfBJ8cXbWjhc0BhImu8wQ&r=wlaKTGoVCDxOzHc2QUzpzGEf9oY3eidXlAe3OF1omvo&m=tRid6vgpMdeQY77uEe7j0LTyjaW0r0d36StAfCnvb0A&s=yI-AmsxRY2eoRcsCUfVwogWd3PeVgXO2-3bc6juyiXw&e=
diff mbox series

Patch

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 61845c0821d9..51c0d6e8b67b 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -18,6 +18,7 @@ 
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
+#include <linux/kfence.h>
 #include <linux/root_dev.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -345,6 +346,8 @@  void __init __no_sanitize_address setup_arch(char **cmdline_p)
 
 	arm64_memblock_init();
 
+	kfence_alloc_pool();
+
 	paging_init();
 
 	acpi_table_upgrade();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 89b66ef43a0f..12712d31a054 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -13,6 +13,7 @@ 
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/kexec.h>
+#include <linux/kfence.h>
 #include <linux/libfdt.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
@@ -515,10 +516,16 @@  static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
-	if (rodata_full || crash_mem_map || debug_pagealloc_enabled() ||
-	    IS_ENABLED(CONFIG_KFENCE))
+	if (rodata_full || crash_mem_map || debug_pagealloc_enabled())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
+	/*
+	 * KFENCE requires linear map to be mapped at page granularity, so
+	 * temporarily skip mapping for __kfence_pool in the following
+	 * for-loop
+	 */
+	memblock_mark_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
+
 	/*
 	 * Take care not to create a writable alias for the
 	 * read-only text and rodata sections of the kernel image.
@@ -553,6 +560,15 @@  static void __init map_mem(pgd_t *pgdp)
 	__map_memblock(pgdp, kernel_start, kernel_end,
 		       PAGE_KERNEL, NO_CONT_MAPPINGS);
 	memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
+
+	/*
+	 * Map the __kfence_pool at page granularity now.
+	 */
+	__map_memblock(pgdp, __pa(__kfence_pool),
+		       __pa(__kfence_pool + KFENCE_POOL_SIZE),
+		       pgprot_tagged(PAGE_KERNEL),
+		       NO_EXEC_MAPPINGS | NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+	memblock_clear_nomap(__pa(__kfence_pool), KFENCE_POOL_SIZE);
 }
 
 void mark_rodata_ro(void)
@@ -1480,12 +1496,7 @@  int arch_add_memory(int nid, u64 start, u64 size,
 
 	VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
-	/*
-	 * KFENCE requires linear map to be mapped at page granularity, so that
-	 * it is possible to protect/unprotect single pages in the KFENCE pool.
-	 */
-	if (rodata_full || debug_pagealloc_enabled() ||
-	    IS_ENABLED(CONFIG_KFENCE))
+	if (rodata_full || debug_pagealloc_enabled())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
 	__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),