diff mbox series

[v10] mm,kfence: decouple kfence from page granularity mapping judgement

Message ID 1678969110-11941-1-git-send-email-quic_zhenhuah@quicinc.com (mailing list archive)
State New, archived
Headers show
Series [v10] mm,kfence: decouple kfence from page granularity mapping judgement | expand

Commit Message

Zhenhua Huang March 16, 2023, 12:18 p.m. UTC
Kfence only needs its pool to be mapped as page granularity, if it is
inited early. Previous judgement was a bit over protected. From [1], Mark
suggested to "just map the KFENCE region a page granularity". So I
decouple it from judgement and do page granularity mapping for kfence
pool only. Need to be noticed that late init of kfence pool still requires
page granularity mapping.

Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
platform. Like what I've tested on QEMU(emulated 1GB RAM) with
gki_defconfig, also turning off rodata protection:
Before:
[root@liebao ]# cat /proc/meminfo
MemTotal:         999484 kB
After:
[root@liebao ]# cat /proc/meminfo
MemTotal:        1001480 kB

To implement this, also relocate the kfence pool allocation before the
linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
addr, __kfence_pool is to be set after linear mapping set up.

LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
---
 arch/arm64/include/asm/kfence.h | 10 +++++++
 arch/arm64/mm/mmu.c             | 61 +++++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/pageattr.c        |  7 +++--
 mm/kfence/core.c                |  4 +++
 4 files changed, 80 insertions(+), 2 deletions(-)

Comments

Kefeng Wang March 16, 2023, 2:15 p.m. UTC | #1
On 2023/3/16 20:18, Zhenhua Huang wrote:
> Kfence only needs its pool to be mapped as page granularity, if it is
> inited early. Previous judgement was a bit over protected. From [1], Mark
> suggested to "just map the KFENCE region a page granularity". So I
> decouple it from judgement and do page granularity mapping for kfence
> pool only. Need to be noticed that late init of kfence pool still requires
> page granularity mapping.
> 
> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
> gki_defconfig, also turning off rodata protection:
> Before:
> [root@liebao ]# cat /proc/meminfo
> MemTotal:         999484 kB
> After:
> [root@liebao ]# cat /proc/meminfo
> MemTotal:        1001480 kB
> 
> To implement this, also relocate the kfence pool allocation before the
> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
> addr, __kfence_pool is to be set after linear mapping set up.
> 

A few little comments,


> LINK: [1] https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
> Suggested-by: Mark Rutland <mark.rutland@arm.com>
> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
> ---
>   arch/arm64/include/asm/kfence.h | 10 +++++++
>   arch/arm64/mm/mmu.c             | 61 +++++++++++++++++++++++++++++++++++++++++
>   arch/arm64/mm/pageattr.c        |  7 +++--
>   mm/kfence/core.c                |  4 +++
>   4 files changed, 80 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
> index aa855c6..a81937f 100644
> --- a/arch/arm64/include/asm/kfence.h
> +++ b/arch/arm64/include/asm/kfence.h
> @@ -19,4 +19,14 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect)
>   	return true;
>   }
>   
> +#ifdef CONFIG_KFENCE
> +extern bool kfence_early_init;
> +static inline bool arm64_kfence_can_set_direct_map(void)
> +{
> +	return !kfence_early_init;
> +}
> +#else /* CONFIG_KFENCE */
> +static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
> +#endif /* CONFIG_KFENCE */
> +
>   #endif /* __ASM_KFENCE_H */
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index ae25524d..aaf1801 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -24,6 +24,7 @@
>   #include <linux/mm.h>
>   #include <linux/vmalloc.h>
>   #include <linux/set_memory.h>
> +#include <linux/kfence.h>
>   
>   #include <asm/barrier.h>
>   #include <asm/cputype.h>
> @@ -38,6 +39,7 @@
>   #include <asm/ptdump.h>
>   #include <asm/tlbflush.h>
>   #include <asm/pgalloc.h>
> +#include <asm/kfence.h>
>   
>   #define NO_BLOCK_MAPPINGS	BIT(0)
>   #define NO_CONT_MAPPINGS	BIT(1)
> @@ -521,12 +523,67 @@ static int __init enable_crash_mem_map(char *arg)
>   }
>   early_param("crashkernel", enable_crash_mem_map);
>   
> +#ifdef CONFIG_KFENCE
> +
> +bool kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;

maybe add __ro_after_init

> +
> +/* early_param() will be parsed before map_mem() below. */
> +static int __init parse_kfence_early_init(char *arg)
> +{
> +	int val;
> +
> +	if (get_option(&arg, &val))
> +		kfence_early_init = !!val;
> +	return 0;
> +}
> +early_param("kfence.sample_interval", parse_kfence_early_init);
> +
> +static phys_addr_t arm64_kfence_alloc_pool(void)

and __init

> +{
> +	phys_addr_t kfence_pool;
> +
> +	if (!kfence_early_init)
> +		return 0;
> +
> +	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
> +	if (!kfence_pool) {
> +		pr_err("failed to allocate kfence pool\n");
> +		kfence_early_init = false;
> +		return 0;
> +	}
> +
> +	/* Temporarily mark as NOMAP. */
> +	memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +
> +	return kfence_pool;
> +}
> +
> +static void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)

Ditto.

Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>

> +{
> +	if (!kfence_pool)
> +		return;
> +
> +	/* KFENCE pool needs page-level mapping. */
> +	__map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
> +			pgprot_tagged(PAGE_KERNEL),
> +			NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
> +	memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
> +	__kfence_pool = phys_to_virt(kfence_pool);
> +}
> +#else /* CONFIG_KFENCE */
> +
> +static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
> +static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { }
> +
> +#endif /* CONFIG_KFENCE */
> +
>   static void __init map_mem(pgd_t *pgdp)
>   {
>   	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>   	phys_addr_t kernel_start = __pa_symbol(_stext);
>   	phys_addr_t kernel_end = __pa_symbol(__init_begin);
>   	phys_addr_t start, end;
> +	phys_addr_t early_kfence_pool;
>   	int flags = NO_EXEC_MAPPINGS;
>   	u64 i;
>   
> @@ -539,6 +596,8 @@ static void __init map_mem(pgd_t *pgdp)
>   	 */
>   	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
>   
> +	early_kfence_pool = arm64_kfence_alloc_pool();
> +
>   	if (can_set_direct_map())
>   		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>   
> @@ -604,6 +663,8 @@ static void __init map_mem(pgd_t *pgdp)
>   		}
>   	}
>   #endif
> +
> +	arm64_kfence_map_pool(early_kfence_pool, pgdp);
>   }
>   
>   void mark_rodata_ro(void)
> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
> index debdecf..dd1291a 100644
> --- a/arch/arm64/mm/pageattr.c
> +++ b/arch/arm64/mm/pageattr.c
> @@ -11,6 +11,7 @@
>   #include <asm/cacheflush.h>
>   #include <asm/set_memory.h>
>   #include <asm/tlbflush.h>
> +#include <asm/kfence.h>
>   
>   struct page_change_data {
>   	pgprot_t set_mask;
> @@ -22,12 +23,14 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
>   bool can_set_direct_map(void)
>   {
>   	/*
> -	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
> +	 * rodata_full and DEBUG_PAGEALLOC require linear map to be
>   	 * mapped at page granularity, so that it is possible to
>   	 * protect/unprotect single pages.
> +	 *
> +	 * KFENCE pool requires page-granular mapping if initialized late.
>   	 */
>   	return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
> -		IS_ENABLED(CONFIG_KFENCE);
> +		arm64_kfence_can_set_direct_map();
>   }
>   
>   static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
> index 1417888..bf2f194c 100644
> --- a/mm/kfence/core.c
> +++ b/mm/kfence/core.c
> @@ -824,6 +824,10 @@ void __init kfence_alloc_pool(void)
>   	if (!kfence_sample_interval)
>   		return;
>   
> +	/* if the pool has already been initialized by arch, skip the below. */
> +	if (__kfence_pool)
> +		return;
> +
>   	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>   
>   	if (!__kfence_pool)
Zhenhua Huang March 16, 2023, 3:10 p.m. UTC | #2
On 2023/3/16 22:15, Kefeng Wang wrote:
> 
> 
> On 2023/3/16 20:18, Zhenhua Huang wrote:
>> Kfence only needs its pool to be mapped as page granularity, if it is
>> inited early. Previous judgement was a bit over protected. From [1], Mark
>> suggested to "just map the KFENCE region a page granularity". So I
>> decouple it from judgement and do page granularity mapping for kfence
>> pool only. Need to be noticed that late init of kfence pool still 
>> requires
>> page granularity mapping.
>>
>> Page granularity mapping in theory cost more(2M per 1GB) memory on arm64
>> platform. Like what I've tested on QEMU(emulated 1GB RAM) with
>> gki_defconfig, also turning off rodata protection:
>> Before:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:         999484 kB
>> After:
>> [root@liebao ]# cat /proc/meminfo
>> MemTotal:        1001480 kB
>>
>> To implement this, also relocate the kfence pool allocation before the
>> linear mapping setting up, arm64_kfence_alloc_pool is to allocate phys
>> addr, __kfence_pool is to be set after linear mapping set up.
>>
> 
> A few little comments,

Thanks Kefeng. Addressed your comments in latest patch.

Thanks,
Zhenhua

> 
> 
>> LINK: [1] 
>> https://lore.kernel.org/linux-arm-kernel/Y+IsdrvDNILA59UN@FVFF77S0Q05N/
>> Suggested-by: Mark Rutland <mark.rutland@arm.com>
>> Signed-off-by: Zhenhua Huang <quic_zhenhuah@quicinc.com>
>> ---
>>   arch/arm64/include/asm/kfence.h | 10 +++++++
>>   arch/arm64/mm/mmu.c             | 61 
>> +++++++++++++++++++++++++++++++++++++++++
>>   arch/arm64/mm/pageattr.c        |  7 +++--
>>   mm/kfence/core.c                |  4 +++
>>   4 files changed, 80 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kfence.h 
>> b/arch/arm64/include/asm/kfence.h
>> index aa855c6..a81937f 100644
>> --- a/arch/arm64/include/asm/kfence.h
>> +++ b/arch/arm64/include/asm/kfence.h
>> @@ -19,4 +19,14 @@ static inline bool kfence_protect_page(unsigned 
>> long addr, bool protect)
>>       return true;
>>   }
>> +#ifdef CONFIG_KFENCE
>> +extern bool kfence_early_init;
>> +static inline bool arm64_kfence_can_set_direct_map(void)
>> +{
>> +    return !kfence_early_init;
>> +}
>> +#else /* CONFIG_KFENCE */
>> +static inline bool arm64_kfence_can_set_direct_map(void) { return 
>> false; }
>> +#endif /* CONFIG_KFENCE */
>> +
>>   #endif /* __ASM_KFENCE_H */
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index ae25524d..aaf1801 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -24,6 +24,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/vmalloc.h>
>>   #include <linux/set_memory.h>
>> +#include <linux/kfence.h>
>>   #include <asm/barrier.h>
>>   #include <asm/cputype.h>
>> @@ -38,6 +39,7 @@
>>   #include <asm/ptdump.h>
>>   #include <asm/tlbflush.h>
>>   #include <asm/pgalloc.h>
>> +#include <asm/kfence.h>
>>   #define NO_BLOCK_MAPPINGS    BIT(0)
>>   #define NO_CONT_MAPPINGS    BIT(1)
>> @@ -521,12 +523,67 @@ static int __init enable_crash_mem_map(char *arg)
>>   }
>>   early_param("crashkernel", enable_crash_mem_map);
>> +#ifdef CONFIG_KFENCE
>> +
>> +bool kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
> 
> maybe add __ro_after_init
> 
>> +
>> +/* early_param() will be parsed before map_mem() below. */
>> +static int __init parse_kfence_early_init(char *arg)
>> +{
>> +    int val;
>> +
>> +    if (get_option(&arg, &val))
>> +        kfence_early_init = !!val;
>> +    return 0;
>> +}
>> +early_param("kfence.sample_interval", parse_kfence_early_init);
>> +
>> +static phys_addr_t arm64_kfence_alloc_pool(void)
> 
> and __init
> 
>> +{
>> +    phys_addr_t kfence_pool;
>> +
>> +    if (!kfence_early_init)
>> +        return 0;
>> +
>> +    kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>> +    if (!kfence_pool) {
>> +        pr_err("failed to allocate kfence pool\n");
>> +        kfence_early_init = false;
>> +        return 0;
>> +    }
>> +
>> +    /* Temporarily mark as NOMAP. */
>> +    memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
>> +
>> +    return kfence_pool;
>> +}
>> +
>> +static void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
> 
> Ditto.
> 
> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> 
>> +{
>> +    if (!kfence_pool)
>> +        return;
>> +
>> +    /* KFENCE pool needs page-level mapping. */
>> +    __map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
>> +            pgprot_tagged(PAGE_KERNEL),
>> +            NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
>> +    memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
>> +    __kfence_pool = phys_to_virt(kfence_pool);
>> +}
>> +#else /* CONFIG_KFENCE */
>> +
>> +static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
>> +static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, 
>> pgd_t *pgdp) { }
>> +
>> +#endif /* CONFIG_KFENCE */
>> +
>>   static void __init map_mem(pgd_t *pgdp)
>>   {
>>       static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
>>       phys_addr_t kernel_start = __pa_symbol(_stext);
>>       phys_addr_t kernel_end = __pa_symbol(__init_begin);
>>       phys_addr_t start, end;
>> +    phys_addr_t early_kfence_pool;
>>       int flags = NO_EXEC_MAPPINGS;
>>       u64 i;
>> @@ -539,6 +596,8 @@ static void __init map_mem(pgd_t *pgdp)
>>        */
>>       BUILD_BUG_ON(pgd_index(direct_map_end - 1) == 
>> pgd_index(direct_map_end));
>> +    early_kfence_pool = arm64_kfence_alloc_pool();
>> +
>>       if (can_set_direct_map())
>>           flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
>> @@ -604,6 +663,8 @@ static void __init map_mem(pgd_t *pgdp)
>>           }
>>       }
>>   #endif
>> +
>> +    arm64_kfence_map_pool(early_kfence_pool, pgdp);
>>   }
>>   void mark_rodata_ro(void)
>> diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
>> index debdecf..dd1291a 100644
>> --- a/arch/arm64/mm/pageattr.c
>> +++ b/arch/arm64/mm/pageattr.c
>> @@ -11,6 +11,7 @@
>>   #include <asm/cacheflush.h>
>>   #include <asm/set_memory.h>
>>   #include <asm/tlbflush.h>
>> +#include <asm/kfence.h>
>>   struct page_change_data {
>>       pgprot_t set_mask;
>> @@ -22,12 +23,14 @@ bool rodata_full __ro_after_init = 
>> IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
>>   bool can_set_direct_map(void)
>>   {
>>       /*
>> -     * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
>> +     * rodata_full and DEBUG_PAGEALLOC require linear map to be
>>        * mapped at page granularity, so that it is possible to
>>        * protect/unprotect single pages.
>> +     *
>> +     * KFENCE pool requires page-granular mapping if initialized late.
>>        */
>>       return (rodata_enabled && rodata_full) || 
>> debug_pagealloc_enabled() ||
>> -        IS_ENABLED(CONFIG_KFENCE);
>> +        arm64_kfence_can_set_direct_map();
>>   }
>>   static int change_page_range(pte_t *ptep, unsigned long addr, void 
>> *data)
>> diff --git a/mm/kfence/core.c b/mm/kfence/core.c
>> index 1417888..bf2f194c 100644
>> --- a/mm/kfence/core.c
>> +++ b/mm/kfence/core.c
>> @@ -824,6 +824,10 @@ void __init kfence_alloc_pool(void)
>>       if (!kfence_sample_interval)
>>           return;
>> +    /* if the pool has already been initialized by arch, skip the 
>> below. */
>> +    if (__kfence_pool)
>> +        return;
>> +
>>       __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
>>       if (!__kfence_pool)
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
index aa855c6..a81937f 100644
--- a/arch/arm64/include/asm/kfence.h
+++ b/arch/arm64/include/asm/kfence.h
@@ -19,4 +19,14 @@  static inline bool kfence_protect_page(unsigned long addr, bool protect)
 	return true;
 }
 
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+static inline bool arm64_kfence_can_set_direct_map(void)
+{
+	return !kfence_early_init;
+}
+#else /* CONFIG_KFENCE */
+static inline bool arm64_kfence_can_set_direct_map(void) { return false; }
+#endif /* CONFIG_KFENCE */
+
 #endif /* __ASM_KFENCE_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ae25524d..aaf1801 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -24,6 +24,7 @@ 
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/set_memory.h>
+#include <linux/kfence.h>
 
 #include <asm/barrier.h>
 #include <asm/cputype.h>
@@ -38,6 +39,7 @@ 
 #include <asm/ptdump.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
+#include <asm/kfence.h>
 
 #define NO_BLOCK_MAPPINGS	BIT(0)
 #define NO_CONT_MAPPINGS	BIT(1)
@@ -521,12 +523,67 @@  static int __init enable_crash_mem_map(char *arg)
 }
 early_param("crashkernel", enable_crash_mem_map);
 
+#ifdef CONFIG_KFENCE
+
+bool kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+/* early_param() will be parsed before map_mem() below. */
+static int __init parse_kfence_early_init(char *arg)
+{
+	int val;
+
+	if (get_option(&arg, &val))
+		kfence_early_init = !!val;
+	return 0;
+}
+early_param("kfence.sample_interval", parse_kfence_early_init);
+
+static phys_addr_t arm64_kfence_alloc_pool(void)
+{
+	phys_addr_t kfence_pool;
+
+	if (!kfence_early_init)
+		return 0;
+
+	kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+	if (!kfence_pool) {
+		pr_err("failed to allocate kfence pool\n");
+		kfence_early_init = false;
+		return 0;
+	}
+
+	/* Temporarily mark as NOMAP. */
+	memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+
+	return kfence_pool;
+}
+
+static void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp)
+{
+	if (!kfence_pool)
+		return;
+
+	/* KFENCE pool needs page-level mapping. */
+	__map_memblock(pgdp, kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
+			pgprot_tagged(PAGE_KERNEL),
+			NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
+	memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+	__kfence_pool = phys_to_virt(kfence_pool);
+}
+#else /* CONFIG_KFENCE */
+
+static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; }
+static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { }
+
+#endif /* CONFIG_KFENCE */
+
 static void __init map_mem(pgd_t *pgdp)
 {
 	static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
 	phys_addr_t kernel_start = __pa_symbol(_stext);
 	phys_addr_t kernel_end = __pa_symbol(__init_begin);
 	phys_addr_t start, end;
+	phys_addr_t early_kfence_pool;
 	int flags = NO_EXEC_MAPPINGS;
 	u64 i;
 
@@ -539,6 +596,8 @@  static void __init map_mem(pgd_t *pgdp)
 	 */
 	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
 
+	early_kfence_pool = arm64_kfence_alloc_pool();
+
 	if (can_set_direct_map())
 		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
@@ -604,6 +663,8 @@  static void __init map_mem(pgd_t *pgdp)
 		}
 	}
 #endif
+
+	arm64_kfence_map_pool(early_kfence_pool, pgdp);
 }
 
 void mark_rodata_ro(void)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index debdecf..dd1291a 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -11,6 +11,7 @@ 
 #include <asm/cacheflush.h>
 #include <asm/set_memory.h>
 #include <asm/tlbflush.h>
+#include <asm/kfence.h>
 
 struct page_change_data {
 	pgprot_t set_mask;
@@ -22,12 +23,14 @@  bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED
 bool can_set_direct_map(void)
 {
 	/*
-	 * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be
+	 * rodata_full and DEBUG_PAGEALLOC require linear map to be
 	 * mapped at page granularity, so that it is possible to
 	 * protect/unprotect single pages.
+	 *
+	 * KFENCE pool requires page-granular mapping if initialized late.
 	 */
 	return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() ||
-		IS_ENABLED(CONFIG_KFENCE);
+		arm64_kfence_can_set_direct_map();
 }
 
 static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 1417888..bf2f194c 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -824,6 +824,10 @@  void __init kfence_alloc_pool(void)
 	if (!kfence_sample_interval)
 		return;
 
+	/* if the pool has already been initialized by arch, skip the below. */
+	if (__kfence_pool)
+		return;
+
 	__kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
 
 	if (!__kfence_pool)