@@ -22,6 +22,7 @@
#define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT)
#define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT)
+#define CPU_STUCK_REASON_NO_PGTABLE_MEM (UL(3) << CPU_STUCK_REASON_SHIFT)
#ifndef __ASSEMBLY__
@@ -89,7 +89,8 @@ SYM_CODE_START(primary_entry)
mov sp, x1
mov x29, xzr
adrp x0, init_idmap_pg_dir
- mov x1, xzr
+ adrp x1, init_idmap_pg_end
+ mov x2, xzr
bl __pi_create_init_idmap
/*
@@ -52,6 +52,7 @@ PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
#endif
PROVIDE(__pi__ctype = _ctype);
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
+PROVIDE(__pi___early_cpu_boot_status = __early_cpu_boot_status);
PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
@@ -68,6 +69,8 @@ PROVIDE(__pi___inittext_end = __inittext_end);
PROVIDE(__pi___initdata_begin = __initdata_begin);
PROVIDE(__pi___initdata_end = __initdata_end);
PROVIDE(__pi__data = _data);
+PROVIDE(__pi___mmuoff_data_start = __mmuoff_data_start);
+PROVIDE(__pi___mmuoff_data_end = __mmuoff_data_end);
PROVIDE(__pi___bss_start = __bss_start);
PROVIDE(__pi__end = _end);
@@ -20,11 +20,11 @@ extern const u8 __eh_frame_start[], __eh_frame_end[];
extern void idmap_cpu_replace_ttbr1(void *pgdir);
-static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
- void *start, void *end, pgprot_t prot,
- bool may_use_cont, int root_level)
+static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 limit,
+ u64 va_offset, void *start, void *end,
+ pgprot_t prot, bool may_use_cont, int root_level)
{
- map_range(pgd, ((u64)start + va_offset) & ~PAGE_OFFSET,
+ map_range(pgd, limit, ((u64)start + va_offset) & ~PAGE_OFFSET,
((u64)end + va_offset) & ~PAGE_OFFSET, (u64)start,
prot, root_level, (pte_t *)pg_dir, may_use_cont, 0);
}
@@ -32,7 +32,7 @@ static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
static void __init unmap_segment(pgd_t *pg_dir, u64 va_offset, void *start,
void *end, int root_level)
{
- map_segment(pg_dir, NULL, va_offset, start, end, __pgprot(0),
+ map_segment(pg_dir, NULL, 0, va_offset, start, end, __pgprot(0),
false, root_level);
}
@@ -41,6 +41,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
+ u64 limit = (u64)init_pg_end;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_t prot;
@@ -78,16 +79,16 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
- map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
+ map_segment(init_pg_dir, &pgdp, limit, va_offset, _stext, _etext, prot,
!twopass, root_level);
- map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
+ map_segment(init_pg_dir, &pgdp, limit, va_offset, __start_rodata,
__inittext_begin, data_prot, false, root_level);
- map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
+ map_segment(init_pg_dir, &pgdp, limit, va_offset, __inittext_begin,
__inittext_end, prot, false, root_level);
- map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
+ map_segment(init_pg_dir, &pgdp, limit, va_offset, __initdata_begin,
__initdata_end, data_prot, false, root_level);
- map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
- true, root_level);
+ map_segment(init_pg_dir, &pgdp, limit, va_offset, _data, _end,
+ data_prot, true, root_level);
dsb(ishst);
idmap_cpu_replace_ttbr1(init_pg_dir);
@@ -120,9 +121,9 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
* Remap these segments with different permissions
* No new page table allocations should be needed
*/
- map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
+ map_segment(init_pg_dir, NULL, 0, va_offset, _stext, _etext,
text_prot, true, root_level);
- map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
+ map_segment(init_pg_dir, NULL, 0, va_offset, __inittext_begin,
__inittext_end, text_prot, false, root_level);
}
@@ -164,7 +165,7 @@ static void __init remap_idmap_for_lpa2(void)
* LPA2 compatible fashion, and update the initial ID map while running
* from that.
*/
- create_init_idmap(init_pg_dir, mask);
+ create_init_idmap(init_pg_dir, init_pg_end, mask);
dsb(ishst);
set_ttbr0_for_lpa2((u64)init_pg_dir);
@@ -175,7 +176,7 @@ static void __init remap_idmap_for_lpa2(void)
memset(init_idmap_pg_dir, 0,
(u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
- create_init_idmap(init_idmap_pg_dir, mask);
+ create_init_idmap(init_idmap_pg_dir, init_idmap_pg_end, mask);
dsb(ishst);
/* switch back to the updated initial ID map */
@@ -188,6 +189,7 @@ static void __init remap_idmap_for_lpa2(void)
static void __init map_fdt(u64 fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
+ u64 limit = (u64)&ptes[INIT_IDMAP_FDT_SIZE];
u64 efdt = fdt + MAX_FDT_SIZE;
u64 ptep = (u64)ptes;
@@ -195,7 +197,8 @@ static void __init map_fdt(u64 fdt)
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
* the kernel image.
*/
- map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
+ map_range(&ptep, limit, fdt,
+ (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
@@ -11,11 +11,36 @@
#include "pi.h"
+static void __init mmuoff_data_clean(void)
+{
+ bool cache_ena = !!(read_sysreg(sctlr_el1) & SCTLR_ELx_C);
+
+ if (cache_ena)
+ dcache_clean_poc((unsigned long)__mmuoff_data_start,
+ (unsigned long)__mmuoff_data_end);
+ else
+ dcache_inval_poc((unsigned long)__mmuoff_data_start,
+ (unsigned long)__mmuoff_data_end);
+}
+
+static void __init report_cpu_stuck(long val)
+{
+ val |= CPU_STUCK_IN_KERNEL;
+ WRITE_ONCE(__early_cpu_boot_status, val);
+
+ /* Ensure the visibility of the status update */
+ dsb(ishst);
+ mmuoff_data_clean();
+
+ cpu_park_loop();
+}
+
/**
* map_range - Map a contiguous range of physical pages into virtual memory
*
* @pte: Address of physical pointer to array of pages to
* allocate page tables from
+ * @limit: Physical address of end of page allocation array
* @start: Virtual address of the start of the range
* @end: Virtual address of the end of the range (exclusive)
* @pa: Physical address of the start of the range
@@ -26,8 +51,9 @@
* @va_offset: Offset between a physical page and its current mapping
* in the VA space
*/
-void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
+void __init map_range(u64 *pte, u64 limit, u64 start, u64 end, u64 pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
u64 protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
@@ -56,11 +82,18 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
* table mapping if necessary and recurse.
*/
if (pte_none(*tbl)) {
+ u64 size = PTRS_PER_PTE * sizeof(pte_t);
+
+ if (*pte + size > limit) {
+ report_cpu_stuck(
+ CPU_STUCK_REASON_NO_PGTABLE_MEM);
+ }
+
*tbl = __pte(__phys_to_pte_val(*pte) |
PMD_TYPE_TABLE | PMD_TABLE_UXN);
- *pte += PTRS_PER_PTE * sizeof(pte_t);
+ *pte += size;
}
- map_range(pte, start, next, pa, prot, level + 1,
+ map_range(pte, limit, start, next, pa, prot, level + 1,
(pte_t *)(__pte_to_phys(*tbl) + va_offset),
may_use_cont, va_offset);
} else {
@@ -87,7 +120,8 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
}
}
-asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
+asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pgd_t *pg_end,
+ pteval_t clrmask)
{
u64 ptep = (u64)pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
@@ -96,10 +130,12 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
pgprot_val(text_prot) &= ~clrmask;
pgprot_val(data_prot) &= ~clrmask;
- map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
- text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
- map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
- data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ map_range(&ptep, (u64)pg_end, (u64)_stext, (u64)__initdata_begin,
+ (u64)_stext, text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir,
+ false, 0);
+ map_range(&ptep, (u64)pg_end, (u64)__initdata_begin, (u64)_end,
+ (u64)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
+ (pte_t *)pg_dir, false, 0);
return ptep;
}
@@ -28,9 +28,9 @@ u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
int scs_patch(const u8 eh_frame[], int size);
-void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
+void map_range(u64 *pgd, u64 limit, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
-asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
+asmlinkage u64 create_init_idmap(pgd_t *pgd, pgd_t *pg_end, pteval_t clrmask);
@@ -773,8 +773,9 @@ static void __init declare_kernel_vmas(void)
declare_vma(&vmlinux_seg[4], _data, _end, 0);
}
-void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
- int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
+void __pi_map_range(u64 *pgd, u64 limit, u64 start, u64 end, u64 pa,
+ pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
+ u64 va_offset);
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
@@ -784,8 +785,9 @@ static void __init create_idmap(void)
u64 start = __pa_symbol(__idmap_text_start);
u64 end = __pa_symbol(__idmap_text_end);
u64 ptep = __pa_symbol(idmap_ptes);
+ u64 limit = __pa_symbol(&idmap_ptes[IDMAP_LEVELS - 1][0]);
- __pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
+ __pi_map_range(&ptep, limit, start, end, start, PAGE_KERNEL_ROX,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
@@ -798,8 +800,10 @@ static void __init create_idmap(void)
* of its synchronization flag in the ID map.
*/
ptep = __pa_symbol(kpti_ptes);
- __pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
- IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
+ limit = __pa_symbol(&kpti_ptes[IDMAP_LEVELS - 1][0]);
+ __pi_map_range(&ptep, limit, pa, pa + sizeof(u32), pa,
+ PAGE_KERNEL, IDMAP_ROOT_LEVEL,
+ (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
}
}
Early pgtables (e.g. init_idmap_pg_dir, init_pg_dir, etc) are allocated from statically defined memory blocks within the kernel image that are sized for the calculated worst case requirements. Let's make the allocator aware of the block's limit so that it can detect any overflow. This boils down to passing the limit of the memory block to map_range() so let's add it as a parameter. If an overflow is detected, report the error to __early_cpu_boot_status and park the cpu. Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> --- ***NOTE*** Any confused maintainers may want to read the cover note here for context: https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/ arch/arm64/include/asm/smp.h | 1 + arch/arm64/kernel/head.S | 3 +- arch/arm64/kernel/image-vars.h | 3 ++ arch/arm64/kernel/pi/map_kernel.c | 35 +++++++++++--------- arch/arm64/kernel/pi/map_range.c | 54 +++++++++++++++++++++++++------ arch/arm64/kernel/pi/pi.h | 4 +-- arch/arm64/mm/mmu.c | 14 +++++--- 7 files changed, 81 insertions(+), 33 deletions(-)