@@ -581,11 +581,17 @@ alternative_endif
* but we have to add an offset so that the TTBR1 address corresponds with the
* pgdir entry that covers the lowest 48-bit addressable VA.
*
+ * Note that this trick is only used for LVA/64k pages - LPA2/4k pages uses an
+ * additional paging level, and on LPA2/16k pages, we would end up with a root
+ * level table with only 2 entries, which is suboptimal in terms of TLB
+ * utilization, so there we fall back to 47 bits of translation if LPA2 is not
+ * supported.
+ *
* orr is used as it can cover the immediate value (and is idempotent).
* ttbr: Value of ttbr to set, modified.
*/
.macro offset_ttbr1, ttbr, tmp
-#ifdef CONFIG_ARM64_VA_BITS_52
+#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
mrs \tmp, tcr_el1
and \tmp, \tmp, #TCR_T1SZ_MASK
cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
@@ -1008,6 +1008,24 @@ static inline bool cpu_has_lva(void)
ID_AA64MMFR2_EL1_VARange_SHIFT);
}
+static inline bool cpu_has_lpa2(void)
+{
+#ifdef CONFIG_ARM64_LPA2
+ u64 mmfr0;
+ int feat;
+
+ mmfr0 = read_sysreg(id_aa64mmfr0_el1);
+ mmfr0 &= ~id_aa64mmfr0_override.mask;
+ mmfr0 |= id_aa64mmfr0_override.val;
+ feat = cpuid_feature_extract_signed_field(mmfr0,
+ ID_AA64MMFR0_EL1_TGRAN_SHIFT);
+
+ return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
+#else
+ return false;
+#endif
+}
+
#endif /* __ASSEMBLY__ */
#endif
@@ -54,7 +54,11 @@
#define FIXADDR_TOP (-UL(SZ_8M))
#if VA_BITS > 48
+#ifdef CONFIG_ARM64_16K_PAGES
+#define VA_BITS_MIN (47)
+#else
#define VA_BITS_MIN (48)
+#endif
#else
#define VA_BITS_MIN (VA_BITS)
#endif
@@ -89,6 +89,7 @@ SYM_CODE_START(primary_entry)
mov sp, x1
mov x29, xzr
adrp x0, init_idmap_pg_dir
+ mov x1, xzr
bl __pi_create_init_idmap
/*
@@ -473,9 +474,16 @@ SYM_FUNC_END(__enable_mmu)
#ifdef CONFIG_ARM64_VA_BITS_52
SYM_FUNC_START(__cpu_secondary_check52bitva)
+#ifndef CONFIG_ARM64_LPA2
mrs_s x0, SYS_ID_AA64MMFR2_EL1
and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
cbnz x0, 2f
+#else
+ mrs x0, id_aa64mmfr0_el1
+ sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
+ cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
+ b.ge 2f
+#endif
update_early_cpu_boot_status \
CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
@@ -54,6 +54,7 @@ PROVIDE(__pi__ctype = _ctype);
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
+PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
PROVIDE(__pi_init_pg_dir = init_pg_dir);
PROVIDE(__pi_init_pg_end = init_pg_end);
PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
@@ -127,11 +127,64 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
}
/* Copy the root page table to its final location */
- memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PGD_SIZE);
+ memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
dsb(ishst);
idmap_cpu_replace_ttbr1(swapper_pg_dir);
}
+static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
+{
+ u64 sctlr = read_sysreg(sctlr_el1);
+ u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
+
+ asm(" msr sctlr_el1, %0 ;"
+ " isb ;"
+ " msr ttbr0_el1, %1 ;"
+ " msr tcr_el1, %2 ;"
+ " isb ;"
+ " tlbi vmalle1 ;"
+ " dsb nsh ;"
+ " isb ;"
+ " msr sctlr_el1, %3 ;"
+ " isb ;"
+ :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
+}
+
+static void __init remap_idmap_for_lpa2(void)
+{
+ /* clear the bits that change meaning once LPA2 is turned on */
+ pteval_t mask = PTE_SHARED;
+
+ /*
+ * We have to clear bits [9:8] in all block or page descriptors in the
+ * initial ID map, as otherwise they will be (mis)interpreted as
+ * physical address bits once we flick the LPA2 switch (TCR.DS). Since
+ * we cannot manipulate live descriptors in that way without creating
+ * potential TLB conflicts, let's create another temporary ID map in a
+ * LPA2 compatible fashion, and update the initial ID map while running
+ * from that.
+ */
+ create_init_idmap(init_pg_dir, mask);
+ dsb(ishst);
+ set_ttbr0_for_lpa2((u64)init_pg_dir);
+
+ /*
+ * Recreate the initial ID map with the same granularity as before.
+ * Don't bother with the FDT, we no longer need it after this.
+ */
+ memset(init_idmap_pg_dir, 0,
+ (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
+
+ create_init_idmap(init_idmap_pg_dir, mask);
+ dsb(ishst);
+
+ /* switch back to the updated initial ID map */
+ set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
+
+ /* wipe the temporary ID map from memory */
+ memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
+}
+
static void __init map_fdt(u64 fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
@@ -154,6 +207,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
u64 va_base, pa_base = (u64)&_text;
u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
+ int va_bits = VA_BITS;
int chosen;
map_fdt((u64)fdt);
@@ -165,8 +219,15 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
chosen = fdt_path_offset(fdt, chosen_str);
init_feature_override(boot_status, fdt, chosen);
- if (VA_BITS > VA_BITS_MIN && cpu_has_lva())
- sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(VA_BITS));
+ if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
+ va_bits = VA_BITS_MIN;
+ } else if (IS_ENABLED(CONFIG_ARM64_LPA2) && !cpu_has_lpa2()) {
+ va_bits = VA_BITS_MIN;
+ root_level++;
+ }
+
+ if (va_bits > VA_BITS_MIN)
+ sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(va_bits));
/*
* The virtual KASLR displacement modulo 2MiB is decided by the
@@ -184,6 +245,9 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
kaslr_offset |= kaslr_seed & ~(MIN_KIMG_ALIGN - 1);
}
+ if (IS_ENABLED(CONFIG_ARM64_LPA2) && va_bits > VA_BITS_MIN)
+ remap_idmap_for_lpa2();
+
va_base = KIMAGE_VADDR + kaslr_offset;
map_kernel(kaslr_offset, va_base - pa_base, root_level);
}
@@ -87,14 +87,19 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
}
}
-asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir)
+asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
{
u64 ptep = (u64)pg_dir + PAGE_SIZE;
+ pgprot_t text_prot = PAGE_KERNEL_ROX;
+ pgprot_t data_prot = PAGE_KERNEL;
+
+ pgprot_val(text_prot) &= ~clrmask;
+ pgprot_val(data_prot) &= ~clrmask;
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
- PAGE_KERNEL_ROX, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
- PAGE_KERNEL, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
+ data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
return ptep;
}
@@ -21,7 +21,7 @@ static inline void *prel64_to_pointer(const prel64_t *offset)
extern bool dynamic_scs_is_enabled;
-extern pgd_t init_idmap_pg_dir[];
+extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen);
@@ -33,4 +33,4 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
-asmlinkage u64 create_init_idmap(pgd_t *pgd);
+asmlinkage u64 create_init_idmap(pgd_t *pgd, pteval_t clrmask);
@@ -238,7 +238,7 @@ void __init arm64_memblock_init(void)
* physical address of PAGE_OFFSET, we have to *subtract* from it.
*/
if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
- memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
+ memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52);
/*
* Apply the memory limit if it was set. Since the kernel may be loaded
@@ -582,8 +582,12 @@ static void __init map_mem(pgd_t *pgdp)
* entries at any level are being shared between the linear region and
* the vmalloc region. Check whether this is true for the PGD level, in
* which case it is guaranteed to be true for all other levels as well.
+ * (Unless we are running with support for LPA2, in which case the
+ * entire reduced VA space is covered by a single pgd_t which will have
+ * been populated without the PXNTable attribute by the time we get here.)
*/
- BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
+ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
+ pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
early_kfence_pool = arm64_kfence_alloc_pool();
@@ -488,6 +488,9 @@ SYM_FUNC_START(__cpu_setup)
mov x9, #64 - VA_BITS
alternative_if ARM64_HAS_VA52
tcr_set_t1sz tcr, x9
+#ifdef CONFIG_ARM64_LPA2
+ orr tcr, tcr, #TCR_DS
+#endif
alternative_else_nop_endif
#endif