@@ -68,6 +68,7 @@ config RISCV
select ARCH_HAS_GCOV_PROFILE_ALL
select HAVE_COPY_THREAD_TLS
select HAVE_ARCH_KASAN if MMU && 64BIT
+ select RELOCATABLE if 64BIT
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
@@ -106,7 +107,7 @@ config PAGE_OFFSET
default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
- default 0xffffffe000000000 if 64BIT && !MAXPHYSMEM_2GB
+ default 0xffffc00000000000 if 64BIT && !MAXPHYSMEM_2GB
config ARCH_FLATMEM_ENABLE
def_bool y
@@ -155,8 +156,11 @@ config GENERIC_HWEIGHT
config FIX_EARLYCON_MEM
def_bool MMU
+# On a 64BIT relocatable kernel, the 4-level page table is at runtime folded
+# on a 3-level page table when sv48 is not supported.
config PGTABLE_LEVELS
int
+ default 4 if 64BIT && RELOCATABLE
default 3 if 64BIT
default 2
@@ -40,11 +40,10 @@
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
-#define SATP_MODE SATP_MODE_32
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
-#define SATP_MODE SATP_MODE_39
+#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#endif
/* Exception cause high bit - is an interrupt if set */
@@ -27,6 +27,7 @@ enum fixed_addresses {
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
FIX_PTE,
FIX_PMD,
+ FIX_PUD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
@@ -31,7 +31,19 @@
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
+#ifdef CONFIG_RELOCATABLE
+#define PAGE_OFFSET __page_offset
+
+#ifdef CONFIG_64BIT
+/*
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
+ * define the PAGE_OFFSET value for SV39.
+ */
+#define PAGE_OFFSET_L3 0xffffffe000000000
+#endif /* CONFIG_64BIT */
+#else
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
+#endif /* CONFIG_RELOCATABLE */
#define KERN_VIRT_SIZE (-PAGE_OFFSET)
@@ -102,6 +114,9 @@ extern unsigned long pfn_base;
extern unsigned long max_low_pfn;
extern unsigned long min_low_pfn;
extern unsigned long kernel_virt_addr;
+#ifdef CONFIG_RELOCATABLE
+extern unsigned long __page_offset;
+#endif
#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
@@ -36,6 +36,42 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
+ pud_t *pud)
+{
+ if (pgtable_l4_enabled) {
+ unsigned long pfn = virt_to_pfn(pud);
+
+ set_p4d_safe(p4d,
+ __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
+ }
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)__get_free_page(
+ GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
+ return NULL;
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ if (pgtable_l4_enabled)
+ free_page((unsigned long)pud);
+}
+
+#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
#endif /* __PAGETABLE_PMD_FOLDED */
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -8,16 +8,32 @@
#include <linux/const.h>
-#define PGDIR_SHIFT 30
+extern bool pgtable_l4_enabled;
+
+#define PGDIR_SHIFT (pgtable_l4_enabled ? 39 : 30)
/* Size of region mapped by a page global directory */
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* pud is folded into pgd in case of 3-level page table */
+#define PUD_SHIFT 30
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE - 1))
+
#define PMD_SHIFT 21
/* Size of region mapped by a page middle directory */
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE - 1))
+/* Page Upper Directory entry */
+typedef struct {
+ unsigned long pud;
+} pud_t;
+
+#define pud_val(x) ((x).pud)
+#define __pud(x) ((pud_t) { (x) })
+#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
+
/* Page Middle Directory entry */
typedef struct {
unsigned long pmd;
@@ -60,6 +76,16 @@ static inline void pud_clear(pud_t *pudp)
set_pud(pudp, __pud(0));
}
+static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
+{
+ return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
+}
+
+static inline unsigned long _pud_pfn(pud_t pud)
+{
+ return pud_val(pud) >> _PAGE_PFN_SHIFT;
+}
+
static inline unsigned long pud_page_vaddr(pud_t pud)
{
return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -70,6 +96,15 @@ static inline struct page *pud_page(pud_t pud)
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
}
+#define mm_pud_folded mm_pud_folded
+static inline bool mm_pud_folded(struct mm_struct *mm)
+{
+ if (pgtable_l4_enabled)
+ return false;
+
+ return true;
+}
+
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
@@ -90,4 +125,64 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
+
+static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ *p4dp = p4d;
+ else
+ set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) == 0);
+
+ return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (p4d_val(p4d) & _PAGE_PRESENT);
+
+ return 1;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return !p4d_present(p4d);
+
+ return 0;
+}
+
+static inline void p4d_clear(p4d_t *p4d)
+{
+ if (pgtable_l4_enabled)
+ set_p4d(p4d, __p4d(0));
+}
+
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+ if (pgtable_l4_enabled)
+ return (unsigned long)pfn_to_virt(
+ p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+
+ return pud_page_vaddr((pud_t) { p4d_val(p4d) });
+}
+
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if (pgtable_l4_enabled)
+ return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
+
+ return (pud_t *)p4d;
+}
+
#endif /* _ASM_RISCV_PGTABLE_64_H */
@@ -20,12 +20,14 @@
* the kernel.
*/
#define KERNEL_VIRT_ADDR (VMALLOC_END - SZ_2G + 1)
-#define KERNEL_LINK_ADDR KERNEL_VIRT_ADDR
+#define KERNEL_LINK_ADDR (VMALLOC_LINK_END - SZ_2G + 1)
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
+#define VMALLOC_LINK_END (_AC(CONFIG_PAGE_OFFSET, UL) - 1)
+
#define BPF_JIT_REGION_SIZE (SZ_128M)
#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
@@ -41,7 +43,7 @@
* position vmemmap directly below the VMALLOC region.
*/
#ifdef CONFIG_64BIT
-#define VA_BITS 39
+#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
#else
#define VA_BITS 32
#endif
@@ -73,8 +75,7 @@
#ifndef __ASSEMBLY__
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
+#include <asm-generic/pgtable-nop4d.h>
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
@@ -505,6 +506,7 @@ static inline void __kernel_map_pages(struct page *page, int numpages, int enabl
extern char _start[];
extern void *dtb_early_va;
+extern u64 satp_mode;
void setup_bootmem(void);
void paging_init(void);
@@ -62,7 +62,8 @@ relocate:
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
- li a1, SATP_MODE
+ la a1, satp_mode
+ REG_L a1, 0(a1)
or a2, a2, a1
/*
@@ -59,7 +59,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_set_cpu(cpu, mm_cpumask(next));
#ifdef CONFIG_MMU
- csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
+ csr_write(CSR_SATP, virt_to_pfn(next->pgd) | satp_mode);
local_flush_tlb_all();
#endif
@@ -25,8 +25,23 @@
#include "../kernel/head.h"
-unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR;
+#ifdef CONFIG_64BIT
+u64 satp_mode = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ?
+ SATP_MODE_39 : SATP_MODE_48;
+bool pgtable_l4_enabled = IS_ENABLED(CONFIG_MAXPHYSMEM_2GB) ? false : true;
+#else
+u64 satp_mode = SATP_MODE_32;
+bool pgtable_l4_enabled;
+#endif
+EXPORT_SYMBOL(pgtable_l4_enabled);
+EXPORT_SYMBOL(satp_mode);
+
+unsigned long kernel_virt_addr;
EXPORT_SYMBOL(kernel_virt_addr);
+#ifdef CONFIG_RELOCATABLE
+unsigned long __page_offset = _AC(CONFIG_PAGE_OFFSET, UL);
+EXPORT_SYMBOL(__page_offset);
+#endif
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
@@ -254,9 +269,12 @@ static void __init create_pte_mapping(pte_t *ptep,
#ifndef __PAGETABLE_PMD_FOLDED
+pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
static pmd_t *__init get_pmd_virt(phys_addr_t pa)
{
@@ -273,7 +291,8 @@ static phys_addr_t __init alloc_pmd(uintptr_t va)
if (mmu_enabled)
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
- BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
+ /* Only one PMD is available for early mapping */
+ BUG_ON((va - kernel_virt_addr) >> PUD_SHIFT);
return (uintptr_t)early_pmd;
}
@@ -305,19 +324,70 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
create_pte_mapping(ptep, va, pa, sz, prot);
}
-#define pgd_next_t pmd_t
-#define alloc_pgd_next(__va) alloc_pmd(__va)
-#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
+static pud_t *__init get_pud_virt(phys_addr_t pa)
+{
+ if (mmu_enabled) {
+ clear_fixmap(FIX_PUD);
+ return (pud_t *)set_fixmap_offset(FIX_PUD, pa);
+ } else {
+ return (pud_t *)((uintptr_t)pa);
+ }
+}
+
+static phys_addr_t __init alloc_pud(uintptr_t va)
+{
+ if (mmu_enabled)
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+
+ /* Only one PUD is available for early mapping */
+ BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pud;
+}
+
+static void __init create_pud_mapping(pud_t *pudp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pmd_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pud_index = pud_index(va);
+
+ if (sz == PUD_SIZE) {
+ if (pud_val(pudp[pud_index]) == 0)
+ pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pud_val(pudp[pud_index]) == 0) {
+ next_phys = alloc_pmd(va);
+ pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = get_pmd_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index]));
+ nextp = get_pmd_virt(next_phys);
+ }
+
+ create_pmd_mapping(nextp, va, pa, sz, prot);
+}
+
+#define pgd_next_t pud_t
+#define alloc_pgd_next(__va) alloc_pud(__va)
+#define get_pgd_next_virt(__pa) get_pud_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
- create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pmd
+ create_pud_mapping(__nextp, __va, __pa, __sz, __prot)
+#define fixmap_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
+#define trampoline_pgd_next (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
#else
#define pgd_next_t pte_t
#define alloc_pgd_next(__va) alloc_pte(__va)
#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define fixmap_pgd_next fixmap_pte
+#define fixmap_pgd_next ((uintptr_t)fixmap_pte)
#endif
static void __init create_pgd_mapping(pgd_t *pgdp,
@@ -328,6 +398,13 @@ static void __init create_pgd_mapping(pgd_t *pgdp,
phys_addr_t next_phys;
uintptr_t pgd_index = pgd_index(va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ if (!pgtable_l4_enabled) {
+ create_pud_mapping((pud_t *)pgdp, va, pa, sz, prot);
+ return;
+ }
+#endif
+
if (sz == PGDIR_SIZE) {
if (pgd_val(pgdp[pgd_index]) == 0)
pgdp[pgd_index] = pfn_pgd(PFN_DOWN(pa), prot);
@@ -419,6 +496,46 @@ void __init relocate_kernel(uintptr_t load_pa)
}
}
+#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB)
+void disable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = false;
+ __page_offset = PAGE_OFFSET_L3;
+ satp_mode = SATP_MODE_39;
+}
+
+/* There is a simple way to determine if 4-level is supported by the
+ * underlying hardware: establish 1:1 mapping in 4-level page table mode
+ * then read SATP to see if the configuration was taken into account
+ * meaning sv48 is supported.
+ */
+asmlinkage __init void set_satp_mode(uintptr_t load_pa)
+{
+ u64 identity_satp, hw_satp;
+
+ create_pgd_mapping(early_pg_dir, load_pa, (uintptr_t)early_pud,
+ PGDIR_SIZE, PAGE_TABLE);
+ create_pud_mapping(early_pud, load_pa, (uintptr_t)early_pmd,
+ PUD_SIZE, PAGE_TABLE);
+ create_pmd_mapping(early_pmd, load_pa, load_pa,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
+ local_flush_tlb_all();
+ csr_write(CSR_SATP, identity_satp);
+
+ hw_satp = csr_read(CSR_SATP);
+ csr_write(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ if (hw_satp != identity_satp)
+ disable_pgtable_l4();
+
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pud, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+}
+#endif
#endif
static uintptr_t load_pa, load_sz;
@@ -442,9 +559,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
load_pa = (uintptr_t)(&_start);
load_sz = (uintptr_t)(&_end) - load_pa;
+#if defined(CONFIG_64BIT) && !defined(CONFIG_MAXPHYSMEM_2GB)
+ set_satp_mode(load_pa);
+#endif
+
+ kernel_virt_addr = KERNEL_VIRT_ADDR;
+
va_pa_offset = PAGE_OFFSET - load_pa;
va_kernel_pa_offset = kernel_virt_addr - load_pa;
-
pfn_base = PFN_DOWN(load_pa);
#ifdef CONFIG_RELOCATABLE
@@ -473,15 +595,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
/* Setup early PGD for fixmap */
create_pgd_mapping(early_pg_dir, FIXADDR_START,
- (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
#ifndef __PAGETABLE_PMD_FOLDED
- /* Setup fixmap PMD */
+ /* Setup fixmap PUD and PMD */
+ if (pgtable_l4_enabled)
+ create_pud_mapping(fixmap_pud, FIXADDR_START,
+ (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
+
/* Setup trampoline PGD and PMD */
create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
- (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+ if (pgtable_l4_enabled)
+ create_pud_mapping(trampoline_pud, kernel_virt_addr,
+ (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
#else
@@ -558,12 +687,13 @@ static void __init setup_vm_final(void)
vm_area_add_early(&vm_kernel);
- /* Clear fixmap PTE and PMD mappings */
+ /* Clear fixmap page table mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
+ clear_fixmap(FIX_PUD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode);
local_flush_tlb_all();
}