Message ID | 000701cf5adc$1aa35350$4fe9f9f0$@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Apr 18, 2014 at 04:59:20PM +0900, Jungseok Lee wrote: > This patch implements 4 levels of translation tables since 3 levels > of page tables with 4KB pages cannot support 40-bit physical address > space described in [1] due to the following issue. > > It is a restriction that kernel logical memory map with 4KB + 3 levels > (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from > 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create > mapping for this region in map_mem function since __phys_to_virt for > this region reaches to address overflow. > > If SoC design follows the document, [1], over 32GB RAM would be placed > from 544GB. Even 64GB system is supposed to use the region from 544GB > to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels > of page tables to avoid hacking __virt_to_phys and __phys_to_virt. > > However, it is recommended 4 levels of page table should be only enabled > if memory map is too sparse or there is about 512GB RAM. Hello Jungseok, A few comments can be found inline... > > References > ---------- > [1]: Principles of ARM Memory Maps, White Paper, Issue C > > Signed-off-by: Jungseok Lee <jays.lee@samsung.com> > Reviewed-by: Sungjinn Chung <sungjinn.chung@samsung.com> > --- > arch/arm64/Kconfig | 7 +++++ > arch/arm64/include/asm/memblock.h | 6 +++++ > arch/arm64/include/asm/page.h | 4 ++- > arch/arm64/include/asm/pgalloc.h | 20 +++++++++++++++ > arch/arm64/include/asm/pgtable-hwdef.h | 6 +++-- > arch/arm64/include/asm/pgtable.h | 44 ++++++++++++++++++++++++++++++-- > arch/arm64/include/asm/tlb.h | 8 ++++++ > arch/arm64/kernel/head.S | 40 ++++++++++++++++++++--------- > arch/arm64/kernel/traps.c | 5 ++++ > arch/arm64/mm/fault.c | 1 + > arch/arm64/mm/mmu.c | 16 +++++++++--- > 11 files changed, 136 insertions(+), 21 deletions(-) > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 431acbc..7f5270b 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -184,12 +184,19 @@ config ARM64_3_LEVELS > help > This feature enables 3 levels of translation tables. > > +config ARM64_4_LEVELS > + bool "4 level" > + depends on ARM64_4K_PAGES > + help > + This feature enables 4 levels of translation tables. > + > endchoice > > config ARM64_VA_BITS > int "Virtual address space size" > range 39 39 if ARM64_4K_PAGES && ARM64_3_LEVELS > range 42 42 if ARM64_64K_PAGES && ARM64_2_LEVELS > + range 48 48 if ARM64_4K_PAGES && ARM64_4_LEVELS > help > This feature is determined by a combination of page size and > level of translation tables. > diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h > index 6afeed2..e4ac8bf 100644 > --- a/arch/arm64/include/asm/memblock.h > +++ b/arch/arm64/include/asm/memblock.h > @@ -16,6 +16,12 @@ > #ifndef __ASM_MEMBLOCK_H > #define __ASM_MEMBLOCK_H > > +#ifndef CONFIG_ARM64_4_LEVELS > +#define MEMBLOCK_INITIAL_LIMIT PGDIR_SIZE > +#else > +#define MEMBLOCK_INITIAL_LIMIT PUD_SIZE > +#endif > + > extern void arm64_memblock_init(void); > > #endif > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 268e53d..83b5289 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -35,8 +35,10 @@ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-types.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-types.h> > +#else > +#include <asm/pgtable-4level-types.h> > #endif > > extern void __cpu_clear_user_page(void *p, unsigned long user); > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h > index 4829837..8d745fa 100644 > --- a/arch/arm64/include/asm/pgalloc.h > +++ b/arch/arm64/include/asm/pgalloc.h > @@ -26,6 +26,26 @@ > > #define check_pgt_cache() do { } while (0) > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) > +{ > + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); > +} > + > +static inline void pud_free(struct mm_struct *mm, pud_t *pud) > +{ > + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); > + free_page((unsigned long)pud); > +} > + > +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) > +{ > + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > #ifndef CONFIG_ARM64_2_LEVELS > > static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h > index 9cd86c6..ba30053 100644 > --- a/arch/arm64/include/asm/pgtable-hwdef.h > +++ b/arch/arm64/include/asm/pgtable-hwdef.h > @@ -18,8 +18,10 @@ > > #ifdef CONFIG_ARM64_2_LEVELS > #include <asm/pgtable-2level-hwdef.h> > -#else > +#elif defined(CONFIG_ARM64_3_LEVELS) > #include <asm/pgtable-3level-hwdef.h> > +#else > +#include <asm/pgtable-4level-hwdef.h> > #endif > > /* > @@ -27,7 +29,7 @@ > * > * Level 1 descriptor (PUD). > */ > - > +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) > #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) > > /* > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index a64ce5e..efc40d1 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -35,7 +35,11 @@ > * VMALLOC and SPARSEMEM_VMEMMAP ranges. > */ > #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) > +#ifndef CONFIG_ARM64_4_LEVELS > #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) > +#else > +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) > +#endif > > #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) > > @@ -44,12 +48,16 @@ > #ifndef __ASSEMBLY__ > extern void __pte_error(const char *file, int line, unsigned long val); > extern void __pmd_error(const char *file, int line, unsigned long val); > +extern void __pud_error(const char *file, int line, unsigned long val); > extern void __pgd_error(const char *file, int line, unsigned long val); > > #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) > #endif > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) > +#endif > #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) > > /* > @@ -344,6 +352,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > > #endif /* CONFIG_ARM64_2_LEVELS */ > > +#ifdef CONFIG_ARM64_4_LEVELS > + > +#define pgd_none(pgd) (!pgd_val(pgd)) > +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) > +#define pgd_present(pgd) (pgd_val(pgd)) > + > +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) > +{ > + *pgdp = pgd; > + dsb(); > +} > + > +static inline void pgd_clear(pgd_t *pgdp) > +{ > + set_pgd(pgdp, __pgd(0)); > +} > + > +static inline pud_t *pgd_page_vaddr(pgd_t pgd) > +{ > + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); > +} > + > +#endif /* CONFIG_ARM64_4_LEVELS */ > + > /* to find an entry in a page-table-directory */ > #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) > > @@ -352,6 +384,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) > /* to find an entry in a kernel page-table-directory */ > #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) > > +#ifdef CONFIG_ARM64_4_LEVELS > +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) > +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) > +{ > + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); > +} > +#endif > + > /* Find an entry in the second-level page table.. */ > #ifndef CONFIG_ARM64_2_LEVELS > #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) > @@ -380,8 +420,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) > extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; > extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; > > -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > > /* > * Encode and decode a swap entry: > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h > index df378b2..dedfb04 100644 > --- a/arch/arm64/include/asm/tlb.h > +++ b/arch/arm64/include/asm/tlb.h > @@ -99,5 +99,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, > } > #endif > > +#ifdef CONFIG_ARM64_4_LEVELS > +static inline void __pud_free_tlb(struct mmu_gather *tlb, pmd_t *pudp, > + unsigned long addr) > +{ > + tlb_add_flush(tlb, addr); > + tlb_remove_page(tlb, virt_to_page(pudp)); > +} > +#endif > > #endif > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S > index 0fd5650..f313a7a 100644 > --- a/arch/arm64/kernel/head.S > +++ b/arch/arm64/kernel/head.S > @@ -37,8 +37,8 @@ > > /* > * swapper_pg_dir is the virtual address of the initial page table. We place > - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has > - * 2 pages and is placed below swapper_pg_dir. > + * the page tables 4 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has > + * 3 pages and is placed below swapper_pg_dir. > */ > #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) > > @@ -46,8 +46,8 @@ > #error KERNEL_RAM_VADDR must start at 0xXXX80000 > #endif > > -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > > .globl swapper_pg_dir > .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE > @@ -371,16 +371,29 @@ ENDPROC(__calc_phys_offset) > > /* > * Macro to populate the PGD for the corresponding block entry in the next > - * level (tbl) for the given virtual address. > + * levels (tbl1 and tbl2) for the given virtual address. > * > - * Preserves: pgd, tbl, virt > + * Preserves: pgd, tbl1, tbl2, virt tbl1 and tbl2 are *not* preserved for 4 level. tbl1 is bumped up one page to make space for the pud, then fed into create_block_mapping later. > * Corrupts: tmp1, tmp2 > */ > - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 > + .macro create_pgd_entry, pgd, tbl1, tbl2, virt, tmp1, tmp2 > lsr \tmp1, \virt, #PGDIR_SHIFT > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > - orr \tmp2, \tbl, #3 // PGD entry table type > + orr \tmp2, \tbl1, #3 // PGD entry table type > str \tmp2, [\pgd, \tmp1, lsl #3] > +#ifdef CONFIG_ARM64_4_LEVELS > + ldr \tbl2, =FIXADDR_TOP > + cmp \tbl2, \virt Do we need this extra logic? See my other comment below where the fixed mapping is placed down. > + add \tbl2, \tbl1, #PAGE_SIZE > + b.ne 1f > + add \tbl2, \tbl2, #PAGE_SIZE > +1: > + lsr \tmp1, \virt, #PUD_SHIFT > + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > + orr \tmp2, \tbl2, #3 // PUD entry table type > + str \tmp2, [\tbl1, \tmp1, lsl #3] > + mov \tbl1, \tbl2 > +#endif It may be easier to read to have a create_pud_entry macro too? > .endm > > /* > @@ -444,7 +457,7 @@ __create_page_tables: > add x0, x25, #PAGE_SIZE // section table address > ldr x3, =KERNEL_START > add x3, x3, x28 // __pa(KERNEL_START) > - create_pgd_entry x25, x0, x3, x5, x6 > + create_pgd_entry x25, x0, x1, x3, x5, x6 > ldr x6, =KERNEL_END > mov x5, x3 // __pa(KERNEL_START) > add x6, x6, x28 // __pa(KERNEL_END) > @@ -455,7 +468,7 @@ __create_page_tables: > */ > add x0, x26, #PAGE_SIZE // section table address > mov x5, #PAGE_OFFSET > - create_pgd_entry x26, x0, x5, x3, x6 > + create_pgd_entry x26, x0, x1, x5, x3, x6 > ldr x6, =KERNEL_END > mov x3, x24 // phys offset > create_block_map x0, x7, x3, x5, x6 > @@ -480,8 +493,11 @@ __create_page_tables: > * Create the pgd entry for the fixed mappings. > */ > ldr x5, =FIXADDR_TOP // Fixed mapping virtual address > - add x0, x26, #2 * PAGE_SIZE // section table address > - create_pgd_entry x26, x0, x5, x6, x7 > + add x0, x26, #PAGE_SIZE > +#ifndef CONFIG_ARM64_4_LEVELS > + add x0, x0, #PAGE_SIZE > +#endif This is overly complicated. For <4 levels we set x0 to be: ttbr1 + 2*PAGE_SIZE. For 4-levels, we set x0 to be ttbr1 + PAGE_SIZE, then inside the create_pgd_entry macro, we check the VA for FIXADDR_TOP then add another PAGE_SIZE. This is presumably done so the same PUD is used for the swapper block map and the FIXADDR map. If you assume that the PUD always follows the PGD for 4-levels, then you can remove this #ifdef and the conditional VA logic in set_pgd_entry. To make the logic simpler for <4 levels, you could call create_pud_entry in the middle of create_pgd_entry, then put down the actual pgd after. > + create_pgd_entry x26, x0, x1, x5, x6, x7 > So before this patch we have the following created by __create_page_tables: +========================+ <--- TEXT_OFFSET + PHYS_OFFSET | FIXADDR (pmd or pte) | +------------------------+ | block map (pmd or pte) | +------------------------+ | PGDs for swapper | +========================+ <--- TTBR1 swapper_pg_dir | block map for idmap | +------------------------+ | PGDs for idmap | +------------------------+ <--- TTBR0 idmap_pg_dir After the patch, for 4 levels activated we have: +========================+ <--- TEXT_OFFSET + PHYS_OFFSET | FIXADDR (ptes) | +------------------------+ | block map (ptes) | +------------------------+ | PUDs for swapper | +------------------------+ | PGDs for swapper | +========================+ <--- TTBR1 swapper_pg_dir | block map for idmap | +------------------------+ | PUDs for idmap | +------------------------+ | PGDs for idmap | +------------------------+ <--- TTBR0 idmap_pg_dir and without 4 levels activated we have: +========================+ <--- TEXT_OFFSET + PHYS_OFFSET | ZERO BYTES | +------------------------+ | FIXADDR (pmd or pte) | +------------------------+ | block map (pmd or pte) | +------------------------+ | PGDs for swapper | +========================+ <--- TTBR1 swapper_pg_dir | ZERO BYTES | +------------------------+ | block map for idmap | +------------------------+ | PGDs for idmap | +------------------------+ <--- TTBR0 idmap_pg_dir This is a pity as we are potentially throwing away 128KB. I would recommend only extending the sizes of IDMAP_DIR_SIZE and SWAPPER_DIR_SIZE if necessary. > /* > * Since the page tables have been populated with non-cacheable > diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c > index 0484e81..16d5ee5 100644 > --- a/arch/arm64/kernel/traps.c > +++ b/arch/arm64/kernel/traps.c > @@ -336,6 +336,11 @@ void __pmd_error(const char *file, int line, unsigned long val) > pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); > } > > +void __pud_error(const char *file, int line, unsigned long val) > +{ > + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); > +} > + > void __pgd_error(const char *file, int line, unsigned long val) > { > pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index c23751b..ed4a343 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -61,6 +61,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) > break; > > pud = pud_offset(pgd, addr); > + printk(", *pud=%016llx", pud_val(*pud)); > if (pud_none(*pud) || pud_bad(*pud)) > break; > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 6b7e895..4d29332 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -32,6 +32,7 @@ > #include <asm/setup.h> > #include <asm/sizes.h> > #include <asm/tlb.h> > +#include <asm/memblock.h> > #include <asm/mmu_context.h> > > #include "mm.h" > @@ -222,9 +223,15 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, > static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, > unsigned long end, unsigned long phys) > { > - pud_t *pud = pud_offset(pgd, addr); > + pud_t *pud; > unsigned long next; > > + if (pgd_none(*pgd) || pgd_bad(*pgd)) { > + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); > + pgd_populate(&init_mm, pgd, pud); > + } > + > + pud = pud_offset(pgd, addr); > do { > next = pud_addr_end(addr, end); > alloc_init_pmd(pud, addr, next, phys); > @@ -271,10 +278,11 @@ static void __init map_mem(void) > * memory addressable from the initial direct kernel mapping. > * > * The initial direct kernel mapping, located at swapper_pg_dir, > - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be > - * aligned to 2MB as per Documentation/arm64/booting.txt). > + * gives us PGDIR_SIZE (2 and 3 levels) or PUD_SIZE (4 levels) memory > + * starting from PHYS_OFFSET (which must be aligned to 2MB as per > + * Documentation/arm64/booting.txt). > */ > - limit = PHYS_OFFSET + PGDIR_SIZE; > + limit = PHYS_OFFSET + MEMBLOCK_INITIAL_LIMIT; > memblock_set_current_limit(limit); > > /* map all the memory banks */ > -- > 1.7.10.4 > > -- To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday, April 24, 2014 1:02 AM, Steve Capper wrote: > On Fri, Apr 18, 2014 at 04:59:20PM +0900, Jungseok Lee wrote: > > This patch implements 4 levels of translation tables since 3 levels of > > page tables with 4KB pages cannot support 40-bit physical address > > space described in [1] due to the following issue. > > > > It is a restriction that kernel logical memory map with 4KB + 3 levels > > (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from > > 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create > > mapping for this region in map_mem function since __phys_to_virt for > > this region reaches to address overflow. > > > > If SoC design follows the document, [1], over 32GB RAM would be placed > > from 544GB. Even 64GB system is supposed to use the region from 544GB > > to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 > > levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. > > > > However, it is recommended 4 levels of page table should be only > > enabled if memory map is too sparse or there is about 512GB RAM. > > Hello Jungseok, > A few comments can be found inline... Hi Steve, The comments are very helpful. Thanks. [ ... ] > > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index > > 0fd5650..f313a7a 100644 > > --- a/arch/arm64/kernel/head.S > > +++ b/arch/arm64/kernel/head.S > > @@ -37,8 +37,8 @@ > > > > /* > > * swapper_pg_dir is the virtual address of the initial page table. > > We place > > - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The > > idmap_pg_dir has > > - * 2 pages and is placed below swapper_pg_dir. > > + * the page tables 4 * PAGE_SIZE below KERNEL_RAM_VADDR. The > > + idmap_pg_dir has > > + * 3 pages and is placed below swapper_pg_dir. > > */ > > #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) > > > > @@ -46,8 +46,8 @@ > > #error KERNEL_RAM_VADDR must start at 0xXXX80000 #endif > > > > -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > > -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > > +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) > > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > > > > .globl swapper_pg_dir > > .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE > > @@ -371,16 +371,29 @@ ENDPROC(__calc_phys_offset) > > > > /* > > * Macro to populate the PGD for the corresponding block entry in the > > next > > - * level (tbl) for the given virtual address. > > + * levels (tbl1 and tbl2) for the given virtual address. > > * > > - * Preserves: pgd, tbl, virt > > + * Preserves: pgd, tbl1, tbl2, virt > > tbl1 and tbl2 are *not* preserved for 4 level. tbl1 is bumped up one page to make space for the pud, > then fed into create_block_mapping later. Your logic can be extended to 3 levels. In an original code, tbl is fed into create_block_mapping. That is why I've written them down as "preserves". I will fix it in the next version. > > * Corrupts: tmp1, tmp2 > > */ > > - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 > > + .macro create_pgd_entry, pgd, tbl1, tbl2, virt, tmp1, tmp2 > > lsr \tmp1, \virt, #PGDIR_SHIFT > > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > > - orr \tmp2, \tbl, #3 // PGD entry table type > > + orr \tmp2, \tbl1, #3 // PGD entry table type > > str \tmp2, [\pgd, \tmp1, lsl #3] > > +#ifdef CONFIG_ARM64_4_LEVELS > > + ldr \tbl2, =FIXADDR_TOP > > + cmp \tbl2, \virt > > Do we need this extra logic? See my other comment below where the fixed mapping is placed down. > > > + add \tbl2, \tbl1, #PAGE_SIZE > > + b.ne 1f > > + add \tbl2, \tbl2, #PAGE_SIZE > > +1: > > + lsr \tmp1, \virt, #PUD_SHIFT > > + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > > + orr \tmp2, \tbl2, #3 // PUD entry table type > > + str \tmp2, [\tbl1, \tmp1, lsl #3] > > + mov \tbl1, \tbl2 > > +#endif > > It may be easier to read to have a create_pud_entry macro too? Okay. I will write a create_pud_entry macro. > > .endm > > > > /* > > @@ -444,7 +457,7 @@ __create_page_tables: > > add x0, x25, #PAGE_SIZE // section table address > > ldr x3, =KERNEL_START > > add x3, x3, x28 // __pa(KERNEL_START) > > - create_pgd_entry x25, x0, x3, x5, x6 > > + create_pgd_entry x25, x0, x1, x3, x5, x6 > > ldr x6, =KERNEL_END > > mov x5, x3 // __pa(KERNEL_START) > > add x6, x6, x28 // __pa(KERNEL_END) > > @@ -455,7 +468,7 @@ __create_page_tables: > > */ > > add x0, x26, #PAGE_SIZE // section table address > > mov x5, #PAGE_OFFSET > > - create_pgd_entry x26, x0, x5, x3, x6 > > + create_pgd_entry x26, x0, x1, x5, x3, x6 > > ldr x6, =KERNEL_END > > mov x3, x24 // phys offset > > create_block_map x0, x7, x3, x5, x6 > > @@ -480,8 +493,11 @@ __create_page_tables: > > * Create the pgd entry for the fixed mappings. > > */ > > ldr x5, =FIXADDR_TOP // Fixed mapping virtual address > > - add x0, x26, #2 * PAGE_SIZE // section table address > > - create_pgd_entry x26, x0, x5, x6, x7 > > + add x0, x26, #PAGE_SIZE > > +#ifndef CONFIG_ARM64_4_LEVELS > > + add x0, x0, #PAGE_SIZE > > +#endif > > This is overly complicated. For <4 levels we set x0 to be: > ttbr1 + 2*PAGE_SIZE. For 4-levels, we set x0 to be ttbr1 + PAGE_SIZE, then inside the create_pgd_entry > macro, we check the VA for FIXADDR_TOP then add another PAGE_SIZE. This is presumably done so the same > PUD is used for the swapper block map and the FIXADDR map. > > If you assume that the PUD always follows the PGD for 4-levels, then you can remove this #ifdef and > the conditional VA logic in set_pgd_entry. To make the logic simpler for <4 levels, you could call > create_pud_entry in the middle of create_pgd_entry, then put down the actual pgd after. Okay, I will revise it in an easy and neat way. > > + create_pgd_entry x26, x0, x1, x5, x6, x7 > > > > So before this patch we have the following created by > __create_page_tables: > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | FIXADDR (pmd or pte) | > +------------------------+ > | block map (pmd or pte) | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | block map for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir > > > After the patch, for 4 levels activated we have: > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | FIXADDR (ptes) | > +------------------------+ > | block map (ptes) | > +------------------------+ > | PUDs for swapper | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | block map for idmap | > +------------------------+ > | PUDs for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir > > and without 4 levels activated we have: > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | ZERO BYTES | > +------------------------+ > | FIXADDR (pmd or pte) | > +------------------------+ > | block map (pmd or pte) | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | ZERO BYTES | > +------------------------+ > | block map for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir > > This is a pity as we are potentially throwing away 128KB. > I would recommend only extending the sizes of IDMAP_DIR_SIZE and SWAPPER_DIR_SIZE if necessary. Yes, you're right. I will introduce #ifdef statements for their size adjustment. Best Regards Jungseok Lee -- To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thursday, April 24, 2014 1:02 AM, Steve Capper wrote: > On Fri, Apr 18, 2014 at 04:59:20PM +0900, Jungseok Lee wrote: [ ... ] > > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index > > 0fd5650..f313a7a 100644 > > --- a/arch/arm64/kernel/head.S > > +++ b/arch/arm64/kernel/head.S > > @@ -37,8 +37,8 @@ > > > > /* > > * swapper_pg_dir is the virtual address of the initial page table. > > We place > > - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The > > idmap_pg_dir has > > - * 2 pages and is placed below swapper_pg_dir. > > + * the page tables 4 * PAGE_SIZE below KERNEL_RAM_VADDR. The > > + idmap_pg_dir has > > + * 3 pages and is placed below swapper_pg_dir. > > */ > > #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) > > > > @@ -46,8 +46,8 @@ > > #error KERNEL_RAM_VADDR must start at 0xXXX80000 #endif > > > > -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) > > -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) > > +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) > > +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) > > > > .globl swapper_pg_dir > > .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE > > @@ -371,16 +371,29 @@ ENDPROC(__calc_phys_offset) > > > > /* > > * Macro to populate the PGD for the corresponding block entry in the > > next > > - * level (tbl) for the given virtual address. > > + * levels (tbl1 and tbl2) for the given virtual address. > > * > > - * Preserves: pgd, tbl, virt > > + * Preserves: pgd, tbl1, tbl2, virt > > tbl1 and tbl2 are *not* preserved for 4 level. tbl1 is bumped up one page to make space for the pud, > then fed into create_block_mapping later. > > > * Corrupts: tmp1, tmp2 > > */ > > - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 > > + .macro create_pgd_entry, pgd, tbl1, tbl2, virt, tmp1, tmp2 > > lsr \tmp1, \virt, #PGDIR_SHIFT > > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > > - orr \tmp2, \tbl, #3 // PGD entry table type > > + orr \tmp2, \tbl1, #3 // PGD entry table type > > str \tmp2, [\pgd, \tmp1, lsl #3] > > +#ifdef CONFIG_ARM64_4_LEVELS > > + ldr \tbl2, =FIXADDR_TOP > > + cmp \tbl2, \virt > > Do we need this extra logic? See my other comment below where the fixed mapping is placed down. > > > + add \tbl2, \tbl1, #PAGE_SIZE > > + b.ne 1f > > + add \tbl2, \tbl2, #PAGE_SIZE > > +1: > > + lsr \tmp1, \virt, #PUD_SHIFT > > + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > > + orr \tmp2, \tbl2, #3 // PUD entry table type > > + str \tmp2, [\tbl1, \tmp1, lsl #3] > > + mov \tbl1, \tbl2 > > +#endif > > It may be easier to read to have a create_pud_entry macro too? > > > .endm > > > > /* > > @@ -444,7 +457,7 @@ __create_page_tables: > > add x0, x25, #PAGE_SIZE // section table address > > ldr x3, =KERNEL_START > > add x3, x3, x28 // __pa(KERNEL_START) > > - create_pgd_entry x25, x0, x3, x5, x6 > > + create_pgd_entry x25, x0, x1, x3, x5, x6 > > ldr x6, =KERNEL_END > > mov x5, x3 // __pa(KERNEL_START) > > add x6, x6, x28 // __pa(KERNEL_END) > > @@ -455,7 +468,7 @@ __create_page_tables: > > */ > > add x0, x26, #PAGE_SIZE // section table address > > mov x5, #PAGE_OFFSET > > - create_pgd_entry x26, x0, x5, x3, x6 > > + create_pgd_entry x26, x0, x1, x5, x3, x6 > > ldr x6, =KERNEL_END > > mov x3, x24 // phys offset > > create_block_map x0, x7, x3, x5, x6 > > @@ -480,8 +493,11 @@ __create_page_tables: > > * Create the pgd entry for the fixed mappings. > > */ > > ldr x5, =FIXADDR_TOP // Fixed mapping virtual address > > - add x0, x26, #2 * PAGE_SIZE // section table address > > - create_pgd_entry x26, x0, x5, x6, x7 > > + add x0, x26, #PAGE_SIZE > > +#ifndef CONFIG_ARM64_4_LEVELS > > + add x0, x0, #PAGE_SIZE > > +#endif > > This is overly complicated. For <4 levels we set x0 to be: > ttbr1 + 2*PAGE_SIZE. For 4-levels, we set x0 to be ttbr1 + PAGE_SIZE, then inside the create_pgd_entry > macro, we check the VA for FIXADDR_TOP then add another PAGE_SIZE. This is presumably done so the same > PUD is used for the swapper block map and the FIXADDR map. Is it too complicated to understand the logic? 1) For <4 levels: PAGE_SIZE is added for FIXADDR map and x0 is passed to create pgd entry. 2) For =4 levels: PAGE_SIZE is added in the create_pgd_entry macro since FIXADDR map info is needed to create pud entry. However, I agree that the code should be revised if other people feel like it is a labyrinthine logic. > If you assume that the PUD always follows the PGD for 4-levels, then you can remove this #ifdef and > the conditional VA logic in set_pgd_entry. To make the logic simpler for <4 levels, you could call > create_pud_entry in the middle of create_pgd_entry, then put down the actual pgd after. create_pud_entry should distinguish block map from FIXADDR map although PUD always follows the PGD in case of 4 levels. If we would like to avoid unnecessary #ifdef, the conditional logic should be introduced in create_ pgd_entry macro. I cannot find the "best" way even though I've tried to figure it out. I would like to find out the most clear and self-descriptive expression. Could you give an idea on how to remove both conditional VA logic and #ifdef? > > + create_pgd_entry x26, x0, x1, x5, x6, x7 > > > > So before this patch we have the following created by > __create_page_tables: > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | FIXADDR (pmd or pte) | > +------------------------+ > | block map (pmd or pte) | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | block map for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir > > > After the patch, for 4 levels activated we have: > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | FIXADDR (ptes) | > +------------------------+ > | block map (ptes) | > +------------------------+ > | PUDs for swapper | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | block map for idmap | > +------------------------+ > | PUDs for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir > > and without 4 levels activated we have: > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > | ZERO BYTES | > +------------------------+ > | FIXADDR (pmd or pte) | > +------------------------+ > | block map (pmd or pte) | > +------------------------+ > | PGDs for swapper | > +========================+ <--- TTBR1 swapper_pg_dir > | ZERO BYTES | > +------------------------+ > | block map for idmap | > +------------------------+ > | PGDs for idmap | > +------------------------+ <--- TTBR0 idmap_pg_dir This is definitely helpful to understand head.S. It would be good to add these figures to Documentation or comments. Best Regards Jungseok Lee -- To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Apr 27, 2014 at 12:37:35PM +0900, Jungseok Lee wrote: > On Thursday, April 24, 2014 1:02 AM, Steve Capper wrote: > > On Fri, Apr 18, 2014 at 04:59:20PM +0900, Jungseok Lee wrote: [ ... ] > > > > This is overly complicated. For <4 levels we set x0 to be: > > ttbr1 + 2*PAGE_SIZE. For 4-levels, we set x0 to be ttbr1 + PAGE_SIZE, then inside the create_pgd_entry > > macro, we check the VA for FIXADDR_TOP then add another PAGE_SIZE. This is presumably done so the same > > PUD is used for the swapper block map and the FIXADDR map. > > Is it too complicated to understand the logic? > > 1) For <4 levels: > PAGE_SIZE is added for FIXADDR map and x0 is passed to create pgd entry. > 2) For =4 levels: > PAGE_SIZE is added in the create_pgd_entry macro since FIXADDR map info > is needed to create pud entry. > > However, I agree that the code should be revised if other people feel > like it is a labyrinthine logic. > > > If you assume that the PUD always follows the PGD for 4-levels, then you can remove this #ifdef and > > the conditional VA logic in set_pgd_entry. To make the logic simpler for <4 levels, you could call > > create_pud_entry in the middle of create_pgd_entry, then put down the actual pgd after. > > create_pud_entry should distinguish block map from FIXADDR map although > PUD always follows the PGD in case of 4 levels. If we would like to avoid > unnecessary #ifdef, the conditional logic should be introduced in create_ > pgd_entry macro. > > I cannot find the "best" way even though I've tried to figure it out. > I would like to find out the most clear and self-descriptive expression. > > Could you give an idea on how to remove both conditional VA logic and #ifdef? Hello Jungseok, I had the following logic in my head: It compiles and runs on the model, but I've not tried it in anger. ========================================================================= .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 #ifdef CONFIG_ARM64_4_LEVELS add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. // to make room for pud add \pud, \pgd, #PAGE_SIZE // pgd points to pud which // follows pgd lsr \tmp1, \virt, #PUD_SHIFT and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index orr \tmp2, \tbl, #3 // PUD entry table type str \tmp2, [\pud, \tmp1, lsl #3] #else mov \pud, \tbl // pgd points to section table // directly for < 4 levels #endif .endm /* * Macro to populate the PGD for the corresponding block entry in the next * level (tbl) for the given virtual address. * * Preserves: pgd, virt * Corrupts: tmp1, tmp2, tmp3 * Returns: tbl -> page where block mappings can be placed * (changed to make room for pud with 4levels, preserved otherwise) */ .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 lsr \tmp1, \virt, #PGDIR_SHIFT and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index orr \tmp2, \tmp3, #3 // PGD entry table type str \tmp2, [\pgd, \tmp1, lsl #3] .endm ========================================================================= [Note I've changed the extra argument to create_pgd_entry to be at the end to make it easier to diff callers] So essentially, we bump up tbl if we are running with 4 levels of page table. We put the pgd down after the pud, and this allows us to sneak a pud in after the pgd if we need to, otherwise it points to the target for the section mapping. Does this work for you? (I go cross-eyed with too much assembler, so could have easily missed something). > > > > + create_pgd_entry x26, x0, x1, x5, x6, x7 > > > > > > > So before this patch we have the following created by > > __create_page_tables: > > > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > | FIXADDR (pmd or pte) | > > +------------------------+ > > | block map (pmd or pte) | > > +------------------------+ > > | PGDs for swapper | > > +========================+ <--- TTBR1 swapper_pg_dir > > | block map for idmap | > > +------------------------+ > > | PGDs for idmap | > > +------------------------+ <--- TTBR0 idmap_pg_dir > > > > > > After the patch, for 4 levels activated we have: > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > | FIXADDR (ptes) | > > +------------------------+ > > | block map (ptes) | > > +------------------------+ > > | PUDs for swapper | > > +------------------------+ > > | PGDs for swapper | > > +========================+ <--- TTBR1 swapper_pg_dir > > | block map for idmap | > > +------------------------+ > > | PUDs for idmap | > > +------------------------+ > > | PGDs for idmap | > > +------------------------+ <--- TTBR0 idmap_pg_dir > > > > and without 4 levels activated we have: > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > | ZERO BYTES | > > +------------------------+ > > | FIXADDR (pmd or pte) | > > +------------------------+ > > | block map (pmd or pte) | > > +------------------------+ > > | PGDs for swapper | > > +========================+ <--- TTBR1 swapper_pg_dir > > | ZERO BYTES | > > +------------------------+ > > | block map for idmap | > > +------------------------+ > > | PGDs for idmap | > > +------------------------+ <--- TTBR0 idmap_pg_dir > > This is definitely helpful to understand head.S. > It would be good to add these figures to Documentation or comments. Please feel free to grab it if you want... :-). Otherwise, I can put a patch in for this, as I am working on some logic to remap the physical memory a PUD blocks for 4K granule. Cheers,
On Monday, April 28, 2014 10:24 PM, Steve Capper wrote: > On Sun, Apr 27, 2014 at 12:37:35PM +0900, Jungseok Lee wrote: > > On Thursday, April 24, 2014 1:02 AM, Steve Capper wrote: > > > On Fri, Apr 18, 2014 at 04:59:20PM +0900, Jungseok Lee wrote: > > [ ... ] > > > > > > > This is overly complicated. For <4 levels we set x0 to be: > > > ttbr1 + 2*PAGE_SIZE. For 4-levels, we set x0 to be ttbr1 + > > > PAGE_SIZE, then inside the create_pgd_entry macro, we check the VA > > > for FIXADDR_TOP then add another PAGE_SIZE. This is presumably done so the same PUD is used for > the swapper block map and the FIXADDR map. > > > > Is it too complicated to understand the logic? > > > > 1) For <4 levels: > > PAGE_SIZE is added for FIXADDR map and x0 is passed to create pgd entry. > > 2) For =4 levels: > > PAGE_SIZE is added in the create_pgd_entry macro since FIXADDR map > > info is needed to create pud entry. > > > > However, I agree that the code should be revised if other people feel > > like it is a labyrinthine logic. > > > > > If you assume that the PUD always follows the PGD for 4-levels, then > > > you can remove this #ifdef and the conditional VA logic in > > > set_pgd_entry. To make the logic simpler for <4 levels, you could call create_pud_entry in the > middle of create_pgd_entry, then put down the actual pgd after. > > > > create_pud_entry should distinguish block map from FIXADDR map > > although PUD always follows the PGD in case of 4 levels. If we would > > like to avoid unnecessary #ifdef, the conditional logic should be > > introduced in create_ pgd_entry macro. > > > > I cannot find the "best" way even though I've tried to figure it out. > > I would like to find out the most clear and self-descriptive expression. > > > > Could you give an idea on how to remove both conditional VA logic and #ifdef? > > > Hello Jungseok, > I had the following logic in my head: > It compiles and runs on the model, but I've not tried it in anger. Hello Steve, It works well as both host and guest on the model. > ========================================================================= > > .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 #ifdef CONFIG_ARM64_4_LEVELS > add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. > // to make room for pud > add \pud, \pgd, #PAGE_SIZE // pgd points to pud which > // follows pgd > lsr \tmp1, \virt, #PUD_SHIFT > and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index > orr \tmp2, \tbl, #3 // PUD entry table type > str \tmp2, [\pud, \tmp1, lsl #3] > #else > mov \pud, \tbl // pgd points to section table > // directly for < 4 levels > #endif > .endm > > /* > * Macro to populate the PGD for the corresponding block entry in the next > * level (tbl) for the given virtual address. > * > * Preserves: pgd, virt > * Corrupts: tmp1, tmp2, tmp3 > * Returns: tbl -> page where block mappings can be placed > * (changed to make room for pud with 4levels, preserved otherwise) > */ > .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 > create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 > lsr \tmp1, \virt, #PGDIR_SHIFT > and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index > orr \tmp2, \tmp3, #3 // PGD entry table type > str \tmp2, [\pgd, \tmp1, lsl #3] > .endm > > ========================================================================= > > [Note I've changed the extra argument to create_pgd_entry to be at the end to make it easier to diff > callers] > > So essentially, we bump up tbl if we are running with 4 levels of page table. We put the pgd down > after the pud, and this allows us to sneak a pud in after the pgd if we need to, otherwise it points > to the target for the section mapping. > > Does this work for you? (I go cross-eyed with too much assembler, so could have easily missed > something). It is a better description compared to my logic. I fully understand your intention now. It would be good to adopt your code instead of mine. How about participating as an author of this part if you are okay? I am posting v4 patches as soon as possible and then figuring out a way to take you as an author of this head.S. > > > > > > + create_pgd_entry x26, x0, x1, x5, x6, x7 > > > > > > > > > > So before this patch we have the following created by > > > __create_page_tables: > > > > > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > > | FIXADDR (pmd or pte) | > > > +------------------------+ > > > | block map (pmd or pte) | > > > +------------------------+ > > > | PGDs for swapper | > > > +========================+ <--- TTBR1 swapper_pg_dir > > > | block map for idmap | > > > +------------------------+ > > > | PGDs for idmap | > > > +------------------------+ <--- TTBR0 idmap_pg_dir > > > > > > > > > After the patch, for 4 levels activated we have: > > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > > | FIXADDR (ptes) | > > > +------------------------+ > > > | block map (ptes) | > > > +------------------------+ > > > | PUDs for swapper | > > > +------------------------+ > > > | PGDs for swapper | > > > +========================+ <--- TTBR1 swapper_pg_dir > > > | block map for idmap | > > > +------------------------+ > > > | PUDs for idmap | > > > +------------------------+ > > > | PGDs for idmap | > > > +------------------------+ <--- TTBR0 idmap_pg_dir > > > > > > and without 4 levels activated we have: > > > +========================+ <--- TEXT_OFFSET + PHYS_OFFSET > > > | ZERO BYTES | > > > +------------------------+ > > > | FIXADDR (pmd or pte) | > > > +------------------------+ > > > | block map (pmd or pte) | > > > +------------------------+ > > > | PGDs for swapper | > > > +========================+ <--- TTBR1 swapper_pg_dir > > > | ZERO BYTES | > > > +------------------------+ > > > | block map for idmap | > > > +------------------------+ > > > | PGDs for idmap | > > > +------------------------+ <--- TTBR0 idmap_pg_dir > > > > This is definitely helpful to understand head.S. > > It would be good to add these figures to Documentation or comments. > > Please feel free to grab it if you want... :-). > Otherwise, I can put a patch in for this, as I am working on some logic to remap the physical memory a > PUD blocks for 4K granule. It is better to write a patch by you, not me. Then, I will review it. Best Regards Jungseok Lee -- To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 431acbc..7f5270b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -184,12 +184,19 @@ config ARM64_3_LEVELS help This feature enables 3 levels of translation tables. +config ARM64_4_LEVELS + bool "4 level" + depends on ARM64_4K_PAGES + help + This feature enables 4 levels of translation tables. + endchoice config ARM64_VA_BITS int "Virtual address space size" range 39 39 if ARM64_4K_PAGES && ARM64_3_LEVELS range 42 42 if ARM64_64K_PAGES && ARM64_2_LEVELS + range 48 48 if ARM64_4K_PAGES && ARM64_4_LEVELS help This feature is determined by a combination of page size and level of translation tables. diff --git a/arch/arm64/include/asm/memblock.h b/arch/arm64/include/asm/memblock.h index 6afeed2..e4ac8bf 100644 --- a/arch/arm64/include/asm/memblock.h +++ b/arch/arm64/include/asm/memblock.h @@ -16,6 +16,12 @@ #ifndef __ASM_MEMBLOCK_H #define __ASM_MEMBLOCK_H +#ifndef CONFIG_ARM64_4_LEVELS +#define MEMBLOCK_INITIAL_LIMIT PGDIR_SIZE +#else +#define MEMBLOCK_INITIAL_LIMIT PUD_SIZE +#endif + extern void arm64_memblock_init(void); #endif diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 268e53d..83b5289 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -35,8 +35,10 @@ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-types.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-types.h> +#else +#include <asm/pgtable-4level-types.h> #endif extern void __cpu_clear_user_page(void *p, unsigned long user); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 4829837..8d745fa 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,6 +26,26 @@ #define check_pgt_cache() do { } while (0) +#ifdef CONFIG_ARM64_4_LEVELS + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + #ifndef CONFIG_ARM64_2_LEVELS static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 9cd86c6..ba30053 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -18,8 +18,10 @@ #ifdef CONFIG_ARM64_2_LEVELS #include <asm/pgtable-2level-hwdef.h> -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include <asm/pgtable-3level-hwdef.h> +#else +#include <asm/pgtable-4level-hwdef.h> #endif /* @@ -27,7 +29,7 @@ * * Level 1 descriptor (PUD). */ - +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) /* diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index a64ce5e..efc40d1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,7 +35,11 @@ * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#ifndef CONFIG_ARM64_4_LEVELS #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) +#else +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) +#endif #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -44,12 +48,16 @@ #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) #ifndef CONFIG_ARM64_2_LEVELS #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) /* @@ -344,6 +352,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #endif /* CONFIG_ARM64_2_LEVELS */ +#ifdef CONFIG_ARM64_4_LEVELS + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (pgd_val(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; + dsb(); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline pud_t *pgd_page_vaddr(pgd_t pgd) +{ + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -352,6 +384,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} +#endif + /* Find an entry in the second-level page table.. */ #ifndef CONFIG_ARM64_2_LEVELS #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) @@ -380,8 +420,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) /* * Encode and decode a swap entry: diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index df378b2..dedfb04 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -99,5 +99,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif +#ifdef CONFIG_ARM64_4_LEVELS +static inline void __pud_free_tlb(struct mmu_gather *tlb, pmd_t *pudp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pudp)); +} +#endif #endif diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0fd5650..f313a7a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,8 +37,8 @@ /* * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. + * the page tables 4 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has + * 3 pages and is placed below swapper_pg_dir. */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) @@ -46,8 +46,8 @@ #error KERNEL_RAM_VADDR must start at 0xXXX80000 #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#define SWAPPER_DIR_SIZE (4 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) .globl swapper_pg_dir .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE @@ -371,16 +371,29 @@ ENDPROC(__calc_phys_offset) /* * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * levels (tbl1 and tbl2) for the given virtual address. * - * Preserves: pgd, tbl, virt + * Preserves: pgd, tbl1, tbl2, virt * Corrupts: tmp1, tmp2 */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + .macro create_pgd_entry, pgd, tbl1, tbl2, virt, tmp1, tmp2 lsr \tmp1, \virt, #PGDIR_SHIFT and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type + orr \tmp2, \tbl1, #3 // PGD entry table type str \tmp2, [\pgd, \tmp1, lsl #3] +#ifdef CONFIG_ARM64_4_LEVELS + ldr \tbl2, =FIXADDR_TOP + cmp \tbl2, \virt + add \tbl2, \tbl1, #PAGE_SIZE + b.ne 1f + add \tbl2, \tbl2, #PAGE_SIZE +1: + lsr \tmp1, \virt, #PUD_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index + orr \tmp2, \tbl2, #3 // PUD entry table type + str \tmp2, [\tbl1, \tmp1, lsl #3] + mov \tbl1, \tbl2 +#endif .endm /* @@ -444,7 +457,7 @@ __create_page_tables: add x0, x25, #PAGE_SIZE // section table address ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x25, x0, x1, x3, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -455,7 +468,7 @@ __create_page_tables: */ add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x26, x0, x1, x5, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -480,8 +493,11 @@ __create_page_tables: * Create the pgd entry for the fixed mappings. */ ldr x5, =FIXADDR_TOP // Fixed mapping virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 + add x0, x26, #PAGE_SIZE +#ifndef CONFIG_ARM64_4_LEVELS + add x0, x0, #PAGE_SIZE +#endif + create_pgd_entry x26, x0, x1, x5, x6, x7 /* * Since the page tables have been populated with non-cacheable diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0484e81..16d5ee5 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -336,6 +336,11 @@ void __pmd_error(const char *file, int line, unsigned long val) pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); +} + void __pgd_error(const char *file, int line, unsigned long val) { pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c23751b..ed4a343 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -61,6 +61,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); + printk(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6b7e895..4d29332 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include <asm/setup.h> #include <asm/sizes.h> #include <asm/tlb.h> +#include <asm/memblock.h> #include <asm/mmu_context.h> #include "mm.h" @@ -222,9 +223,15 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long phys) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud; unsigned long next; + if (pgd_none(*pgd) || pgd_bad(*pgd)) { + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(&init_mm, pgd, pud); + } + + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); alloc_init_pmd(pud, addr, next, phys); @@ -271,10 +278,11 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be - * aligned to 2MB as per Documentation/arm64/booting.txt). + * gives us PGDIR_SIZE (2 and 3 levels) or PUD_SIZE (4 levels) memory + * starting from PHYS_OFFSET (which must be aligned to 2MB as per + * Documentation/arm64/booting.txt). */ - limit = PHYS_OFFSET + PGDIR_SIZE; + limit = PHYS_OFFSET + MEMBLOCK_INITIAL_LIMIT; memblock_set_current_limit(limit); /* map all the memory banks */