diff mbox series

[v5,05/21] arm64: head: simplify page table mapping macros (slightly)

Message ID 20220624150651.1358849-6-ardb@kernel.org (mailing list archive)
State New, archived
Headers show
Series arm64: refactor boot flow | expand

Commit Message

Ard Biesheuvel June 24, 2022, 3:06 p.m. UTC
Simplify the macros in head.S that are used to set up the early page
tables, by switching to immediates for the number of bits that are
interpreted as the table index at each level. This makes it much
easier to infer from the instruction stream what is going on, and
reduces the number of instructions emitted substantially.

Note that the extended ID map for cases where no additional level needs
to be configured now uses a compile time size as well, which means that
we interpret up to 10 bits as the table index at the root level (for
52-bit physical addressing), without taking into account whether or not
this is supported on the current system.  However, those bits can only
be set if we are executing the image from an address that exceeds the
48-bit PA range, and are guaranteed to be cleared otherwise, and given
that we are dealing with a mapping in the lower TTBR0 range of the
address space, the result is therefore the same as if we'd mask off only
6 bits.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm64/kernel/head.S | 55 ++++++++------------
 1 file changed, 22 insertions(+), 33 deletions(-)

Comments

Mark Rutland June 26, 2022, 10:07 a.m. UTC | #1
On Fri, Jun 24, 2022 at 05:06:35PM +0200, Ard Biesheuvel wrote:
> Simplify the macros in head.S that are used to set up the early page
> tables, by switching to immediates for the number of bits that are
> interpreted as the table index at each level. This makes it much
> easier to infer from the instruction stream what is going on, and
> reduces the number of instructions emitted substantially.

Nice!

> Note that the extended ID map for cases where no additional level needs
> to be configured now uses a compile time size as well, which means that
> we interpret up to 10 bits as the table index at the root level (for
> 52-bit physical addressing), without taking into account whether or not
> this is supported on the current system.  However, those bits can only
> be set if we are executing the image from an address that exceeds the
> 48-bit PA range, and are guaranteed to be cleared otherwise, and given
> that we are dealing with a mapping in the lower TTBR0 range of the
> address space, the result is therefore the same as if we'd mask off only
> 6 bits.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

Aside from one trivial comment below, this looks good to me, so either way:

Acked-by: Mark Rutland <mark.rutland@arm.com>

> ---
>  arch/arm64/kernel/head.S | 55 ++++++++------------
>  1 file changed, 22 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 53126a35d73c..9fdde2f9cc0f 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -179,31 +179,20 @@ SYM_CODE_END(preserve_boot_args)
>   *	vstart:	virtual address of start of range
>   *	vend:	virtual address of end of range - we map [vstart, vend]
>   *	shift:	shift used to transform virtual address into index
> - *	ptrs:	number of entries in page table
> + *	order:  #imm 2log(number of entries in page table)
>   *	istart:	index in table corresponding to vstart
>   *	iend:	index in table corresponding to vend
>   *	count:	On entry: how many extra entries were required in previous level, scales
>   *			  our end index.
>   *		On exit: returns how many extra entries required for next page table level
>   *
> - * Preserves:	vstart, vend, shift, ptrs
> + * Preserves:	vstart, vend
>   * Returns:	istart, iend, count
>   */
> -	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
> -	lsr	\iend, \vend, \shift
> -	mov	\istart, \ptrs
> -	sub	\istart, \istart, #1
> -	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
> -	mov	\istart, \ptrs
> -	mul	\istart, \istart, \count
> -	add	\iend, \iend, \istart	// iend += count * ptrs
> -					// our entries span multiple tables
> -
> -	lsr	\istart, \vstart, \shift
> -	mov	\count, \ptrs
> -	sub	\count, \count, #1
> -	and	\istart, \istart, \count
> -
> +	.macro compute_indices, vstart, vend, shift, order, istart, iend, count
> +	ubfx	\istart, \vstart, \shift, \order
> +	ubfx	\iend, \vend, \shift, \order
> +	add	\iend, \iend, \count, lsl \order
>  	sub	\count, \iend, \istart
>  	.endm
>  
> @@ -218,38 +207,39 @@ SYM_CODE_END(preserve_boot_args)
>   *	vend:	virtual address of end of range - we map [vstart, vend - 1]
>   *	flags:	flags to use to map last level entries
>   *	phys:	physical address corresponding to vstart - physical memory is contiguous
> - *	pgds:	the number of pgd entries
> + *	order:  #imm 2log(number of entries in PGD table)

For clarity, perhaps: s/2log/ilog2/ ? The latter is used much more commonly
throughot the kernel.

>   *
>   * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
>   * Preserves:	vstart, flags
>   * Corrupts:	tbl, rtbl, vend, istart, iend, tmp, count, sv
>   */
> -	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
> +	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv
>  	sub \vend, \vend, #1
>  	add \rtbl, \tbl, #PAGE_SIZE
> -	mov \sv, \rtbl
>  	mov \count, #0
> -	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
> +
> +	compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
> -	mov \sv, \rtbl

FWIW, moving the temporary save of (r)tbl immediately around populate_entries
is *much* clearer!

Mark.

>  
>  #if SWAPPER_PGTABLE_LEVELS > 3
> -	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
> +	compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
> -	mov \sv, \rtbl
>  #endif
>  
>  #if SWAPPER_PGTABLE_LEVELS > 2
> -	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
> +	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
>  #endif
>  
> -	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
> -	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
> -	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
> +	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
> +	populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
>  	.endm
>  
>  /*
> @@ -300,12 +290,12 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	 * range in that case, and configure an additional translation level
>  	 * if needed.
>  	 */
> -	mov	x4, #PTRS_PER_PGD
>  	idmap_get_t0sz x5
>  	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
>  	b.ge	1f			// .. then skip VA range extension
>  
>  #if (VA_BITS < 48)
> +#define IDMAP_PGD_ORDER	(VA_BITS - PGDIR_SHIFT)
>  #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
>  #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
>  
> @@ -323,16 +313,16 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	mov	x2, EXTRA_PTRS
>  	create_table_entry x0, x3, EXTRA_SHIFT, x2, x5, x6
>  #else
> +#define IDMAP_PGD_ORDER	(PHYS_MASK_SHIFT - PGDIR_SHIFT)
>  	/*
>  	 * If VA_BITS == 48, we don't have to configure an additional
>  	 * translation level, but the top-level table has more entries.
>  	 */
> -	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
>  #endif
>  1:
>  	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
>  
> -	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
> +	map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14
>  
>  	/*
>  	 * Map the kernel image (starting with PHYS_OFFSET).
> @@ -340,13 +330,12 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	adrp	x0, init_pg_dir
>  	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
>  	add	x5, x5, x23			// add KASLR displacement
> -	mov	x4, PTRS_PER_PGD
>  	adrp	x6, _end			// runtime __pa(_end)
>  	adrp	x3, _text			// runtime __pa(_text)
>  	sub	x6, x6, x3			// _end - _text
>  	add	x6, x6, x5			// runtime __va(_end)
>  
> -	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
> +	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
>  
>  	/*
>  	 * Since the page tables have been populated with non-cacheable
> -- 
> 2.35.1
>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 53126a35d73c..9fdde2f9cc0f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -179,31 +179,20 @@  SYM_CODE_END(preserve_boot_args)
  *	vstart:	virtual address of start of range
  *	vend:	virtual address of end of range - we map [vstart, vend]
  *	shift:	shift used to transform virtual address into index
- *	ptrs:	number of entries in page table
+ *	order:  #imm 2log(number of entries in page table)
  *	istart:	index in table corresponding to vstart
  *	iend:	index in table corresponding to vend
  *	count:	On entry: how many extra entries were required in previous level, scales
  *			  our end index.
  *		On exit: returns how many extra entries required for next page table level
  *
- * Preserves:	vstart, vend, shift, ptrs
+ * Preserves:	vstart, vend
  * Returns:	istart, iend, count
  */
-	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
-	lsr	\iend, \vend, \shift
-	mov	\istart, \ptrs
-	sub	\istart, \istart, #1
-	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
-	mov	\istart, \ptrs
-	mul	\istart, \istart, \count
-	add	\iend, \iend, \istart	// iend += count * ptrs
-					// our entries span multiple tables
-
-	lsr	\istart, \vstart, \shift
-	mov	\count, \ptrs
-	sub	\count, \count, #1
-	and	\istart, \istart, \count
-
+	.macro compute_indices, vstart, vend, shift, order, istart, iend, count
+	ubfx	\istart, \vstart, \shift, \order
+	ubfx	\iend, \vend, \shift, \order
+	add	\iend, \iend, \count, lsl \order
 	sub	\count, \iend, \istart
 	.endm
 
@@ -218,38 +207,39 @@  SYM_CODE_END(preserve_boot_args)
  *	vend:	virtual address of end of range - we map [vstart, vend - 1]
  *	flags:	flags to use to map last level entries
  *	phys:	physical address corresponding to vstart - physical memory is contiguous
- *	pgds:	the number of pgd entries
+ *	order:  #imm 2log(number of entries in PGD table)
  *
  * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
  * Preserves:	vstart, flags
  * Corrupts:	tbl, rtbl, vend, istart, iend, tmp, count, sv
  */
-	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv
 	sub \vend, \vend, #1
 	add \rtbl, \tbl, #PAGE_SIZE
-	mov \sv, \rtbl
 	mov \count, #0
-	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+
+	compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
-	mov \sv, \rtbl
 
 #if SWAPPER_PGTABLE_LEVELS > 3
-	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+	compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
-	mov \sv, \rtbl
 #endif
 
 #if SWAPPER_PGTABLE_LEVELS > 2
-	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	mov \sv, \rtbl
 	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
 	mov \tbl, \sv
 #endif
 
-	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
-	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
-	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
+	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
+	bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
+	populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
 	.endm
 
 /*
@@ -300,12 +290,12 @@  SYM_FUNC_START_LOCAL(__create_page_tables)
 	 * range in that case, and configure an additional translation level
 	 * if needed.
 	 */
-	mov	x4, #PTRS_PER_PGD
 	idmap_get_t0sz x5
 	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
 	b.ge	1f			// .. then skip VA range extension
 
 #if (VA_BITS < 48)
+#define IDMAP_PGD_ORDER	(VA_BITS - PGDIR_SHIFT)
 #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
 #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
 
@@ -323,16 +313,16 @@  SYM_FUNC_START_LOCAL(__create_page_tables)
 	mov	x2, EXTRA_PTRS
 	create_table_entry x0, x3, EXTRA_SHIFT, x2, x5, x6
 #else
+#define IDMAP_PGD_ORDER	(PHYS_MASK_SHIFT - PGDIR_SHIFT)
 	/*
 	 * If VA_BITS == 48, we don't have to configure an additional
 	 * translation level, but the top-level table has more entries.
 	 */
-	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
 #endif
 1:
 	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
 
-	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
+	map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14
 
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
@@ -340,13 +330,12 @@  SYM_FUNC_START_LOCAL(__create_page_tables)
 	adrp	x0, init_pg_dir
 	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
-	mov	x4, PTRS_PER_PGD
 	adrp	x6, _end			// runtime __pa(_end)
 	adrp	x3, _text			// runtime __pa(_text)
 	sub	x6, x6, x3			// _end - _text
 	add	x6, x6, x5			// runtime __va(_end)
 
-	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
+	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
 
 	/*
 	 * Since the page tables have been populated with non-cacheable