diff mbox series

[RFC,v1,55/57] arm64: TRAMP_VALIAS is no longer compile-time constant

Message ID 20241014105912.3207374-55-ryan.roberts@arm.com (mailing list archive)
State New
Headers show
Series Boot-time page size selection for arm64 | expand

Commit Message

Ryan Roberts Oct. 14, 2024, 10:59 a.m. UTC
When boot-time page size is in operation, TRAMP_VALIAS is no longer a
compile-time constant, because the VA of a fixmap slot depends upon
PAGE_SIZE.

Let's handle this by instead exporting the slot index,
FIX_ENTRY_TRAMP_BEGIN,to assembly, then do the TRAMP_VALIAS calculation
per page size and use alternatives to decide which variant to activate.

Note that for the tramp_map_kernel case, we are one instruction short of
space in the vector to have NOPs for all 3 page size variants. So we do
if/else for 16K/64K and branch around it for the 4K case. This saves 2
instructions.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---

***NOTE***
Any confused maintainers may want to read the cover note here for context:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/

 arch/arm64/kernel/asm-offsets.c |  2 +-
 arch/arm64/kernel/entry.S       | 50 ++++++++++++++++++++++++++-------
 2 files changed, 41 insertions(+), 11 deletions(-)

Comments

Ard Biesheuvel Oct. 14, 2024, 11:21 a.m. UTC | #1
Hi Ryan,

On Mon, 14 Oct 2024 at 13:02, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> When boot-time page size is in operation, TRAMP_VALIAS is no longer a
> compile-time constant, because the VA of a fixmap slot depends upon
> PAGE_SIZE.
>
> Let's handle this by instead exporting the slot index,
> FIX_ENTRY_TRAMP_BEGIN,to assembly, then do the TRAMP_VALIAS calculation
> per page size and use alternatives to decide which variant to activate.
>
> Note that for the tramp_map_kernel case, we are one instruction short of
> space in the vector to have NOPs for all 3 page size variants. So we do
> if/else for 16K/64K and branch around it for the 4K case. This saves 2
> instructions.
>
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
>
> ***NOTE***
> Any confused maintainers may want to read the cover note here for context:
> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
>
>  arch/arm64/kernel/asm-offsets.c |  2 +-
>  arch/arm64/kernel/entry.S       | 50 ++++++++++++++++++++++++++-------
>  2 files changed, 41 insertions(+), 11 deletions(-)
>
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index f32b8d7f00b2a..c45fa3e281884 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -172,7 +172,7 @@ int main(void)
>    DEFINE(ARM64_FTR_SYSVAL,     offsetof(struct arm64_ftr_reg, sys_val));
>    BLANK();
>  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> -  DEFINE(TRAMP_VALIAS,         TRAMP_VALIAS);
> +  DEFINE(FIX_ENTRY_TRAMP_BEGIN,        FIX_ENTRY_TRAMP_BEGIN);
>  #endif
>  #ifdef CONFIG_ARM_SDE_INTERFACE
>    DEFINE(SDEI_EVENT_INTREGS,   offsetof(struct sdei_registered_event, interrupted_regs));
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 7ef0e127b149f..ba47dc8672c04 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -101,11 +101,27 @@
>  .org .Lventry_start\@ + 128    // Did we overflow the ventry slot?
>         .endm
>
> +#define TRAMP_VALIAS(page_shift)       (FIXADDR_TOP - (FIX_ENTRY_TRAMP_BEGIN << (page_shift)))
> +
>         .macro  tramp_alias, dst, sym
> -       .set    .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
> -       movz    \dst, :abs_g2_s:.Lalias\@
> -       movk    \dst, :abs_g1_nc:.Lalias\@
> -       movk    \dst, :abs_g0_nc:.Lalias\@
> +alternative_if ARM64_USE_PAGE_SIZE_4K
> +       .set    .Lalias4k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) + \sym - .entry.tramp.text
> +       movz    \dst, :abs_g2_s:.Lalias4k\@
> +       movk    \dst, :abs_g1_nc:.Lalias4k\@
> +       movk    \dst, :abs_g0_nc:.Lalias4k\@
> +alternative_else_nop_endif
> +alternative_if ARM64_USE_PAGE_SIZE_16K
> +       .set    .Lalias16k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) + \sym - .entry.tramp.text
> +       movz    \dst, :abs_g2_s:.Lalias16k\@
> +       movk    \dst, :abs_g1_nc:.Lalias16k\@
> +       movk    \dst, :abs_g0_nc:.Lalias16k\@
> +alternative_else_nop_endif
> +alternative_if ARM64_USE_PAGE_SIZE_64K
> +       .set    .Lalias64k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) + \sym - .entry.tramp.text
> +       movz    \dst, :abs_g2_s:.Lalias64k\@
> +       movk    \dst, :abs_g1_nc:.Lalias64k\@
> +       movk    \dst, :abs_g0_nc:.Lalias64k\@
> +alternative_else_nop_endif

Since you're changing these, might as well drop the middle movk as the
fixmap is now always in the top 2 GiB of the VA space.

However, wouldn't it be better to reuse the existing callback
alternative stuff that Marc added for KVM?

Same applies below, I reckon.

>         .endm
>
>         /*
> @@ -627,16 +643,30 @@ SYM_CODE_END(ret_to_user)
>         bic     \tmp, \tmp, #USER_ASID_FLAG
>         msr     ttbr1_el1, \tmp
>  #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
> -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
> +alternative_if_not ARM64_WORKAROUND_QCOM_FALKOR_E1003
> +       b       .Lskip_falkor_e1003\@
> +alternative_else_nop_endif
>         /* ASID already in \tmp[63:48] */
> -       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
> -       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
> -       /* 2MB boundary containing the vectors, so we nobble the walk cache */
> -       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
> +alternative_if ARM64_USE_PAGE_SIZE_4K
> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) & ~(SZ_2M - 1)) >> 12)
> +       b       .Lfinish_falkor_e1003\@
> +alternative_else_nop_endif
> +alternative_if ARM64_USE_PAGE_SIZE_16K
> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) & ~(SZ_2M - 1)) >> 12)
> +alternative_else /* ARM64_USE_PAGE_SIZE_64K */
> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) & ~(SZ_2M - 1)) >> 12)
> +alternative_endif
> +.Lfinish_falkor_e1003\@:
>         isb
>         tlbi    vae1, \tmp
>         dsb     nsh
> -alternative_else_nop_endif
> +.Lskip_falkor_e1003\@:
>  #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
>         .endm
>
> --
> 2.43.0
>
Ryan Roberts Oct. 14, 2024, 11:28 a.m. UTC | #2
On 14/10/2024 12:21, Ard Biesheuvel wrote:
> Hi Ryan,
> 
> On Mon, 14 Oct 2024 at 13:02, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>
>> When boot-time page size is in operation, TRAMP_VALIAS is no longer a
>> compile-time constant, because the VA of a fixmap slot depends upon
>> PAGE_SIZE.
>>
>> Let's handle this by instead exporting the slot index,
>> FIX_ENTRY_TRAMP_BEGIN,to assembly, then do the TRAMP_VALIAS calculation
>> per page size and use alternatives to decide which variant to activate.
>>
>> Note that for the tramp_map_kernel case, we are one instruction short of
>> space in the vector to have NOPs for all 3 page size variants. So we do
>> if/else for 16K/64K and branch around it for the 4K case. This saves 2
>> instructions.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>
>> ***NOTE***
>> Any confused maintainers may want to read the cover note here for context:
>> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
>>
>>  arch/arm64/kernel/asm-offsets.c |  2 +-
>>  arch/arm64/kernel/entry.S       | 50 ++++++++++++++++++++++++++-------
>>  2 files changed, 41 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
>> index f32b8d7f00b2a..c45fa3e281884 100644
>> --- a/arch/arm64/kernel/asm-offsets.c
>> +++ b/arch/arm64/kernel/asm-offsets.c
>> @@ -172,7 +172,7 @@ int main(void)
>>    DEFINE(ARM64_FTR_SYSVAL,     offsetof(struct arm64_ftr_reg, sys_val));
>>    BLANK();
>>  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
>> -  DEFINE(TRAMP_VALIAS,         TRAMP_VALIAS);
>> +  DEFINE(FIX_ENTRY_TRAMP_BEGIN,        FIX_ENTRY_TRAMP_BEGIN);
>>  #endif
>>  #ifdef CONFIG_ARM_SDE_INTERFACE
>>    DEFINE(SDEI_EVENT_INTREGS,   offsetof(struct sdei_registered_event, interrupted_regs));
>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> index 7ef0e127b149f..ba47dc8672c04 100644
>> --- a/arch/arm64/kernel/entry.S
>> +++ b/arch/arm64/kernel/entry.S
>> @@ -101,11 +101,27 @@
>>  .org .Lventry_start\@ + 128    // Did we overflow the ventry slot?
>>         .endm
>>
>> +#define TRAMP_VALIAS(page_shift)       (FIXADDR_TOP - (FIX_ENTRY_TRAMP_BEGIN << (page_shift)))
>> +
>>         .macro  tramp_alias, dst, sym
>> -       .set    .Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
>> -       movz    \dst, :abs_g2_s:.Lalias\@
>> -       movk    \dst, :abs_g1_nc:.Lalias\@
>> -       movk    \dst, :abs_g0_nc:.Lalias\@
>> +alternative_if ARM64_USE_PAGE_SIZE_4K
>> +       .set    .Lalias4k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) + \sym - .entry.tramp.text
>> +       movz    \dst, :abs_g2_s:.Lalias4k\@
>> +       movk    \dst, :abs_g1_nc:.Lalias4k\@
>> +       movk    \dst, :abs_g0_nc:.Lalias4k\@
>> +alternative_else_nop_endif
>> +alternative_if ARM64_USE_PAGE_SIZE_16K
>> +       .set    .Lalias16k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) + \sym - .entry.tramp.text
>> +       movz    \dst, :abs_g2_s:.Lalias16k\@
>> +       movk    \dst, :abs_g1_nc:.Lalias16k\@
>> +       movk    \dst, :abs_g0_nc:.Lalias16k\@
>> +alternative_else_nop_endif
>> +alternative_if ARM64_USE_PAGE_SIZE_64K
>> +       .set    .Lalias64k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) + \sym - .entry.tramp.text
>> +       movz    \dst, :abs_g2_s:.Lalias64k\@
>> +       movk    \dst, :abs_g1_nc:.Lalias64k\@
>> +       movk    \dst, :abs_g0_nc:.Lalias64k\@
>> +alternative_else_nop_endif
> 
> Since you're changing these, might as well drop the middle movk as the
> fixmap is now always in the top 2 GiB of the VA space.
> 
> However, wouldn't it be better to reuse the existing callback
> alternative stuff that Marc added for KVM?

Yes, I agree. Mark suggested the same thing when we were talking the other day
too. I'll definitely use the callbacks for next version, but I didn't want to
hold up the RFC any further - I'd already spent way too much time polishing.

> 
> Same applies below, I reckon.
> 
>>         .endm
>>
>>         /*
>> @@ -627,16 +643,30 @@ SYM_CODE_END(ret_to_user)
>>         bic     \tmp, \tmp, #USER_ASID_FLAG
>>         msr     ttbr1_el1, \tmp
>>  #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
>> -alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
>> +alternative_if_not ARM64_WORKAROUND_QCOM_FALKOR_E1003
>> +       b       .Lskip_falkor_e1003\@
>> +alternative_else_nop_endif
>>         /* ASID already in \tmp[63:48] */
>> -       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
>> -       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
>> -       /* 2MB boundary containing the vectors, so we nobble the walk cache */
>> -       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
>> +alternative_if ARM64_USE_PAGE_SIZE_4K
>> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
>> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
>> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) & ~(SZ_2M - 1)) >> 12)
>> +       b       .Lfinish_falkor_e1003\@
>> +alternative_else_nop_endif
>> +alternative_if ARM64_USE_PAGE_SIZE_16K
>> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
>> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
>> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) & ~(SZ_2M - 1)) >> 12)
>> +alternative_else /* ARM64_USE_PAGE_SIZE_64K */
>> +       movk    \tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
>> +       movk    \tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
>> +       movk    \tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) & ~(SZ_2M - 1)) >> 12)
>> +alternative_endif
>> +.Lfinish_falkor_e1003\@:
>>         isb
>>         tlbi    vae1, \tmp
>>         dsb     nsh
>> -alternative_else_nop_endif
>> +.Lskip_falkor_e1003\@:
>>  #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
>>         .endm
>>
>> --
>> 2.43.0
>>
diff mbox series

Patch

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index f32b8d7f00b2a..c45fa3e281884 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -172,7 +172,7 @@  int main(void)
   DEFINE(ARM64_FTR_SYSVAL,	offsetof(struct arm64_ftr_reg, sys_val));
   BLANK();
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-  DEFINE(TRAMP_VALIAS,		TRAMP_VALIAS);
+  DEFINE(FIX_ENTRY_TRAMP_BEGIN,	FIX_ENTRY_TRAMP_BEGIN);
 #endif
 #ifdef CONFIG_ARM_SDE_INTERFACE
   DEFINE(SDEI_EVENT_INTREGS,	offsetof(struct sdei_registered_event, interrupted_regs));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7ef0e127b149f..ba47dc8672c04 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -101,11 +101,27 @@ 
 .org .Lventry_start\@ + 128	// Did we overflow the ventry slot?
 	.endm
 
+#define TRAMP_VALIAS(page_shift)	(FIXADDR_TOP - (FIX_ENTRY_TRAMP_BEGIN << (page_shift)))
+
 	.macro	tramp_alias, dst, sym
-	.set	.Lalias\@, TRAMP_VALIAS + \sym - .entry.tramp.text
-	movz	\dst, :abs_g2_s:.Lalias\@
-	movk	\dst, :abs_g1_nc:.Lalias\@
-	movk	\dst, :abs_g0_nc:.Lalias\@
+alternative_if ARM64_USE_PAGE_SIZE_4K
+	.set	.Lalias4k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) + \sym - .entry.tramp.text
+	movz	\dst, :abs_g2_s:.Lalias4k\@
+	movk	\dst, :abs_g1_nc:.Lalias4k\@
+	movk	\dst, :abs_g0_nc:.Lalias4k\@
+alternative_else_nop_endif
+alternative_if ARM64_USE_PAGE_SIZE_16K
+	.set	.Lalias16k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) + \sym - .entry.tramp.text
+	movz	\dst, :abs_g2_s:.Lalias16k\@
+	movk	\dst, :abs_g1_nc:.Lalias16k\@
+	movk	\dst, :abs_g0_nc:.Lalias16k\@
+alternative_else_nop_endif
+alternative_if ARM64_USE_PAGE_SIZE_64K
+	.set	.Lalias64k\@, TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) + \sym - .entry.tramp.text
+	movz	\dst, :abs_g2_s:.Lalias64k\@
+	movk	\dst, :abs_g1_nc:.Lalias64k\@
+	movk	\dst, :abs_g0_nc:.Lalias64k\@
+alternative_else_nop_endif
 	.endm
 
 	/*
@@ -627,16 +643,30 @@  SYM_CODE_END(ret_to_user)
 	bic	\tmp, \tmp, #USER_ASID_FLAG
 	msr	ttbr1_el1, \tmp
 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
-alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
+alternative_if_not ARM64_WORKAROUND_QCOM_FALKOR_E1003
+	b	.Lskip_falkor_e1003\@
+alternative_else_nop_endif
 	/* ASID already in \tmp[63:48] */
-	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
-	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
-	/* 2MB boundary containing the vectors, so we nobble the walk cache */
-	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
+alternative_if ARM64_USE_PAGE_SIZE_4K
+	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
+	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) >> 12)
+	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_4K) & ~(SZ_2M - 1)) >> 12)
+	b	.Lfinish_falkor_e1003\@
+alternative_else_nop_endif
+alternative_if ARM64_USE_PAGE_SIZE_16K
+	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
+	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) >> 12)
+	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_16K) & ~(SZ_2M - 1)) >> 12)
+alternative_else /* ARM64_USE_PAGE_SIZE_64K */
+	movk	\tmp, #:abs_g2_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
+	movk	\tmp, #:abs_g1_nc:(TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) >> 12)
+	movk	\tmp, #:abs_g0_nc:((TRAMP_VALIAS(ARM64_PAGE_SHIFT_64K) & ~(SZ_2M - 1)) >> 12)
+alternative_endif
+.Lfinish_falkor_e1003\@:
 	isb
 	tlbi	vae1, \tmp
 	dsb	nsh
-alternative_else_nop_endif
+.Lskip_falkor_e1003\@:
 #endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
 	.endm