diff mbox series

[2/3] RISC-V: Inline the assembly register save/restore macros

Message ID 20200227213450.87194-3-palmer@dabbelt.com (mailing list archive)
State New, archived
Headers show
Series [1/3] RISC-V: Stop relying on GCC's register allocator's hueristics | expand

Commit Message

Palmer Dabbelt Feb. 27, 2020, 9:34 p.m. UTC
From: Palmer Dabbelt <palmerdabbelt@google.com>

These are only used once, and when reading the code I've always found them to
be more of a headache than a benefit.  While they were never worth removing
before, LLVM's integrated assembler doesn't support LOCAL so rather that trying
to figure out how to refactor the macros it seems saner to just inline them.

Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/kernel/entry.S | 143 ++++++++++++++++----------------------
 1 file changed, 61 insertions(+), 82 deletions(-)

Comments

Nick Desaulniers Feb. 27, 2020, 11:45 p.m. UTC | #1
On Thu, Feb 27, 2020 at 1:35 PM Palmer Dabbelt <palmer@dabbelt.com> wrote:
>
> From: Palmer Dabbelt <palmerdabbelt@google.com>
>
> These are only used once, and when reading the code I've always found them to

Looks like there's SAVE_ALL/RESTORE_ALL macros in
arch/riscv/kernel/mcount-dyn.S as well that could probably be
refactored to do the saving/restoring of just the GPRs (since the two
similarly named macros share quite a bit of code between the two
source files, but aren't 100% the same), but this patch is fine, too.
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

> be more of a headache than a benefit.  While they were never worth removing
> before, LLVM's integrated assembler doesn't support LOCAL so rather that trying
> to figure out how to refactor the macros it seems saner to just inline them.
>
> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
> ---
>  arch/riscv/kernel/entry.S | 143 ++++++++++++++++----------------------
>  1 file changed, 61 insertions(+), 82 deletions(-)
>
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index bad4d85b5e91..f2e8e7c8089d 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -13,17 +13,11 @@
>  #include <asm/thread_info.h>
>  #include <asm/asm-offsets.h>
>
> -       .text
> -       .altmacro
> -
> -/*
> - * Prepares to enter a system call or exception by saving all registers to the
> - * stack.
> - */
> -       .macro SAVE_ALL
> -       LOCAL _restore_kernel_tpsp
> -       LOCAL _save_context
> +#if !IS_ENABLED(CONFIG_PREEMPTION)
> +.set resume_kernel, restore_all
> +#endif
>
> +ENTRY(handle_exception)
>         /*
>          * If coming from userspace, preserve the user thread pointer and load
>          * the kernel thread pointer.  If we came from the kernel, the scratch
> @@ -90,77 +84,6 @@ _save_context:
>         REG_S s3, PT_BADADDR(sp)
>         REG_S s4, PT_CAUSE(sp)
>         REG_S s5, PT_TP(sp)
> -       .endm
> -
> -/*
> - * Prepares to return from a system call or exception by restoring all
> - * registers from the stack.
> - */
> -       .macro RESTORE_ALL
> -       REG_L a0, PT_STATUS(sp)
> -       /*
> -        * The current load reservation is effectively part of the processor's
> -        * state, in the sense that load reservations cannot be shared between
> -        * different hart contexts.  We can't actually save and restore a load
> -        * reservation, so instead here we clear any existing reservation --
> -        * it's always legal for implementations to clear load reservations at
> -        * any point (as long as the forward progress guarantee is kept, but
> -        * we'll ignore that here).
> -        *
> -        * Dangling load reservations can be the result of taking a trap in the
> -        * middle of an LR/SC sequence, but can also be the result of a taken
> -        * forward branch around an SC -- which is how we implement CAS.  As a
> -        * result we need to clear reservations between the last CAS and the
> -        * jump back to the new context.  While it is unlikely the store
> -        * completes, implementations are allowed to expand reservations to be
> -        * arbitrarily large.
> -        */
> -       REG_L  a2, PT_EPC(sp)
> -       REG_SC x0, a2, PT_EPC(sp)
> -
> -       csrw CSR_STATUS, a0
> -       csrw CSR_EPC, a2
> -
> -       REG_L x1,  PT_RA(sp)
> -       REG_L x3,  PT_GP(sp)
> -       REG_L x4,  PT_TP(sp)
> -       REG_L x5,  PT_T0(sp)
> -       REG_L x6,  PT_T1(sp)
> -       REG_L x7,  PT_T2(sp)
> -       REG_L x8,  PT_S0(sp)
> -       REG_L x9,  PT_S1(sp)
> -       REG_L x10, PT_A0(sp)
> -       REG_L x11, PT_A1(sp)
> -       REG_L x12, PT_A2(sp)
> -       REG_L x13, PT_A3(sp)
> -       REG_L x14, PT_A4(sp)
> -       REG_L x15, PT_A5(sp)
> -       REG_L x16, PT_A6(sp)
> -       REG_L x17, PT_A7(sp)
> -       REG_L x18, PT_S2(sp)
> -       REG_L x19, PT_S3(sp)
> -       REG_L x20, PT_S4(sp)
> -       REG_L x21, PT_S5(sp)
> -       REG_L x22, PT_S6(sp)
> -       REG_L x23, PT_S7(sp)
> -       REG_L x24, PT_S8(sp)
> -       REG_L x25, PT_S9(sp)
> -       REG_L x26, PT_S10(sp)
> -       REG_L x27, PT_S11(sp)
> -       REG_L x28, PT_T3(sp)
> -       REG_L x29, PT_T4(sp)
> -       REG_L x30, PT_T5(sp)
> -       REG_L x31, PT_T6(sp)
> -
> -       REG_L x2,  PT_SP(sp)
> -       .endm
> -
> -#if !IS_ENABLED(CONFIG_PREEMPTION)
> -.set resume_kernel, restore_all
> -#endif
> -
> -ENTRY(handle_exception)
> -       SAVE_ALL
>
>         /*
>          * Set the scratch register to 0, so that if a recursive exception
> @@ -298,7 +221,63 @@ resume_userspace:
>         csrw CSR_SCRATCH, tp
>
>  restore_all:
> -       RESTORE_ALL
> +       REG_L a0, PT_STATUS(sp)
> +       /*
> +        * The current load reservation is effectively part of the processor's
> +        * state, in the sense that load reservations cannot be shared between
> +        * different hart contexts.  We can't actually save and restore a load
> +        * reservation, so instead here we clear any existing reservation --
> +        * it's always legal for implementations to clear load reservations at
> +        * any point (as long as the forward progress guarantee is kept, but
> +        * we'll ignore that here).
> +        *
> +        * Dangling load reservations can be the result of taking a trap in the
> +        * middle of an LR/SC sequence, but can also be the result of a taken
> +        * forward branch around an SC -- which is how we implement CAS.  As a
> +        * result we need to clear reservations between the last CAS and the
> +        * jump back to the new context.  While it is unlikely the store
> +        * completes, implementations are allowed to expand reservations to be
> +        * arbitrarily large.
> +        */
> +       REG_L  a2, PT_EPC(sp)
> +       REG_SC x0, a2, PT_EPC(sp)
> +
> +       csrw CSR_STATUS, a0
> +       csrw CSR_EPC, a2
> +
> +       REG_L x1,  PT_RA(sp)
> +       REG_L x3,  PT_GP(sp)
> +       REG_L x4,  PT_TP(sp)
> +       REG_L x5,  PT_T0(sp)
> +       REG_L x6,  PT_T1(sp)
> +       REG_L x7,  PT_T2(sp)
> +       REG_L x8,  PT_S0(sp)
> +       REG_L x9,  PT_S1(sp)
> +       REG_L x10, PT_A0(sp)
> +       REG_L x11, PT_A1(sp)
> +       REG_L x12, PT_A2(sp)
> +       REG_L x13, PT_A3(sp)
> +       REG_L x14, PT_A4(sp)
> +       REG_L x15, PT_A5(sp)
> +       REG_L x16, PT_A6(sp)
> +       REG_L x17, PT_A7(sp)
> +       REG_L x18, PT_S2(sp)
> +       REG_L x19, PT_S3(sp)
> +       REG_L x20, PT_S4(sp)
> +       REG_L x21, PT_S5(sp)
> +       REG_L x22, PT_S6(sp)
> +       REG_L x23, PT_S7(sp)
> +       REG_L x24, PT_S8(sp)
> +       REG_L x25, PT_S9(sp)
> +       REG_L x26, PT_S10(sp)
> +       REG_L x27, PT_S11(sp)
> +       REG_L x28, PT_T3(sp)
> +       REG_L x29, PT_T4(sp)
> +       REG_L x30, PT_T5(sp)
> +       REG_L x31, PT_T6(sp)
> +
> +       REG_L x2,  PT_SP(sp)
> +
>  #ifdef CONFIG_RISCV_M_MODE
>         mret
>  #else
> --
> 2.25.1.481.gfbce0eb801-goog
>
> --
> You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20200227213450.87194-3-palmer%40dabbelt.com.
diff mbox series

Patch

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index bad4d85b5e91..f2e8e7c8089d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -13,17 +13,11 @@ 
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 
-	.text
-	.altmacro
-
-/*
- * Prepares to enter a system call or exception by saving all registers to the
- * stack.
- */
-	.macro SAVE_ALL
-	LOCAL _restore_kernel_tpsp
-	LOCAL _save_context
+#if !IS_ENABLED(CONFIG_PREEMPTION)
+.set resume_kernel, restore_all
+#endif
 
+ENTRY(handle_exception)
 	/*
 	 * If coming from userspace, preserve the user thread pointer and load
 	 * the kernel thread pointer.  If we came from the kernel, the scratch
@@ -90,77 +84,6 @@  _save_context:
 	REG_S s3, PT_BADADDR(sp)
 	REG_S s4, PT_CAUSE(sp)
 	REG_S s5, PT_TP(sp)
-	.endm
-
-/*
- * Prepares to return from a system call or exception by restoring all
- * registers from the stack.
- */
-	.macro RESTORE_ALL
-	REG_L a0, PT_STATUS(sp)
-	/*
-	 * The current load reservation is effectively part of the processor's
-	 * state, in the sense that load reservations cannot be shared between
-	 * different hart contexts.  We can't actually save and restore a load
-	 * reservation, so instead here we clear any existing reservation --
-	 * it's always legal for implementations to clear load reservations at
-	 * any point (as long as the forward progress guarantee is kept, but
-	 * we'll ignore that here).
-	 *
-	 * Dangling load reservations can be the result of taking a trap in the
-	 * middle of an LR/SC sequence, but can also be the result of a taken
-	 * forward branch around an SC -- which is how we implement CAS.  As a
-	 * result we need to clear reservations between the last CAS and the
-	 * jump back to the new context.  While it is unlikely the store
-	 * completes, implementations are allowed to expand reservations to be
-	 * arbitrarily large.
-	 */
-	REG_L  a2, PT_EPC(sp)
-	REG_SC x0, a2, PT_EPC(sp)
-
-	csrw CSR_STATUS, a0
-	csrw CSR_EPC, a2
-
-	REG_L x1,  PT_RA(sp)
-	REG_L x3,  PT_GP(sp)
-	REG_L x4,  PT_TP(sp)
-	REG_L x5,  PT_T0(sp)
-	REG_L x6,  PT_T1(sp)
-	REG_L x7,  PT_T2(sp)
-	REG_L x8,  PT_S0(sp)
-	REG_L x9,  PT_S1(sp)
-	REG_L x10, PT_A0(sp)
-	REG_L x11, PT_A1(sp)
-	REG_L x12, PT_A2(sp)
-	REG_L x13, PT_A3(sp)
-	REG_L x14, PT_A4(sp)
-	REG_L x15, PT_A5(sp)
-	REG_L x16, PT_A6(sp)
-	REG_L x17, PT_A7(sp)
-	REG_L x18, PT_S2(sp)
-	REG_L x19, PT_S3(sp)
-	REG_L x20, PT_S4(sp)
-	REG_L x21, PT_S5(sp)
-	REG_L x22, PT_S6(sp)
-	REG_L x23, PT_S7(sp)
-	REG_L x24, PT_S8(sp)
-	REG_L x25, PT_S9(sp)
-	REG_L x26, PT_S10(sp)
-	REG_L x27, PT_S11(sp)
-	REG_L x28, PT_T3(sp)
-	REG_L x29, PT_T4(sp)
-	REG_L x30, PT_T5(sp)
-	REG_L x31, PT_T6(sp)
-
-	REG_L x2,  PT_SP(sp)
-	.endm
-
-#if !IS_ENABLED(CONFIG_PREEMPTION)
-.set resume_kernel, restore_all
-#endif
-
-ENTRY(handle_exception)
-	SAVE_ALL
 
 	/*
 	 * Set the scratch register to 0, so that if a recursive exception
@@ -298,7 +221,63 @@  resume_userspace:
 	csrw CSR_SCRATCH, tp
 
 restore_all:
-	RESTORE_ALL
+	REG_L a0, PT_STATUS(sp)
+	/*
+	 * The current load reservation is effectively part of the processor's
+	 * state, in the sense that load reservations cannot be shared between
+	 * different hart contexts.  We can't actually save and restore a load
+	 * reservation, so instead here we clear any existing reservation --
+	 * it's always legal for implementations to clear load reservations at
+	 * any point (as long as the forward progress guarantee is kept, but
+	 * we'll ignore that here).
+	 *
+	 * Dangling load reservations can be the result of taking a trap in the
+	 * middle of an LR/SC sequence, but can also be the result of a taken
+	 * forward branch around an SC -- which is how we implement CAS.  As a
+	 * result we need to clear reservations between the last CAS and the
+	 * jump back to the new context.  While it is unlikely the store
+	 * completes, implementations are allowed to expand reservations to be
+	 * arbitrarily large.
+	 */
+	REG_L  a2, PT_EPC(sp)
+	REG_SC x0, a2, PT_EPC(sp)
+
+	csrw CSR_STATUS, a0
+	csrw CSR_EPC, a2
+
+	REG_L x1,  PT_RA(sp)
+	REG_L x3,  PT_GP(sp)
+	REG_L x4,  PT_TP(sp)
+	REG_L x5,  PT_T0(sp)
+	REG_L x6,  PT_T1(sp)
+	REG_L x7,  PT_T2(sp)
+	REG_L x8,  PT_S0(sp)
+	REG_L x9,  PT_S1(sp)
+	REG_L x10, PT_A0(sp)
+	REG_L x11, PT_A1(sp)
+	REG_L x12, PT_A2(sp)
+	REG_L x13, PT_A3(sp)
+	REG_L x14, PT_A4(sp)
+	REG_L x15, PT_A5(sp)
+	REG_L x16, PT_A6(sp)
+	REG_L x17, PT_A7(sp)
+	REG_L x18, PT_S2(sp)
+	REG_L x19, PT_S3(sp)
+	REG_L x20, PT_S4(sp)
+	REG_L x21, PT_S5(sp)
+	REG_L x22, PT_S6(sp)
+	REG_L x23, PT_S7(sp)
+	REG_L x24, PT_S8(sp)
+	REG_L x25, PT_S9(sp)
+	REG_L x26, PT_S10(sp)
+	REG_L x27, PT_S11(sp)
+	REG_L x28, PT_T3(sp)
+	REG_L x29, PT_T4(sp)
+	REG_L x30, PT_T5(sp)
+	REG_L x31, PT_T6(sp)
+
+	REG_L x2,  PT_SP(sp)
+
 #ifdef CONFIG_RISCV_M_MODE
 	mret
 #else