diff mbox series

[5/7] RISC-V: fix auipc-jalr addresses in patched alternatives

Message ID 20221110164924.529386-6-heiko@sntech.de (mailing list archive)
State Superseded
Headers show
Series Zbb string optimizations and call support in alternatives | expand

Checks

Context Check Description
conchuod/tree_selection fail Guessing tree name failed

Commit Message

Heiko Stübner Nov. 10, 2022, 4:49 p.m. UTC
From: Heiko Stuebner <heiko.stuebner@vrull.eu>

Alternatives live in a different section, so addresses used by call
functions will point to wrong locations after the patch got applied.

Similar to arm64, adjust the location to consider that offset.

Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
 arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
 1 file changed, 77 insertions(+), 2 deletions(-)

Comments

Conor Dooley Nov. 13, 2022, 8:31 p.m. UTC | #1
On Thu, Nov 10, 2022 at 05:49:22PM +0100, Heiko Stuebner wrote:
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> 
> Alternatives live in a different section, so addresses used by call
> functions will point to wrong locations after the patch got applied.
> 
> Similar to arm64, adjust the location to consider that offset.
> 
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>

What a lovely function you've got here, seems to make sense though..
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>

> ---
>  arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 694267d1fe81..026512ca9c4c 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>  	return cpu_req_feature;
>  }
>  
> +#include <asm/parse_asm.h>
> +
> +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
> +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
> +
> +static inline bool is_auipc_jalr_pair(long insn1, long insn2)
> +{
> +	return is_auipc_insn(insn1) && is_jalr_insn(insn2);
> +}
> +
> +#define JALR_SIGN_MASK		BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
> +#define JALR_OFFSET_MASK	I_IMM_11_0_MASK
> +#define AUIPC_OFFSET_MASK	U_IMM_31_12_MASK
> +#define AUIPC_PAD		(0x00001000)
> +#define JALR_SHIFT		I_IMM_11_0_OPOFF
> +
> +#define to_jalr_imm(offset)						\
> +	((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
> +
> +#define to_auipc_imm(offset)						\
> +	((offset & JALR_SIGN_MASK) ?					\
> +	((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :	\
> +	(offset & AUIPC_OFFSET_MASK))
> +
> +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
> +					     unsigned int len, int patch_offset)
> +{
> +	int num_instr = len / sizeof(u32);
> +	unsigned int call[2];
> +	int i;
> +	int imm1;
> +	u32 rd1;
> +
> +	for (i = 0; i < num_instr; i++) {
> +		/* is there a further instruction? */
> +		if (i + 1 >= num_instr)
> +			continue;
> +
> +		if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1)))
> +			continue;
> +
> +		/* call will use ra register */
> +		rd1 = EXTRACT_RD_REG(*(alt_ptr + i));
> +		if (rd1 != 1)
> +			continue;
> +
> +		/* get and adjust new target address */
> +		imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
> +		imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
> +		imm1 -= patch_offset;
> +
> +		/* pick the original auipc + jalr */
> +		call[0] = *(alt_ptr + i);
> +		call[1] = *(alt_ptr + i + 1);
> +
> +		/* drop the old IMMs */
> +		call[0] &= ~(U_IMM_31_12_MASK);
> +		call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF);
> +
> +		/* add the adapted IMMs */
> +		call[0] |= to_auipc_imm(imm1);
> +		call[1] |= to_jalr_imm(imm1);
> +
> +		/* patch the call place again */
> +		patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
> +	}
> +}
> +
>  void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>  						  struct alt_entry *end,
>  						  unsigned int stage)
> @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>  		}
>  
>  		tmp = (1U << alt->errata_id);
> -		if (cpu_req_feature & tmp)
> -			patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> +		if (cpu_req_feature & tmp) {
> +			/* do the basic patching */
> +			patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> +					  alt->alt_len);
> +
> +			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> +							 alt->alt_len,
> +							 alt->old_ptr - alt->alt_ptr);
> +		}
>  	}
>  }
>  #endif
> -- 
> 2.35.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Emil Renner Berthing Nov. 14, 2022, 10:57 a.m. UTC | #2
On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
>
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
>
> Alternatives live in a different section, so addresses used by call
> functions will point to wrong locations after the patch got applied.
>
> Similar to arm64, adjust the location to consider that offset.
>
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> ---
>  arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 694267d1fe81..026512ca9c4c 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>         return cpu_req_feature;
>  }
>
> +#include <asm/parse_asm.h>
> +
> +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
> +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
> +
> +static inline bool is_auipc_jalr_pair(long insn1, long insn2)
> +{
> +       return is_auipc_insn(insn1) && is_jalr_insn(insn2);
> +}
> +
> +#define JALR_SIGN_MASK         BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
> +#define JALR_OFFSET_MASK       I_IMM_11_0_MASK
> +#define AUIPC_OFFSET_MASK      U_IMM_31_12_MASK
> +#define AUIPC_PAD              (0x00001000)
> +#define JALR_SHIFT             I_IMM_11_0_OPOFF
> +
> +#define to_jalr_imm(offset)                                            \
> +       ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
> +
> +#define to_auipc_imm(offset)                                           \
> +       ((offset & JALR_SIGN_MASK) ?                                    \
> +       ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :    \
> +       (offset & AUIPC_OFFSET_MASK))
> +
> +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
> +                                            unsigned int len, int patch_offset)
> +{
> +       int num_instr = len / sizeof(u32);
> +       unsigned int call[2];
> +       int i;
> +       int imm1;
> +       u32 rd1;
> +
> +       for (i = 0; i < num_instr; i++) {
> +               /* is there a further instruction? */
> +               if (i + 1 >= num_instr)
> +                       continue;

Isn't this the same as for (i = 0; i < num_instr - 1; i++) ?

> +
> +               if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1)))
> +                       continue;
> +
> +               /* call will use ra register */
> +               rd1 = EXTRACT_RD_REG(*(alt_ptr + i));
> +               if (rd1 != 1)
> +                       continue;
> +
> +               /* get and adjust new target address */
> +               imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
> +               imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
> +               imm1 -= patch_offset;
> +
> +               /* pick the original auipc + jalr */
> +               call[0] = *(alt_ptr + i);
> +               call[1] = *(alt_ptr + i + 1);
> +
> +               /* drop the old IMMs */
> +               call[0] &= ~(U_IMM_31_12_MASK);
> +               call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF);
> +
> +               /* add the adapted IMMs */
> +               call[0] |= to_auipc_imm(imm1);
> +               call[1] |= to_jalr_imm(imm1);
> +
> +               /* patch the call place again */
> +               patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
> +       }
> +}
> +
>  void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>                                                   struct alt_entry *end,
>                                                   unsigned int stage)
> @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>                 }
>
>                 tmp = (1U << alt->errata_id);
> -               if (cpu_req_feature & tmp)
> -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> +               if (cpu_req_feature & tmp) {
> +                       /* do the basic patching */
> +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> +                                         alt->alt_len);
> +
> +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> +                                                        alt->alt_len,
> +                                                        alt->old_ptr - alt->alt_ptr);

Here you're casting a void pointer to an instruction to an unsigned
int pointer, but since we enable compressed instructions this may
result in an unaligned pointer. Using this pointer will work, but may
be slow. Eg. fault to m-mode to be patched up. We already do that in
other places in the arch/riscv, but I'd prefer not to add new
instances of this.

> +               }
>         }
>  }
>  #endif
> --
> 2.35.1
Andrew Jones Nov. 14, 2022, 11:35 a.m. UTC | #3
On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
...
> > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> >                 }
> >
> >                 tmp = (1U << alt->errata_id);
> > -               if (cpu_req_feature & tmp)
> > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > +               if (cpu_req_feature & tmp) {
> > +                       /* do the basic patching */
> > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > +                                         alt->alt_len);
> > +
> > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > +                                                        alt->alt_len,
> > +                                                        alt->old_ptr - alt->alt_ptr);
> 
> Here you're casting a void pointer to an instruction to an unsigned
> int pointer, but since we enable compressed instructions this may
> result in an unaligned pointer. Using this pointer will work, but may
> be slow. Eg. fault to m-mode to be patched up. We already do that in
> other places in the arch/riscv, but I'd prefer not to add new
> instances of this.

Alternative instruction sequences (old and new) have compression disabled.

Thanks,
drew
Emil Renner Berthing Nov. 14, 2022, 11:38 a.m. UTC | #4
On Mon, 14 Nov 2022 at 12:35, Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
> ...
> > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > >                 }
> > >
> > >                 tmp = (1U << alt->errata_id);
> > > -               if (cpu_req_feature & tmp)
> > > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > > +               if (cpu_req_feature & tmp) {
> > > +                       /* do the basic patching */
> > > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > > +                                         alt->alt_len);
> > > +
> > > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > +                                                        alt->alt_len,
> > > +                                                        alt->old_ptr - alt->alt_ptr);
> >
> > Here you're casting a void pointer to an instruction to an unsigned
> > int pointer, but since we enable compressed instructions this may
> > result in an unaligned pointer. Using this pointer will work, but may
> > be slow. Eg. fault to m-mode to be patched up. We already do that in
> > other places in the arch/riscv, but I'd prefer not to add new
> > instances of this.
>
> Alternative instruction sequences (old and new) have compression disabled.

I see, but if the instructions before the alternative sequence ends on
an unaligned address will there be inserted 16bit NOPs to make sure
the alternative sequence will be aligned?

> Thanks,
> drew
Heiko Stübner Nov. 14, 2022, 11:38 a.m. UTC | #5
Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones:
> On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
> ...
> > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > >                 }
> > >
> > >                 tmp = (1U << alt->errata_id);
> > > -               if (cpu_req_feature & tmp)
> > > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > > +               if (cpu_req_feature & tmp) {
> > > +                       /* do the basic patching */
> > > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > > +                                         alt->alt_len);
> > > +
> > > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > +                                                        alt->alt_len,
> > > +                                                        alt->old_ptr - alt->alt_ptr);
> > 
> > Here you're casting a void pointer to an instruction to an unsigned
> > int pointer, but since we enable compressed instructions this may
> > result in an unaligned pointer. Using this pointer will work, but may
> > be slow. Eg. fault to m-mode to be patched up. We already do that in
> > other places in the arch/riscv, but I'd prefer not to add new
> > instances of this.
> 
> Alternative instruction sequences (old and new) have compression disabled.

That was my first thought as well, but I think Emil was talking more about the
placement of the alternative block inside the running kernel.

i.e. I guess the starting point of an alternative sequence could also be unaligned.

Though I don't _yet_ see how an improvement could look like.
Andrew Jones Nov. 14, 2022, 12:15 p.m. UTC | #6
On Mon, Nov 14, 2022 at 12:38:39PM +0100, Heiko Stübner wrote:
> Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones:
> > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
> > ...
> > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > > >                 }
> > > >
> > > >                 tmp = (1U << alt->errata_id);
> > > > -               if (cpu_req_feature & tmp)
> > > > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > > > +               if (cpu_req_feature & tmp) {
> > > > +                       /* do the basic patching */
> > > > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > > > +                                         alt->alt_len);
> > > > +
> > > > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > > +                                                        alt->alt_len,
> > > > +                                                        alt->old_ptr - alt->alt_ptr);
> > > 
> > > Here you're casting a void pointer to an instruction to an unsigned
> > > int pointer, but since we enable compressed instructions this may
> > > result in an unaligned pointer. Using this pointer will work, but may
> > > be slow. Eg. fault to m-mode to be patched up. We already do that in
> > > other places in the arch/riscv, but I'd prefer not to add new
> > > instances of this.
> > 
> > Alternative instruction sequences (old and new) have compression disabled.
> 
> That was my first thought as well, but I think Emil was talking more about the
> placement of the alternative block inside the running kernel.
> 
> i.e. I guess the starting point of an alternative sequence could also be unaligned.

Oh, I see.

> 
> Though I don't _yet_ see how an improvement could look like.

I think we can patch the alternative macros to add alignment. Something
like

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index ec2f3f1b836f..3c330a9066f7 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -20,6 +20,7 @@
        ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
        .popsection
        .subsection 1
+       .balign 4
 888 :
        .option push
        .option norvc
@@ -34,6 +35,7 @@
 .endm

 .macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+       .balign 4
 886 :
        .option push
        .option norvc
@@ -49,6 +51,7 @@

 .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
                                  new_c_2, vendor_id_2, errata_id_2, enable_2
+       .balign 4
 886 :
        .option push
        .option norvc
@@ -87,6 +90,7 @@
        ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
        ".popsection\n"                                                 \
        ".subsection 1\n"                                               \
+       ".balign 4\n"                                                   \
        "888 :\n"                                                       \
        ".option push\n"                                                \
        ".option norvc\n"                                               \
@@ -100,6 +104,7 @@
        ".endif\n"

 #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable)  \
+       ".balign 4\n"                                                   \
        "886 :\n"                                                       \
        ".option push\n"                                                \
        ".option norvc\n"                                               \
@@ -116,6 +121,7 @@
                                        enable_1,                       \
                                   new_c_2, vendor_id_2, errata_id_2,   \
                                        enable_2)                       \
+       ".balign 4\n"                                                   \
        "886 :\n"                                                       \
        ".option push\n"                                                \
        ".option norvc\n"                                               \
Emil Renner Berthing Nov. 14, 2022, 12:29 p.m. UTC | #7
On Mon, 14 Nov 2022 at 13:15, Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Mon, Nov 14, 2022 at 12:38:39PM +0100, Heiko Stübner wrote:
> > Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones:
> > > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> > > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
> > > ...
> > > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > > > >                 }
> > > > >
> > > > >                 tmp = (1U << alt->errata_id);
> > > > > -               if (cpu_req_feature & tmp)
> > > > > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > > > > +               if (cpu_req_feature & tmp) {
> > > > > +                       /* do the basic patching */
> > > > > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > > > > +                                         alt->alt_len);
> > > > > +
> > > > > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > > > +                                                        alt->alt_len,
> > > > > +                                                        alt->old_ptr - alt->alt_ptr);
> > > >
> > > > Here you're casting a void pointer to an instruction to an unsigned
> > > > int pointer, but since we enable compressed instructions this may
> > > > result in an unaligned pointer. Using this pointer will work, but may
> > > > be slow. Eg. fault to m-mode to be patched up. We already do that in
> > > > other places in the arch/riscv, but I'd prefer not to add new
> > > > instances of this.
> > >
> > > Alternative instruction sequences (old and new) have compression disabled.
> >
> > That was my first thought as well, but I think Emil was talking more about the
> > placement of the alternative block inside the running kernel.
> >
> > i.e. I guess the starting point of an alternative sequence could also be unaligned.
>
> Oh, I see.
>
> >
> > Though I don't _yet_ see how an improvement could look like.
>
> I think we can patch the alternative macros to add alignment. Something
> like
>
> diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
> index ec2f3f1b836f..3c330a9066f7 100644
> --- a/arch/riscv/include/asm/alternative-macros.h
> +++ b/arch/riscv/include/asm/alternative-macros.h
> @@ -20,6 +20,7 @@
>         ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
>         .popsection
>         .subsection 1
> +       .balign 4
>  888 :
>         .option push
>         .option norvc
> @@ -34,6 +35,7 @@
>  .endm
>
>  .macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
> +       .balign 4
>  886 :
>         .option push
>         .option norvc
> @@ -49,6 +51,7 @@
>
>  .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
>                                   new_c_2, vendor_id_2, errata_id_2, enable_2
> +       .balign 4
>  886 :
>         .option push
>         .option norvc
> @@ -87,6 +90,7 @@
>         ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
>         ".popsection\n"                                                 \
>         ".subsection 1\n"                                               \
> +       ".balign 4\n"                                                   \
>         "888 :\n"                                                       \
>         ".option push\n"                                                \
>         ".option norvc\n"                                               \
> @@ -100,6 +104,7 @@
>         ".endif\n"
>
>  #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable)  \
> +       ".balign 4\n"                                                   \
>         "886 :\n"                                                       \
>         ".option push\n"                                                \
>         ".option norvc\n"                                               \
> @@ -116,6 +121,7 @@
>                                         enable_1,                       \
>                                    new_c_2, vendor_id_2, errata_id_2,   \
>                                         enable_2)                       \
> +       ".balign 4\n"                                                   \
>         "886 :\n"                                                       \
>         ".option push\n"                                                \
>         ".option norvc\n"                                               \

Why not just use accessors? Eg.

unsigned int riscv_instruction_at(void *p)
{
  u16 *parcel = p;
  return (unsigned int)parcel[0] | (unsigned int)parcel[1] << 16;
}
Philipp Tomsich Nov. 14, 2022, 12:47 p.m. UTC | #8
On Mon, 14 Nov 2022 at 12:38, Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones:
> > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote:
> > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote:
> > ...
> > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > > >                 }
> > > >
> > > >                 tmp = (1U << alt->errata_id);
> > > > -               if (cpu_req_feature & tmp)
> > > > -                       patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> > > > +               if (cpu_req_feature & tmp) {
> > > > +                       /* do the basic patching */
> > > > +                       patch_text_nosync(alt->old_ptr, alt->alt_ptr,
> > > > +                                         alt->alt_len);
> > > > +
> > > > +                       riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > > +                                                        alt->alt_len,
> > > > +                                                        alt->old_ptr - alt->alt_ptr);
> > >
> > > Here you're casting a void pointer to an instruction to an unsigned
> > > int pointer, but since we enable compressed instructions this may
> > > result in an unaligned pointer. Using this pointer will work, but may
> > > be slow. Eg. fault to m-mode to be patched up. We already do that in
> > > other places in the arch/riscv, but I'd prefer not to add new
> > > instances of this.
> >
> > Alternative instruction sequences (old and new) have compression disabled.
>
> That was my first thought as well, but I think Emil was talking more about the
> placement of the alternative block inside the running kernel.
>
> i.e. I guess the starting point of an alternative sequence could also be unaligned.

Indeed. Instruction alignment is only guaranteed to be 16bits, even
for larger instructions.

> Though I don't _yet_ see how an improvement could look like.

The general strategy would be multiple smaller accesses that are then
stitched together.
This will require (for a 32bit opcode) at least 2 loads, 1 shift and 1
or — for a critical path of 3 instructions.

Given that we are running on RV64, you can handle up to 48bit by
aligning down, performing a 64bit load and (if needed) shifting.

Finally, profiles looks like it will give us support for misaligned
loads (see https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva22-profiles
and where the Zicclsm extension is mandated).

Philipp.
Lad, Prabhakar Nov. 15, 2022, 2:28 p.m. UTC | #9
Hi Heiko,

Thank you for the patch.

On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
>
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
>
> Alternatives live in a different section, so addresses used by call
> functions will point to wrong locations after the patch got applied.
>
> Similar to arm64, adjust the location to consider that offset.
>
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> ---
>  arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 694267d1fe81..026512ca9c4c 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>         return cpu_req_feature;
>  }
>
> +#include <asm/parse_asm.h>
> +
> +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
> +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
> +
> +static inline bool is_auipc_jalr_pair(long insn1, long insn2)
> +{
> +       return is_auipc_insn(insn1) && is_jalr_insn(insn2);
> +}
> +
> +#define JALR_SIGN_MASK         BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
> +#define JALR_OFFSET_MASK       I_IMM_11_0_MASK
> +#define AUIPC_OFFSET_MASK      U_IMM_31_12_MASK
> +#define AUIPC_PAD              (0x00001000)
> +#define JALR_SHIFT             I_IMM_11_0_OPOFF
> +
> +#define to_jalr_imm(offset)                                            \
> +       ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
> +
> +#define to_auipc_imm(offset)                                           \
> +       ((offset & JALR_SIGN_MASK) ?                                    \
> +       ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :    \
> +       (offset & AUIPC_OFFSET_MASK))
> +
> +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
> +                                            unsigned int len, int patch_offset)
> +{

I am yet to test this with my ASM code yet, but maybe can we move this
to [0] so that other erratas can make use of it too?

[0] arch/riscv/kernel/patch.c

Cheers,
Prabhakar
Heiko Stübner Nov. 17, 2022, 11:51 a.m. UTC | #10
Am Dienstag, 15. November 2022, 15:28:27 CET schrieb Lad, Prabhakar:
> Hi Heiko,
> 
> Thank you for the patch.
> 
> On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
> >
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> >
> > Alternatives live in a different section, so addresses used by call
> > functions will point to wrong locations after the patch got applied.
> >
> > Similar to arm64, adjust the location to consider that offset.
> >
> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > ---
> >  arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
> >  1 file changed, 77 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index 694267d1fe81..026512ca9c4c 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
> >         return cpu_req_feature;
> >  }
> >
> > +#include <asm/parse_asm.h>
> > +
> > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
> > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
> > +
> > +static inline bool is_auipc_jalr_pair(long insn1, long insn2)
> > +{
> > +       return is_auipc_insn(insn1) && is_jalr_insn(insn2);
> > +}
> > +
> > +#define JALR_SIGN_MASK         BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
> > +#define JALR_OFFSET_MASK       I_IMM_11_0_MASK
> > +#define AUIPC_OFFSET_MASK      U_IMM_31_12_MASK
> > +#define AUIPC_PAD              (0x00001000)
> > +#define JALR_SHIFT             I_IMM_11_0_OPOFF
> > +
> > +#define to_jalr_imm(offset)                                            \
> > +       ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
> > +
> > +#define to_auipc_imm(offset)                                           \
> > +       ((offset & JALR_SIGN_MASK) ?                                    \
> > +       ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :    \
> > +       (offset & AUIPC_OFFSET_MASK))
> > +
> > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
> > +                                            unsigned int len, int patch_offset)
> > +{
> 
> I am yet to test this with my ASM code yet, but maybe can we move this
> to [0] so that other erratas can make use of it too?
> 
> [0] arch/riscv/kernel/patch.c

yeah, that sounds like a very good plan.

I also want to make the to_foo_imm macros shared.
I.e. right now they're just duplicated from the ftrace patching code.

Heiko
Lad, Prabhakar Nov. 21, 2022, 9:50 a.m. UTC | #11
Hi Heiko,

On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
>
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
>
> Alternatives live in a different section, so addresses used by call
> functions will point to wrong locations after the patch got applied.
>
> Similar to arm64, adjust the location to consider that offset.
>
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> ---
>  arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++-
>  1 file changed, 77 insertions(+), 2 deletions(-)
>
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 694267d1fe81..026512ca9c4c 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>         return cpu_req_feature;
>  }
>
> +#include <asm/parse_asm.h>
> +
> +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
> +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
> +
> +static inline bool is_auipc_jalr_pair(long insn1, long insn2)
> +{
> +       return is_auipc_insn(insn1) && is_jalr_insn(insn2);
> +}
> +
> +#define JALR_SIGN_MASK         BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
> +#define JALR_OFFSET_MASK       I_IMM_11_0_MASK
> +#define AUIPC_OFFSET_MASK      U_IMM_31_12_MASK
> +#define AUIPC_PAD              (0x00001000)
> +#define JALR_SHIFT             I_IMM_11_0_OPOFF
> +
> +#define to_jalr_imm(offset)                                            \
> +       ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
> +
> +#define to_auipc_imm(offset)                                           \
> +       ((offset & JALR_SIGN_MASK) ?                                    \
> +       ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :    \
> +       (offset & AUIPC_OFFSET_MASK))
> +
> +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
> +                                            unsigned int len, int patch_offset)
> +{
> +       int num_instr = len / sizeof(u32);
> +       unsigned int call[2];
> +       int i;
> +       int imm1;
> +       u32 rd1;
> +
> +       for (i = 0; i < num_instr; i++) {
> +               /* is there a further instruction? */
> +               if (i + 1 >= num_instr)
> +                       continue;
> +
> +               if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1)))
> +                       continue;
> +
> +               /* call will use ra register */
> +               rd1 = EXTRACT_RD_REG(*(alt_ptr + i));
> +               if (rd1 != 1)
> +                       continue;
> +
> +               /* get and adjust new target address */
> +               imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
> +               imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
> +               imm1 -= patch_offset;
> +
> +               /* pick the original auipc + jalr */
> +               call[0] = *(alt_ptr + i);
> +               call[1] = *(alt_ptr + i + 1);
> +
> +               /* drop the old IMMs */
> +               call[0] &= ~(U_IMM_31_12_MASK);
> +               call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF);
> +
> +               /* add the adapted IMMs */
> +               call[0] |= to_auipc_imm(imm1);
> +               call[1] |= to_jalr_imm(imm1);
> +
> +               /* patch the call place again */
> +               patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
> +       }
> +}
> +

I have the below assembly code which I have tested without the
alternatives for the RZ/Five CMO,

#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
asm volatile(".option push\n\t\n\t"                    \
         ".option norvc\n\t"                    \
         ".option norelax\n\t"                    \
         "addi sp,sp,-16\n\t"                    \
         "sd    s0,0(sp)\n\t"                    \
         "sd    ra,8(sp)\n\t"                    \
         "addi    s0,sp,16\n\t"                    \
         "mv a4,%6\n\t"                        \
         "mv a3,%5\n\t"                        \
         "mv a2,%4\n\t"                        \
         "mv a1,%3\n\t"                        \
         "mv a0,%0\n\t"                        \
         "call rzfive_cmo\n\t"                    \
         "ld    ra,8(sp)\n\t"                    \
         "ld    s0,0(sp)\n\t"                    \
         "addi    sp,sp,16\n\t"                    \
         ".option pop\n\t"                        \
         : : "r"(_cachesize),                    \
         "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
         "r"((unsigned long)(_start) + (_size)),            \
         "r"((unsigned long) (_start)),                \
         "r"((unsigned long) (_size)),                \
         "r"((unsigned long) (_dir)),                \
         "r"((unsigned long) (_ops))                \
         : "a0", "a1", "a2", "a3", "a4", "memory")

Now when integrate this with ALTERNATIVE_2() as below,

#define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
asm volatile(ALTERNATIVE_2(                        \
    __nops(14),                            \
    "mv a0, %1\n\t"                            \
    "j 2f\n\t"                            \
    "3:\n\t"                            \
    "cbo." __stringify(_op) " (a0)\n\t"                \
    "add a0, a0, %0\n\t"                        \
    "2:\n\t"                            \
    "bltu a0, %2, 3b\n\t"                        \
    __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,    \
    ".option push\n\t\n\t"                        \
    ".option norvc\n\t"                        \
    ".option norelax\n\t"                        \
    "addi sp,sp,-16\n\t"                        \
    "sd    s0,0(sp)\n\t"                        \
    "sd    ra,8(sp)\n\t"                        \
    "addi    s0,sp,16\n\t"                        \
    "mv a4,%6\n\t"                            \
    "mv a3,%5\n\t"                            \
    "mv a2,%4\n\t"                            \
    "mv a1,%3\n\t"                            \
    "mv a0,%0\n\t"                            \
    "call rzfive_cmo\n\t"                \
    "ld    ra,8(sp)\n\t"                        \
    "ld    s0,0(sp)\n\t"                        \
    "addi    sp,sp,16\n\t"                        \
    ".option pop\n\t"                        \
    , ANDESTECH_VENDOR_ID,                        \
            ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO)    \
    : : "r"(_cachesize),                        \
    "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
    "r"((unsigned long)(_start) + (_size)),            \
    "r"((unsigned long) (_start)),                \
    "r"((unsigned long) (_size)),                \
    "r"((unsigned long) (_dir)),                \
    "r"((unsigned long) (_ops))                \
    : "a0", "a1", "a2", "a3", "a4", "memory")

I am seeing kernel panic with this change. Looking at the
riscv_alternative_fix_auipc_jalr() implementation it assumes the rest
of the alternative options are calls too. Is my understanding correct
here?

Do you think this is the correct approach in my case?

Note, I wanted to test with ALTERNATIVE_2() first to make sure
everything is okay and then later test my ALTERNATIVE_3()
implementation.

Cheers,
Prabhakar
Heiko Stübner Nov. 21, 2022, 11:27 a.m. UTC | #12
Hi,

Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar:
> On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
> >
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> >
> > Alternatives live in a different section, so addresses used by call
> > functions will point to wrong locations after the patch got applied.
> >
> > Similar to arm64, adjust the location to consider that offset.
> >
> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > ---

[...]

> I have the below assembly code which I have tested without the
> alternatives for the RZ/Five CMO,
> 
> #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> asm volatile(".option push\n\t\n\t"                    \
>          ".option norvc\n\t"                    \
>          ".option norelax\n\t"                    \
>          "addi sp,sp,-16\n\t"                    \
>          "sd    s0,0(sp)\n\t"                    \
>          "sd    ra,8(sp)\n\t"                    \
>          "addi    s0,sp,16\n\t"                    \
>          "mv a4,%6\n\t"                        \
>          "mv a3,%5\n\t"                        \
>          "mv a2,%4\n\t"                        \
>          "mv a1,%3\n\t"                        \
>          "mv a0,%0\n\t"                        \
>          "call rzfive_cmo\n\t"                    \
>          "ld    ra,8(sp)\n\t"                    \
>          "ld    s0,0(sp)\n\t"                    \
>          "addi    sp,sp,16\n\t"                    \
>          ".option pop\n\t"                        \
>          : : "r"(_cachesize),                    \
>          "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
>          "r"((unsigned long)(_start) + (_size)),            \
>          "r"((unsigned long) (_start)),                \
>          "r"((unsigned long) (_size)),                \
>          "r"((unsigned long) (_dir)),                \
>          "r"((unsigned long) (_ops))                \
>          : "a0", "a1", "a2", "a3", "a4", "memory")
>
> Now when integrate this with ALTERNATIVE_2() as below,
> 
> #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> asm volatile(ALTERNATIVE_2(                        \
>     __nops(14),                            \
>     "mv a0, %1\n\t"                            \
>     "j 2f\n\t"                            \
>     "3:\n\t"                            \
>     "cbo." __stringify(_op) " (a0)\n\t"                \
>     "add a0, a0, %0\n\t"                        \
>     "2:\n\t"                            \
>     "bltu a0, %2, 3b\n\t"                        \
>     __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,    \
>     ".option push\n\t\n\t"                        \
>     ".option norvc\n\t"                        \
>     ".option norelax\n\t"                        \
>     "addi sp,sp,-16\n\t"                        \
>     "sd    s0,0(sp)\n\t"                        \
>     "sd    ra,8(sp)\n\t"                        \
>     "addi    s0,sp,16\n\t"                        \
>     "mv a4,%6\n\t"                            \
>     "mv a3,%5\n\t"                            \
>     "mv a2,%4\n\t"                            \
>     "mv a1,%3\n\t"                            \
>     "mv a0,%0\n\t"                            \
>     "call rzfive_cmo\n\t"                \
>     "ld    ra,8(sp)\n\t"                        \
>     "ld    s0,0(sp)\n\t"                        \
>     "addi    sp,sp,16\n\t"                        \
>     ".option pop\n\t"                        \
>     , ANDESTECH_VENDOR_ID,                        \
>             ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO)    \
>     : : "r"(_cachesize),                        \
>     "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
>     "r"((unsigned long)(_start) + (_size)),            \
>     "r"((unsigned long) (_start)),                \
>     "r"((unsigned long) (_size)),                \
>     "r"((unsigned long) (_dir)),                \
>     "r"((unsigned long) (_ops))                \
>     : "a0", "a1", "a2", "a3", "a4", "memory")
> 
> I am seeing kernel panic with this change. Looking at the
> riscv_alternative_fix_auipc_jalr() implementation it assumes the rest
> of the alternative options are calls too. Is my understanding correct
> here?

The loop walks through the instructions after the location got patched and
checks if an instruction is an auipc and the next one is a jalr and only then
adjusts the address accordingly.

So it _should_ leave all other (non auipc+jalr) instructions alone.
(hopefully)


> Do you think this is the correct approach in my case?

It does look correct on first glance.

As I had that passing thought, are you actually calling
	riscv_alternative_fix_auipc_jalr()
from your errata/.../foo.c after doing the patching?

I.e. with the current patchset, that function is only called from the
cpufeature part, but for example not from the other patching locations.
[and a future revision should probably change that :-) ]


After making sure that function actually runs, the next thing you could try
is to have both the "original" code and the patch be identical, i.e.
replace the cbo* part with your code as well and then just output the
instructions via printk to check what the addresses do in both.

After riscv_alternative_fix_auipc_jalr() ran then both code variants
should be identical when using the same code in both areas.


> Note, I wanted to test with ALTERNATIVE_2() first to make sure
> everything is okay and then later test my ALTERNATIVE_3()
> implementation.

sounds like a very sensible idea to use the existing macros
first for verification :-)


Heiko
Lad, Prabhakar Nov. 21, 2022, 3:06 p.m. UTC | #13
Hi Heiko,

On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote:
>
> Hi,
>
> Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar:
> > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
> > >
> > > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > >
> > > Alternatives live in a different section, so addresses used by call
> > > functions will point to wrong locations after the patch got applied.
> > >
> > > Similar to arm64, adjust the location to consider that offset.
> > >
> > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > > ---
>
> [...]
>
> > I have the below assembly code which I have tested without the
> > alternatives for the RZ/Five CMO,
> >
> > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > asm volatile(".option push\n\t\n\t"                    \
> >          ".option norvc\n\t"                    \
> >          ".option norelax\n\t"                    \
> >          "addi sp,sp,-16\n\t"                    \
> >          "sd    s0,0(sp)\n\t"                    \
> >          "sd    ra,8(sp)\n\t"                    \
> >          "addi    s0,sp,16\n\t"                    \
> >          "mv a4,%6\n\t"                        \
> >          "mv a3,%5\n\t"                        \
> >          "mv a2,%4\n\t"                        \
> >          "mv a1,%3\n\t"                        \
> >          "mv a0,%0\n\t"                        \
> >          "call rzfive_cmo\n\t"                    \
> >          "ld    ra,8(sp)\n\t"                    \
> >          "ld    s0,0(sp)\n\t"                    \
> >          "addi    sp,sp,16\n\t"                    \
> >          ".option pop\n\t"                        \
> >          : : "r"(_cachesize),                    \
> >          "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> >          "r"((unsigned long)(_start) + (_size)),            \
> >          "r"((unsigned long) (_start)),                \
> >          "r"((unsigned long) (_size)),                \
> >          "r"((unsigned long) (_dir)),                \
> >          "r"((unsigned long) (_ops))                \
> >          : "a0", "a1", "a2", "a3", "a4", "memory")
> >
> > Now when integrate this with ALTERNATIVE_2() as below,
> >
> > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > asm volatile(ALTERNATIVE_2(                        \
> >     __nops(14),                            \
> >     "mv a0, %1\n\t"                            \
> >     "j 2f\n\t"                            \
> >     "3:\n\t"                            \
> >     "cbo." __stringify(_op) " (a0)\n\t"                \
> >     "add a0, a0, %0\n\t"                        \
> >     "2:\n\t"                            \
> >     "bltu a0, %2, 3b\n\t"                        \
> >     __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,    \
> >     ".option push\n\t\n\t"                        \
> >     ".option norvc\n\t"                        \
> >     ".option norelax\n\t"                        \
> >     "addi sp,sp,-16\n\t"                        \
> >     "sd    s0,0(sp)\n\t"                        \
> >     "sd    ra,8(sp)\n\t"                        \
> >     "addi    s0,sp,16\n\t"                        \
> >     "mv a4,%6\n\t"                            \
> >     "mv a3,%5\n\t"                            \
> >     "mv a2,%4\n\t"                            \
> >     "mv a1,%3\n\t"                            \
> >     "mv a0,%0\n\t"                            \
> >     "call rzfive_cmo\n\t"                \
> >     "ld    ra,8(sp)\n\t"                        \
> >     "ld    s0,0(sp)\n\t"                        \
> >     "addi    sp,sp,16\n\t"                        \
> >     ".option pop\n\t"                        \
> >     , ANDESTECH_VENDOR_ID,                        \
> >             ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO)    \
> >     : : "r"(_cachesize),                        \
> >     "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> >     "r"((unsigned long)(_start) + (_size)),            \
> >     "r"((unsigned long) (_start)),                \
> >     "r"((unsigned long) (_size)),                \
> >     "r"((unsigned long) (_dir)),                \
> >     "r"((unsigned long) (_ops))                \
> >     : "a0", "a1", "a2", "a3", "a4", "memory")
> >
> > I am seeing kernel panic with this change. Looking at the
> > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest
> > of the alternative options are calls too. Is my understanding correct
> > here?
>
> The loop walks through the instructions after the location got patched and
> checks if an instruction is an auipc and the next one is a jalr and only then
> adjusts the address accordingly.
>
Ok so my understanding was wrong here.

> So it _should_ leave all other (non auipc+jalr) instructions alone.
> (hopefully)
>
Agreed.

>
> > Do you think this is the correct approach in my case?
>
> It does look correct on first glance.
>
\o/

> As I had that passing thought, are you actually calling
>         riscv_alternative_fix_auipc_jalr()
> from your errata/.../foo.c after doing the patching?
>
> I.e. with the current patchset, that function is only called from the
> cpufeature part, but for example not from the other patching locations.
> [and a future revision should probably change that :-) ]
>
>
I have made a local copy of riscv_alternative_fix_auipc_jalr() and
then calling it after patch_text_nosync() referring to your patch for
str functions.

> After making sure that function actually runs, the next thing you could try
> is to have both the "original" code and the patch be identical, i.e.
> replace the cbo* part with your code as well and then just output the
> instructions via printk to check what the addresses do in both.
>
> After riscv_alternative_fix_auipc_jalr() ran then both code variants
> should be identical when using the same code in both areas.
>
So I have added debug prints to match the instructions as below after
and before patching:

static void riscv_alternative_print_inst(unsigned int *alt_ptr,
                     unsigned int len)
{
    int num_instr = len / sizeof(u32);
    int i;

    for (i = 0; i < num_instr; i++)
        pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i));

}

void __init_or_module andes_errata_patch_func(struct alt_entry *begin,
struct alt_entry *end,
                          unsigned long archid, unsigned long impid,
                          unsigned int stage)
{
....
    if (cpu_req_errata & tmp) {
        pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp,
cpu_req_errata, alt->errata_id);
        pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr,
alt->alt_ptr, alt->alt_len);
        pr_err("Print old start\n");
        riscv_alternative_print_inst(alt->old_ptr, alt->alt_len);
        pr_err("Print old end\n");
        patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);

        riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len,
                        alt->old_ptr - alt->alt_ptr);
        pr_err("Print patch start\n");
        riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len);
        pr_err("Print patch end\n");
    }
.....
}

Below is the log:
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] Print new old end
[    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
[    0.000000] Print patch start
[    0.000000] riscv_alternative_print_inst instruction: 0xff010113
[    0.000000] riscv_alternative_print_inst instruction: 0x813023
[    0.000000] riscv_alternative_print_inst instruction: 0x113423
[    0.000000] riscv_alternative_print_inst instruction: 0x1010413
[    0.000000] riscv_alternative_print_inst instruction: 0xf0713
[    0.000000] riscv_alternative_print_inst instruction: 0x78693
[    0.000000] riscv_alternative_print_inst instruction: 0x88613
[    0.000000] riscv_alternative_print_inst instruction: 0x80593
[    0.000000] riscv_alternative_print_inst instruction: 0xe0513
[    0.000000] riscv_alternative_print_inst instruction: 0x97
[    0.000000] riscv_alternative_print_inst instruction: 0xcba080e7
[    0.000000] riscv_alternative_print_inst instruction: 0x813083
[    0.000000] riscv_alternative_print_inst instruction: 0x13403
[    0.000000] riscv_alternative_print_inst instruction: 0x1010113
[    0.000000] Print patch end
[    0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0
[    0.000000] old:arch_sync_dma_for_device
alt:riscv_noncoherent_supported len:38
[    0.000000] Print  old start
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x970013
    ====================> This instruction doesn't look correct it
should be 0x13?
[    0.000000] Print  old end
[    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
[    0.000000] Print patch start
[    0.000000] riscv_alternative_print_inst instruction: 0xff010113
[    0.000000] riscv_alternative_print_inst instruction: 0x813023
[    0.000000] riscv_alternative_print_inst instruction: 0x113423
[    0.000000] riscv_alternative_print_inst instruction: 0x1010413
[    0.000000] riscv_alternative_print_inst instruction: 0x78713
[    0.000000] riscv_alternative_print_inst instruction: 0x78693
[    0.000000] riscv_alternative_print_inst instruction: 0x88613
[    0.000000] riscv_alternative_print_inst instruction: 0x80593
[    0.000000] riscv_alternative_print_inst instruction: 0xe0513
[    0.000000] riscv_alternative_print_inst instruction: 0x97
[    0.000000] riscv_alternative_print_inst instruction: 0xc82080e7
====================> This instruction doesn't look correct comparing
to objdump output this should be 000080e7 or does it require the
offset too?
[    0.000000] riscv_alternative_print_inst instruction: 0x813083
[    0.000000] riscv_alternative_print_inst instruction: 0x13403
[    0.000000] riscv_alternative_print_inst instruction: 0x1010113
[    0.000000] Print patch end
[    0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0
[    0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38
[    0.000000] Print old start
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x97
====================> This instruction doesn't look correct it should
be 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0xeee080e7
      ====================> This instruction doesn't look correct it
should be 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] Print old end
[    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
[    0.000000] Print patch start
[    0.000000] riscv_alternative_print_inst instruction: 0xff010113
[    0.000000] riscv_alternative_print_inst instruction: 0x813023
[    0.000000] riscv_alternative_print_inst instruction: 0x113423
[    0.000000] riscv_alternative_print_inst instruction: 0x1010413
[    0.000000] riscv_alternative_print_inst instruction: 0xf0713
[    0.000000] riscv_alternative_print_inst instruction: 0x80693
[    0.000000] riscv_alternative_print_inst instruction: 0x88613
[    0.000000] riscv_alternative_print_inst instruction: 0x78593
[    0.000000] riscv_alternative_print_inst instruction: 0xe0513
[    0.000000] riscv_alternative_print_inst instruction: 0x97
[    0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7
====================> This instruction doesn't look correct comparing
to objdump output this should be 000080e7 or does it require the
offset too?
[    0.000000] riscv_alternative_print_inst instruction: 0x813083
[    0.000000] riscv_alternative_print_inst instruction: 0x13403
[    0.000000] riscv_alternative_print_inst instruction: 0x1010113
[    0.000000] Print patch end
[    0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0
[    0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38
[    0.000000] Print old start
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x970013
====================> This instruction doesn't look correct it should
be 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x80e70000
====================> This instruction doesn't look correct it should
be 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0xe720
====================> This instruction doesn't look correct it should
be 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] riscv_alternative_print_inst instruction: 0x13
[    0.000000] Print old end
[    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
[    0.000000] Print patch start
[    0.000000] riscv_alternative_print_inst instruction: 0xff010113
[    0.000000] riscv_alternative_print_inst instruction: 0x813023
[    0.000000] riscv_alternative_print_inst instruction: 0x113423
[    0.000000] riscv_alternative_print_inst instruction: 0x1010413
[    0.000000] riscv_alternative_print_inst instruction: 0xf0713
[    0.000000] riscv_alternative_print_inst instruction: 0xe8693
[    0.000000] riscv_alternative_print_inst instruction: 0x88613
[    0.000000] riscv_alternative_print_inst instruction: 0x78593
[    0.000000] riscv_alternative_print_inst instruction: 0x30513
[    0.000000] riscv_alternative_print_inst instruction: 0x97
[    0.000000] riscv_alternative_print_inst instruction: 0xc12080e7
====================> This instruction doesn't look correct comparing
to objdump output this should be 000080e7 + offset?
[    0.000000] riscv_alternative_print_inst instruction: 0x813083
[    0.000000] riscv_alternative_print_inst instruction: 0x13403
[    0.000000] riscv_alternative_print_inst instruction: 0x1010113
[    0.000000] Print patch end

Here is the output from objdump of the file (dma-noncoherent.o):

000000000000032e <.L888^B1>:
 32e:    ff010113              addi    sp,sp,-16
 332:    00813023              sd    s0,0(sp)
 336:    00113423              sd    ra,8(sp)
 33a:    01010413              addi    s0,sp,16
 33e:    000f0713              mv    a4,t5
 342:    00078693              mv    a3,a5
 346:    00088613              mv    a2,a7
 34a:    00080593              mv    a1,a6
 34e:    000e0513              mv    a0,t3
 352:    00000097              auipc    ra,0x0
 356:    000080e7              jalr    ra # 352 <.L888^B1+0x24>
 35a:    00813083              ld    ra,8(sp)
 35e:    00013403              ld    s0,0(sp)
 362:    01010113              addi    sp,sp,16

0000000000000366 <.L888^B2>:
 366:    ff010113              addi    sp,sp,-16
 36a:    00813023              sd    s0,0(sp)
 36e:    00113423              sd    ra,8(sp)
 372:    01010413              addi    s0,sp,16
 376:    00078713              mv    a4,a5
 37a:    00078693              mv    a3,a5
 37e:    00088613              mv    a2,a7
 382:    00080593              mv    a1,a6
 386:    000e0513              mv    a0,t3
 38a:    00000097              auipc    ra,0x0
 38e:    000080e7              jalr    ra # 38a <.L888^B2+0x24>
 392:    00813083              ld    ra,8(sp)
 396:    00013403              ld    s0,0(sp)
 39a:    01010113              addi    sp,sp,16

000000000000039e <.L888^B3>:
 39e:    ff010113              addi    sp,sp,-16
 3a2:    00813023              sd    s0,0(sp)
 3a6:    00113423              sd    ra,8(sp)
 3aa:    01010413              addi    s0,sp,16
 3ae:    000f0713              mv    a4,t5
 3b2:    00080693              mv    a3,a6
 3b6:    00088613              mv    a2,a7
 3ba:    00078593              mv    a1,a5
 3be:    000e0513              mv    a0,t3
 3c2:    00000097              auipc    ra,0x0
 3c6:    000080e7              jalr    ra # 3c2 <.L888^B3+0x24>
 3ca:    00813083              ld    ra,8(sp)
 3ce:    00013403              ld    s0,0(sp)
 3d2:    01010113              addi    sp,sp,16

00000000000003d6 <.L888^B4>:
 3d6:    ff010113              addi    sp,sp,-16
 3da:    00813023              sd    s0,0(sp)
 3de:    00113423              sd    ra,8(sp)
 3e2:    01010413              addi    s0,sp,16
 3e6:    000f0713              mv    a4,t5
 3ea:    000e8693              mv    a3,t4
 3ee:    00088613              mv    a2,a7
 3f2:    00078593              mv    a1,a5
 3f6:    00030513              mv    a0,t1
 3fa:    00000097              auipc    ra,0x0
 3fe:    000080e7              jalr    ra # 3fa <.L888^B4+0x24>
 402:    00813083              ld    ra,8(sp)
 406:    00013403              ld    s0,0(sp)
 40a:    01010113              addi    sp,sp,16

Disassembly of section __ksymtab_strings:

Any pointers what could be happening?

>
> > Note, I wanted to test with ALTERNATIVE_2() first to make sure
> > everything is okay and then later test my ALTERNATIVE_3()
> > implementation.
>
> sounds like a very sensible idea to use the existing macros
> first for verification :-)
>
:)

Cheers,
Prabhakar
Lad, Prabhakar Nov. 21, 2022, 9:31 p.m. UTC | #14
Hi Heiko,

On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Heiko,
>
> On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote:
> >
> > Hi,
> >
> > Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar:
> > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
> > > >
> > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > > >
> > > > Alternatives live in a different section, so addresses used by call
> > > > functions will point to wrong locations after the patch got applied.
> > > >
> > > > Similar to arm64, adjust the location to consider that offset.
> > > >
> > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > > > ---
> >
> > [...]
> >
> > > I have the below assembly code which I have tested without the
> > > alternatives for the RZ/Five CMO,
> > >
> > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > > asm volatile(".option push\n\t\n\t"                    \
> > >          ".option norvc\n\t"                    \
> > >          ".option norelax\n\t"                    \
> > >          "addi sp,sp,-16\n\t"                    \
> > >          "sd    s0,0(sp)\n\t"                    \
> > >          "sd    ra,8(sp)\n\t"                    \
> > >          "addi    s0,sp,16\n\t"                    \
> > >          "mv a4,%6\n\t"                        \
> > >          "mv a3,%5\n\t"                        \
> > >          "mv a2,%4\n\t"                        \
> > >          "mv a1,%3\n\t"                        \
> > >          "mv a0,%0\n\t"                        \
> > >          "call rzfive_cmo\n\t"                    \
> > >          "ld    ra,8(sp)\n\t"                    \
> > >          "ld    s0,0(sp)\n\t"                    \
> > >          "addi    sp,sp,16\n\t"                    \
> > >          ".option pop\n\t"                        \
> > >          : : "r"(_cachesize),                    \
> > >          "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> > >          "r"((unsigned long)(_start) + (_size)),            \
> > >          "r"((unsigned long) (_start)),                \
> > >          "r"((unsigned long) (_size)),                \
> > >          "r"((unsigned long) (_dir)),                \
> > >          "r"((unsigned long) (_ops))                \
> > >          : "a0", "a1", "a2", "a3", "a4", "memory")
> > >
> > > Now when integrate this with ALTERNATIVE_2() as below,
> > >
> > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > > asm volatile(ALTERNATIVE_2(                        \
> > >     __nops(14),                            \
> > >     "mv a0, %1\n\t"                            \
> > >     "j 2f\n\t"                            \
> > >     "3:\n\t"                            \
> > >     "cbo." __stringify(_op) " (a0)\n\t"                \
> > >     "add a0, a0, %0\n\t"                        \
> > >     "2:\n\t"                            \
> > >     "bltu a0, %2, 3b\n\t"                        \
> > >     __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,    \
> > >     ".option push\n\t\n\t"                        \
> > >     ".option norvc\n\t"                        \
> > >     ".option norelax\n\t"                        \
> > >     "addi sp,sp,-16\n\t"                        \
> > >     "sd    s0,0(sp)\n\t"                        \
> > >     "sd    ra,8(sp)\n\t"                        \
> > >     "addi    s0,sp,16\n\t"                        \
> > >     "mv a4,%6\n\t"                            \
> > >     "mv a3,%5\n\t"                            \
> > >     "mv a2,%4\n\t"                            \
> > >     "mv a1,%3\n\t"                            \
> > >     "mv a0,%0\n\t"                            \
> > >     "call rzfive_cmo\n\t"                \
> > >     "ld    ra,8(sp)\n\t"                        \
> > >     "ld    s0,0(sp)\n\t"                        \
> > >     "addi    sp,sp,16\n\t"                        \
> > >     ".option pop\n\t"                        \
> > >     , ANDESTECH_VENDOR_ID,                        \
> > >             ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO)    \
> > >     : : "r"(_cachesize),                        \
> > >     "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> > >     "r"((unsigned long)(_start) + (_size)),            \
> > >     "r"((unsigned long) (_start)),                \
> > >     "r"((unsigned long) (_size)),                \
> > >     "r"((unsigned long) (_dir)),                \
> > >     "r"((unsigned long) (_ops))                \
> > >     : "a0", "a1", "a2", "a3", "a4", "memory")
> > >
> > > I am seeing kernel panic with this change. Looking at the
> > > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest
> > > of the alternative options are calls too. Is my understanding correct
> > > here?
> >
> > The loop walks through the instructions after the location got patched and
> > checks if an instruction is an auipc and the next one is a jalr and only then
> > adjusts the address accordingly.
> >
> Ok so my understanding was wrong here.
>
> > So it _should_ leave all other (non auipc+jalr) instructions alone.
> > (hopefully)
> >
> Agreed.
>
> >
> > > Do you think this is the correct approach in my case?
> >
> > It does look correct on first glance.
> >
> \o/
>
> > As I had that passing thought, are you actually calling
> >         riscv_alternative_fix_auipc_jalr()
> > from your errata/.../foo.c after doing the patching?
> >
> > I.e. with the current patchset, that function is only called from the
> > cpufeature part, but for example not from the other patching locations.
> > [and a future revision should probably change that :-) ]
> >
> >
> I have made a local copy of riscv_alternative_fix_auipc_jalr() and
> then calling it after patch_text_nosync() referring to your patch for
> str functions.
>
> > After making sure that function actually runs, the next thing you could try
> > is to have both the "original" code and the patch be identical, i.e.
> > replace the cbo* part with your code as well and then just output the
> > instructions via printk to check what the addresses do in both.
> >
> > After riscv_alternative_fix_auipc_jalr() ran then both code variants
> > should be identical when using the same code in both areas.
> >
> So I have added debug prints to match the instructions as below after
> and before patching:
>
> static void riscv_alternative_print_inst(unsigned int *alt_ptr,
>                      unsigned int len)
> {
>     int num_instr = len / sizeof(u32);
>     int i;
>
>     for (i = 0; i < num_instr; i++)
>         pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i));
>
> }
>
> void __init_or_module andes_errata_patch_func(struct alt_entry *begin,
> struct alt_entry *end,
>                           unsigned long archid, unsigned long impid,
>                           unsigned int stage)
> {
> ....
>     if (cpu_req_errata & tmp) {
>         pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp,
> cpu_req_errata, alt->errata_id);
>         pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr,
> alt->alt_ptr, alt->alt_len);
>         pr_err("Print old start\n");
>         riscv_alternative_print_inst(alt->old_ptr, alt->alt_len);
>         pr_err("Print old end\n");
>         patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
>
>         riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len,
>                         alt->old_ptr - alt->alt_ptr);
>         pr_err("Print patch start\n");
>         riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len);
>         pr_err("Print patch end\n");
>     }
> .....
> }
>
> Below is the log:
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] Print new old end
> [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> [    0.000000] Print patch start
> [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> [    0.000000] riscv_alternative_print_inst instruction: 0x78693
> [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> [    0.000000] riscv_alternative_print_inst instruction: 0x80593
> [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> [    0.000000] riscv_alternative_print_inst instruction: 0x97
> [    0.000000] riscv_alternative_print_inst instruction: 0xcba080e7
> [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> [    0.000000] Print patch end
> [    0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0
> [    0.000000] old:arch_sync_dma_for_device
> alt:riscv_noncoherent_supported len:38
> [    0.000000] Print  old start
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x970013
>     ====================> This instruction doesn't look correct it
> should be 0x13?
> [    0.000000] Print  old end
> [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> [    0.000000] Print patch start
> [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> [    0.000000] riscv_alternative_print_inst instruction: 0x78713
> [    0.000000] riscv_alternative_print_inst instruction: 0x78693
> [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> [    0.000000] riscv_alternative_print_inst instruction: 0x80593
> [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> [    0.000000] riscv_alternative_print_inst instruction: 0x97
> [    0.000000] riscv_alternative_print_inst instruction: 0xc82080e7
> ====================> This instruction doesn't look correct comparing
> to objdump output this should be 000080e7 or does it require the
> offset too?
> [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> [    0.000000] Print patch end
> [    0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0
> [    0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38
> [    0.000000] Print old start
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x97
> ====================> This instruction doesn't look correct it should
> be 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0xeee080e7
>       ====================> This instruction doesn't look correct it
> should be 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] Print old end
> [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> [    0.000000] Print patch start
> [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> [    0.000000] riscv_alternative_print_inst instruction: 0x80693
> [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> [    0.000000] riscv_alternative_print_inst instruction: 0x78593
> [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> [    0.000000] riscv_alternative_print_inst instruction: 0x97
> [    0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7
> ====================> This instruction doesn't look correct comparing
> to objdump output this should be 000080e7 or does it require the
> offset too?
> [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> [    0.000000] Print patch end
> [    0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0
> [    0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38
> [    0.000000] Print old start
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x970013
> ====================> This instruction doesn't look correct it should
> be 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x80e70000
> ====================> This instruction doesn't look correct it should
> be 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0xe720
> ====================> This instruction doesn't look correct it should
> be 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] riscv_alternative_print_inst instruction: 0x13
> [    0.000000] Print old end
> [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> [    0.000000] Print patch start
> [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> [    0.000000] riscv_alternative_print_inst instruction: 0xe8693
> [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> [    0.000000] riscv_alternative_print_inst instruction: 0x78593
> [    0.000000] riscv_alternative_print_inst instruction: 0x30513
> [    0.000000] riscv_alternative_print_inst instruction: 0x97
> [    0.000000] riscv_alternative_print_inst instruction: 0xc12080e7
> ====================> This instruction doesn't look correct comparing
> to objdump output this should be 000080e7 + offset?
> [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> [    0.000000] Print patch end
>
> Here is the output from objdump of the file (dma-noncoherent.o):
>
> 000000000000032e <.L888^B1>:
>  32e:    ff010113              addi    sp,sp,-16
>  332:    00813023              sd    s0,0(sp)
>  336:    00113423              sd    ra,8(sp)
>  33a:    01010413              addi    s0,sp,16
>  33e:    000f0713              mv    a4,t5
>  342:    00078693              mv    a3,a5
>  346:    00088613              mv    a2,a7
>  34a:    00080593              mv    a1,a6
>  34e:    000e0513              mv    a0,t3
>  352:    00000097              auipc    ra,0x0
>  356:    000080e7              jalr    ra # 352 <.L888^B1+0x24>
>  35a:    00813083              ld    ra,8(sp)
>  35e:    00013403              ld    s0,0(sp)
>  362:    01010113              addi    sp,sp,16
>
> 0000000000000366 <.L888^B2>:
>  366:    ff010113              addi    sp,sp,-16
>  36a:    00813023              sd    s0,0(sp)
>  36e:    00113423              sd    ra,8(sp)
>  372:    01010413              addi    s0,sp,16
>  376:    00078713              mv    a4,a5
>  37a:    00078693              mv    a3,a5
>  37e:    00088613              mv    a2,a7
>  382:    00080593              mv    a1,a6
>  386:    000e0513              mv    a0,t3
>  38a:    00000097              auipc    ra,0x0
>  38e:    000080e7              jalr    ra # 38a <.L888^B2+0x24>
>  392:    00813083              ld    ra,8(sp)
>  396:    00013403              ld    s0,0(sp)
>  39a:    01010113              addi    sp,sp,16
>
> 000000000000039e <.L888^B3>:
>  39e:    ff010113              addi    sp,sp,-16
>  3a2:    00813023              sd    s0,0(sp)
>  3a6:    00113423              sd    ra,8(sp)
>  3aa:    01010413              addi    s0,sp,16
>  3ae:    000f0713              mv    a4,t5
>  3b2:    00080693              mv    a3,a6
>  3b6:    00088613              mv    a2,a7
>  3ba:    00078593              mv    a1,a5
>  3be:    000e0513              mv    a0,t3
>  3c2:    00000097              auipc    ra,0x0
>  3c6:    000080e7              jalr    ra # 3c2 <.L888^B3+0x24>
>  3ca:    00813083              ld    ra,8(sp)
>  3ce:    00013403              ld    s0,0(sp)
>  3d2:    01010113              addi    sp,sp,16
>
> 00000000000003d6 <.L888^B4>:
>  3d6:    ff010113              addi    sp,sp,-16
>  3da:    00813023              sd    s0,0(sp)
>  3de:    00113423              sd    ra,8(sp)
>  3e2:    01010413              addi    s0,sp,16
>  3e6:    000f0713              mv    a4,t5
>  3ea:    000e8693              mv    a3,t4
>  3ee:    00088613              mv    a2,a7
>  3f2:    00078593              mv    a1,a5
>  3f6:    00030513              mv    a0,t1
>  3fa:    00000097              auipc    ra,0x0
>  3fe:    000080e7              jalr    ra # 3fa <.L888^B4+0x24>
>  402:    00813083              ld    ra,8(sp)
>  406:    00013403              ld    s0,0(sp)
>  40a:    01010113              addi    sp,sp,16
>
> Disassembly of section __ksymtab_strings:
>
> Any pointers what could be happening?
>

Some more information,

- If I drop the riscv_alternative_fix_auipc_jalr() call after
patch_text_nosync() and then print the alt->old_ptr instructions
before patching I can see the instructions as 0x13 (nop) which is
correct.

- if I call riscv_alternative_fix_auipc_jalr() call after
patch_text_nosync() and then print the alt->old_ptr instructions
before patching I dont see 0x13 (nop) consistently for old
instructions.

- If I replace the nop's in the old instructions with my assembly code
of rz/five cmo and then just use patch_text_nosync() I can see the
correct actual instruction being printed apart from jalr (is some sort
of offset added to it as I see last 4 bits match?) and then is
replaced correctly by the same alt instructions apart from the jalr
(log [0]).

- If I replace the nop's in the old instructions with my assembly code
of rz/five cmo and then use patch_text_nosync() and
riscv_alternative_fix_auipc_jalr() I can see the actual old
instructions differs a bit and again the jalr instruction differs too
in the patched code (log [1]).

[0] https://paste.debian.net/1261412/
[1] https://paste.debian.net/1261413/

Attached is the objump of dma-noncoherent.o for reference.

Note, if I replace the old/orignal instruction to my asm code for
rz/five cmo and replace the errata id's to deadbeef the code works OK.

Cheers,
Prabhakar
Heiko Stübner Nov. 21, 2022, 10:17 p.m. UTC | #15
Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> Hi Heiko,
> 
> On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Heiko,
> >
> > On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote:
> > >
> > > Hi,
> > >
> > > Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar:
> > > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote:
> > > > >
> > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > > > >
> > > > > Alternatives live in a different section, so addresses used by call
> > > > > functions will point to wrong locations after the patch got applied.
> > > > >
> > > > > Similar to arm64, adjust the location to consider that offset.
> > > > >
> > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > > > > ---
> > >
> > > [...]
> > >
> > > > I have the below assembly code which I have tested without the
> > > > alternatives for the RZ/Five CMO,
> > > >
> > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > > > asm volatile(".option push\n\t\n\t"                    \
> > > >          ".option norvc\n\t"                    \
> > > >          ".option norelax\n\t"                    \
> > > >          "addi sp,sp,-16\n\t"                    \
> > > >          "sd    s0,0(sp)\n\t"                    \
> > > >          "sd    ra,8(sp)\n\t"                    \
> > > >          "addi    s0,sp,16\n\t"                    \
> > > >          "mv a4,%6\n\t"                        \
> > > >          "mv a3,%5\n\t"                        \
> > > >          "mv a2,%4\n\t"                        \
> > > >          "mv a1,%3\n\t"                        \
> > > >          "mv a0,%0\n\t"                        \
> > > >          "call rzfive_cmo\n\t"                    \
> > > >          "ld    ra,8(sp)\n\t"                    \
> > > >          "ld    s0,0(sp)\n\t"                    \
> > > >          "addi    sp,sp,16\n\t"                    \
> > > >          ".option pop\n\t"                        \
> > > >          : : "r"(_cachesize),                    \
> > > >          "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> > > >          "r"((unsigned long)(_start) + (_size)),            \
> > > >          "r"((unsigned long) (_start)),                \
> > > >          "r"((unsigned long) (_size)),                \
> > > >          "r"((unsigned long) (_dir)),                \
> > > >          "r"((unsigned long) (_ops))                \
> > > >          : "a0", "a1", "a2", "a3", "a4", "memory")
> > > >
> > > > Now when integrate this with ALTERNATIVE_2() as below,
> > > >
> > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops)        \
> > > > asm volatile(ALTERNATIVE_2(                        \
> > > >     __nops(14),                            \
> > > >     "mv a0, %1\n\t"                            \
> > > >     "j 2f\n\t"                            \
> > > >     "3:\n\t"                            \
> > > >     "cbo." __stringify(_op) " (a0)\n\t"                \
> > > >     "add a0, a0, %0\n\t"                        \
> > > >     "2:\n\t"                            \
> > > >     "bltu a0, %2, 3b\n\t"                        \
> > > >     __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM,    \
> > > >     ".option push\n\t\n\t"                        \
> > > >     ".option norvc\n\t"                        \
> > > >     ".option norelax\n\t"                        \
> > > >     "addi sp,sp,-16\n\t"                        \
> > > >     "sd    s0,0(sp)\n\t"                        \
> > > >     "sd    ra,8(sp)\n\t"                        \
> > > >     "addi    s0,sp,16\n\t"                        \
> > > >     "mv a4,%6\n\t"                            \
> > > >     "mv a3,%5\n\t"                            \
> > > >     "mv a2,%4\n\t"                            \
> > > >     "mv a1,%3\n\t"                            \
> > > >     "mv a0,%0\n\t"                            \
> > > >     "call rzfive_cmo\n\t"                \
> > > >     "ld    ra,8(sp)\n\t"                        \
> > > >     "ld    s0,0(sp)\n\t"                        \
> > > >     "addi    sp,sp,16\n\t"                        \
> > > >     ".option pop\n\t"                        \
> > > >     , ANDESTECH_VENDOR_ID,                        \
> > > >             ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO)    \
> > > >     : : "r"(_cachesize),                        \
> > > >     "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)),    \
> > > >     "r"((unsigned long)(_start) + (_size)),            \
> > > >     "r"((unsigned long) (_start)),                \
> > > >     "r"((unsigned long) (_size)),                \
> > > >     "r"((unsigned long) (_dir)),                \
> > > >     "r"((unsigned long) (_ops))                \
> > > >     : "a0", "a1", "a2", "a3", "a4", "memory")
> > > >
> > > > I am seeing kernel panic with this change. Looking at the
> > > > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest
> > > > of the alternative options are calls too. Is my understanding correct
> > > > here?
> > >
> > > The loop walks through the instructions after the location got patched and
> > > checks if an instruction is an auipc and the next one is a jalr and only then
> > > adjusts the address accordingly.
> > >
> > Ok so my understanding was wrong here.
> >
> > > So it _should_ leave all other (non auipc+jalr) instructions alone.
> > > (hopefully)
> > >
> > Agreed.
> >
> > >
> > > > Do you think this is the correct approach in my case?
> > >
> > > It does look correct on first glance.
> > >
> > \o/
> >
> > > As I had that passing thought, are you actually calling
> > >         riscv_alternative_fix_auipc_jalr()
> > > from your errata/.../foo.c after doing the patching?
> > >
> > > I.e. with the current patchset, that function is only called from the
> > > cpufeature part, but for example not from the other patching locations.
> > > [and a future revision should probably change that :-) ]
> > >
> > >
> > I have made a local copy of riscv_alternative_fix_auipc_jalr() and
> > then calling it after patch_text_nosync() referring to your patch for
> > str functions.
> >
> > > After making sure that function actually runs, the next thing you could try
> > > is to have both the "original" code and the patch be identical, i.e.
> > > replace the cbo* part with your code as well and then just output the
> > > instructions via printk to check what the addresses do in both.
> > >
> > > After riscv_alternative_fix_auipc_jalr() ran then both code variants
> > > should be identical when using the same code in both areas.
> > >
> > So I have added debug prints to match the instructions as below after
> > and before patching:
> >
> > static void riscv_alternative_print_inst(unsigned int *alt_ptr,
> >                      unsigned int len)
> > {
> >     int num_instr = len / sizeof(u32);
> >     int i;
> >
> >     for (i = 0; i < num_instr; i++)
> >         pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i));
> >
> > }
> >
> > void __init_or_module andes_errata_patch_func(struct alt_entry *begin,
> > struct alt_entry *end,
> >                           unsigned long archid, unsigned long impid,
> >                           unsigned int stage)
> > {
> > ....
> >     if (cpu_req_errata & tmp) {
> >         pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp,
> > cpu_req_errata, alt->errata_id);
> >         pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr,
> > alt->alt_ptr, alt->alt_len);
> >         pr_err("Print old start\n");
> >         riscv_alternative_print_inst(alt->old_ptr, alt->alt_len);
> >         pr_err("Print old end\n");
> >         patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
> >
> >         riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len,
> >                         alt->old_ptr - alt->alt_ptr);
> >         pr_err("Print patch start\n");
> >         riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len);
> >         pr_err("Print patch end\n");
> >     }
> > .....
> > }
> >
> > Below is the log:
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] Print new old end
> > [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> > [    0.000000] Print patch start
> > [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> > [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> > [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> > [    0.000000] riscv_alternative_print_inst instruction: 0x78693
> > [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> > [    0.000000] riscv_alternative_print_inst instruction: 0x80593
> > [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> > [    0.000000] riscv_alternative_print_inst instruction: 0x97
> > [    0.000000] riscv_alternative_print_inst instruction: 0xcba080e7
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> > [    0.000000] Print patch end
> > [    0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0
> > [    0.000000] old:arch_sync_dma_for_device
> > alt:riscv_noncoherent_supported len:38
> > [    0.000000] Print  old start
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x970013
> >     ====================> This instruction doesn't look correct it
> > should be 0x13?
> > [    0.000000] Print  old end
> > [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> > [    0.000000] Print patch start
> > [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> > [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> > [    0.000000] riscv_alternative_print_inst instruction: 0x78713
> > [    0.000000] riscv_alternative_print_inst instruction: 0x78693
> > [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> > [    0.000000] riscv_alternative_print_inst instruction: 0x80593
> > [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> > [    0.000000] riscv_alternative_print_inst instruction: 0x97
> > [    0.000000] riscv_alternative_print_inst instruction: 0xc82080e7
> > ====================> This instruction doesn't look correct comparing
> > to objdump output this should be 000080e7 or does it require the
> > offset too?
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> > [    0.000000] Print patch end
> > [    0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0
> > [    0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38
> > [    0.000000] Print old start
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x97
> > ====================> This instruction doesn't look correct it should
> > be 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0xeee080e7
> >       ====================> This instruction doesn't look correct it
> > should be 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] Print old end
> > [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> > [    0.000000] Print patch start
> > [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> > [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> > [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> > [    0.000000] riscv_alternative_print_inst instruction: 0x80693
> > [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> > [    0.000000] riscv_alternative_print_inst instruction: 0x78593
> > [    0.000000] riscv_alternative_print_inst instruction: 0xe0513
> > [    0.000000] riscv_alternative_print_inst instruction: 0x97
> > [    0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7
> > ====================> This instruction doesn't look correct comparing
> > to objdump output this should be 000080e7 or does it require the
> > offset too?
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> > [    0.000000] Print patch end
> > [    0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0
> > [    0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38
> > [    0.000000] Print old start
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x970013
> > ====================> This instruction doesn't look correct it should
> > be 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x80e70000
> > ====================> This instruction doesn't look correct it should
> > be 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0xe720
> > ====================> This instruction doesn't look correct it should
> > be 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13
> > [    0.000000] Print old end
> > [    0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14
> > [    0.000000] Print patch start
> > [    0.000000] riscv_alternative_print_inst instruction: 0xff010113
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813023
> > [    0.000000] riscv_alternative_print_inst instruction: 0x113423
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010413
> > [    0.000000] riscv_alternative_print_inst instruction: 0xf0713
> > [    0.000000] riscv_alternative_print_inst instruction: 0xe8693
> > [    0.000000] riscv_alternative_print_inst instruction: 0x88613
> > [    0.000000] riscv_alternative_print_inst instruction: 0x78593
> > [    0.000000] riscv_alternative_print_inst instruction: 0x30513
> > [    0.000000] riscv_alternative_print_inst instruction: 0x97
> > [    0.000000] riscv_alternative_print_inst instruction: 0xc12080e7
> > ====================> This instruction doesn't look correct comparing
> > to objdump output this should be 000080e7 + offset?
> > [    0.000000] riscv_alternative_print_inst instruction: 0x813083
> > [    0.000000] riscv_alternative_print_inst instruction: 0x13403
> > [    0.000000] riscv_alternative_print_inst instruction: 0x1010113
> > [    0.000000] Print patch end
> >
> > Here is the output from objdump of the file (dma-noncoherent.o):
> >
> > 000000000000032e <.L888^B1>:
> >  32e:    ff010113              addi    sp,sp,-16
> >  332:    00813023              sd    s0,0(sp)
> >  336:    00113423              sd    ra,8(sp)
> >  33a:    01010413              addi    s0,sp,16
> >  33e:    000f0713              mv    a4,t5
> >  342:    00078693              mv    a3,a5
> >  346:    00088613              mv    a2,a7
> >  34a:    00080593              mv    a1,a6
> >  34e:    000e0513              mv    a0,t3
> >  352:    00000097              auipc    ra,0x0
> >  356:    000080e7              jalr    ra # 352 <.L888^B1+0x24>
> >  35a:    00813083              ld    ra,8(sp)
> >  35e:    00013403              ld    s0,0(sp)
> >  362:    01010113              addi    sp,sp,16
> >
> > 0000000000000366 <.L888^B2>:
> >  366:    ff010113              addi    sp,sp,-16
> >  36a:    00813023              sd    s0,0(sp)
> >  36e:    00113423              sd    ra,8(sp)
> >  372:    01010413              addi    s0,sp,16
> >  376:    00078713              mv    a4,a5
> >  37a:    00078693              mv    a3,a5
> >  37e:    00088613              mv    a2,a7
> >  382:    00080593              mv    a1,a6
> >  386:    000e0513              mv    a0,t3
> >  38a:    00000097              auipc    ra,0x0
> >  38e:    000080e7              jalr    ra # 38a <.L888^B2+0x24>
> >  392:    00813083              ld    ra,8(sp)
> >  396:    00013403              ld    s0,0(sp)
> >  39a:    01010113              addi    sp,sp,16
> >
> > 000000000000039e <.L888^B3>:
> >  39e:    ff010113              addi    sp,sp,-16
> >  3a2:    00813023              sd    s0,0(sp)
> >  3a6:    00113423              sd    ra,8(sp)
> >  3aa:    01010413              addi    s0,sp,16
> >  3ae:    000f0713              mv    a4,t5
> >  3b2:    00080693              mv    a3,a6
> >  3b6:    00088613              mv    a2,a7
> >  3ba:    00078593              mv    a1,a5
> >  3be:    000e0513              mv    a0,t3
> >  3c2:    00000097              auipc    ra,0x0
> >  3c6:    000080e7              jalr    ra # 3c2 <.L888^B3+0x24>
> >  3ca:    00813083              ld    ra,8(sp)
> >  3ce:    00013403              ld    s0,0(sp)
> >  3d2:    01010113              addi    sp,sp,16
> >
> > 00000000000003d6 <.L888^B4>:
> >  3d6:    ff010113              addi    sp,sp,-16
> >  3da:    00813023              sd    s0,0(sp)
> >  3de:    00113423              sd    ra,8(sp)
> >  3e2:    01010413              addi    s0,sp,16
> >  3e6:    000f0713              mv    a4,t5
> >  3ea:    000e8693              mv    a3,t4
> >  3ee:    00088613              mv    a2,a7
> >  3f2:    00078593              mv    a1,a5
> >  3f6:    00030513              mv    a0,t1
> >  3fa:    00000097              auipc    ra,0x0
> >  3fe:    000080e7              jalr    ra # 3fa <.L888^B4+0x24>
> >  402:    00813083              ld    ra,8(sp)
> >  406:    00013403              ld    s0,0(sp)
> >  40a:    01010113              addi    sp,sp,16
> >
> > Disassembly of section __ksymtab_strings:
> >
> > Any pointers what could be happening?
> >
> 
> Some more information,
> 
> - If I drop the riscv_alternative_fix_auipc_jalr() call after
> patch_text_nosync() and then print the alt->old_ptr instructions
> before patching I can see the instructions as 0x13 (nop) which is
> correct.
> 
> - if I call riscv_alternative_fix_auipc_jalr() call after
> patch_text_nosync() and then print the alt->old_ptr instructions
> before patching I dont see 0x13 (nop) consistently for old
> instructions.

which is to be expected I guess.

alt->old_ptr points to the memory location where the live kernel code
lives.

I.e. the code at this location is the thing the kernel actually runs.
The code at this location then gets overwritten by the alternative
assembly.


> - If I replace the nop's in the old instructions with my assembly code
> of rz/five cmo and then just use patch_text_nosync() I can see the
> correct actual instruction being printed apart from jalr (is some sort
> of offset added to it as I see last 4 bits match?) and then is
> replaced correctly by the same alt instructions apart from the jalr
> (log [0]).
> 
> - If I replace the nop's in the old instructions with my assembly code
> of rz/five cmo and then use patch_text_nosync() and
> riscv_alternative_fix_auipc_jalr() I can see the actual old
> instructions differs a bit and again the jalr instruction differs too
> in the patched code (log [1]).
> 
> [0] https://paste.debian.net/1261412/
> [1] https://paste.debian.net/1261413/
> 
> Attached is the objump of dma-noncoherent.o for reference.

I did read that objdumps are not really conclusive when looking
at auipc + jalr instructions, hence the printing of the actual instructions.

As either manually or with a helper like

	https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7

you can then decode the actual instruction and compare.

In your log the two jalr instructions decode to different offsets,
	jalr x1, x1, -180
vs
	jalr x1, x1, -834

Can you check what the patch_offset value is in your case?

Interestingly the
	auipc x1, 0
is 0 for both cases.

I'll try to build a real test-setup mimicing what you're doing
tomorrow (european tomorrow).


Heiko
Heiko Stübner Nov. 21, 2022, 10:38 p.m. UTC | #16
Am Montag, 21. November 2022, 23:17:11 CET schrieb Heiko Stübner:
> Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > Some more information,
> > 
> > - If I drop the riscv_alternative_fix_auipc_jalr() call after
> > patch_text_nosync() and then print the alt->old_ptr instructions
> > before patching I can see the instructions as 0x13 (nop) which is
> > correct.
> > 
> > - if I call riscv_alternative_fix_auipc_jalr() call after
> > patch_text_nosync() and then print the alt->old_ptr instructions
> > before patching I dont see 0x13 (nop) consistently for old
> > instructions.
> 
> which is to be expected I guess.
> 
> alt->old_ptr points to the memory location where the live kernel code
> lives.
> 
> I.e. the code at this location is the thing the kernel actually runs.
> The code at this location then gets overwritten by the alternative
> assembly.
> 
> 
> > - If I replace the nop's in the old instructions with my assembly code
> > of rz/five cmo and then just use patch_text_nosync() I can see the
> > correct actual instruction being printed apart from jalr (is some sort
> > of offset added to it as I see last 4 bits match?) and then is
> > replaced correctly by the same alt instructions apart from the jalr
> > (log [0]).
> > 
> > - If I replace the nop's in the old instructions with my assembly code
> > of rz/five cmo and then use patch_text_nosync() and
> > riscv_alternative_fix_auipc_jalr() I can see the actual old
> > instructions differs a bit and again the jalr instruction differs too
> > in the patched code (log [1]).
> > 
> > [0] https://paste.debian.net/1261412/
> > [1] https://paste.debian.net/1261413/
> > 
> > Attached is the objump of dma-noncoherent.o for reference.
> 
> I did read that objdumps are not really conclusive when looking
> at auipc + jalr instructions, hence the printing of the actual instructions.
> 
> As either manually or with a helper like
> 
> 	https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
> 
> you can then decode the actual instruction and compare.
> 
> In your log the two jalr instructions decode to different offsets,
> 	jalr x1, x1, -180
> vs
> 	jalr x1, x1, -834
> 
> Can you check what the patch_offset value is in your case?
> 
> Interestingly the
> 	auipc x1, 0
> is 0 for both cases.
> 
> I'll try to build a real test-setup mimicing what you're doing
> tomorrow (european tomorrow).

also, is it possible for you to put your code on some github
or so? Sometimes looking at the actual code makes things
a lot easier :-)

Thanks
Heiko
Lad, Prabhakar Nov. 21, 2022, 11:59 p.m. UTC | #17
Hi Heiko,

On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > Hi Heiko,
> >
> > On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > >
<snip>
> > Some more information,
> >
> > - If I drop the riscv_alternative_fix_auipc_jalr() call after
> > patch_text_nosync() and then print the alt->old_ptr instructions
> > before patching I can see the instructions as 0x13 (nop) which is
> > correct.
> >
> > - if I call riscv_alternative_fix_auipc_jalr() call after
> > patch_text_nosync() and then print the alt->old_ptr instructions
> > before patching I dont see 0x13 (nop) consistently for old
> > instructions.
>
> which is to be expected I guess.
>
> alt->old_ptr points to the memory location where the live kernel code
> lives.
>
Agreed.

> I.e. the code at this location is the thing the kernel actually runs.
> The code at this location then gets overwritten by the alternative
> assembly.
>
But shouldn't the actual code be nops before patching?

>
> > - If I replace the nop's in the old instructions with my assembly code
> > of rz/five cmo and then just use patch_text_nosync() I can see the
> > correct actual instruction being printed apart from jalr (is some sort
> > of offset added to it as I see last 4 bits match?) and then is
> > replaced correctly by the same alt instructions apart from the jalr
> > (log [0]).
> >
> > - If I replace the nop's in the old instructions with my assembly code
> > of rz/five cmo and then use patch_text_nosync() and
> > riscv_alternative_fix_auipc_jalr() I can see the actual old
> > instructions differs a bit and again the jalr instruction differs too
> > in the patched code (log [1]).
> >
> > [0] https://paste.debian.net/1261412/
> > [1] https://paste.debian.net/1261413/
> >
> > Attached is the objump of dma-noncoherent.o for reference.
>
> I did read that objdumps are not really conclusive when looking
> at auipc + jalr instructions, hence the printing of the actual instructions.
>
> As either manually or with a helper like
>
>         https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
>
> you can then decode the actual instruction and compare.
>
OK, I will give it a try.


> In your log the two jalr instructions decode to different offsets,
>         jalr x1, x1, -180
> vs
>         jalr x1, x1, -834
>
> Can you check what the patch_offset value is in your case?
>
I'll check that and let you know.

> Interestingly the
>         auipc x1, 0
> is 0 for both cases.
>
> I'll try to build a real test-setup mimicing what you're doing
> tomorrow (european tomorrow).
>
Thank you!

Cheers,
Prabhakar
Lad, Prabhakar Nov. 22, 2022, 12:16 a.m. UTC | #18
Hi Heiko,

On Mon, Nov 21, 2022 at 10:38 PM Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Montag, 21. November 2022, 23:17:11 CET schrieb Heiko Stübner:
> > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > > Some more information,
> > >
> > > - If I drop the riscv_alternative_fix_auipc_jalr() call after
> > > patch_text_nosync() and then print the alt->old_ptr instructions
> > > before patching I can see the instructions as 0x13 (nop) which is
> > > correct.
> > >
> > > - if I call riscv_alternative_fix_auipc_jalr() call after
> > > patch_text_nosync() and then print the alt->old_ptr instructions
> > > before patching I dont see 0x13 (nop) consistently for old
> > > instructions.
> >
> > which is to be expected I guess.
> >
> > alt->old_ptr points to the memory location where the live kernel code
> > lives.
> >
> > I.e. the code at this location is the thing the kernel actually runs.
> > The code at this location then gets overwritten by the alternative
> > assembly.
> >
> >
> > > - If I replace the nop's in the old instructions with my assembly code
> > > of rz/five cmo and then just use patch_text_nosync() I can see the
> > > correct actual instruction being printed apart from jalr (is some sort
> > > of offset added to it as I see last 4 bits match?) and then is
> > > replaced correctly by the same alt instructions apart from the jalr
> > > (log [0]).
> > >
> > > - If I replace the nop's in the old instructions with my assembly code
> > > of rz/five cmo and then use patch_text_nosync() and
> > > riscv_alternative_fix_auipc_jalr() I can see the actual old
> > > instructions differs a bit and again the jalr instruction differs too
> > > in the patched code (log [1]).
> > >
> > > [0] https://paste.debian.net/1261412/
> > > [1] https://paste.debian.net/1261413/
> > >
> > > Attached is the objump of dma-noncoherent.o for reference.
> >
> > I did read that objdumps are not really conclusive when looking
> > at auipc + jalr instructions, hence the printing of the actual instructions.
> >
> > As either manually or with a helper like
> >
> >       https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
> >
> > you can then decode the actual instruction and compare.
> >
> > In your log the two jalr instructions decode to different offsets,
> >       jalr x1, x1, -180
> > vs
> >       jalr x1, x1, -834
> >
> > Can you check what the patch_offset value is in your case?
> >
> > Interestingly the
> >       auipc x1, 0
> > is 0 for both cases.
> >
> > I'll try to build a real test-setup mimicing what you're doing
> > tomorrow (european tomorrow).
>
> also, is it possible for you to put your code on some github
> or so? Sometimes looking at the actual code makes things
> a lot easier :-)
>
I have pushed my changes here
https://github.com/prabhakarlad/linux/tree/rzfive-cmo

Cheers,
Prabhakar
Lad, Prabhakar Nov. 22, 2022, 10:59 a.m. UTC | #19
Hi Heiko,

On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > Hi Heiko,
> >
<snip>
> As either manually or with a helper like
>
>         https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
>
> you can then decode the actual instruction and compare.
>
> In your log the two jalr instructions decode to different offsets,
>         jalr x1, x1, -180
> vs
>         jalr x1, x1, -834
>
> Can you check what the patch_offset value is in your case?
>
patch_offset for the above case is -654.

Cheers,
Prabhakar
Heiko Stübner Nov. 22, 2022, 11:19 a.m. UTC | #20
Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar:
> Hi Heiko,
> 
> On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote:
> >
> > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > > Hi Heiko,
> > >
> <snip>
> > As either manually or with a helper like
> >
> >         https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
> >
> > you can then decode the actual instruction and compare.
> >
> > In your log the two jalr instructions decode to different offsets,
> >         jalr x1, x1, -180
> > vs
> >         jalr x1, x1, -834
> >
> > Can you check what the patch_offset value is in your case?
> >
> patch_offset for the above case is -654.

which is a big indicator that the auipc-jalr-fixup function is not catching
the instruction ... i.e. -180 - 654 = -834.

I managed to reproduce that issue with your branch now
(after hacking up stuff a bit to run in qemu :-) ).

I'll try to find out where the fixup fails.


Heiko
Heiko Stübner Nov. 22, 2022, 11:37 a.m. UTC | #21
Am Dienstag, 22. November 2022, 12:19:40 CET schrieb Heiko Stübner:
> Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar:
> > Hi Heiko,
> > 
> > On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote:
> > >
> > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > > > Hi Heiko,
> > > >
> > <snip>
> > > As either manually or with a helper like
> > >
> > >         https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
> > >
> > > you can then decode the actual instruction and compare.
> > >
> > > In your log the two jalr instructions decode to different offsets,
> > >         jalr x1, x1, -180
> > > vs
> > >         jalr x1, x1, -834
> > >
> > > Can you check what the patch_offset value is in your case?
> > >
> > patch_offset for the above case is -654.
> 
> which is a big indicator that the auipc-jalr-fixup function is not catching
> the instruction ... i.e. -180 - 654 = -834.
> 
> I managed to reproduce that issue with your branch now
> (after hacking up stuff a bit to run in qemu :-) ).
> 
> I'll try to find out where the fixup fails.

imagine me with a slightly red head now ... as there is a slightly
embarrassing mistake in the fixup function ;-) .


When going from void* to unsigned int* pointers I have missed
adjusting the actual patch-location.

The call needs to be
	patch_text_nosync(alt_ptr + i, call, 8);

instead of the current
	patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);

In my str* cases this didn't matter because "i" was 0 there, but in your
longer assembly it actually patched the wrong location.


Heiko

============
For reference, my debug prints to find where the patching fails was:

diff --git a/arch/riscv/errata/renesas/errata.c b/arch/riscv/errata/renesas/errata.c
index 986f1c762d72..a5a47c5e9ff8 100644
--- a/arch/riscv/errata/renesas/errata.c
+++ b/arch/riscv/errata/renesas/errata.c
@@ -72,6 +72,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
        u32 rd1;
 
        for (i = 0; i < num_instr; i++) {
+printk("%s: looking at inst 0x%x\n", __func__, *(alt_ptr + i));
                /* is there a further instruction? */
                if (i + 1 >= num_instr)
                        continue;
@@ -84,6 +85,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
                if (rd1 != 1)
                        continue;
 
+printk("%s: -> found a auipc + jalr pair\n", __func__);
                /* get and adjust new target address */
                imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
                imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
@@ -101,8 +103,10 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
                call[0] |= to_auipc_imm(imm1);
                call[1] |= to_jalr_imm(imm1);
 
+printk("%s: patching to 0x%x and 0x%x\n", __func__, call[0], call[1]);
                /* patch the call place again */
-               patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
+               patch_text_nosync(alt_ptr + i, call, 8);
+printk("%s: patched to 0x%x and 0x%x\n", __func__, *(alt_ptr + i), *(alt_ptr + i + 1));
        }
 }
 
and then realizing that the "patching to" and "patched to" where different.
Lad, Prabhakar Nov. 22, 2022, 12:28 p.m. UTC | #22
Hi Heiko,

On Tue, Nov 22, 2022 at 11:37 AM Heiko Stübner <heiko@sntech.de> wrote:
>
> Am Dienstag, 22. November 2022, 12:19:40 CET schrieb Heiko Stübner:
> > Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar:
> > > Hi Heiko,
> > >
> > > On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote:
> > > >
> > > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar:
> > > > > Hi Heiko,
> > > > >
> > > <snip>
> > > > As either manually or with a helper like
> > > >
> > > >         https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7
> > > >
> > > > you can then decode the actual instruction and compare.
> > > >
> > > > In your log the two jalr instructions decode to different offsets,
> > > >         jalr x1, x1, -180
> > > > vs
> > > >         jalr x1, x1, -834
> > > >
> > > > Can you check what the patch_offset value is in your case?
> > > >
> > > patch_offset for the above case is -654.
> >
> > which is a big indicator that the auipc-jalr-fixup function is not catching
> > the instruction ... i.e. -180 - 654 = -834.
> >
> > I managed to reproduce that issue with your branch now
> > (after hacking up stuff a bit to run in qemu :-) ).
> >
> > I'll try to find out where the fixup fails.
>
> imagine me with a slightly red head now ... as there is a slightly
> embarrassing mistake in the fixup function ;-) .
>
Cheer up now :-)

>
> When going from void* to unsigned int* pointers I have missed
> adjusting the actual patch-location.
>
> The call needs to be
>         patch_text_nosync(alt_ptr + i, call, 8);
>
That did the trick! I have done some limited testing on the board
(even replaced orignal instructions back to nop's even with its
working too).

> instead of the current
>         patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
>
> In my str* cases this didn't matter because "i" was 0 there, but in your
> longer assembly it actually patched the wrong location.
>
Ahaa right the alt macro just had calls.

>
> Heiko
>
> ============
> For reference, my debug prints to find where the patching fails was:
>
> diff --git a/arch/riscv/errata/renesas/errata.c b/arch/riscv/errata/renesas/errata.c
> index 986f1c762d72..a5a47c5e9ff8 100644
> --- a/arch/riscv/errata/renesas/errata.c
> +++ b/arch/riscv/errata/renesas/errata.c
> @@ -72,6 +72,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
>         u32 rd1;
>
>         for (i = 0; i < num_instr; i++) {
> +printk("%s: looking at inst 0x%x\n", __func__, *(alt_ptr + i));
>                 /* is there a further instruction? */
>                 if (i + 1 >= num_instr)
>                         continue;
> @@ -84,6 +85,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
>                 if (rd1 != 1)
>                         continue;
>
> +printk("%s: -> found a auipc + jalr pair\n", __func__);
>                 /* get and adjust new target address */
>                 imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
>                 imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
> @@ -101,8 +103,10 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
>                 call[0] |= to_auipc_imm(imm1);
>                 call[1] |= to_jalr_imm(imm1);
>
> +printk("%s: patching to 0x%x and 0x%x\n", __func__, call[0], call[1]);
>                 /* patch the call place again */
> -               patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
> +               patch_text_nosync(alt_ptr + i, call, 8);
> +printk("%s: patched to 0x%x and 0x%x\n", __func__, *(alt_ptr + i), *(alt_ptr + i + 1));
>         }
>  }
>
> and then realizing that the "patching to" and "patched to" where different.
>
Thanks for the hunk.
>

Now waiting for your v3. Meanwhile, I'll look into the ALT3() macro.

Cheers,
Prabhakar
diff mbox series

Patch

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 694267d1fe81..026512ca9c4c 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -298,6 +298,74 @@  static u32 __init_or_module cpufeature_probe(unsigned int stage)
 	return cpu_req_feature;
 }
 
+#include <asm/parse_asm.h>
+
+DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR)
+DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC)
+
+static inline bool is_auipc_jalr_pair(long insn1, long insn2)
+{
+	return is_auipc_insn(insn1) && is_jalr_insn(insn2);
+}
+
+#define JALR_SIGN_MASK		BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF)
+#define JALR_OFFSET_MASK	I_IMM_11_0_MASK
+#define AUIPC_OFFSET_MASK	U_IMM_31_12_MASK
+#define AUIPC_PAD		(0x00001000)
+#define JALR_SHIFT		I_IMM_11_0_OPOFF
+
+#define to_jalr_imm(offset)						\
+	((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF)
+
+#define to_auipc_imm(offset)						\
+	((offset & JALR_SIGN_MASK) ?					\
+	((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) :	\
+	(offset & AUIPC_OFFSET_MASK))
+
+static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr,
+					     unsigned int len, int patch_offset)
+{
+	int num_instr = len / sizeof(u32);
+	unsigned int call[2];
+	int i;
+	int imm1;
+	u32 rd1;
+
+	for (i = 0; i < num_instr; i++) {
+		/* is there a further instruction? */
+		if (i + 1 >= num_instr)
+			continue;
+
+		if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1)))
+			continue;
+
+		/* call will use ra register */
+		rd1 = EXTRACT_RD_REG(*(alt_ptr + i));
+		if (rd1 != 1)
+			continue;
+
+		/* get and adjust new target address */
+		imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i));
+		imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1));
+		imm1 -= patch_offset;
+
+		/* pick the original auipc + jalr */
+		call[0] = *(alt_ptr + i);
+		call[1] = *(alt_ptr + i + 1);
+
+		/* drop the old IMMs */
+		call[0] &= ~(U_IMM_31_12_MASK);
+		call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF);
+
+		/* add the adapted IMMs */
+		call[0] |= to_auipc_imm(imm1);
+		call[1] |= to_jalr_imm(imm1);
+
+		/* patch the call place again */
+		patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8);
+	}
+}
+
 void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
 						  struct alt_entry *end,
 						  unsigned int stage)
@@ -316,8 +384,15 @@  void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
 		}
 
 		tmp = (1U << alt->errata_id);
-		if (cpu_req_feature & tmp)
-			patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
+		if (cpu_req_feature & tmp) {
+			/* do the basic patching */
+			patch_text_nosync(alt->old_ptr, alt->alt_ptr,
+					  alt->alt_len);
+
+			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
+							 alt->alt_len,
+							 alt->old_ptr - alt->alt_ptr);
+		}
 	}
 }
 #endif