Message ID | 20221110164924.529386-6-heiko@sntech.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Zbb string optimizations and call support in alternatives | expand |
Context | Check | Description |
---|---|---|
conchuod/tree_selection | fail | Guessing tree name failed |
On Thu, Nov 10, 2022 at 05:49:22PM +0100, Heiko Stuebner wrote: > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Alternatives live in a different section, so addresses used by call > functions will point to wrong locations after the patch got applied. > > Similar to arm64, adjust the location to consider that offset. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> What a lovely function you've got here, seems to make sense though.. Reviewed-by: Conor Dooley <conor.dooley@microchip.com> > --- > arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 694267d1fe81..026512ca9c4c 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) > return cpu_req_feature; > } > > +#include <asm/parse_asm.h> > + > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) > + > +static inline bool is_auipc_jalr_pair(long insn1, long insn2) > +{ > + return is_auipc_insn(insn1) && is_jalr_insn(insn2); > +} > + > +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) > +#define JALR_OFFSET_MASK I_IMM_11_0_MASK > +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK > +#define AUIPC_PAD (0x00001000) > +#define JALR_SHIFT I_IMM_11_0_OPOFF > + > +#define to_jalr_imm(offset) \ > + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) > + > +#define to_auipc_imm(offset) \ > + ((offset & JALR_SIGN_MASK) ? \ > + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ > + (offset & AUIPC_OFFSET_MASK)) > + > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > + unsigned int len, int patch_offset) > +{ > + int num_instr = len / sizeof(u32); > + unsigned int call[2]; > + int i; > + int imm1; > + u32 rd1; > + > + for (i = 0; i < num_instr; i++) { > + /* is there a further instruction? */ > + if (i + 1 >= num_instr) > + continue; > + > + if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1))) > + continue; > + > + /* call will use ra register */ > + rd1 = EXTRACT_RD_REG(*(alt_ptr + i)); > + if (rd1 != 1) > + continue; > + > + /* get and adjust new target address */ > + imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); > + imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); > + imm1 -= patch_offset; > + > + /* pick the original auipc + jalr */ > + call[0] = *(alt_ptr + i); > + call[1] = *(alt_ptr + i + 1); > + > + /* drop the old IMMs */ > + call[0] &= ~(U_IMM_31_12_MASK); > + call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF); > + > + /* add the adapted IMMs */ > + call[0] |= to_auipc_imm(imm1); > + call[1] |= to_jalr_imm(imm1); > + > + /* patch the call place again */ > + patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); > + } > +} > + > void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > struct alt_entry *end, > unsigned int stage) > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > } > > tmp = (1U << alt->errata_id); > - if (cpu_req_feature & tmp) > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > + if (cpu_req_feature & tmp) { > + /* do the basic patching */ > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > + alt->alt_len); > + > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > + alt->alt_len, > + alt->old_ptr - alt->alt_ptr); > + } > } > } > #endif > -- > 2.35.1 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Alternatives live in a different section, so addresses used by call > functions will point to wrong locations after the patch got applied. > > Similar to arm64, adjust the location to consider that offset. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > --- > arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 694267d1fe81..026512ca9c4c 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) > return cpu_req_feature; > } > > +#include <asm/parse_asm.h> > + > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) > + > +static inline bool is_auipc_jalr_pair(long insn1, long insn2) > +{ > + return is_auipc_insn(insn1) && is_jalr_insn(insn2); > +} > + > +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) > +#define JALR_OFFSET_MASK I_IMM_11_0_MASK > +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK > +#define AUIPC_PAD (0x00001000) > +#define JALR_SHIFT I_IMM_11_0_OPOFF > + > +#define to_jalr_imm(offset) \ > + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) > + > +#define to_auipc_imm(offset) \ > + ((offset & JALR_SIGN_MASK) ? \ > + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ > + (offset & AUIPC_OFFSET_MASK)) > + > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > + unsigned int len, int patch_offset) > +{ > + int num_instr = len / sizeof(u32); > + unsigned int call[2]; > + int i; > + int imm1; > + u32 rd1; > + > + for (i = 0; i < num_instr; i++) { > + /* is there a further instruction? */ > + if (i + 1 >= num_instr) > + continue; Isn't this the same as for (i = 0; i < num_instr - 1; i++) ? > + > + if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1))) > + continue; > + > + /* call will use ra register */ > + rd1 = EXTRACT_RD_REG(*(alt_ptr + i)); > + if (rd1 != 1) > + continue; > + > + /* get and adjust new target address */ > + imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); > + imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); > + imm1 -= patch_offset; > + > + /* pick the original auipc + jalr */ > + call[0] = *(alt_ptr + i); > + call[1] = *(alt_ptr + i + 1); > + > + /* drop the old IMMs */ > + call[0] &= ~(U_IMM_31_12_MASK); > + call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF); > + > + /* add the adapted IMMs */ > + call[0] |= to_auipc_imm(imm1); > + call[1] |= to_jalr_imm(imm1); > + > + /* patch the call place again */ > + patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); > + } > +} > + > void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > struct alt_entry *end, > unsigned int stage) > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > } > > tmp = (1U << alt->errata_id); > - if (cpu_req_feature & tmp) > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > + if (cpu_req_feature & tmp) { > + /* do the basic patching */ > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > + alt->alt_len); > + > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > + alt->alt_len, > + alt->old_ptr - alt->alt_ptr); Here you're casting a void pointer to an instruction to an unsigned int pointer, but since we enable compressed instructions this may result in an unaligned pointer. Using this pointer will work, but may be slow. Eg. fault to m-mode to be patched up. We already do that in other places in the arch/riscv, but I'd prefer not to add new instances of this. > + } > } > } > #endif > -- > 2.35.1
On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: ... > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > } > > > > tmp = (1U << alt->errata_id); > > - if (cpu_req_feature & tmp) > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > + if (cpu_req_feature & tmp) { > > + /* do the basic patching */ > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > + alt->alt_len); > > + > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > + alt->alt_len, > > + alt->old_ptr - alt->alt_ptr); > > Here you're casting a void pointer to an instruction to an unsigned > int pointer, but since we enable compressed instructions this may > result in an unaligned pointer. Using this pointer will work, but may > be slow. Eg. fault to m-mode to be patched up. We already do that in > other places in the arch/riscv, but I'd prefer not to add new > instances of this. Alternative instruction sequences (old and new) have compression disabled. Thanks, drew
On Mon, 14 Nov 2022 at 12:35, Andrew Jones <ajones@ventanamicro.com> wrote: > > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > ... > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > > } > > > > > > tmp = (1U << alt->errata_id); > > > - if (cpu_req_feature & tmp) > > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > + if (cpu_req_feature & tmp) { > > > + /* do the basic patching */ > > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > > + alt->alt_len); > > > + > > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > > + alt->alt_len, > > > + alt->old_ptr - alt->alt_ptr); > > > > Here you're casting a void pointer to an instruction to an unsigned > > int pointer, but since we enable compressed instructions this may > > result in an unaligned pointer. Using this pointer will work, but may > > be slow. Eg. fault to m-mode to be patched up. We already do that in > > other places in the arch/riscv, but I'd prefer not to add new > > instances of this. > > Alternative instruction sequences (old and new) have compression disabled. I see, but if the instructions before the alternative sequence ends on an unaligned address will there be inserted 16bit NOPs to make sure the alternative sequence will be aligned? > Thanks, > drew
Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones: > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > ... > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > > } > > > > > > tmp = (1U << alt->errata_id); > > > - if (cpu_req_feature & tmp) > > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > + if (cpu_req_feature & tmp) { > > > + /* do the basic patching */ > > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > > + alt->alt_len); > > > + > > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > > + alt->alt_len, > > > + alt->old_ptr - alt->alt_ptr); > > > > Here you're casting a void pointer to an instruction to an unsigned > > int pointer, but since we enable compressed instructions this may > > result in an unaligned pointer. Using this pointer will work, but may > > be slow. Eg. fault to m-mode to be patched up. We already do that in > > other places in the arch/riscv, but I'd prefer not to add new > > instances of this. > > Alternative instruction sequences (old and new) have compression disabled. That was my first thought as well, but I think Emil was talking more about the placement of the alternative block inside the running kernel. i.e. I guess the starting point of an alternative sequence could also be unaligned. Though I don't _yet_ see how an improvement could look like.
On Mon, Nov 14, 2022 at 12:38:39PM +0100, Heiko Stübner wrote: > Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones: > > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > > ... > > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > > > } > > > > > > > > tmp = (1U << alt->errata_id); > > > > - if (cpu_req_feature & tmp) > > > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > > + if (cpu_req_feature & tmp) { > > > > + /* do the basic patching */ > > > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > > > + alt->alt_len); > > > > + > > > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > > > + alt->alt_len, > > > > + alt->old_ptr - alt->alt_ptr); > > > > > > Here you're casting a void pointer to an instruction to an unsigned > > > int pointer, but since we enable compressed instructions this may > > > result in an unaligned pointer. Using this pointer will work, but may > > > be slow. Eg. fault to m-mode to be patched up. We already do that in > > > other places in the arch/riscv, but I'd prefer not to add new > > > instances of this. > > > > Alternative instruction sequences (old and new) have compression disabled. > > That was my first thought as well, but I think Emil was talking more about the > placement of the alternative block inside the running kernel. > > i.e. I guess the starting point of an alternative sequence could also be unaligned. Oh, I see. > > Though I don't _yet_ see how an improvement could look like. I think we can patch the alternative macros to add alignment. Something like diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index ec2f3f1b836f..3c330a9066f7 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -20,6 +20,7 @@ ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f .popsection .subsection 1 + .balign 4 888 : .option push .option norvc @@ -34,6 +35,7 @@ .endm .macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable + .balign 4 886 : .option push .option norvc @@ -49,6 +51,7 @@ .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ new_c_2, vendor_id_2, errata_id_2, enable_2 + .balign 4 886 : .option push .option norvc @@ -87,6 +90,7 @@ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ ".popsection\n" \ ".subsection 1\n" \ + ".balign 4\n" \ "888 :\n" \ ".option push\n" \ ".option norvc\n" \ @@ -100,6 +104,7 @@ ".endif\n" #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ + ".balign 4\n" \ "886 :\n" \ ".option push\n" \ ".option norvc\n" \ @@ -116,6 +121,7 @@ enable_1, \ new_c_2, vendor_id_2, errata_id_2, \ enable_2) \ + ".balign 4\n" \ "886 :\n" \ ".option push\n" \ ".option norvc\n" \
On Mon, 14 Nov 2022 at 13:15, Andrew Jones <ajones@ventanamicro.com> wrote: > > On Mon, Nov 14, 2022 at 12:38:39PM +0100, Heiko Stübner wrote: > > Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones: > > > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > > > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > > > ... > > > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > > > > } > > > > > > > > > > tmp = (1U << alt->errata_id); > > > > > - if (cpu_req_feature & tmp) > > > > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > > > + if (cpu_req_feature & tmp) { > > > > > + /* do the basic patching */ > > > > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > > > > + alt->alt_len); > > > > > + > > > > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > > > > + alt->alt_len, > > > > > + alt->old_ptr - alt->alt_ptr); > > > > > > > > Here you're casting a void pointer to an instruction to an unsigned > > > > int pointer, but since we enable compressed instructions this may > > > > result in an unaligned pointer. Using this pointer will work, but may > > > > be slow. Eg. fault to m-mode to be patched up. We already do that in > > > > other places in the arch/riscv, but I'd prefer not to add new > > > > instances of this. > > > > > > Alternative instruction sequences (old and new) have compression disabled. > > > > That was my first thought as well, but I think Emil was talking more about the > > placement of the alternative block inside the running kernel. > > > > i.e. I guess the starting point of an alternative sequence could also be unaligned. > > Oh, I see. > > > > > Though I don't _yet_ see how an improvement could look like. > > I think we can patch the alternative macros to add alignment. Something > like > > diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h > index ec2f3f1b836f..3c330a9066f7 100644 > --- a/arch/riscv/include/asm/alternative-macros.h > +++ b/arch/riscv/include/asm/alternative-macros.h > @@ -20,6 +20,7 @@ > ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f > .popsection > .subsection 1 > + .balign 4 > 888 : > .option push > .option norvc > @@ -34,6 +35,7 @@ > .endm > > .macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable > + .balign 4 > 886 : > .option push > .option norvc > @@ -49,6 +51,7 @@ > > .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ > new_c_2, vendor_id_2, errata_id_2, enable_2 > + .balign 4 > 886 : > .option push > .option norvc > @@ -87,6 +90,7 @@ > ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ > ".popsection\n" \ > ".subsection 1\n" \ > + ".balign 4\n" \ > "888 :\n" \ > ".option push\n" \ > ".option norvc\n" \ > @@ -100,6 +104,7 @@ > ".endif\n" > > #define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ > + ".balign 4\n" \ > "886 :\n" \ > ".option push\n" \ > ".option norvc\n" \ > @@ -116,6 +121,7 @@ > enable_1, \ > new_c_2, vendor_id_2, errata_id_2, \ > enable_2) \ > + ".balign 4\n" \ > "886 :\n" \ > ".option push\n" \ > ".option norvc\n" \ Why not just use accessors? Eg. unsigned int riscv_instruction_at(void *p) { u16 *parcel = p; return (unsigned int)parcel[0] | (unsigned int)parcel[1] << 16; }
On Mon, 14 Nov 2022 at 12:38, Heiko Stübner <heiko@sntech.de> wrote: > > Am Montag, 14. November 2022, 12:35:53 CET schrieb Andrew Jones: > > On Mon, Nov 14, 2022 at 11:57:29AM +0100, Emil Renner Berthing wrote: > > > On Thu, 10 Nov 2022 at 17:50, Heiko Stuebner <heiko@sntech.de> wrote: > > ... > > > > @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, > > > > } > > > > > > > > tmp = (1U << alt->errata_id); > > > > - if (cpu_req_feature & tmp) > > > > - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > > + if (cpu_req_feature & tmp) { > > > > + /* do the basic patching */ > > > > + patch_text_nosync(alt->old_ptr, alt->alt_ptr, > > > > + alt->alt_len); > > > > + > > > > + riscv_alternative_fix_auipc_jalr(alt->old_ptr, > > > > + alt->alt_len, > > > > + alt->old_ptr - alt->alt_ptr); > > > > > > Here you're casting a void pointer to an instruction to an unsigned > > > int pointer, but since we enable compressed instructions this may > > > result in an unaligned pointer. Using this pointer will work, but may > > > be slow. Eg. fault to m-mode to be patched up. We already do that in > > > other places in the arch/riscv, but I'd prefer not to add new > > > instances of this. > > > > Alternative instruction sequences (old and new) have compression disabled. > > That was my first thought as well, but I think Emil was talking more about the > placement of the alternative block inside the running kernel. > > i.e. I guess the starting point of an alternative sequence could also be unaligned. Indeed. Instruction alignment is only guaranteed to be 16bits, even for larger instructions. > Though I don't _yet_ see how an improvement could look like. The general strategy would be multiple smaller accesses that are then stitched together. This will require (for a 32bit opcode) at least 2 loads, 1 shift and 1 or — for a critical path of 3 instructions. Given that we are running on RV64, you can handle up to 48bit by aligning down, performing a 64bit load and (if needed) shifting. Finally, profiles looks like it will give us support for misaligned loads (see https://github.com/riscv/riscv-profiles/blob/main/profiles.adoc#rva22-profiles and where the Zicclsm extension is mandated). Philipp.
Hi Heiko, Thank you for the patch. On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Alternatives live in a different section, so addresses used by call > functions will point to wrong locations after the patch got applied. > > Similar to arm64, adjust the location to consider that offset. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > --- > arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 694267d1fe81..026512ca9c4c 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) > return cpu_req_feature; > } > > +#include <asm/parse_asm.h> > + > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) > + > +static inline bool is_auipc_jalr_pair(long insn1, long insn2) > +{ > + return is_auipc_insn(insn1) && is_jalr_insn(insn2); > +} > + > +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) > +#define JALR_OFFSET_MASK I_IMM_11_0_MASK > +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK > +#define AUIPC_PAD (0x00001000) > +#define JALR_SHIFT I_IMM_11_0_OPOFF > + > +#define to_jalr_imm(offset) \ > + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) > + > +#define to_auipc_imm(offset) \ > + ((offset & JALR_SIGN_MASK) ? \ > + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ > + (offset & AUIPC_OFFSET_MASK)) > + > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > + unsigned int len, int patch_offset) > +{ I am yet to test this with my ASM code yet, but maybe can we move this to [0] so that other erratas can make use of it too? [0] arch/riscv/kernel/patch.c Cheers, Prabhakar
Am Dienstag, 15. November 2022, 15:28:27 CET schrieb Lad, Prabhakar: > Hi Heiko, > > Thank you for the patch. > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > Alternatives live in a different section, so addresses used by call > > functions will point to wrong locations after the patch got applied. > > > > Similar to arm64, adjust the location to consider that offset. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > --- > > arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++- > > 1 file changed, 77 insertions(+), 2 deletions(-) > > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > > index 694267d1fe81..026512ca9c4c 100644 > > --- a/arch/riscv/kernel/cpufeature.c > > +++ b/arch/riscv/kernel/cpufeature.c > > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) > > return cpu_req_feature; > > } > > > > +#include <asm/parse_asm.h> > > + > > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) > > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) > > + > > +static inline bool is_auipc_jalr_pair(long insn1, long insn2) > > +{ > > + return is_auipc_insn(insn1) && is_jalr_insn(insn2); > > +} > > + > > +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) > > +#define JALR_OFFSET_MASK I_IMM_11_0_MASK > > +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK > > +#define AUIPC_PAD (0x00001000) > > +#define JALR_SHIFT I_IMM_11_0_OPOFF > > + > > +#define to_jalr_imm(offset) \ > > + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) > > + > > +#define to_auipc_imm(offset) \ > > + ((offset & JALR_SIGN_MASK) ? \ > > + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ > > + (offset & AUIPC_OFFSET_MASK)) > > + > > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > > + unsigned int len, int patch_offset) > > +{ > > I am yet to test this with my ASM code yet, but maybe can we move this > to [0] so that other erratas can make use of it too? > > [0] arch/riscv/kernel/patch.c yeah, that sounds like a very good plan. I also want to make the to_foo_imm macros shared. I.e. right now they're just duplicated from the ftrace patching code. Heiko
Hi Heiko, On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > Alternatives live in a different section, so addresses used by call > functions will point to wrong locations after the patch got applied. > > Similar to arm64, adjust the location to consider that offset. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > --- > arch/riscv/kernel/cpufeature.c | 79 +++++++++++++++++++++++++++++++++- > 1 file changed, 77 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index 694267d1fe81..026512ca9c4c 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) > return cpu_req_feature; > } > > +#include <asm/parse_asm.h> > + > +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) > +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) > + > +static inline bool is_auipc_jalr_pair(long insn1, long insn2) > +{ > + return is_auipc_insn(insn1) && is_jalr_insn(insn2); > +} > + > +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) > +#define JALR_OFFSET_MASK I_IMM_11_0_MASK > +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK > +#define AUIPC_PAD (0x00001000) > +#define JALR_SHIFT I_IMM_11_0_OPOFF > + > +#define to_jalr_imm(offset) \ > + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) > + > +#define to_auipc_imm(offset) \ > + ((offset & JALR_SIGN_MASK) ? \ > + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ > + (offset & AUIPC_OFFSET_MASK)) > + > +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > + unsigned int len, int patch_offset) > +{ > + int num_instr = len / sizeof(u32); > + unsigned int call[2]; > + int i; > + int imm1; > + u32 rd1; > + > + for (i = 0; i < num_instr; i++) { > + /* is there a further instruction? */ > + if (i + 1 >= num_instr) > + continue; > + > + if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1))) > + continue; > + > + /* call will use ra register */ > + rd1 = EXTRACT_RD_REG(*(alt_ptr + i)); > + if (rd1 != 1) > + continue; > + > + /* get and adjust new target address */ > + imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); > + imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); > + imm1 -= patch_offset; > + > + /* pick the original auipc + jalr */ > + call[0] = *(alt_ptr + i); > + call[1] = *(alt_ptr + i + 1); > + > + /* drop the old IMMs */ > + call[0] &= ~(U_IMM_31_12_MASK); > + call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF); > + > + /* add the adapted IMMs */ > + call[0] |= to_auipc_imm(imm1); > + call[1] |= to_jalr_imm(imm1); > + > + /* patch the call place again */ > + patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); > + } > +} > + I have the below assembly code which I have tested without the alternatives for the RZ/Five CMO, #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ asm volatile(".option push\n\t\n\t" \ ".option norvc\n\t" \ ".option norelax\n\t" \ "addi sp,sp,-16\n\t" \ "sd s0,0(sp)\n\t" \ "sd ra,8(sp)\n\t" \ "addi s0,sp,16\n\t" \ "mv a4,%6\n\t" \ "mv a3,%5\n\t" \ "mv a2,%4\n\t" \ "mv a1,%3\n\t" \ "mv a0,%0\n\t" \ "call rzfive_cmo\n\t" \ "ld ra,8(sp)\n\t" \ "ld s0,0(sp)\n\t" \ "addi sp,sp,16\n\t" \ ".option pop\n\t" \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ "r"((unsigned long)(_start) + (_size)), \ "r"((unsigned long) (_start)), \ "r"((unsigned long) (_size)), \ "r"((unsigned long) (_dir)), \ "r"((unsigned long) (_ops)) \ : "a0", "a1", "a2", "a3", "a4", "memory") Now when integrate this with ALTERNATIVE_2() as below, #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ asm volatile(ALTERNATIVE_2( \ __nops(14), \ "mv a0, %1\n\t" \ "j 2f\n\t" \ "3:\n\t" \ "cbo." __stringify(_op) " (a0)\n\t" \ "add a0, a0, %0\n\t" \ "2:\n\t" \ "bltu a0, %2, 3b\n\t" \ __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ ".option push\n\t\n\t" \ ".option norvc\n\t" \ ".option norelax\n\t" \ "addi sp,sp,-16\n\t" \ "sd s0,0(sp)\n\t" \ "sd ra,8(sp)\n\t" \ "addi s0,sp,16\n\t" \ "mv a4,%6\n\t" \ "mv a3,%5\n\t" \ "mv a2,%4\n\t" \ "mv a1,%3\n\t" \ "mv a0,%0\n\t" \ "call rzfive_cmo\n\t" \ "ld ra,8(sp)\n\t" \ "ld s0,0(sp)\n\t" \ "addi sp,sp,16\n\t" \ ".option pop\n\t" \ , ANDESTECH_VENDOR_ID, \ ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO) \ : : "r"(_cachesize), \ "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ "r"((unsigned long)(_start) + (_size)), \ "r"((unsigned long) (_start)), \ "r"((unsigned long) (_size)), \ "r"((unsigned long) (_dir)), \ "r"((unsigned long) (_ops)) \ : "a0", "a1", "a2", "a3", "a4", "memory") I am seeing kernel panic with this change. Looking at the riscv_alternative_fix_auipc_jalr() implementation it assumes the rest of the alternative options are calls too. Is my understanding correct here? Do you think this is the correct approach in my case? Note, I wanted to test with ALTERNATIVE_2() first to make sure everything is okay and then later test my ALTERNATIVE_3() implementation. Cheers, Prabhakar
Hi, Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar: > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > Alternatives live in a different section, so addresses used by call > > functions will point to wrong locations after the patch got applied. > > > > Similar to arm64, adjust the location to consider that offset. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > --- [...] > I have the below assembly code which I have tested without the > alternatives for the RZ/Five CMO, > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > asm volatile(".option push\n\t\n\t" \ > ".option norvc\n\t" \ > ".option norelax\n\t" \ > "addi sp,sp,-16\n\t" \ > "sd s0,0(sp)\n\t" \ > "sd ra,8(sp)\n\t" \ > "addi s0,sp,16\n\t" \ > "mv a4,%6\n\t" \ > "mv a3,%5\n\t" \ > "mv a2,%4\n\t" \ > "mv a1,%3\n\t" \ > "mv a0,%0\n\t" \ > "call rzfive_cmo\n\t" \ > "ld ra,8(sp)\n\t" \ > "ld s0,0(sp)\n\t" \ > "addi sp,sp,16\n\t" \ > ".option pop\n\t" \ > : : "r"(_cachesize), \ > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > "r"((unsigned long)(_start) + (_size)), \ > "r"((unsigned long) (_start)), \ > "r"((unsigned long) (_size)), \ > "r"((unsigned long) (_dir)), \ > "r"((unsigned long) (_ops)) \ > : "a0", "a1", "a2", "a3", "a4", "memory") > > Now when integrate this with ALTERNATIVE_2() as below, > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > asm volatile(ALTERNATIVE_2( \ > __nops(14), \ > "mv a0, %1\n\t" \ > "j 2f\n\t" \ > "3:\n\t" \ > "cbo." __stringify(_op) " (a0)\n\t" \ > "add a0, a0, %0\n\t" \ > "2:\n\t" \ > "bltu a0, %2, 3b\n\t" \ > __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > ".option push\n\t\n\t" \ > ".option norvc\n\t" \ > ".option norelax\n\t" \ > "addi sp,sp,-16\n\t" \ > "sd s0,0(sp)\n\t" \ > "sd ra,8(sp)\n\t" \ > "addi s0,sp,16\n\t" \ > "mv a4,%6\n\t" \ > "mv a3,%5\n\t" \ > "mv a2,%4\n\t" \ > "mv a1,%3\n\t" \ > "mv a0,%0\n\t" \ > "call rzfive_cmo\n\t" \ > "ld ra,8(sp)\n\t" \ > "ld s0,0(sp)\n\t" \ > "addi sp,sp,16\n\t" \ > ".option pop\n\t" \ > , ANDESTECH_VENDOR_ID, \ > ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO) \ > : : "r"(_cachesize), \ > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > "r"((unsigned long)(_start) + (_size)), \ > "r"((unsigned long) (_start)), \ > "r"((unsigned long) (_size)), \ > "r"((unsigned long) (_dir)), \ > "r"((unsigned long) (_ops)) \ > : "a0", "a1", "a2", "a3", "a4", "memory") > > I am seeing kernel panic with this change. Looking at the > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest > of the alternative options are calls too. Is my understanding correct > here? The loop walks through the instructions after the location got patched and checks if an instruction is an auipc and the next one is a jalr and only then adjusts the address accordingly. So it _should_ leave all other (non auipc+jalr) instructions alone. (hopefully) > Do you think this is the correct approach in my case? It does look correct on first glance. As I had that passing thought, are you actually calling riscv_alternative_fix_auipc_jalr() from your errata/.../foo.c after doing the patching? I.e. with the current patchset, that function is only called from the cpufeature part, but for example not from the other patching locations. [and a future revision should probably change that :-) ] After making sure that function actually runs, the next thing you could try is to have both the "original" code and the patch be identical, i.e. replace the cbo* part with your code as well and then just output the instructions via printk to check what the addresses do in both. After riscv_alternative_fix_auipc_jalr() ran then both code variants should be identical when using the same code in both areas. > Note, I wanted to test with ALTERNATIVE_2() first to make sure > everything is okay and then later test my ALTERNATIVE_3() > implementation. sounds like a very sensible idea to use the existing macros first for verification :-) Heiko
Hi Heiko, On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote: > > Hi, > > Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar: > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > Alternatives live in a different section, so addresses used by call > > > functions will point to wrong locations after the patch got applied. > > > > > > Similar to arm64, adjust the location to consider that offset. > > > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > --- > > [...] > > > I have the below assembly code which I have tested without the > > alternatives for the RZ/Five CMO, > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > asm volatile(".option push\n\t\n\t" \ > > ".option norvc\n\t" \ > > ".option norelax\n\t" \ > > "addi sp,sp,-16\n\t" \ > > "sd s0,0(sp)\n\t" \ > > "sd ra,8(sp)\n\t" \ > > "addi s0,sp,16\n\t" \ > > "mv a4,%6\n\t" \ > > "mv a3,%5\n\t" \ > > "mv a2,%4\n\t" \ > > "mv a1,%3\n\t" \ > > "mv a0,%0\n\t" \ > > "call rzfive_cmo\n\t" \ > > "ld ra,8(sp)\n\t" \ > > "ld s0,0(sp)\n\t" \ > > "addi sp,sp,16\n\t" \ > > ".option pop\n\t" \ > > : : "r"(_cachesize), \ > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > "r"((unsigned long)(_start) + (_size)), \ > > "r"((unsigned long) (_start)), \ > > "r"((unsigned long) (_size)), \ > > "r"((unsigned long) (_dir)), \ > > "r"((unsigned long) (_ops)) \ > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > Now when integrate this with ALTERNATIVE_2() as below, > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > asm volatile(ALTERNATIVE_2( \ > > __nops(14), \ > > "mv a0, %1\n\t" \ > > "j 2f\n\t" \ > > "3:\n\t" \ > > "cbo." __stringify(_op) " (a0)\n\t" \ > > "add a0, a0, %0\n\t" \ > > "2:\n\t" \ > > "bltu a0, %2, 3b\n\t" \ > > __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > ".option push\n\t\n\t" \ > > ".option norvc\n\t" \ > > ".option norelax\n\t" \ > > "addi sp,sp,-16\n\t" \ > > "sd s0,0(sp)\n\t" \ > > "sd ra,8(sp)\n\t" \ > > "addi s0,sp,16\n\t" \ > > "mv a4,%6\n\t" \ > > "mv a3,%5\n\t" \ > > "mv a2,%4\n\t" \ > > "mv a1,%3\n\t" \ > > "mv a0,%0\n\t" \ > > "call rzfive_cmo\n\t" \ > > "ld ra,8(sp)\n\t" \ > > "ld s0,0(sp)\n\t" \ > > "addi sp,sp,16\n\t" \ > > ".option pop\n\t" \ > > , ANDESTECH_VENDOR_ID, \ > > ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO) \ > > : : "r"(_cachesize), \ > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > "r"((unsigned long)(_start) + (_size)), \ > > "r"((unsigned long) (_start)), \ > > "r"((unsigned long) (_size)), \ > > "r"((unsigned long) (_dir)), \ > > "r"((unsigned long) (_ops)) \ > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > I am seeing kernel panic with this change. Looking at the > > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest > > of the alternative options are calls too. Is my understanding correct > > here? > > The loop walks through the instructions after the location got patched and > checks if an instruction is an auipc and the next one is a jalr and only then > adjusts the address accordingly. > Ok so my understanding was wrong here. > So it _should_ leave all other (non auipc+jalr) instructions alone. > (hopefully) > Agreed. > > > Do you think this is the correct approach in my case? > > It does look correct on first glance. > \o/ > As I had that passing thought, are you actually calling > riscv_alternative_fix_auipc_jalr() > from your errata/.../foo.c after doing the patching? > > I.e. with the current patchset, that function is only called from the > cpufeature part, but for example not from the other patching locations. > [and a future revision should probably change that :-) ] > > I have made a local copy of riscv_alternative_fix_auipc_jalr() and then calling it after patch_text_nosync() referring to your patch for str functions. > After making sure that function actually runs, the next thing you could try > is to have both the "original" code and the patch be identical, i.e. > replace the cbo* part with your code as well and then just output the > instructions via printk to check what the addresses do in both. > > After riscv_alternative_fix_auipc_jalr() ran then both code variants > should be identical when using the same code in both areas. > So I have added debug prints to match the instructions as below after and before patching: static void riscv_alternative_print_inst(unsigned int *alt_ptr, unsigned int len) { int num_instr = len / sizeof(u32); int i; for (i = 0; i < num_instr; i++) pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i)); } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { .... if (cpu_req_errata & tmp) { pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp, cpu_req_errata, alt->errata_id); pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr, alt->alt_ptr, alt->alt_len); pr_err("Print old start\n"); riscv_alternative_print_inst(alt->old_ptr, alt->alt_len); pr_err("Print old end\n"); patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len, alt->old_ptr - alt->alt_ptr); pr_err("Print patch start\n"); riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len); pr_err("Print patch end\n"); } ..... } Below is the log: [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] Print new old end [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 [ 0.000000] Print patch start [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 [ 0.000000] riscv_alternative_print_inst instruction: 0x97 [ 0.000000] riscv_alternative_print_inst instruction: 0xcba080e7 [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 [ 0.000000] Print patch end [ 0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0 [ 0.000000] old:arch_sync_dma_for_device alt:riscv_noncoherent_supported len:38 [ 0.000000] Print old start [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 ====================> This instruction doesn't look correct it should be 0x13? [ 0.000000] Print old end [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 [ 0.000000] Print patch start [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 [ 0.000000] riscv_alternative_print_inst instruction: 0x78713 [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 [ 0.000000] riscv_alternative_print_inst instruction: 0x97 [ 0.000000] riscv_alternative_print_inst instruction: 0xc82080e7 ====================> This instruction doesn't look correct comparing to objdump output this should be 000080e7 or does it require the offset too? [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 [ 0.000000] Print patch end [ 0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0 [ 0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38 [ 0.000000] Print old start [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x97 ====================> This instruction doesn't look correct it should be 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0xeee080e7 ====================> This instruction doesn't look correct it should be 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] Print old end [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 [ 0.000000] Print patch start [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 [ 0.000000] riscv_alternative_print_inst instruction: 0x80693 [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 [ 0.000000] riscv_alternative_print_inst instruction: 0x97 [ 0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7 ====================> This instruction doesn't look correct comparing to objdump output this should be 000080e7 or does it require the offset too? [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 [ 0.000000] Print patch end [ 0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0 [ 0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38 [ 0.000000] Print old start [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 ====================> This instruction doesn't look correct it should be 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x80e70000 ====================> This instruction doesn't look correct it should be 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0xe720 ====================> This instruction doesn't look correct it should be 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] riscv_alternative_print_inst instruction: 0x13 [ 0.000000] Print old end [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 [ 0.000000] Print patch start [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 [ 0.000000] riscv_alternative_print_inst instruction: 0xe8693 [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 [ 0.000000] riscv_alternative_print_inst instruction: 0x30513 [ 0.000000] riscv_alternative_print_inst instruction: 0x97 [ 0.000000] riscv_alternative_print_inst instruction: 0xc12080e7 ====================> This instruction doesn't look correct comparing to objdump output this should be 000080e7 + offset? [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 [ 0.000000] Print patch end Here is the output from objdump of the file (dma-noncoherent.o): 000000000000032e <.L888^B1>: 32e: ff010113 addi sp,sp,-16 332: 00813023 sd s0,0(sp) 336: 00113423 sd ra,8(sp) 33a: 01010413 addi s0,sp,16 33e: 000f0713 mv a4,t5 342: 00078693 mv a3,a5 346: 00088613 mv a2,a7 34a: 00080593 mv a1,a6 34e: 000e0513 mv a0,t3 352: 00000097 auipc ra,0x0 356: 000080e7 jalr ra # 352 <.L888^B1+0x24> 35a: 00813083 ld ra,8(sp) 35e: 00013403 ld s0,0(sp) 362: 01010113 addi sp,sp,16 0000000000000366 <.L888^B2>: 366: ff010113 addi sp,sp,-16 36a: 00813023 sd s0,0(sp) 36e: 00113423 sd ra,8(sp) 372: 01010413 addi s0,sp,16 376: 00078713 mv a4,a5 37a: 00078693 mv a3,a5 37e: 00088613 mv a2,a7 382: 00080593 mv a1,a6 386: 000e0513 mv a0,t3 38a: 00000097 auipc ra,0x0 38e: 000080e7 jalr ra # 38a <.L888^B2+0x24> 392: 00813083 ld ra,8(sp) 396: 00013403 ld s0,0(sp) 39a: 01010113 addi sp,sp,16 000000000000039e <.L888^B3>: 39e: ff010113 addi sp,sp,-16 3a2: 00813023 sd s0,0(sp) 3a6: 00113423 sd ra,8(sp) 3aa: 01010413 addi s0,sp,16 3ae: 000f0713 mv a4,t5 3b2: 00080693 mv a3,a6 3b6: 00088613 mv a2,a7 3ba: 00078593 mv a1,a5 3be: 000e0513 mv a0,t3 3c2: 00000097 auipc ra,0x0 3c6: 000080e7 jalr ra # 3c2 <.L888^B3+0x24> 3ca: 00813083 ld ra,8(sp) 3ce: 00013403 ld s0,0(sp) 3d2: 01010113 addi sp,sp,16 00000000000003d6 <.L888^B4>: 3d6: ff010113 addi sp,sp,-16 3da: 00813023 sd s0,0(sp) 3de: 00113423 sd ra,8(sp) 3e2: 01010413 addi s0,sp,16 3e6: 000f0713 mv a4,t5 3ea: 000e8693 mv a3,t4 3ee: 00088613 mv a2,a7 3f2: 00078593 mv a1,a5 3f6: 00030513 mv a0,t1 3fa: 00000097 auipc ra,0x0 3fe: 000080e7 jalr ra # 3fa <.L888^B4+0x24> 402: 00813083 ld ra,8(sp) 406: 00013403 ld s0,0(sp) 40a: 01010113 addi sp,sp,16 Disassembly of section __ksymtab_strings: Any pointers what could be happening? > > > Note, I wanted to test with ALTERNATIVE_2() first to make sure > > everything is okay and then later test my ALTERNATIVE_3() > > implementation. > > sounds like a very sensible idea to use the existing macros > first for verification :-) > :) Cheers, Prabhakar
Hi Heiko, On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote: > > Hi Heiko, > > On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote: > > > > Hi, > > > > Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar: > > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > > > > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > > > Alternatives live in a different section, so addresses used by call > > > > functions will point to wrong locations after the patch got applied. > > > > > > > > Similar to arm64, adjust the location to consider that offset. > > > > > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > --- > > > > [...] > > > > > I have the below assembly code which I have tested without the > > > alternatives for the RZ/Five CMO, > > > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > > asm volatile(".option push\n\t\n\t" \ > > > ".option norvc\n\t" \ > > > ".option norelax\n\t" \ > > > "addi sp,sp,-16\n\t" \ > > > "sd s0,0(sp)\n\t" \ > > > "sd ra,8(sp)\n\t" \ > > > "addi s0,sp,16\n\t" \ > > > "mv a4,%6\n\t" \ > > > "mv a3,%5\n\t" \ > > > "mv a2,%4\n\t" \ > > > "mv a1,%3\n\t" \ > > > "mv a0,%0\n\t" \ > > > "call rzfive_cmo\n\t" \ > > > "ld ra,8(sp)\n\t" \ > > > "ld s0,0(sp)\n\t" \ > > > "addi sp,sp,16\n\t" \ > > > ".option pop\n\t" \ > > > : : "r"(_cachesize), \ > > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > > "r"((unsigned long)(_start) + (_size)), \ > > > "r"((unsigned long) (_start)), \ > > > "r"((unsigned long) (_size)), \ > > > "r"((unsigned long) (_dir)), \ > > > "r"((unsigned long) (_ops)) \ > > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > > > Now when integrate this with ALTERNATIVE_2() as below, > > > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > > asm volatile(ALTERNATIVE_2( \ > > > __nops(14), \ > > > "mv a0, %1\n\t" \ > > > "j 2f\n\t" \ > > > "3:\n\t" \ > > > "cbo." __stringify(_op) " (a0)\n\t" \ > > > "add a0, a0, %0\n\t" \ > > > "2:\n\t" \ > > > "bltu a0, %2, 3b\n\t" \ > > > __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > > ".option push\n\t\n\t" \ > > > ".option norvc\n\t" \ > > > ".option norelax\n\t" \ > > > "addi sp,sp,-16\n\t" \ > > > "sd s0,0(sp)\n\t" \ > > > "sd ra,8(sp)\n\t" \ > > > "addi s0,sp,16\n\t" \ > > > "mv a4,%6\n\t" \ > > > "mv a3,%5\n\t" \ > > > "mv a2,%4\n\t" \ > > > "mv a1,%3\n\t" \ > > > "mv a0,%0\n\t" \ > > > "call rzfive_cmo\n\t" \ > > > "ld ra,8(sp)\n\t" \ > > > "ld s0,0(sp)\n\t" \ > > > "addi sp,sp,16\n\t" \ > > > ".option pop\n\t" \ > > > , ANDESTECH_VENDOR_ID, \ > > > ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO) \ > > > : : "r"(_cachesize), \ > > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > > "r"((unsigned long)(_start) + (_size)), \ > > > "r"((unsigned long) (_start)), \ > > > "r"((unsigned long) (_size)), \ > > > "r"((unsigned long) (_dir)), \ > > > "r"((unsigned long) (_ops)) \ > > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > > > I am seeing kernel panic with this change. Looking at the > > > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest > > > of the alternative options are calls too. Is my understanding correct > > > here? > > > > The loop walks through the instructions after the location got patched and > > checks if an instruction is an auipc and the next one is a jalr and only then > > adjusts the address accordingly. > > > Ok so my understanding was wrong here. > > > So it _should_ leave all other (non auipc+jalr) instructions alone. > > (hopefully) > > > Agreed. > > > > > > Do you think this is the correct approach in my case? > > > > It does look correct on first glance. > > > \o/ > > > As I had that passing thought, are you actually calling > > riscv_alternative_fix_auipc_jalr() > > from your errata/.../foo.c after doing the patching? > > > > I.e. with the current patchset, that function is only called from the > > cpufeature part, but for example not from the other patching locations. > > [and a future revision should probably change that :-) ] > > > > > I have made a local copy of riscv_alternative_fix_auipc_jalr() and > then calling it after patch_text_nosync() referring to your patch for > str functions. > > > After making sure that function actually runs, the next thing you could try > > is to have both the "original" code and the patch be identical, i.e. > > replace the cbo* part with your code as well and then just output the > > instructions via printk to check what the addresses do in both. > > > > After riscv_alternative_fix_auipc_jalr() ran then both code variants > > should be identical when using the same code in both areas. > > > So I have added debug prints to match the instructions as below after > and before patching: > > static void riscv_alternative_print_inst(unsigned int *alt_ptr, > unsigned int len) > { > int num_instr = len / sizeof(u32); > int i; > > for (i = 0; i < num_instr; i++) > pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i)); > > } > > void __init_or_module andes_errata_patch_func(struct alt_entry *begin, > struct alt_entry *end, > unsigned long archid, unsigned long impid, > unsigned int stage) > { > .... > if (cpu_req_errata & tmp) { > pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp, > cpu_req_errata, alt->errata_id); > pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr, > alt->alt_ptr, alt->alt_len); > pr_err("Print old start\n"); > riscv_alternative_print_inst(alt->old_ptr, alt->alt_len); > pr_err("Print old end\n"); > patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len, > alt->old_ptr - alt->alt_ptr); > pr_err("Print patch start\n"); > riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len); > pr_err("Print patch end\n"); > } > ..... > } > > Below is the log: > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] Print new old end > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > [ 0.000000] Print patch start > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > [ 0.000000] riscv_alternative_print_inst instruction: 0xcba080e7 > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > [ 0.000000] Print patch end > [ 0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0 > [ 0.000000] old:arch_sync_dma_for_device > alt:riscv_noncoherent_supported len:38 > [ 0.000000] Print old start > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 > ====================> This instruction doesn't look correct it > should be 0x13? > [ 0.000000] Print old end > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > [ 0.000000] Print patch start > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > [ 0.000000] riscv_alternative_print_inst instruction: 0x78713 > [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > [ 0.000000] riscv_alternative_print_inst instruction: 0xc82080e7 > ====================> This instruction doesn't look correct comparing > to objdump output this should be 000080e7 or does it require the > offset too? > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > [ 0.000000] Print patch end > [ 0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0 > [ 0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38 > [ 0.000000] Print old start > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > ====================> This instruction doesn't look correct it should > be 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0xeee080e7 > ====================> This instruction doesn't look correct it > should be 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] Print old end > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > [ 0.000000] Print patch start > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > [ 0.000000] riscv_alternative_print_inst instruction: 0x80693 > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > [ 0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7 > ====================> This instruction doesn't look correct comparing > to objdump output this should be 000080e7 or does it require the > offset too? > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > [ 0.000000] Print patch end > [ 0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0 > [ 0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38 > [ 0.000000] Print old start > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 > ====================> This instruction doesn't look correct it should > be 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x80e70000 > ====================> This instruction doesn't look correct it should > be 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0xe720 > ====================> This instruction doesn't look correct it should > be 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > [ 0.000000] Print old end > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > [ 0.000000] Print patch start > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > [ 0.000000] riscv_alternative_print_inst instruction: 0xe8693 > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 > [ 0.000000] riscv_alternative_print_inst instruction: 0x30513 > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > [ 0.000000] riscv_alternative_print_inst instruction: 0xc12080e7 > ====================> This instruction doesn't look correct comparing > to objdump output this should be 000080e7 + offset? > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > [ 0.000000] Print patch end > > Here is the output from objdump of the file (dma-noncoherent.o): > > 000000000000032e <.L888^B1>: > 32e: ff010113 addi sp,sp,-16 > 332: 00813023 sd s0,0(sp) > 336: 00113423 sd ra,8(sp) > 33a: 01010413 addi s0,sp,16 > 33e: 000f0713 mv a4,t5 > 342: 00078693 mv a3,a5 > 346: 00088613 mv a2,a7 > 34a: 00080593 mv a1,a6 > 34e: 000e0513 mv a0,t3 > 352: 00000097 auipc ra,0x0 > 356: 000080e7 jalr ra # 352 <.L888^B1+0x24> > 35a: 00813083 ld ra,8(sp) > 35e: 00013403 ld s0,0(sp) > 362: 01010113 addi sp,sp,16 > > 0000000000000366 <.L888^B2>: > 366: ff010113 addi sp,sp,-16 > 36a: 00813023 sd s0,0(sp) > 36e: 00113423 sd ra,8(sp) > 372: 01010413 addi s0,sp,16 > 376: 00078713 mv a4,a5 > 37a: 00078693 mv a3,a5 > 37e: 00088613 mv a2,a7 > 382: 00080593 mv a1,a6 > 386: 000e0513 mv a0,t3 > 38a: 00000097 auipc ra,0x0 > 38e: 000080e7 jalr ra # 38a <.L888^B2+0x24> > 392: 00813083 ld ra,8(sp) > 396: 00013403 ld s0,0(sp) > 39a: 01010113 addi sp,sp,16 > > 000000000000039e <.L888^B3>: > 39e: ff010113 addi sp,sp,-16 > 3a2: 00813023 sd s0,0(sp) > 3a6: 00113423 sd ra,8(sp) > 3aa: 01010413 addi s0,sp,16 > 3ae: 000f0713 mv a4,t5 > 3b2: 00080693 mv a3,a6 > 3b6: 00088613 mv a2,a7 > 3ba: 00078593 mv a1,a5 > 3be: 000e0513 mv a0,t3 > 3c2: 00000097 auipc ra,0x0 > 3c6: 000080e7 jalr ra # 3c2 <.L888^B3+0x24> > 3ca: 00813083 ld ra,8(sp) > 3ce: 00013403 ld s0,0(sp) > 3d2: 01010113 addi sp,sp,16 > > 00000000000003d6 <.L888^B4>: > 3d6: ff010113 addi sp,sp,-16 > 3da: 00813023 sd s0,0(sp) > 3de: 00113423 sd ra,8(sp) > 3e2: 01010413 addi s0,sp,16 > 3e6: 000f0713 mv a4,t5 > 3ea: 000e8693 mv a3,t4 > 3ee: 00088613 mv a2,a7 > 3f2: 00078593 mv a1,a5 > 3f6: 00030513 mv a0,t1 > 3fa: 00000097 auipc ra,0x0 > 3fe: 000080e7 jalr ra # 3fa <.L888^B4+0x24> > 402: 00813083 ld ra,8(sp) > 406: 00013403 ld s0,0(sp) > 40a: 01010113 addi sp,sp,16 > > Disassembly of section __ksymtab_strings: > > Any pointers what could be happening? > Some more information, - If I drop the riscv_alternative_fix_auipc_jalr() call after patch_text_nosync() and then print the alt->old_ptr instructions before patching I can see the instructions as 0x13 (nop) which is correct. - if I call riscv_alternative_fix_auipc_jalr() call after patch_text_nosync() and then print the alt->old_ptr instructions before patching I dont see 0x13 (nop) consistently for old instructions. - If I replace the nop's in the old instructions with my assembly code of rz/five cmo and then just use patch_text_nosync() I can see the correct actual instruction being printed apart from jalr (is some sort of offset added to it as I see last 4 bits match?) and then is replaced correctly by the same alt instructions apart from the jalr (log [0]). - If I replace the nop's in the old instructions with my assembly code of rz/five cmo and then use patch_text_nosync() and riscv_alternative_fix_auipc_jalr() I can see the actual old instructions differs a bit and again the jalr instruction differs too in the patched code (log [1]). [0] https://paste.debian.net/1261412/ [1] https://paste.debian.net/1261413/ Attached is the objump of dma-noncoherent.o for reference. Note, if I replace the old/orignal instruction to my asm code for rz/five cmo and replace the errata id's to deadbeef the code works OK. Cheers, Prabhakar
Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > Hi Heiko, > > On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar > <prabhakar.csengg@gmail.com> wrote: > > > > Hi Heiko, > > > > On Mon, Nov 21, 2022 at 11:27 AM Heiko Stübner <heiko@sntech.de> wrote: > > > > > > Hi, > > > > > > Am Montag, 21. November 2022, 10:50:09 CET schrieb Lad, Prabhakar: > > > > On Thu, Nov 10, 2022 at 4:50 PM Heiko Stuebner <heiko@sntech.de> wrote: > > > > > > > > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > > > > > > Alternatives live in a different section, so addresses used by call > > > > > functions will point to wrong locations after the patch got applied. > > > > > > > > > > Similar to arm64, adjust the location to consider that offset. > > > > > > > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > > --- > > > > > > [...] > > > > > > > I have the below assembly code which I have tested without the > > > > alternatives for the RZ/Five CMO, > > > > > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > > > asm volatile(".option push\n\t\n\t" \ > > > > ".option norvc\n\t" \ > > > > ".option norelax\n\t" \ > > > > "addi sp,sp,-16\n\t" \ > > > > "sd s0,0(sp)\n\t" \ > > > > "sd ra,8(sp)\n\t" \ > > > > "addi s0,sp,16\n\t" \ > > > > "mv a4,%6\n\t" \ > > > > "mv a3,%5\n\t" \ > > > > "mv a2,%4\n\t" \ > > > > "mv a1,%3\n\t" \ > > > > "mv a0,%0\n\t" \ > > > > "call rzfive_cmo\n\t" \ > > > > "ld ra,8(sp)\n\t" \ > > > > "ld s0,0(sp)\n\t" \ > > > > "addi sp,sp,16\n\t" \ > > > > ".option pop\n\t" \ > > > > : : "r"(_cachesize), \ > > > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > > > "r"((unsigned long)(_start) + (_size)), \ > > > > "r"((unsigned long) (_start)), \ > > > > "r"((unsigned long) (_size)), \ > > > > "r"((unsigned long) (_dir)), \ > > > > "r"((unsigned long) (_ops)) \ > > > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > > > > > Now when integrate this with ALTERNATIVE_2() as below, > > > > > > > > #define ALT_CMO_OP(_op, _start, _size, _cachesize, _dir, _ops) \ > > > > asm volatile(ALTERNATIVE_2( \ > > > > __nops(14), \ > > > > "mv a0, %1\n\t" \ > > > > "j 2f\n\t" \ > > > > "3:\n\t" \ > > > > "cbo." __stringify(_op) " (a0)\n\t" \ > > > > "add a0, a0, %0\n\t" \ > > > > "2:\n\t" \ > > > > "bltu a0, %2, 3b\n\t" \ > > > > __nops(8), 0, CPUFEATURE_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ > > > > ".option push\n\t\n\t" \ > > > > ".option norvc\n\t" \ > > > > ".option norelax\n\t" \ > > > > "addi sp,sp,-16\n\t" \ > > > > "sd s0,0(sp)\n\t" \ > > > > "sd ra,8(sp)\n\t" \ > > > > "addi s0,sp,16\n\t" \ > > > > "mv a4,%6\n\t" \ > > > > "mv a3,%5\n\t" \ > > > > "mv a2,%4\n\t" \ > > > > "mv a1,%3\n\t" \ > > > > "mv a0,%0\n\t" \ > > > > "call rzfive_cmo\n\t" \ > > > > "ld ra,8(sp)\n\t" \ > > > > "ld s0,0(sp)\n\t" \ > > > > "addi sp,sp,16\n\t" \ > > > > ".option pop\n\t" \ > > > > , ANDESTECH_VENDOR_ID, \ > > > > ERRATA_ANDESTECH_NO_IOCP, CONFIG_ERRATA_RZFIVE_CMO) \ > > > > : : "r"(_cachesize), \ > > > > "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ > > > > "r"((unsigned long)(_start) + (_size)), \ > > > > "r"((unsigned long) (_start)), \ > > > > "r"((unsigned long) (_size)), \ > > > > "r"((unsigned long) (_dir)), \ > > > > "r"((unsigned long) (_ops)) \ > > > > : "a0", "a1", "a2", "a3", "a4", "memory") > > > > > > > > I am seeing kernel panic with this change. Looking at the > > > > riscv_alternative_fix_auipc_jalr() implementation it assumes the rest > > > > of the alternative options are calls too. Is my understanding correct > > > > here? > > > > > > The loop walks through the instructions after the location got patched and > > > checks if an instruction is an auipc and the next one is a jalr and only then > > > adjusts the address accordingly. > > > > > Ok so my understanding was wrong here. > > > > > So it _should_ leave all other (non auipc+jalr) instructions alone. > > > (hopefully) > > > > > Agreed. > > > > > > > > > Do you think this is the correct approach in my case? > > > > > > It does look correct on first glance. > > > > > \o/ > > > > > As I had that passing thought, are you actually calling > > > riscv_alternative_fix_auipc_jalr() > > > from your errata/.../foo.c after doing the patching? > > > > > > I.e. with the current patchset, that function is only called from the > > > cpufeature part, but for example not from the other patching locations. > > > [and a future revision should probably change that :-) ] > > > > > > > > I have made a local copy of riscv_alternative_fix_auipc_jalr() and > > then calling it after patch_text_nosync() referring to your patch for > > str functions. > > > > > After making sure that function actually runs, the next thing you could try > > > is to have both the "original" code and the patch be identical, i.e. > > > replace the cbo* part with your code as well and then just output the > > > instructions via printk to check what the addresses do in both. > > > > > > After riscv_alternative_fix_auipc_jalr() ran then both code variants > > > should be identical when using the same code in both areas. > > > > > So I have added debug prints to match the instructions as below after > > and before patching: > > > > static void riscv_alternative_print_inst(unsigned int *alt_ptr, > > unsigned int len) > > { > > int num_instr = len / sizeof(u32); > > int i; > > > > for (i = 0; i < num_instr; i++) > > pr_err("%s instruction: 0x%x\n", __func__, *(alt_ptr + i)); > > > > } > > > > void __init_or_module andes_errata_patch_func(struct alt_entry *begin, > > struct alt_entry *end, > > unsigned long archid, unsigned long impid, > > unsigned int stage) > > { > > .... > > if (cpu_req_errata & tmp) { > > pr_err("stage: %x -> %px--> %x %x %x\n", stage, alt, tmp, > > cpu_req_errata, alt->errata_id); > > pr_err("old:%ps alt:%ps len:%lx\n", alt->old_ptr, > > alt->alt_ptr, alt->alt_len); > > pr_err("Print old start\n"); > > riscv_alternative_print_inst(alt->old_ptr, alt->alt_len); > > pr_err("Print old end\n"); > > patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); > > > > riscv_alternative_fix_auipc_jalr(alt->old_ptr, alt->alt_len, > > alt->old_ptr - alt->alt_ptr); > > pr_err("Print patch start\n"); > > riscv_alternative_print_inst(alt->alt_ptr, alt->alt_len); > > pr_err("Print patch end\n"); > > } > > ..... > > } > > > > Below is the log: > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] Print new old end > > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > > [ 0.000000] Print patch start > > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xcba080e7 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > > [ 0.000000] Print patch end > > [ 0.000000] stage: 0 -> ffffffff80a2492c--> 1 1 0 > > [ 0.000000] old:arch_sync_dma_for_device > > alt:riscv_noncoherent_supported len:38 > > [ 0.000000] Print old start > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 > > ====================> This instruction doesn't look correct it > > should be 0x13? > > [ 0.000000] Print old end > > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > > [ 0.000000] Print patch start > > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x78713 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x78693 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x80593 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xc82080e7 > > ====================> This instruction doesn't look correct comparing > > to objdump output this should be 000080e7 or does it require the > > offset too? > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > > [ 0.000000] Print patch end > > [ 0.000000] stage: 0 -> ffffffff80a24950--> 1 1 0 > > [ 0.000000] old:arch_sync_dma_for_cpu alt:riscv_noncoherent_supported len:38 > > [ 0.000000] Print old start > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > > ====================> This instruction doesn't look correct it should > > be 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xeee080e7 > > ====================> This instruction doesn't look correct it > > should be 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] Print old end > > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > > [ 0.000000] Print patch start > > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x80693 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xe0513 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xc4a080e7 > > ====================> This instruction doesn't look correct comparing > > to objdump output this should be 000080e7 or does it require the > > offset too? > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > > [ 0.000000] Print patch end > > [ 0.000000] stage: 0 -> ffffffff80a24974--> 1 1 0 > > [ 0.000000] old:arch_dma_prep_coherent alt:riscv_noncoherent_supported len:38 > > [ 0.000000] Print old start > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x970013 > > ====================> This instruction doesn't look correct it should > > be 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x80e70000 > > ====================> This instruction doesn't look correct it should > > be 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xe720 > > ====================> This instruction doesn't look correct it should > > be 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13 > > [ 0.000000] Print old end > > [ 0.000000] riscv_alternative_fix_auipc_jalr num instruction: 14 > > [ 0.000000] Print patch start > > [ 0.000000] riscv_alternative_print_inst instruction: 0xff010113 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813023 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x113423 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010413 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xf0713 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xe8693 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x88613 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x78593 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x30513 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x97 > > [ 0.000000] riscv_alternative_print_inst instruction: 0xc12080e7 > > ====================> This instruction doesn't look correct comparing > > to objdump output this should be 000080e7 + offset? > > [ 0.000000] riscv_alternative_print_inst instruction: 0x813083 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x13403 > > [ 0.000000] riscv_alternative_print_inst instruction: 0x1010113 > > [ 0.000000] Print patch end > > > > Here is the output from objdump of the file (dma-noncoherent.o): > > > > 000000000000032e <.L888^B1>: > > 32e: ff010113 addi sp,sp,-16 > > 332: 00813023 sd s0,0(sp) > > 336: 00113423 sd ra,8(sp) > > 33a: 01010413 addi s0,sp,16 > > 33e: 000f0713 mv a4,t5 > > 342: 00078693 mv a3,a5 > > 346: 00088613 mv a2,a7 > > 34a: 00080593 mv a1,a6 > > 34e: 000e0513 mv a0,t3 > > 352: 00000097 auipc ra,0x0 > > 356: 000080e7 jalr ra # 352 <.L888^B1+0x24> > > 35a: 00813083 ld ra,8(sp) > > 35e: 00013403 ld s0,0(sp) > > 362: 01010113 addi sp,sp,16 > > > > 0000000000000366 <.L888^B2>: > > 366: ff010113 addi sp,sp,-16 > > 36a: 00813023 sd s0,0(sp) > > 36e: 00113423 sd ra,8(sp) > > 372: 01010413 addi s0,sp,16 > > 376: 00078713 mv a4,a5 > > 37a: 00078693 mv a3,a5 > > 37e: 00088613 mv a2,a7 > > 382: 00080593 mv a1,a6 > > 386: 000e0513 mv a0,t3 > > 38a: 00000097 auipc ra,0x0 > > 38e: 000080e7 jalr ra # 38a <.L888^B2+0x24> > > 392: 00813083 ld ra,8(sp) > > 396: 00013403 ld s0,0(sp) > > 39a: 01010113 addi sp,sp,16 > > > > 000000000000039e <.L888^B3>: > > 39e: ff010113 addi sp,sp,-16 > > 3a2: 00813023 sd s0,0(sp) > > 3a6: 00113423 sd ra,8(sp) > > 3aa: 01010413 addi s0,sp,16 > > 3ae: 000f0713 mv a4,t5 > > 3b2: 00080693 mv a3,a6 > > 3b6: 00088613 mv a2,a7 > > 3ba: 00078593 mv a1,a5 > > 3be: 000e0513 mv a0,t3 > > 3c2: 00000097 auipc ra,0x0 > > 3c6: 000080e7 jalr ra # 3c2 <.L888^B3+0x24> > > 3ca: 00813083 ld ra,8(sp) > > 3ce: 00013403 ld s0,0(sp) > > 3d2: 01010113 addi sp,sp,16 > > > > 00000000000003d6 <.L888^B4>: > > 3d6: ff010113 addi sp,sp,-16 > > 3da: 00813023 sd s0,0(sp) > > 3de: 00113423 sd ra,8(sp) > > 3e2: 01010413 addi s0,sp,16 > > 3e6: 000f0713 mv a4,t5 > > 3ea: 000e8693 mv a3,t4 > > 3ee: 00088613 mv a2,a7 > > 3f2: 00078593 mv a1,a5 > > 3f6: 00030513 mv a0,t1 > > 3fa: 00000097 auipc ra,0x0 > > 3fe: 000080e7 jalr ra # 3fa <.L888^B4+0x24> > > 402: 00813083 ld ra,8(sp) > > 406: 00013403 ld s0,0(sp) > > 40a: 01010113 addi sp,sp,16 > > > > Disassembly of section __ksymtab_strings: > > > > Any pointers what could be happening? > > > > Some more information, > > - If I drop the riscv_alternative_fix_auipc_jalr() call after > patch_text_nosync() and then print the alt->old_ptr instructions > before patching I can see the instructions as 0x13 (nop) which is > correct. > > - if I call riscv_alternative_fix_auipc_jalr() call after > patch_text_nosync() and then print the alt->old_ptr instructions > before patching I dont see 0x13 (nop) consistently for old > instructions. which is to be expected I guess. alt->old_ptr points to the memory location where the live kernel code lives. I.e. the code at this location is the thing the kernel actually runs. The code at this location then gets overwritten by the alternative assembly. > - If I replace the nop's in the old instructions with my assembly code > of rz/five cmo and then just use patch_text_nosync() I can see the > correct actual instruction being printed apart from jalr (is some sort > of offset added to it as I see last 4 bits match?) and then is > replaced correctly by the same alt instructions apart from the jalr > (log [0]). > > - If I replace the nop's in the old instructions with my assembly code > of rz/five cmo and then use patch_text_nosync() and > riscv_alternative_fix_auipc_jalr() I can see the actual old > instructions differs a bit and again the jalr instruction differs too > in the patched code (log [1]). > > [0] https://paste.debian.net/1261412/ > [1] https://paste.debian.net/1261413/ > > Attached is the objump of dma-noncoherent.o for reference. I did read that objdumps are not really conclusive when looking at auipc + jalr instructions, hence the printing of the actual instructions. As either manually or with a helper like https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 you can then decode the actual instruction and compare. In your log the two jalr instructions decode to different offsets, jalr x1, x1, -180 vs jalr x1, x1, -834 Can you check what the patch_offset value is in your case? Interestingly the auipc x1, 0 is 0 for both cases. I'll try to build a real test-setup mimicing what you're doing tomorrow (european tomorrow). Heiko
Am Montag, 21. November 2022, 23:17:11 CET schrieb Heiko Stübner: > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > Some more information, > > > > - If I drop the riscv_alternative_fix_auipc_jalr() call after > > patch_text_nosync() and then print the alt->old_ptr instructions > > before patching I can see the instructions as 0x13 (nop) which is > > correct. > > > > - if I call riscv_alternative_fix_auipc_jalr() call after > > patch_text_nosync() and then print the alt->old_ptr instructions > > before patching I dont see 0x13 (nop) consistently for old > > instructions. > > which is to be expected I guess. > > alt->old_ptr points to the memory location where the live kernel code > lives. > > I.e. the code at this location is the thing the kernel actually runs. > The code at this location then gets overwritten by the alternative > assembly. > > > > - If I replace the nop's in the old instructions with my assembly code > > of rz/five cmo and then just use patch_text_nosync() I can see the > > correct actual instruction being printed apart from jalr (is some sort > > of offset added to it as I see last 4 bits match?) and then is > > replaced correctly by the same alt instructions apart from the jalr > > (log [0]). > > > > - If I replace the nop's in the old instructions with my assembly code > > of rz/five cmo and then use patch_text_nosync() and > > riscv_alternative_fix_auipc_jalr() I can see the actual old > > instructions differs a bit and again the jalr instruction differs too > > in the patched code (log [1]). > > > > [0] https://paste.debian.net/1261412/ > > [1] https://paste.debian.net/1261413/ > > > > Attached is the objump of dma-noncoherent.o for reference. > > I did read that objdumps are not really conclusive when looking > at auipc + jalr instructions, hence the printing of the actual instructions. > > As either manually or with a helper like > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > you can then decode the actual instruction and compare. > > In your log the two jalr instructions decode to different offsets, > jalr x1, x1, -180 > vs > jalr x1, x1, -834 > > Can you check what the patch_offset value is in your case? > > Interestingly the > auipc x1, 0 > is 0 for both cases. > > I'll try to build a real test-setup mimicing what you're doing > tomorrow (european tomorrow). also, is it possible for you to put your code on some github or so? Sometimes looking at the actual code makes things a lot easier :-) Thanks Heiko
Hi Heiko, On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote: > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > Hi Heiko, > > > > On Mon, Nov 21, 2022 at 3:06 PM Lad, Prabhakar > > <prabhakar.csengg@gmail.com> wrote: > > > <snip> > > Some more information, > > > > - If I drop the riscv_alternative_fix_auipc_jalr() call after > > patch_text_nosync() and then print the alt->old_ptr instructions > > before patching I can see the instructions as 0x13 (nop) which is > > correct. > > > > - if I call riscv_alternative_fix_auipc_jalr() call after > > patch_text_nosync() and then print the alt->old_ptr instructions > > before patching I dont see 0x13 (nop) consistently for old > > instructions. > > which is to be expected I guess. > > alt->old_ptr points to the memory location where the live kernel code > lives. > Agreed. > I.e. the code at this location is the thing the kernel actually runs. > The code at this location then gets overwritten by the alternative > assembly. > But shouldn't the actual code be nops before patching? > > > - If I replace the nop's in the old instructions with my assembly code > > of rz/five cmo and then just use patch_text_nosync() I can see the > > correct actual instruction being printed apart from jalr (is some sort > > of offset added to it as I see last 4 bits match?) and then is > > replaced correctly by the same alt instructions apart from the jalr > > (log [0]). > > > > - If I replace the nop's in the old instructions with my assembly code > > of rz/five cmo and then use patch_text_nosync() and > > riscv_alternative_fix_auipc_jalr() I can see the actual old > > instructions differs a bit and again the jalr instruction differs too > > in the patched code (log [1]). > > > > [0] https://paste.debian.net/1261412/ > > [1] https://paste.debian.net/1261413/ > > > > Attached is the objump of dma-noncoherent.o for reference. > > I did read that objdumps are not really conclusive when looking > at auipc + jalr instructions, hence the printing of the actual instructions. > > As either manually or with a helper like > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > you can then decode the actual instruction and compare. > OK, I will give it a try. > In your log the two jalr instructions decode to different offsets, > jalr x1, x1, -180 > vs > jalr x1, x1, -834 > > Can you check what the patch_offset value is in your case? > I'll check that and let you know. > Interestingly the > auipc x1, 0 > is 0 for both cases. > > I'll try to build a real test-setup mimicing what you're doing > tomorrow (european tomorrow). > Thank you! Cheers, Prabhakar
Hi Heiko, On Mon, Nov 21, 2022 at 10:38 PM Heiko Stübner <heiko@sntech.de> wrote: > > Am Montag, 21. November 2022, 23:17:11 CET schrieb Heiko Stübner: > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > > Some more information, > > > > > > - If I drop the riscv_alternative_fix_auipc_jalr() call after > > > patch_text_nosync() and then print the alt->old_ptr instructions > > > before patching I can see the instructions as 0x13 (nop) which is > > > correct. > > > > > > - if I call riscv_alternative_fix_auipc_jalr() call after > > > patch_text_nosync() and then print the alt->old_ptr instructions > > > before patching I dont see 0x13 (nop) consistently for old > > > instructions. > > > > which is to be expected I guess. > > > > alt->old_ptr points to the memory location where the live kernel code > > lives. > > > > I.e. the code at this location is the thing the kernel actually runs. > > The code at this location then gets overwritten by the alternative > > assembly. > > > > > > > - If I replace the nop's in the old instructions with my assembly code > > > of rz/five cmo and then just use patch_text_nosync() I can see the > > > correct actual instruction being printed apart from jalr (is some sort > > > of offset added to it as I see last 4 bits match?) and then is > > > replaced correctly by the same alt instructions apart from the jalr > > > (log [0]). > > > > > > - If I replace the nop's in the old instructions with my assembly code > > > of rz/five cmo and then use patch_text_nosync() and > > > riscv_alternative_fix_auipc_jalr() I can see the actual old > > > instructions differs a bit and again the jalr instruction differs too > > > in the patched code (log [1]). > > > > > > [0] https://paste.debian.net/1261412/ > > > [1] https://paste.debian.net/1261413/ > > > > > > Attached is the objump of dma-noncoherent.o for reference. > > > > I did read that objdumps are not really conclusive when looking > > at auipc + jalr instructions, hence the printing of the actual instructions. > > > > As either manually or with a helper like > > > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > > > you can then decode the actual instruction and compare. > > > > In your log the two jalr instructions decode to different offsets, > > jalr x1, x1, -180 > > vs > > jalr x1, x1, -834 > > > > Can you check what the patch_offset value is in your case? > > > > Interestingly the > > auipc x1, 0 > > is 0 for both cases. > > > > I'll try to build a real test-setup mimicing what you're doing > > tomorrow (european tomorrow). > > also, is it possible for you to put your code on some github > or so? Sometimes looking at the actual code makes things > a lot easier :-) > I have pushed my changes here https://github.com/prabhakarlad/linux/tree/rzfive-cmo Cheers, Prabhakar
Hi Heiko, On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote: > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > Hi Heiko, > > <snip> > As either manually or with a helper like > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > you can then decode the actual instruction and compare. > > In your log the two jalr instructions decode to different offsets, > jalr x1, x1, -180 > vs > jalr x1, x1, -834 > > Can you check what the patch_offset value is in your case? > patch_offset for the above case is -654. Cheers, Prabhakar
Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar: > Hi Heiko, > > On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote: > > > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > > Hi Heiko, > > > > <snip> > > As either manually or with a helper like > > > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > > > you can then decode the actual instruction and compare. > > > > In your log the two jalr instructions decode to different offsets, > > jalr x1, x1, -180 > > vs > > jalr x1, x1, -834 > > > > Can you check what the patch_offset value is in your case? > > > patch_offset for the above case is -654. which is a big indicator that the auipc-jalr-fixup function is not catching the instruction ... i.e. -180 - 654 = -834. I managed to reproduce that issue with your branch now (after hacking up stuff a bit to run in qemu :-) ). I'll try to find out where the fixup fails. Heiko
Am Dienstag, 22. November 2022, 12:19:40 CET schrieb Heiko Stübner: > Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar: > > Hi Heiko, > > > > On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote: > > > > > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > > > Hi Heiko, > > > > > > <snip> > > > As either manually or with a helper like > > > > > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > > > > > you can then decode the actual instruction and compare. > > > > > > In your log the two jalr instructions decode to different offsets, > > > jalr x1, x1, -180 > > > vs > > > jalr x1, x1, -834 > > > > > > Can you check what the patch_offset value is in your case? > > > > > patch_offset for the above case is -654. > > which is a big indicator that the auipc-jalr-fixup function is not catching > the instruction ... i.e. -180 - 654 = -834. > > I managed to reproduce that issue with your branch now > (after hacking up stuff a bit to run in qemu :-) ). > > I'll try to find out where the fixup fails. imagine me with a slightly red head now ... as there is a slightly embarrassing mistake in the fixup function ;-) . When going from void* to unsigned int* pointers I have missed adjusting the actual patch-location. The call needs to be patch_text_nosync(alt_ptr + i, call, 8); instead of the current patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); In my str* cases this didn't matter because "i" was 0 there, but in your longer assembly it actually patched the wrong location. Heiko ============ For reference, my debug prints to find where the patching fails was: diff --git a/arch/riscv/errata/renesas/errata.c b/arch/riscv/errata/renesas/errata.c index 986f1c762d72..a5a47c5e9ff8 100644 --- a/arch/riscv/errata/renesas/errata.c +++ b/arch/riscv/errata/renesas/errata.c @@ -72,6 +72,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, u32 rd1; for (i = 0; i < num_instr; i++) { +printk("%s: looking at inst 0x%x\n", __func__, *(alt_ptr + i)); /* is there a further instruction? */ if (i + 1 >= num_instr) continue; @@ -84,6 +85,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, if (rd1 != 1) continue; +printk("%s: -> found a auipc + jalr pair\n", __func__); /* get and adjust new target address */ imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); @@ -101,8 +103,10 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, call[0] |= to_auipc_imm(imm1); call[1] |= to_jalr_imm(imm1); +printk("%s: patching to 0x%x and 0x%x\n", __func__, call[0], call[1]); /* patch the call place again */ - patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); + patch_text_nosync(alt_ptr + i, call, 8); +printk("%s: patched to 0x%x and 0x%x\n", __func__, *(alt_ptr + i), *(alt_ptr + i + 1)); } } and then realizing that the "patching to" and "patched to" where different.
Hi Heiko, On Tue, Nov 22, 2022 at 11:37 AM Heiko Stübner <heiko@sntech.de> wrote: > > Am Dienstag, 22. November 2022, 12:19:40 CET schrieb Heiko Stübner: > > Am Dienstag, 22. November 2022, 11:59:57 CET schrieb Lad, Prabhakar: > > > Hi Heiko, > > > > > > On Mon, Nov 21, 2022 at 10:17 PM Heiko Stübner <heiko@sntech.de> wrote: > > > > > > > > Am Montag, 21. November 2022, 22:31:36 CET schrieb Lad, Prabhakar: > > > > > Hi Heiko, > > > > > > > > <snip> > > > > As either manually or with a helper like > > > > > > > > https://luplab.gitlab.io/rvcodecjs/#q=0xf4c080e7 > > > > > > > > you can then decode the actual instruction and compare. > > > > > > > > In your log the two jalr instructions decode to different offsets, > > > > jalr x1, x1, -180 > > > > vs > > > > jalr x1, x1, -834 > > > > > > > > Can you check what the patch_offset value is in your case? > > > > > > > patch_offset for the above case is -654. > > > > which is a big indicator that the auipc-jalr-fixup function is not catching > > the instruction ... i.e. -180 - 654 = -834. > > > > I managed to reproduce that issue with your branch now > > (after hacking up stuff a bit to run in qemu :-) ). > > > > I'll try to find out where the fixup fails. > > imagine me with a slightly red head now ... as there is a slightly > embarrassing mistake in the fixup function ;-) . > Cheer up now :-) > > When going from void* to unsigned int* pointers I have missed > adjusting the actual patch-location. > > The call needs to be > patch_text_nosync(alt_ptr + i, call, 8); > That did the trick! I have done some limited testing on the board (even replaced orignal instructions back to nop's even with its working too). > instead of the current > patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); > > In my str* cases this didn't matter because "i" was 0 there, but in your > longer assembly it actually patched the wrong location. > Ahaa right the alt macro just had calls. > > Heiko > > ============ > For reference, my debug prints to find where the patching fails was: > > diff --git a/arch/riscv/errata/renesas/errata.c b/arch/riscv/errata/renesas/errata.c > index 986f1c762d72..a5a47c5e9ff8 100644 > --- a/arch/riscv/errata/renesas/errata.c > +++ b/arch/riscv/errata/renesas/errata.c > @@ -72,6 +72,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > u32 rd1; > > for (i = 0; i < num_instr; i++) { > +printk("%s: looking at inst 0x%x\n", __func__, *(alt_ptr + i)); > /* is there a further instruction? */ > if (i + 1 >= num_instr) > continue; > @@ -84,6 +85,7 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > if (rd1 != 1) > continue; > > +printk("%s: -> found a auipc + jalr pair\n", __func__); > /* get and adjust new target address */ > imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); > imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); > @@ -101,8 +103,10 @@ static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, > call[0] |= to_auipc_imm(imm1); > call[1] |= to_jalr_imm(imm1); > > +printk("%s: patching to 0x%x and 0x%x\n", __func__, call[0], call[1]); > /* patch the call place again */ > - patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); > + patch_text_nosync(alt_ptr + i, call, 8); > +printk("%s: patched to 0x%x and 0x%x\n", __func__, *(alt_ptr + i), *(alt_ptr + i + 1)); > } > } > > and then realizing that the "patching to" and "patched to" where different. > Thanks for the hunk. > Now waiting for your v3. Meanwhile, I'll look into the ALT3() macro. Cheers, Prabhakar
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 694267d1fe81..026512ca9c4c 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -298,6 +298,74 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage) return cpu_req_feature; } +#include <asm/parse_asm.h> + +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) +DECLARE_INSN(auipc, MATCH_AUIPC, MASK_AUIPC) + +static inline bool is_auipc_jalr_pair(long insn1, long insn2) +{ + return is_auipc_insn(insn1) && is_jalr_insn(insn2); +} + +#define JALR_SIGN_MASK BIT(I_IMM_SIGN_OPOFF - I_IMM_11_0_OPOFF) +#define JALR_OFFSET_MASK I_IMM_11_0_MASK +#define AUIPC_OFFSET_MASK U_IMM_31_12_MASK +#define AUIPC_PAD (0x00001000) +#define JALR_SHIFT I_IMM_11_0_OPOFF + +#define to_jalr_imm(offset) \ + ((offset & I_IMM_11_0_MASK) << I_IMM_11_0_OPOFF) + +#define to_auipc_imm(offset) \ + ((offset & JALR_SIGN_MASK) ? \ + ((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) : \ + (offset & AUIPC_OFFSET_MASK)) + +static void riscv_alternative_fix_auipc_jalr(unsigned int *alt_ptr, + unsigned int len, int patch_offset) +{ + int num_instr = len / sizeof(u32); + unsigned int call[2]; + int i; + int imm1; + u32 rd1; + + for (i = 0; i < num_instr; i++) { + /* is there a further instruction? */ + if (i + 1 >= num_instr) + continue; + + if (!is_auipc_jalr_pair(*(alt_ptr + i), *(alt_ptr + i + 1))) + continue; + + /* call will use ra register */ + rd1 = EXTRACT_RD_REG(*(alt_ptr + i)); + if (rd1 != 1) + continue; + + /* get and adjust new target address */ + imm1 = EXTRACT_UTYPE_IMM(*(alt_ptr + i)); + imm1 += EXTRACT_ITYPE_IMM(*(alt_ptr + i + 1)); + imm1 -= patch_offset; + + /* pick the original auipc + jalr */ + call[0] = *(alt_ptr + i); + call[1] = *(alt_ptr + i + 1); + + /* drop the old IMMs */ + call[0] &= ~(U_IMM_31_12_MASK); + call[1] &= ~(I_IMM_11_0_MASK << I_IMM_11_0_OPOFF); + + /* add the adapted IMMs */ + call[0] |= to_auipc_imm(imm1); + call[1] |= to_jalr_imm(imm1); + + /* patch the call place again */ + patch_text_nosync(alt_ptr + i * sizeof(u32), call, 8); + } +} + void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned int stage) @@ -316,8 +384,15 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, } tmp = (1U << alt->errata_id); - if (cpu_req_feature & tmp) - patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); + if (cpu_req_feature & tmp) { + /* do the basic patching */ + patch_text_nosync(alt->old_ptr, alt->alt_ptr, + alt->alt_len); + + riscv_alternative_fix_auipc_jalr(alt->old_ptr, + alt->alt_len, + alt->old_ptr - alt->alt_ptr); + } } } #endif