diff mbox series

[v1] RISC-V: take text_mutex during alternative patching

Message ID 20230212194735.491785-1-conor@kernel.org (mailing list archive)
State Accepted
Commit 9493e6f3ce02f44c21aa19f3cbf3b9aa05479d06
Delegated to: Palmer Dabbelt
Headers show
Series [v1] RISC-V: take text_mutex during alternative patching | expand

Checks

Context Check Description
conchuod/tree_selection fail Failed to apply to next/pending-fixes or riscv/for-next

Commit Message

Conor Dooley Feb. 12, 2023, 7:47 p.m. UTC
From: Conor Dooley <conor.dooley@microchip.com>

Guenter reported a splat during boot, that Samuel pointed out was the
lockdep assertion failing in patch_insn_write():

WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
epc : patch_insn_write+0x222/0x2f6
 ra : patch_insn_write+0x21e/0x2f6
epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
 gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
 t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
 s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
 a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
 a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
 s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
 s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
 s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
 s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
 t5 : ffffffffd8180000 t6 : ffffffff81803bc8
status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
[<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
[<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
[<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
[<ffffffff80003348>] _apply_alternatives+0x46/0x86
[<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
[<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
[<ffffffff80c0075a>] start_kernel+0xa2/0x8f8

This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
riscv_has_extension_likely()"), as it is the patching in has_fpu() that
triggers the splats in Guenter's report.

Take the text_mutex before doing any code patching to satisfy lockdep.

Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
---
Applies on top of Samuel's critical fixes in his "Fix alternatives
issues on for-next":
https://lore.kernel.org/all/20230212021534.59121-1-samuel@sholland.org/
---
 arch/riscv/errata/sifive/errata.c | 3 +++
 arch/riscv/errata/thead/errata.c  | 8 ++++++--
 arch/riscv/kernel/cpufeature.c    | 4 ++++
 3 files changed, 13 insertions(+), 2 deletions(-)

Comments

Samuel Holland Feb. 12, 2023, 7:58 p.m. UTC | #1
On 2/12/23 13:47, Conor Dooley wrote:
> From: Conor Dooley <conor.dooley@microchip.com>
> 
> Guenter reported a splat during boot, that Samuel pointed out was the
> lockdep assertion failing in patch_insn_write():
> 
> WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> epc : patch_insn_write+0x222/0x2f6
>  ra : patch_insn_write+0x21e/0x2f6
> epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
>  gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
>  t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
>  s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
>  a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
>  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
>  s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
>  s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
>  s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
>  s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
>  t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> 
> This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
> riscv_has_extension_likely()"), as it is the patching in has_fpu() that
> triggers the splats in Guenter's report.
> 
> Take the text_mutex before doing any code patching to satisfy lockdep.
> 
> Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
> Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
> Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> ---
> Applies on top of Samuel's critical fixes in his "Fix alternatives
> issues on for-next":
> https://lore.kernel.org/all/20230212021534.59121-1-samuel@sholland.org/
> ---
>  arch/riscv/errata/sifive/errata.c | 3 +++
>  arch/riscv/errata/thead/errata.c  | 8 ++++++--
>  arch/riscv/kernel/cpufeature.c    | 4 ++++
>  3 files changed, 13 insertions(+), 2 deletions(-)

Reviewed-by: Samuel Holland <samuel@sholland.org>
Guenter Roeck Feb. 12, 2023, 11:08 p.m. UTC | #2
On 2/12/23 11:47, Conor Dooley wrote:
> From: Conor Dooley <conor.dooley@microchip.com>
> 
> Guenter reported a splat during boot, that Samuel pointed out was the
> lockdep assertion failing in patch_insn_write():
> 
> WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> epc : patch_insn_write+0x222/0x2f6
>   ra : patch_insn_write+0x21e/0x2f6
> epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
>   gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
>   t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
>   s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
>   a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
>   a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
>   s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
>   s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
>   s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
>   s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
>   t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> 
> This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
> riscv_has_extension_likely()"), as it is the patching in has_fpu() that
> triggers the splats in Guenter's report.
> 
> Take the text_mutex before doing any code patching to satisfy lockdep.
> 
> Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
> Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
> Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>


Applying this patch together with

riscv: Fix early alternative patching
riscv: Fix Zbb alternative IDs

_and_ disabling CONFIG_RISCV_ISA_ZBB fixes all problems observed with
riscv64 for me. With that,

Tested-by: Guenter Roeck <linux@roeck-us.net>

Unfortunately I still see boot failures when trying to boot riscv32
images, so something is still broken. I don't know yet if that is
a generic problem or if it is related to a riscv patch.
I am still trying to track that down ...

Guenter

> ---
> Applies on top of Samuel's critical fixes in his "Fix alternatives
> issues on for-next":
> https://lore.kernel.org/all/20230212021534.59121-1-samuel@sholland.org/
> ---
>   arch/riscv/errata/sifive/errata.c | 3 +++
>   arch/riscv/errata/thead/errata.c  | 8 ++++++--
>   arch/riscv/kernel/cpufeature.c    | 4 ++++
>   3 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
> index ef9a4eec0dba..da55cb247e89 100644
> --- a/arch/riscv/errata/sifive/errata.c
> +++ b/arch/riscv/errata/sifive/errata.c
> @@ -4,6 +4,7 @@
>    */
>   
>   #include <linux/kernel.h>
> +#include <linux/memory.h>
>   #include <linux/module.h>
>   #include <linux/string.h>
>   #include <linux/bug.h>
> @@ -107,8 +108,10 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,
>   
>   		tmp = (1U << alt->errata_id);
>   		if (cpu_req_errata & tmp) {
> +			mutex_lock(&text_mutex);
>   			patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt),
>   					  alt->alt_len);
> +			mutex_lock(&text_mutex);
>   			cpu_apply_errata |= tmp;
>   		}
>   	}
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index 1dd90a5f86f0..3b96a06d3c54 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -5,6 +5,7 @@
>   
>   #include <linux/bug.h>
>   #include <linux/kernel.h>
> +#include <linux/memory.h>
>   #include <linux/module.h>
>   #include <linux/string.h>
>   #include <linux/uaccess.h>
> @@ -101,10 +102,13 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
>   			altptr = ALT_ALT_PTR(alt);
>   
>   			/* On vm-alternatives, the mmu isn't running yet */
> -			if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> +			if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
>   				memcpy(oldptr, altptr, alt->alt_len);
> -			else
> +			} else {
> +				mutex_lock(&text_mutex);
>   				patch_text_nosync(oldptr, altptr, alt->alt_len);
> +				mutex_unlock(&text_mutex);
> +			}
>   		}
>   	}
>   
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 21fb567e1b22..59d58ee0f68d 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -10,6 +10,7 @@
>   #include <linux/ctype.h>
>   #include <linux/libfdt.h>
>   #include <linux/log2.h>
> +#include <linux/memory.h>
>   #include <linux/module.h>
>   #include <linux/of.h>
>   #include <asm/alternative.h>
> @@ -292,8 +293,11 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>   
>   		oldptr = ALT_OLD_PTR(alt);
>   		altptr = ALT_ALT_PTR(alt);
> +
> +		mutex_lock(&text_mutex);
>   		patch_text_nosync(oldptr, altptr, alt->alt_len);
>   		riscv_alternative_fix_offsets(oldptr, alt->alt_len, oldptr - altptr);
> +		mutex_unlock(&text_mutex);
>   	}
>   }
>   #endif
Conor Dooley Feb. 13, 2023, 10:40 p.m. UTC | #3
+CC Heiko

On Sun, Feb 12, 2023 at 03:08:55PM -0800, Guenter Roeck wrote:
> On 2/12/23 11:47, Conor Dooley wrote:
> > From: Conor Dooley <conor.dooley@microchip.com>
> > 
> > Guenter reported a splat during boot, that Samuel pointed out was the
> > lockdep assertion failing in patch_insn_write():
> > 
> > WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> > epc : patch_insn_write+0x222/0x2f6
> >   ra : patch_insn_write+0x21e/0x2f6
> > epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
> >   gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
> >   t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
> >   s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
> >   a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
> >   a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
> >   s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
> >   s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
> >   s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
> >   s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
> >   t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> > status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> > [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> > [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> > [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> > [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> > [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> > [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> > [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> > 
> > This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
> > riscv_has_extension_likely()"), as it is the patching in has_fpu() that
> > triggers the splats in Guenter's report.
> > 
> > Take the text_mutex before doing any code patching to satisfy lockdep.
> > 
> > Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
> > Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
> > Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
> > Reported-by: Guenter Roeck <linux@roeck-us.net>
> > Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
> > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> 
> 
> Applying this patch together with
> 
> riscv: Fix early alternative patching
> riscv: Fix Zbb alternative IDs
> 
> _and_ disabling CONFIG_RISCV_ISA_ZBB fixes all problems observed with
> riscv64 for me. With that,
> 
> Tested-by: Guenter Roeck <linux@roeck-us.net>
> 
> Unfortunately I still see boot failures when trying to boot riscv32
> images, so something is still broken. I don't know yet if that is
> a generic problem or if it is related to a riscv patch.
> I am still trying to track that down ...

Heiko, would you be able to look at this please?
There's some more information in the Link: above.

Thanks
Conor.
Heiko Stuebner Feb. 15, 2023, 2:43 p.m. UTC | #4
Hi Conor,

Am Montag, 13. Februar 2023, 23:40:38 CET schrieb Conor Dooley:
> +CC Heiko
> 
> On Sun, Feb 12, 2023 at 03:08:55PM -0800, Guenter Roeck wrote:
> > On 2/12/23 11:47, Conor Dooley wrote:
> > > From: Conor Dooley <conor.dooley@microchip.com>
> > > 
> > > Guenter reported a splat during boot, that Samuel pointed out was the
> > > lockdep assertion failing in patch_insn_write():
> > > 
> > > WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> > > epc : patch_insn_write+0x222/0x2f6
> > >   ra : patch_insn_write+0x21e/0x2f6
> > > epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
> > >   gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
> > >   t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
> > >   s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
> > >   a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
> > >   a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
> > >   s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
> > >   s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
> > >   s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
> > >   s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
> > >   t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> > > status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> > > [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> > > [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> > > [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> > > [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> > > [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> > > [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> > > [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> > > 
> > > This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
> > > riscv_has_extension_likely()"), as it is the patching in has_fpu() that
> > > triggers the splats in Guenter's report.
> > > 
> > > Take the text_mutex before doing any code patching to satisfy lockdep.
> > > 
> > > Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
> > > Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
> > > Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
> > > Reported-by: Guenter Roeck <linux@roeck-us.net>
> > > Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
> > > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> > 
> > 
> > Applying this patch together with
> > 
> > riscv: Fix early alternative patching
> > riscv: Fix Zbb alternative IDs
> > 
> > _and_ disabling CONFIG_RISCV_ISA_ZBB fixes all problems observed with
> > riscv64 for me. With that,
> > 
> > Tested-by: Guenter Roeck <linux@roeck-us.net>
> > 
> > Unfortunately I still see boot failures when trying to boot riscv32
> > images, so something is still broken. I don't know yet if that is
> > a generic problem or if it is related to a riscv patch.
> > I am still trying to track that down ...
> 
> Heiko, would you be able to look at this please?
> There's some more information in the Link: above.

just for the record, I started looking into this now.

Starting with my own code [0] on top of 6.2-rc2 I was able to verify
- qemu-riscv32 without zbb
- qemu-riscv32 with zbb
- qemu-riscv64 without zbb
- qemu-riscv64 with zbb
to work.

So next up, I'll move over to the merged for-next branch to hopefully
see what changed between the above and the for-next code.


Heiko


[0] https://github.com/mmind/linux-riscv/tree/upstream/strcmp-v5
Heiko Stuebner Feb. 15, 2023, 10 p.m. UTC | #5
Am Mittwoch, 15. Februar 2023, 15:43:10 CET schrieb Heiko Stübner:
> Hi Conor,
> 
> Am Montag, 13. Februar 2023, 23:40:38 CET schrieb Conor Dooley:
> > +CC Heiko
> > 
> > On Sun, Feb 12, 2023 at 03:08:55PM -0800, Guenter Roeck wrote:
> > > On 2/12/23 11:47, Conor Dooley wrote:
> > > > From: Conor Dooley <conor.dooley@microchip.com>
> > > > 
> > > > Guenter reported a splat during boot, that Samuel pointed out was the
> > > > lockdep assertion failing in patch_insn_write():
> > > > 
> > > > WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> > > > epc : patch_insn_write+0x222/0x2f6
> > > >   ra : patch_insn_write+0x21e/0x2f6
> > > > epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
> > > >   gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
> > > >   t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
> > > >   s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
> > > >   a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
> > > >   a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
> > > >   s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
> > > >   s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
> > > >   s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
> > > >   s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
> > > >   t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> > > > status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> > > > [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> > > > [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> > > > [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> > > > [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> > > > [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> > > > [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> > > > [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> > > > 
> > > > This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
> > > > riscv_has_extension_likely()"), as it is the patching in has_fpu() that
> > > > triggers the splats in Guenter's report.
> > > > 
> > > > Take the text_mutex before doing any code patching to satisfy lockdep.
> > > > 
> > > > Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
> > > > Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
> > > > Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
> > > > Reported-by: Guenter Roeck <linux@roeck-us.net>
> > > > Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
> > > > Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
> > > 
> > > 
> > > Applying this patch together with
> > > 
> > > riscv: Fix early alternative patching
> > > riscv: Fix Zbb alternative IDs
> > > 
> > > _and_ disabling CONFIG_RISCV_ISA_ZBB fixes all problems observed with
> > > riscv64 for me. With that,
> > > 
> > > Tested-by: Guenter Roeck <linux@roeck-us.net>
> > > 
> > > Unfortunately I still see boot failures when trying to boot riscv32
> > > images, so something is still broken. I don't know yet if that is
> > > a generic problem or if it is related to a riscv patch.
> > > I am still trying to track that down ...
> > 
> > Heiko, would you be able to look at this please?
> > There's some more information in the Link: above.
> 
> just for the record, I started looking into this now.
> 
> Starting with my own code [0] on top of 6.2-rc2 I was able to verify
> - qemu-riscv32 without zbb
> - qemu-riscv32 with zbb
> - qemu-riscv64 without zbb
> - qemu-riscv64 with zbb
> to work.
> 
> So next up, I'll move over to the merged for-next branch to hopefully
> see what changed between the above and the for-next code.

So now I've also tested Palmer's for-next at
  commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")

again with the same variants 
- qemu-riscv32 without zbb
- qemu-riscv32 with zbb
- qemu-riscv64 without zbb
- qemu-riscv64 with zbb

And all of them booted fine into a nfs-root (debian for riscv64 and a
buildroot for riscv32).

I even forced a bug into the zbb code to make sure the patching worked
correctly (where the kernel failed as expected).

Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)

I did try the one from Debian-stable (qemu-5.2) but that was too old and
didn't support Zbb yet.

One thing of note, the "active" 32bit config I had, somehow didn't produce
working images and I needed to start a new build using the rv32_defconfig.

So right now, I'm not sure what more to test though.

Heiko
Guenter Roeck Feb. 16, 2023, 12:22 a.m. UTC | #6
On 2/15/23 14:00, Heiko Stübner wrote:
> Am Mittwoch, 15. Februar 2023, 15:43:10 CET schrieb Heiko Stübner:
>> Hi Conor,
>>
>> Am Montag, 13. Februar 2023, 23:40:38 CET schrieb Conor Dooley:
>>> +CC Heiko
>>>
>>> On Sun, Feb 12, 2023 at 03:08:55PM -0800, Guenter Roeck wrote:
>>>> On 2/12/23 11:47, Conor Dooley wrote:
>>>>> From: Conor Dooley <conor.dooley@microchip.com>
>>>>>
>>>>> Guenter reported a splat during boot, that Samuel pointed out was the
>>>>> lockdep assertion failing in patch_insn_write():
>>>>>
>>>>> WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
>>>>> epc : patch_insn_write+0x222/0x2f6
>>>>>    ra : patch_insn_write+0x21e/0x2f6
>>>>> epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
>>>>>    gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
>>>>>    t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
>>>>>    s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
>>>>>    a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
>>>>>    a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
>>>>>    s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
>>>>>    s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
>>>>>    s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
>>>>>    s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
>>>>>    t5 : ffffffffd8180000 t6 : ffffffff81803bc8
>>>>> status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
>>>>> [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
>>>>> [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
>>>>> [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
>>>>> [<ffffffff80003348>] _apply_alternatives+0x46/0x86
>>>>> [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
>>>>> [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
>>>>> [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
>>>>>
>>>>> This issue was exposed by 702e64550b12 ("riscv: fpu: switch has_fpu() to
>>>>> riscv_has_extension_likely()"), as it is the patching in has_fpu() that
>>>>> triggers the splats in Guenter's report.
>>>>>
>>>>> Take the text_mutex before doing any code patching to satisfy lockdep.
>>>>>
>>>>> Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
>>>>> Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
>>>>> Fixes: 1a0e5dbd3723 ("riscv: sifive: Add SiFive alternative ports")
>>>>> Reported-by: Guenter Roeck <linux@roeck-us.net>
>>>>> Link: https://lore.kernel.org/all/20230212154333.GA3760469@roeck-us.net/
>>>>> Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
>>>>
>>>>
>>>> Applying this patch together with
>>>>
>>>> riscv: Fix early alternative patching
>>>> riscv: Fix Zbb alternative IDs
>>>>
>>>> _and_ disabling CONFIG_RISCV_ISA_ZBB fixes all problems observed with
>>>> riscv64 for me. With that,
>>>>
>>>> Tested-by: Guenter Roeck <linux@roeck-us.net>
>>>>
>>>> Unfortunately I still see boot failures when trying to boot riscv32
>>>> images, so something is still broken. I don't know yet if that is
>>>> a generic problem or if it is related to a riscv patch.
>>>> I am still trying to track that down ...
>>>
>>> Heiko, would you be able to look at this please?
>>> There's some more information in the Link: above.
>>
>> just for the record, I started looking into this now.
>>
>> Starting with my own code [0] on top of 6.2-rc2 I was able to verify
>> - qemu-riscv32 without zbb
>> - qemu-riscv32 with zbb
>> - qemu-riscv64 without zbb
>> - qemu-riscv64 with zbb
>> to work.
>>
>> So next up, I'll move over to the merged for-next branch to hopefully
>> see what changed between the above and the for-next code.
> 
> So now I've also tested Palmer's for-next at
>    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> 
> again with the same variants
> - qemu-riscv32 without zbb
> - qemu-riscv32 with zbb
> - qemu-riscv64 without zbb
> - qemu-riscv64 with zbb
> 
> And all of them booted fine into a nfs-root (debian for riscv64 and a
> buildroot for riscv32).
> 
> I even forced a bug into the zbb code to make sure the patching worked
> correctly (where the kernel failed as expected).
> 
> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> 

Where did you get this version of qemu from ? Does it posssibly include
zbb related fixes/changes on top of v7.2.0 ? I used v7.2.0. I also tried
the master branch as of today, but there was no difference.

Also, what C compiler and binutils version did you use ?

> I did try the one from Debian-stable (qemu-5.2) but that was too old and
> didn't support Zbb yet.
> 
> One thing of note, the "active" 32bit config I had, somehow didn't produce
> working images and I needed to start a new build using the rv32_defconfig.
> 

My tests use rv32_defconfig as base for 32-bit tests, with various debug
options enabled on top.

Ok, I gave it another try. The backtraces do _not_ appear if I just
use defconfig as base. I'll try to find out what exactly triggers the
problem.

Thanks,
Guenter

> So right now, I'm not sure what more to test though.
> 
> Heiko
> 
>
Guenter Roeck Feb. 16, 2023, 7:07 a.m. UTC | #7
On 2/15/23 14:00, Heiko Stübner wrote:
[ ... ]
> again with the same variants
> - qemu-riscv32 without zbb
> - qemu-riscv32 with zbb
> - qemu-riscv64 without zbb
> - qemu-riscv64 with zbb
> 
> And all of them booted fine into a nfs-root (debian for riscv64 and a
> buildroot for riscv32).
> 
> I even forced a bug into the zbb code to make sure the patching worked
> correctly (where the kernel failed as expected).
> 
> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> 
> I did try the one from Debian-stable (qemu-5.2) but that was too old and
> didn't support Zbb yet.
> 
> One thing of note, the "active" 32bit config I had, somehow didn't produce
> working images and I needed to start a new build using the rv32_defconfig.
> 
> So right now, I'm not sure what more to test though.
> 

Try this:

- Use next-20230216 as base.
- Apply
     RISC-V: take text_mutex during alternative patching
     RISC-V: Don't check text_mutex during stop_machine
   on top of it.
- Configure defconfig + CONFIG_PROVE_LOCKING.
- Run something like
   qemu-system-riscv64 -M virt -m 512M -no-reboot \
     -kernel arch/riscv/boot/Image -initrd rootfs.cpio \
     -append "rdinit=/sbin/init console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
     -nographic -monitor none

That should give you

WARNING: CPU: 0 PID: 0 at kernel/trace/trace_events.c:448 trace_event_raw_init+0xde/0x642
...
event neigh_update has unsafe dereference of argument 9
and lots of follow-up messages.

I tried with qemu v7.2.0 and qemu master (v7.2.0-1434-g6a50f64ca0).

Guenter
Guenter Roeck Feb. 16, 2023, 3:33 p.m. UTC | #8
On 2/15/23 14:00, Heiko Stübner wrote:
[ ... ]
> 
> So now I've also tested Palmer's for-next at
>    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> 
> again with the same variants
> - qemu-riscv32 without zbb
> - qemu-riscv32 with zbb
> - qemu-riscv64 without zbb
> - qemu-riscv64 with zbb
> 
> And all of them booted fine into a nfs-root (debian for riscv64 and a
> buildroot for riscv32).
> 
> I even forced a bug into the zbb code to make sure the patching worked
> correctly (where the kernel failed as expected).
> 
> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> 
> I did try the one from Debian-stable (qemu-5.2) but that was too old and
> didn't support Zbb yet.
> 
> One thing of note, the "active" 32bit config I had, somehow didn't produce
> working images and I needed to start a new build using the rv32_defconfig.
> 
> So right now, I'm not sure what more to test though.
> 

Another example:

- build defconfig
- run
   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
     -nographic -monitor none

With CONFIG_RISCV_ISA_ZBB=y, that results in

[    0.818263] /dev/root: Can't open blockdev
[    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
[    0.819177] Please append a correct "root=" boot option; here are the available partitions:
[    0.819808] fe00           16384 vda
[    0.819944]  driver: virtio_blk
[    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
[    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
[    0.821672] Hardware name: riscv-virtio,qemu (DT)
[    0.822050] Call Trace:
[    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
[    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
[    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
[    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
[    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
[    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
[    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
[    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
[    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
[    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
[    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
[    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---

This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.

Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.

Guenter
Andrew Jones Feb. 22, 2023, 2:27 p.m. UTC | #9
On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> On 2/15/23 14:00, Heiko Stübner wrote:
> [ ... ]
> > 
> > So now I've also tested Palmer's for-next at
> >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> > 
> > again with the same variants
> > - qemu-riscv32 without zbb
> > - qemu-riscv32 with zbb
> > - qemu-riscv64 without zbb
> > - qemu-riscv64 with zbb
> > 
> > And all of them booted fine into a nfs-root (debian for riscv64 and a
> > buildroot for riscv32).
> > 
> > I even forced a bug into the zbb code to make sure the patching worked
> > correctly (where the kernel failed as expected).
> > 
> > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> > 
> > I did try the one from Debian-stable (qemu-5.2) but that was too old and
> > didn't support Zbb yet.
> > 
> > One thing of note, the "active" 32bit config I had, somehow didn't produce
> > working images and I needed to start a new build using the rv32_defconfig.
> > 
> > So right now, I'm not sure what more to test though.
> > 
> 
> Another example:
> 
> - build defconfig
> - run
>   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
>     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
>     -nographic -monitor none
> 
> With CONFIG_RISCV_ISA_ZBB=y, that results in
> 
> [    0.818263] /dev/root: Can't open blockdev
> [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> [    0.819808] fe00           16384 vda
> [    0.819944]  driver: virtio_blk
> [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> [    0.822050] Call Trace:
> [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> 
> This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> 
> Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
>

Just to +1 this, I get the same result (unable to mount root fs) with

$QEMU -cpu rv64,zbb=on \
        -nographic \
        -machine virt \
        -kernel $KERNEL \
        -append 'root=/dev/vda console=ttyS0' \
        -drive file=disk.ext4,format=raw,id=hd0 \
        -device virtio-blk-pci,drive=hd0

kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
gcc:      riscv-gnu-toolchain (12.1.0)
binutils: riscv-gnu-toolchain (2.39)
qemu:     latest master (79b677d658d3)

Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
kernel, it's just necessary to avoid using it.

Thanks,
drew
patchwork-bot+linux-riscv@kernel.org Feb. 22, 2023, 3 p.m. UTC | #10
Hello:

This patch was applied to riscv/linux.git (for-next)
by Palmer Dabbelt <palmer@rivosinc.com>:

On Sun, 12 Feb 2023 19:47:36 +0000 you wrote:
> From: Conor Dooley <conor.dooley@microchip.com>
> 
> Guenter reported a splat during boot, that Samuel pointed out was the
> lockdep assertion failing in patch_insn_write():
> 
> WARNING: CPU: 0 PID: 0 at arch/riscv/kernel/patch.c:63 patch_insn_write+0x222/0x2f6
> epc : patch_insn_write+0x222/0x2f6
>  ra : patch_insn_write+0x21e/0x2f6
> epc : ffffffff800068c6 ra : ffffffff800068c2 sp : ffffffff81803df0
>  gp : ffffffff81a1ab78 tp : ffffffff81814f80 t0 : ffffffffffffe000
>  t1 : 0000000000000001 t2 : 4c45203a76637369 s0 : ffffffff81803e40
>  s1 : 0000000000000004 a0 : 0000000000000000 a1 : ffffffffffffffff
>  a2 : 0000000000000004 a3 : 0000000000000000 a4 : 0000000000000001
>  a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43
>  s2 : ffffffff80b4889c s3 : 000000000000082c s4 : ffffffff80b48828
>  s5 : 0000000000000828 s6 : ffffffff8131a0a0 s7 : 0000000000000fff
>  s8 : 0000000008000200 s9 : ffffffff8131a520 s10: 0000000000000018
>  s11: 000000000000000b t3 : 0000000000000001 t4 : 000000000000000d
>  t5 : ffffffffd8180000 t6 : ffffffff81803bc8
> status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
> [<ffffffff800068c6>] patch_insn_write+0x222/0x2f6
> [<ffffffff80006a36>] patch_text_nosync+0xc/0x2a
> [<ffffffff80003b86>] riscv_cpufeature_patch_func+0x52/0x98
> [<ffffffff80003348>] _apply_alternatives+0x46/0x86
> [<ffffffff80c02d36>] apply_boot_alternatives+0x3c/0xfa
> [<ffffffff80c03ad8>] setup_arch+0x584/0x5b8
> [<ffffffff80c0075a>] start_kernel+0xa2/0x8f8
> 
> [...]

Here is the summary with links:
  - [v1] RISC-V: take text_mutex during alternative patching
    https://git.kernel.org/riscv/c/9493e6f3ce02

You are awesome, thank you!
Andrew Jones Feb. 22, 2023, 3:31 p.m. UTC | #11
On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
> On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> > On 2/15/23 14:00, Heiko Stübner wrote:
> > [ ... ]
> > > 
> > > So now I've also tested Palmer's for-next at
> > >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> > > 
> > > again with the same variants
> > > - qemu-riscv32 without zbb
> > > - qemu-riscv32 with zbb
> > > - qemu-riscv64 without zbb
> > > - qemu-riscv64 with zbb
> > > 
> > > And all of them booted fine into a nfs-root (debian for riscv64 and a
> > > buildroot for riscv32).
> > > 
> > > I even forced a bug into the zbb code to make sure the patching worked
> > > correctly (where the kernel failed as expected).
> > > 
> > > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> > > 
> > > I did try the one from Debian-stable (qemu-5.2) but that was too old and
> > > didn't support Zbb yet.
> > > 
> > > One thing of note, the "active" 32bit config I had, somehow didn't produce
> > > working images and I needed to start a new build using the rv32_defconfig.
> > > 
> > > So right now, I'm not sure what more to test though.
> > > 
> > 
> > Another example:
> > 
> > - build defconfig
> > - run
> >   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
> >     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
> >     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
> >     -nographic -monitor none
> > 
> > With CONFIG_RISCV_ISA_ZBB=y, that results in
> > 
> > [    0.818263] /dev/root: Can't open blockdev
> > [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> > [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> > [    0.819808] fe00           16384 vda
> > [    0.819944]  driver: virtio_blk
> > [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> > [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> > [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> > [    0.822050] Call Trace:
> > [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> > [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> > [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> > [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> > [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> > [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> > [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> > [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> > [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> > [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> > [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> > [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> > 
> > This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> > 
> > Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
> >
> 
> Just to +1 this, I get the same result (unable to mount root fs) with
> 
> $QEMU -cpu rv64,zbb=on \
>         -nographic \
>         -machine virt \
>         -kernel $KERNEL \
>         -append 'root=/dev/vda console=ttyS0' \
>         -drive file=disk.ext4,format=raw,id=hd0 \
>         -device virtio-blk-pci,drive=hd0
> 
> kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> gcc:      riscv-gnu-toolchain (12.1.0)
> binutils: riscv-gnu-toolchain (2.39)
> qemu:     latest master (79b677d658d3)
> 
> Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> kernel, it's just necessary to avoid using it.
>

Looks like something in the strncmp implementation. Only commenting it
out allows boot to succeed.

diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index ee49595075be..0bf03abe1ced 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -9,7 +9,7 @@
 /* int strncmp(const char *cs, const char *ct, size_t count) */
 SYM_FUNC_START(strncmp)

-       ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
+//     ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)

        /*
         * Returns


Thanks,
drew
Heiko Stuebner Feb. 22, 2023, 3:39 p.m. UTC | #12
Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
> > On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> > > On 2/15/23 14:00, Heiko Stübner wrote:
> > > [ ... ]
> > > > 
> > > > So now I've also tested Palmer's for-next at
> > > >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> > > > 
> > > > again with the same variants
> > > > - qemu-riscv32 without zbb
> > > > - qemu-riscv32 with zbb
> > > > - qemu-riscv64 without zbb
> > > > - qemu-riscv64 with zbb
> > > > 
> > > > And all of them booted fine into a nfs-root (debian for riscv64 and a
> > > > buildroot for riscv32).
> > > > 
> > > > I even forced a bug into the zbb code to make sure the patching worked
> > > > correctly (where the kernel failed as expected).
> > > > 
> > > > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> > > > 
> > > > I did try the one from Debian-stable (qemu-5.2) but that was too old and
> > > > didn't support Zbb yet.
> > > > 
> > > > One thing of note, the "active" 32bit config I had, somehow didn't produce
> > > > working images and I needed to start a new build using the rv32_defconfig.
> > > > 
> > > > So right now, I'm not sure what more to test though.
> > > > 
> > > 
> > > Another example:
> > > 
> > > - build defconfig
> > > - run
> > >   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
> > >     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
> > >     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
> > >     -nographic -monitor none
> > > 
> > > With CONFIG_RISCV_ISA_ZBB=y, that results in
> > > 
> > > [    0.818263] /dev/root: Can't open blockdev
> > > [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> > > [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> > > [    0.819808] fe00           16384 vda
> > > [    0.819944]  driver: virtio_blk
> > > [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> > > [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> > > [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> > > [    0.822050] Call Trace:
> > > [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> > > [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> > > [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> > > [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> > > [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> > > [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> > > [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> > > [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> > > [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> > > [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> > > [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> > > [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> > > 
> > > This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> > > 
> > > Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
> > >
> > 
> > Just to +1 this, I get the same result (unable to mount root fs) with
> > 
> > $QEMU -cpu rv64,zbb=on \
> >         -nographic \
> >         -machine virt \
> >         -kernel $KERNEL \
> >         -append 'root=/dev/vda console=ttyS0' \
> >         -drive file=disk.ext4,format=raw,id=hd0 \
> >         -device virtio-blk-pci,drive=hd0
> > 
> > kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> > gcc:      riscv-gnu-toolchain (12.1.0)
> > binutils: riscv-gnu-toolchain (2.39)
> > qemu:     latest master (79b677d658d3)
> > 
> > Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> > not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> > kernel, it's just necessary to avoid using it.
> >
> 
> Looks like something in the strncmp implementation. Only commenting it
> out allows boot to succeed.

and interestingly it seems to be something very specific. I.e. my setup is
nfsroot-based (qemu is "just" another board in my boardfarm) and booting
into an nfs-root works quite nicely.

I guess I need to look into how to get an actual disk-image in there.


> diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
> index ee49595075be..0bf03abe1ced 100644
> --- a/arch/riscv/lib/strncmp.S
> +++ b/arch/riscv/lib/strncmp.S
> @@ -9,7 +9,7 @@
>  /* int strncmp(const char *cs, const char *ct, size_t count) */
>  SYM_FUNC_START(strncmp)
> 
> -       ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
> +//     ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
> 
>         /*
>          * Returns
> 
> 
> Thanks,
> drew
>
Palmer Dabbelt Feb. 22, 2023, 4:17 p.m. UTC | #13
On Wed, 22 Feb 2023 06:27:43 PST (-0800), ajones@ventanamicro.com wrote:
> On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
>> On 2/15/23 14:00, Heiko Stübner wrote:
>> [ ... ]
>> >
>> > So now I've also tested Palmer's for-next at
>> >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
>> >
>> > again with the same variants
>> > - qemu-riscv32 without zbb
>> > - qemu-riscv32 with zbb
>> > - qemu-riscv64 without zbb
>> > - qemu-riscv64 with zbb
>> >
>> > And all of them booted fine into a nfs-root (debian for riscv64 and a
>> > buildroot for riscv32).
>> >
>> > I even forced a bug into the zbb code to make sure the patching worked
>> > correctly (where the kernel failed as expected).
>> >
>> > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
>> >
>> > I did try the one from Debian-stable (qemu-5.2) but that was too old and
>> > didn't support Zbb yet.
>> >
>> > One thing of note, the "active" 32bit config I had, somehow didn't produce
>> > working images and I needed to start a new build using the rv32_defconfig.
>> >
>> > So right now, I'm not sure what more to test though.
>> >
>>
>> Another example:
>>
>> - build defconfig
>> - run
>>   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
>>     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>>     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
>>     -nographic -monitor none
>>
>> With CONFIG_RISCV_ISA_ZBB=y, that results in
>>
>> [    0.818263] /dev/root: Can't open blockdev
>> [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
>> [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
>> [    0.819808] fe00           16384 vda
>> [    0.819944]  driver: virtio_blk
>> [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
>> [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
>> [    0.821672] Hardware name: riscv-virtio,qemu (DT)
>> [    0.822050] Call Trace:
>> [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
>> [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
>> [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
>> [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
>> [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
>> [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
>> [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
>> [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
>> [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
>> [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
>> [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
>> [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
>>
>> This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
>>
>> Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
>>
>
> Just to +1 this, I get the same result (unable to mount root fs) with
>
> $QEMU -cpu rv64,zbb=on \
>         -nographic \
>         -machine virt \
>         -kernel $KERNEL \
>         -append 'root=/dev/vda console=ttyS0' \
>         -drive file=disk.ext4,format=raw,id=hd0 \
>         -device virtio-blk-pci,drive=hd0
>
> kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> gcc:      riscv-gnu-toolchain (12.1.0)
> binutils: riscv-gnu-toolchain (2.39)
> qemu:     latest master (79b677d658d3)
>
> Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> kernel, it's just necessary to avoid using it.

Thanks for finding this.  I'm changing the subject line and adding Paul, 
as it came up during the patchwork call.  I added zbb=on for rv32 and 
rv64 to my tests and they pass, but I'm not mounting a rootfs...

>
> Thanks,
> drew
Palmer Dabbelt Feb. 22, 2023, 4:47 p.m. UTC | #14
On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
> Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
>> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
>> > On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
>> > > On 2/15/23 14:00, Heiko Stübner wrote:
>> > > [ ... ]
>> > > > 
>> > > > So now I've also tested Palmer's for-next at
>> > > >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
>> > > > 
>> > > > again with the same variants
>> > > > - qemu-riscv32 without zbb
>> > > > - qemu-riscv32 with zbb
>> > > > - qemu-riscv64 without zbb
>> > > > - qemu-riscv64 with zbb
>> > > > 
>> > > > And all of them booted fine into a nfs-root (debian for riscv64 and a
>> > > > buildroot for riscv32).
>> > > > 
>> > > > I even forced a bug into the zbb code to make sure the patching worked
>> > > > correctly (where the kernel failed as expected).
>> > > > 
>> > > > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
>> > > > 
>> > > > I did try the one from Debian-stable (qemu-5.2) but that was too old and
>> > > > didn't support Zbb yet.
>> > > > 
>> > > > One thing of note, the "active" 32bit config I had, somehow didn't produce
>> > > > working images and I needed to start a new build using the rv32_defconfig.
>> > > > 
>> > > > So right now, I'm not sure what more to test though.
>> > > > 
>> > > 
>> > > Another example:
>> > > 
>> > > - build defconfig
>> > > - run
>> > >   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
>> > >     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>> > >     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
>> > >     -nographic -monitor none
>> > > 
>> > > With CONFIG_RISCV_ISA_ZBB=y, that results in
>> > > 
>> > > [    0.818263] /dev/root: Can't open blockdev
>> > > [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
>> > > [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
>> > > [    0.819808] fe00           16384 vda
>> > > [    0.819944]  driver: virtio_blk
>> > > [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
>> > > [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
>> > > [    0.821672] Hardware name: riscv-virtio,qemu (DT)
>> > > [    0.822050] Call Trace:
>> > > [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
>> > > [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
>> > > [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
>> > > [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
>> > > [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
>> > > [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
>> > > [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
>> > > [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
>> > > [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
>> > > [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
>> > > [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
>> > > [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
>> > > 
>> > > This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
>> > > 
>> > > Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
>> > >
>> > 
>> > Just to +1 this, I get the same result (unable to mount root fs) with
>> > 
>> > $QEMU -cpu rv64,zbb=on \
>> >         -nographic \
>> >         -machine virt \
>> >         -kernel $KERNEL \
>> >         -append 'root=/dev/vda console=ttyS0' \
>> >         -drive file=disk.ext4,format=raw,id=hd0 \
>> >         -device virtio-blk-pci,drive=hd0
>> > 
>> > kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
>> > gcc:      riscv-gnu-toolchain (12.1.0)
>> > binutils: riscv-gnu-toolchain (2.39)
>> > qemu:     latest master (79b677d658d3)
>> > 
>> > Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
>> > not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
>> > kernel, it's just necessary to avoid using it.
>> >
>> 
>> Looks like something in the strncmp implementation. Only commenting it
>> out allows boot to succeed.
>
> and interestingly it seems to be something very specific. I.e. my setup is
> nfsroot-based (qemu is "just" another board in my boardfarm) and booting
> into an nfs-root works quite nicely.
>
> I guess I need to look into how to get an actual disk-image in there.

It looks like Drew isn't using an initrd (and with NFS, presumably you 
are)?  That's probably a big difference as well.

>
>
>> diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
>> index ee49595075be..0bf03abe1ced 100644
>> --- a/arch/riscv/lib/strncmp.S
>> +++ b/arch/riscv/lib/strncmp.S
>> @@ -9,7 +9,7 @@
>>  /* int strncmp(const char *cs, const char *ct, size_t count) */
>>  SYM_FUNC_START(strncmp)
>> 
>> -       ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
>> +//     ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
>> 
>>         /*
>>          * Returns
>> 
>> 
>> Thanks,
>> drew
>> 
>
>
>
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Heiko Stuebner Feb. 22, 2023, 5:03 p.m. UTC | #15
Am Mittwoch, 22. Februar 2023, 17:47:10 CET schrieb Palmer Dabbelt:
> On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
> > Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
> >> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
> >> > On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> >> > > On 2/15/23 14:00, Heiko Stübner wrote:
> >> > > [ ... ]
> >> > > > 
> >> > > > So now I've also tested Palmer's for-next at
> >> > > >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> >> > > > 
> >> > > > again with the same variants
> >> > > > - qemu-riscv32 without zbb
> >> > > > - qemu-riscv32 with zbb
> >> > > > - qemu-riscv64 without zbb
> >> > > > - qemu-riscv64 with zbb
> >> > > > 
> >> > > > And all of them booted fine into a nfs-root (debian for riscv64 and a
> >> > > > buildroot for riscv32).
> >> > > > 
> >> > > > I even forced a bug into the zbb code to make sure the patching worked
> >> > > > correctly (where the kernel failed as expected).
> >> > > > 
> >> > > > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> >> > > > 
> >> > > > I did try the one from Debian-stable (qemu-5.2) but that was too old and
> >> > > > didn't support Zbb yet.
> >> > > > 
> >> > > > One thing of note, the "active" 32bit config I had, somehow didn't produce
> >> > > > working images and I needed to start a new build using the rv32_defconfig.
> >> > > > 
> >> > > > So right now, I'm not sure what more to test though.
> >> > > > 
> >> > > 
> >> > > Another example:
> >> > > 
> >> > > - build defconfig
> >> > > - run
> >> > >   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
> >> > >     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
> >> > >     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
> >> > >     -nographic -monitor none
> >> > > 
> >> > > With CONFIG_RISCV_ISA_ZBB=y, that results in
> >> > > 
> >> > > [    0.818263] /dev/root: Can't open blockdev
> >> > > [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> >> > > [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> >> > > [    0.819808] fe00           16384 vda
> >> > > [    0.819944]  driver: virtio_blk
> >> > > [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> >> > > [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> >> > > [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> >> > > [    0.822050] Call Trace:
> >> > > [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> >> > > [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> >> > > [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> >> > > [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> >> > > [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> >> > > [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> >> > > [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> >> > > [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> >> > > [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> >> > > [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> >> > > [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> >> > > [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> >> > > 
> >> > > This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> >> > > 
> >> > > Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
> >> > >
> >> > 
> >> > Just to +1 this, I get the same result (unable to mount root fs) with
> >> > 
> >> > $QEMU -cpu rv64,zbb=on \
> >> >         -nographic \
> >> >         -machine virt \
> >> >         -kernel $KERNEL \
> >> >         -append 'root=/dev/vda console=ttyS0' \
> >> >         -drive file=disk.ext4,format=raw,id=hd0 \
> >> >         -device virtio-blk-pci,drive=hd0
> >> > 
> >> > kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> >> > gcc:      riscv-gnu-toolchain (12.1.0)
> >> > binutils: riscv-gnu-toolchain (2.39)
> >> > qemu:     latest master (79b677d658d3)
> >> > 
> >> > Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> >> > not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> >> > kernel, it's just necessary to avoid using it.
> >> >
> >> 
> >> Looks like something in the strncmp implementation. Only commenting it
> >> out allows boot to succeed.
> >
> > and interestingly it seems to be something very specific. I.e. my setup is
> > nfsroot-based (qemu is "just" another board in my boardfarm) and booting
> > into an nfs-root works quite nicely.
> >
> > I guess I need to look into how to get an actual disk-image in there.
> 
> It looks like Drew isn't using an initrd (and with NFS, presumably you 
> are)?  That's probably a big difference as well.

There is no initrd involved in my qemu setup ;-) .
Just a kernel built from current for-next + defconfig and a nfs-server
holding a full Debian-riscv64 system.


The magic qemu incantation is also pretty standard:

/usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
 -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
 -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
 -append earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw \
 -netdev user,id=n1 -device virtio-net-pci,netdev=n1 -name boardfarm-0,process=boardfarm-vm-0 -daemonize


And I also checked that my kernel really lands in the Zbb-code by simply
adding a "ret" [0] and making sure it breaks vs. runs without it


Next I'll try to move my rootfs into a real image-file and switch over to
the commandline Drew calls, to see if it'll reproduce then.



[0] 
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index ee49595075be..6b35933c5949 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -53,6 +53,7 @@ strncmp_zbb:
 .option push
 .option arch,+zbb
 
+       ret
        /*
         * Returns
         *   a0 - comparison result, like strncmp
Heiko Stuebner Feb. 22, 2023, 5:43 p.m. UTC | #16
Am Mittwoch, 22. Februar 2023, 18:03:55 CET schrieb Heiko Stübner:
> Am Mittwoch, 22. Februar 2023, 17:47:10 CET schrieb Palmer Dabbelt:
> > On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
> > > Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
> > >> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
> > >> > On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> > >> > > On 2/15/23 14:00, Heiko Stübner wrote:
> > >> > > [ ... ]
> > >> > > > 
> > >> > > > So now I've also tested Palmer's for-next at
> > >> > > >    commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> > >> > > > 
> > >> > > > again with the same variants
> > >> > > > - qemu-riscv32 without zbb
> > >> > > > - qemu-riscv32 with zbb
> > >> > > > - qemu-riscv64 without zbb
> > >> > > > - qemu-riscv64 with zbb
> > >> > > > 
> > >> > > > And all of them booted fine into a nfs-root (debian for riscv64 and a
> > >> > > > buildroot for riscv32).
> > >> > > > 
> > >> > > > I even forced a bug into the zbb code to make sure the patching worked
> > >> > > > correctly (where the kernel failed as expected).
> > >> > > > 
> > >> > > > Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> > >> > > > 
> > >> > > > I did try the one from Debian-stable (qemu-5.2) but that was too old and
> > >> > > > didn't support Zbb yet.
> > >> > > > 
> > >> > > > One thing of note, the "active" 32bit config I had, somehow didn't produce
> > >> > > > working images and I needed to start a new build using the rv32_defconfig.
> > >> > > > 
> > >> > > > So right now, I'm not sure what more to test though.
> > >> > > > 
> > >> > > 
> > >> > > Another example:
> > >> > > 
> > >> > > - build defconfig
> > >> > > - run
> > >> > >   qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
> > >> > >     -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
> > >> > >     -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
> > >> > >     -nographic -monitor none
> > >> > > 
> > >> > > With CONFIG_RISCV_ISA_ZBB=y, that results in
> > >> > > 
> > >> > > [    0.818263] /dev/root: Can't open blockdev
> > >> > > [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> > >> > > [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> > >> > > [    0.819808] fe00           16384 vda
> > >> > > [    0.819944]  driver: virtio_blk
> > >> > > [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> > >> > > [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> > >> > > [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> > >> > > [    0.822050] Call Trace:
> > >> > > [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> > >> > > [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> > >> > > [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> > >> > > [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> > >> > > [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> > >> > > [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> > >> > > [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> > >> > > [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> > >> > > [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> > >> > > [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> > >> > > [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> > >> > > [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> > >> > > 
> > >> > > This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> > >> > > 
> > >> > > Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
> > >> > >
> > >> > 
> > >> > Just to +1 this, I get the same result (unable to mount root fs) with
> > >> > 
> > >> > $QEMU -cpu rv64,zbb=on \
> > >> >         -nographic \
> > >> >         -machine virt \
> > >> >         -kernel $KERNEL \
> > >> >         -append 'root=/dev/vda console=ttyS0' \
> > >> >         -drive file=disk.ext4,format=raw,id=hd0 \
> > >> >         -device virtio-blk-pci,drive=hd0
> > >> > 
> > >> > kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> > >> > gcc:      riscv-gnu-toolchain (12.1.0)
> > >> > binutils: riscv-gnu-toolchain (2.39)
> > >> > qemu:     latest master (79b677d658d3)
> > >> > 
> > >> > Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> > >> > not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> > >> > kernel, it's just necessary to avoid using it.
> > >> >
> > >> 
> > >> Looks like something in the strncmp implementation. Only commenting it
> > >> out allows boot to succeed.
> > >
> > > and interestingly it seems to be something very specific. I.e. my setup is
> > > nfsroot-based (qemu is "just" another board in my boardfarm) and booting
> > > into an nfs-root works quite nicely.
> > >
> > > I guess I need to look into how to get an actual disk-image in there.
> > 
> > It looks like Drew isn't using an initrd (and with NFS, presumably you 
> > are)?  That's probably a big difference as well.
> 
> There is no initrd involved in my qemu setup ;-) .
> Just a kernel built from current for-next + defconfig and a nfs-server
> holding a full Debian-riscv64 system.
> 
> 
> The magic qemu incantation is also pretty standard:
> 
> /usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
>  -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
>  -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
>  -append earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw \
>  -netdev user,id=n1 -device virtio-net-pci,netdev=n1 -name boardfarm-0,process=boardfarm-vm-0 -daemonize
> 
> 
> And I also checked that my kernel really lands in the Zbb-code by simply
> adding a "ret" [0] and making sure it breaks vs. runs without it
> 
> 
> Next I'll try to move my rootfs into a real image-file and switch over to
> the commandline Drew calls, to see if it'll reproduce then.

With the rootfs in an image I could reproduce the issue now.

I hacked in a very crude change to find the invocation that acts differently
and voila it's a strncmp of the "/dev/vda" against "/dev/" [0].

It's really strange that all the other invocations produce correct results.




[0]
non-working - with zbb-strncmp:
[    2.619129] strncmp: comparing /dev/vda <-> mtd, count 3 => -1
[    2.620451] strncmp: comparing /dev/vda <-> ubi, count 3 => -1
[    2.621163] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -1
[    2.621703] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -1
------ below is the difference
[    2.622255] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
[    2.623446] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
------ above is the difference
[    2.627064] strncmp: comparing sysfs <-> ext3, count 4 => 14
[    2.627646] strncmp: comparing tmpfs <-> ext3, count 4 => 15
[    2.628476] strncmp: comparing bdev <-> ext3, count 4 => -3
[    2.628990] strncmp: comparing proc <-> ext3, count 4 => 11
[    2.629422] strncmp: comparing cgroup <-> ext3, count 4 => -2
[    2.629856] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
[    2.630345] strncmp: comparing cpuset <-> ext3, count 4 => -2
[    2.630785] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
[    2.631267] strncmp: comparing debugfs <-> ext3, count 4 => -1
[    2.631696] strncmp: comparing securityfs <-> ext3, count 4 => 1
[    2.632596] strncmp: comparing sockfs <-> ext3, count 4 => 14
[    2.633095] strncmp: comparing bpf <-> ext3, count 4 => -3
[    2.633600] strncmp: comparing pipefs <-> ext3, count 4 => 11
[    2.634071] strncmp: comparing ramfs <-> ext3, count 4 => 13
[    2.634501] strncmp: comparing hugetlbfs <-> ext3, count 4 => 1
[    2.634955] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 1
[    2.635407] strncmp: comparing devpts <-> ext3, count 4 => -1
[    2.638788] strncmp: comparing ext3 <-> ext3, count 4 => 0


working - with normal strncmp:
[    2.416438] strncmp: comparing /dev/vda <-> mtd, count 3 => -62
[    2.417312] strncmp: comparing /dev/vda <-> ubi, count 3 => -70
[    2.418296] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -33
[    2.418921] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -33
------ below is the difference
[    2.419450] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
[    2.420698] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
------ above is the difference
[    2.424086] strncmp: comparing sysfs <-> ext3, count 4 => 14
[    2.424663] strncmp: comparing tmpfs <-> ext3, count 4 => 15
[    2.425111] strncmp: comparing bdev <-> ext3, count 4 => -3
[    2.425563] strncmp: comparing proc <-> ext3, count 4 => 11
[    2.426022] strncmp: comparing cgroup <-> ext3, count 4 => -2
[    2.426717] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
[    2.427175] strncmp: comparing cpuset <-> ext3, count 4 => -2
[    2.427608] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
[    2.428061] strncmp: comparing debugfs <-> ext3, count 4 => -1
[    2.428526] strncmp: comparing securityfs <-> ext3, count 4 => 14
[    2.428985] strncmp: comparing sockfs <-> ext3, count 4 => 14
[    2.429423] strncmp: comparing bpf <-> ext3, count 4 => -3
[    2.429863] strncmp: comparing pipefs <-> ext3, count 4 => 11
[    2.430433] strncmp: comparing ramfs <-> ext3, count 4 => 13
[    2.431202] strncmp: comparing hugetlbfs <-> ext3, count 4 => 3
[    2.431721] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 13
[    2.432222] strncmp: comparing devpts <-> ext3, count 4 => -1
[    2.432685] strncmp: comparing ext3 <-> ext3, count 4 => 0
Guenter Roeck Feb. 22, 2023, 7:06 p.m. UTC | #17
On 2/22/23 09:43, Heiko Stübner wrote:
> Am Mittwoch, 22. Februar 2023, 18:03:55 CET schrieb Heiko Stübner:
>> Am Mittwoch, 22. Februar 2023, 17:47:10 CET schrieb Palmer Dabbelt:
>>> On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
>>>> Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
>>>>> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
>>>>>> On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
>>>>>>> On 2/15/23 14:00, Heiko Stübner wrote:
>>>>>>> [ ... ]
>>>>>>>>
>>>>>>>> So now I've also tested Palmer's for-next at
>>>>>>>>     commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
>>>>>>>>
>>>>>>>> again with the same variants
>>>>>>>> - qemu-riscv32 without zbb
>>>>>>>> - qemu-riscv32 with zbb
>>>>>>>> - qemu-riscv64 without zbb
>>>>>>>> - qemu-riscv64 with zbb
>>>>>>>>
>>>>>>>> And all of them booted fine into a nfs-root (debian for riscv64 and a
>>>>>>>> buildroot for riscv32).
>>>>>>>>
>>>>>>>> I even forced a bug into the zbb code to make sure the patching worked
>>>>>>>> correctly (where the kernel failed as expected).
>>>>>>>>
>>>>>>>> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
>>>>>>>>
>>>>>>>> I did try the one from Debian-stable (qemu-5.2) but that was too old and
>>>>>>>> didn't support Zbb yet.
>>>>>>>>
>>>>>>>> One thing of note, the "active" 32bit config I had, somehow didn't produce
>>>>>>>> working images and I needed to start a new build using the rv32_defconfig.
>>>>>>>>
>>>>>>>> So right now, I'm not sure what more to test though.
>>>>>>>>
>>>>>>>
>>>>>>> Another example:
>>>>>>>
>>>>>>> - build defconfig
>>>>>>> - run
>>>>>>>    qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
>>>>>>>      -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>>>>>>>      -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
>>>>>>>      -nographic -monitor none
>>>>>>>
>>>>>>> With CONFIG_RISCV_ISA_ZBB=y, that results in
>>>>>>>
>>>>>>> [    0.818263] /dev/root: Can't open blockdev
>>>>>>> [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
>>>>>>> [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
>>>>>>> [    0.819808] fe00           16384 vda
>>>>>>> [    0.819944]  driver: virtio_blk
>>>>>>> [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
>>>>>>> [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
>>>>>>> [    0.821672] Hardware name: riscv-virtio,qemu (DT)
>>>>>>> [    0.822050] Call Trace:
>>>>>>> [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
>>>>>>> [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
>>>>>>> [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
>>>>>>> [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
>>>>>>> [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
>>>>>>> [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
>>>>>>> [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
>>>>>>> [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
>>>>>>> [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
>>>>>>> [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
>>>>>>> [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
>>>>>>> [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
>>>>>>>
>>>>>>> This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
>>>>>>>
>>>>>>> Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
>>>>>>>
>>>>>>
>>>>>> Just to +1 this, I get the same result (unable to mount root fs) with
>>>>>>
>>>>>> $QEMU -cpu rv64,zbb=on \
>>>>>>          -nographic \
>>>>>>          -machine virt \
>>>>>>          -kernel $KERNEL \
>>>>>>          -append 'root=/dev/vda console=ttyS0' \
>>>>>>          -drive file=disk.ext4,format=raw,id=hd0 \
>>>>>>          -device virtio-blk-pci,drive=hd0
>>>>>>
>>>>>> kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
>>>>>> gcc:      riscv-gnu-toolchain (12.1.0)
>>>>>> binutils: riscv-gnu-toolchain (2.39)
>>>>>> qemu:     latest master (79b677d658d3)
>>>>>>
>>>>>> Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
>>>>>> not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
>>>>>> kernel, it's just necessary to avoid using it.
>>>>>>
>>>>>
>>>>> Looks like something in the strncmp implementation. Only commenting it
>>>>> out allows boot to succeed.
>>>>
>>>> and interestingly it seems to be something very specific. I.e. my setup is
>>>> nfsroot-based (qemu is "just" another board in my boardfarm) and booting
>>>> into an nfs-root works quite nicely.
>>>>
>>>> I guess I need to look into how to get an actual disk-image in there.
>>>
>>> It looks like Drew isn't using an initrd (and with NFS, presumably you
>>> are)?  That's probably a big difference as well.
>>
>> There is no initrd involved in my qemu setup ;-) .
>> Just a kernel built from current for-next + defconfig and a nfs-server
>> holding a full Debian-riscv64 system.
>>
>>
>> The magic qemu incantation is also pretty standard:
>>
>> /usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
>>   -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
>>   -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
>>   -append earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw \
>>   -netdev user,id=n1 -device virtio-net-pci,netdev=n1 -name boardfarm-0,process=boardfarm-vm-0 -daemonize
>>
>>
>> And I also checked that my kernel really lands in the Zbb-code by simply
>> adding a "ret" [0] and making sure it breaks vs. runs without it
>>
>>
>> Next I'll try to move my rootfs into a real image-file and switch over to
>> the commandline Drew calls, to see if it'll reproduce then.
> 
> With the rootfs in an image I could reproduce the issue now.
> 
> I hacked in a very crude change to find the invocation that acts differently
> and voila it's a strncmp of the "/dev/vda" against "/dev/" [0].
> 
> It's really strange that all the other invocations produce correct results.
> 

Not really; the failures are the only comparisons which are expected to
return 0 and are not a complete string match.

I'd suspect that strncmp(s1, s2, strlen(s2)) will always return
a bad result if s1 starts with s2 but is not identical to s2.

Guenter

> 
> 
> 
> [0]
> non-working - with zbb-strncmp:
> [    2.619129] strncmp: comparing /dev/vda <-> mtd, count 3 => -1
> [    2.620451] strncmp: comparing /dev/vda <-> ubi, count 3 => -1
> [    2.621163] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -1
> [    2.621703] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -1
> ------ below is the difference
> [    2.622255] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
> [    2.623446] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
> ------ above is the difference
> [    2.627064] strncmp: comparing sysfs <-> ext3, count 4 => 14
> [    2.627646] strncmp: comparing tmpfs <-> ext3, count 4 => 15
> [    2.628476] strncmp: comparing bdev <-> ext3, count 4 => -3
> [    2.628990] strncmp: comparing proc <-> ext3, count 4 => 11
> [    2.629422] strncmp: comparing cgroup <-> ext3, count 4 => -2
> [    2.629856] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
> [    2.630345] strncmp: comparing cpuset <-> ext3, count 4 => -2
> [    2.630785] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
> [    2.631267] strncmp: comparing debugfs <-> ext3, count 4 => -1
> [    2.631696] strncmp: comparing securityfs <-> ext3, count 4 => 1
> [    2.632596] strncmp: comparing sockfs <-> ext3, count 4 => 14
> [    2.633095] strncmp: comparing bpf <-> ext3, count 4 => -3
> [    2.633600] strncmp: comparing pipefs <-> ext3, count 4 => 11
> [    2.634071] strncmp: comparing ramfs <-> ext3, count 4 => 13
> [    2.634501] strncmp: comparing hugetlbfs <-> ext3, count 4 => 1
> [    2.634955] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 1
> [    2.635407] strncmp: comparing devpts <-> ext3, count 4 => -1
> [    2.638788] strncmp: comparing ext3 <-> ext3, count 4 => 0
> 
> 
> working - with normal strncmp:
> [    2.416438] strncmp: comparing /dev/vda <-> mtd, count 3 => -62
> [    2.417312] strncmp: comparing /dev/vda <-> ubi, count 3 => -70
> [    2.418296] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -33
> [    2.418921] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -33
> ------ below is the difference
> [    2.419450] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
> [    2.420698] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
> ------ above is the difference
> [    2.424086] strncmp: comparing sysfs <-> ext3, count 4 => 14
> [    2.424663] strncmp: comparing tmpfs <-> ext3, count 4 => 15
> [    2.425111] strncmp: comparing bdev <-> ext3, count 4 => -3
> [    2.425563] strncmp: comparing proc <-> ext3, count 4 => 11
> [    2.426022] strncmp: comparing cgroup <-> ext3, count 4 => -2
> [    2.426717] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
> [    2.427175] strncmp: comparing cpuset <-> ext3, count 4 => -2
> [    2.427608] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
> [    2.428061] strncmp: comparing debugfs <-> ext3, count 4 => -1
> [    2.428526] strncmp: comparing securityfs <-> ext3, count 4 => 14
> [    2.428985] strncmp: comparing sockfs <-> ext3, count 4 => 14
> [    2.429423] strncmp: comparing bpf <-> ext3, count 4 => -3
> [    2.429863] strncmp: comparing pipefs <-> ext3, count 4 => 11
> [    2.430433] strncmp: comparing ramfs <-> ext3, count 4 => 13
> [    2.431202] strncmp: comparing hugetlbfs <-> ext3, count 4 => 3
> [    2.431721] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 13
> [    2.432222] strncmp: comparing devpts <-> ext3, count 4 => -1
> [    2.432685] strncmp: comparing ext3 <-> ext3, count 4 => 0
> 
> 
>
Heiko Stuebner Feb. 22, 2023, 8:33 p.m. UTC | #18
Am Mittwoch, 22. Februar 2023, 20:06:56 CET schrieb Guenter Roeck:
> On 2/22/23 09:43, Heiko Stübner wrote:
> > Am Mittwoch, 22. Februar 2023, 18:03:55 CET schrieb Heiko Stübner:
> >> Am Mittwoch, 22. Februar 2023, 17:47:10 CET schrieb Palmer Dabbelt:
> >>> On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
> >>>> Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
> >>>>> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
> >>>>>> On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
> >>>>>>> On 2/15/23 14:00, Heiko Stübner wrote:
> >>>>>>> [ ... ]
> >>>>>>>>
> >>>>>>>> So now I've also tested Palmer's for-next at
> >>>>>>>>     commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
> >>>>>>>>
> >>>>>>>> again with the same variants
> >>>>>>>> - qemu-riscv32 without zbb
> >>>>>>>> - qemu-riscv32 with zbb
> >>>>>>>> - qemu-riscv64 without zbb
> >>>>>>>> - qemu-riscv64 with zbb
> >>>>>>>>
> >>>>>>>> And all of them booted fine into a nfs-root (debian for riscv64 and a
> >>>>>>>> buildroot for riscv32).
> >>>>>>>>
> >>>>>>>> I even forced a bug into the zbb code to make sure the patching worked
> >>>>>>>> correctly (where the kernel failed as expected).
> >>>>>>>>
> >>>>>>>> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
> >>>>>>>>
> >>>>>>>> I did try the one from Debian-stable (qemu-5.2) but that was too old and
> >>>>>>>> didn't support Zbb yet.
> >>>>>>>>
> >>>>>>>> One thing of note, the "active" 32bit config I had, somehow didn't produce
> >>>>>>>> working images and I needed to start a new build using the rv32_defconfig.
> >>>>>>>>
> >>>>>>>> So right now, I'm not sure what more to test though.
> >>>>>>>>
> >>>>>>>
> >>>>>>> Another example:
> >>>>>>>
> >>>>>>> - build defconfig
> >>>>>>> - run
> >>>>>>>    qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
> >>>>>>>      -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
> >>>>>>>      -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
> >>>>>>>      -nographic -monitor none
> >>>>>>>
> >>>>>>> With CONFIG_RISCV_ISA_ZBB=y, that results in
> >>>>>>>
> >>>>>>> [    0.818263] /dev/root: Can't open blockdev
> >>>>>>> [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
> >>>>>>> [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
> >>>>>>> [    0.819808] fe00           16384 vda
> >>>>>>> [    0.819944]  driver: virtio_blk
> >>>>>>> [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
> >>>>>>> [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
> >>>>>>> [    0.821672] Hardware name: riscv-virtio,qemu (DT)
> >>>>>>> [    0.822050] Call Trace:
> >>>>>>> [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
> >>>>>>> [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
> >>>>>>> [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
> >>>>>>> [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
> >>>>>>> [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
> >>>>>>> [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
> >>>>>>> [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
> >>>>>>> [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
> >>>>>>> [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
> >>>>>>> [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
> >>>>>>> [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
> >>>>>>> [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
> >>>>>>>
> >>>>>>> This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
> >>>>>>>
> >>>>>>> Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
> >>>>>>>
> >>>>>>
> >>>>>> Just to +1 this, I get the same result (unable to mount root fs) with
> >>>>>>
> >>>>>> $QEMU -cpu rv64,zbb=on \
> >>>>>>          -nographic \
> >>>>>>          -machine virt \
> >>>>>>          -kernel $KERNEL \
> >>>>>>          -append 'root=/dev/vda console=ttyS0' \
> >>>>>>          -drive file=disk.ext4,format=raw,id=hd0 \
> >>>>>>          -device virtio-blk-pci,drive=hd0
> >>>>>>
> >>>>>> kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
> >>>>>> gcc:      riscv-gnu-toolchain (12.1.0)
> >>>>>> binutils: riscv-gnu-toolchain (2.39)
> >>>>>> qemu:     latest master (79b677d658d3)
> >>>>>>
> >>>>>> Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
> >>>>>> not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
> >>>>>> kernel, it's just necessary to avoid using it.
> >>>>>>
> >>>>>
> >>>>> Looks like something in the strncmp implementation. Only commenting it
> >>>>> out allows boot to succeed.
> >>>>
> >>>> and interestingly it seems to be something very specific. I.e. my setup is
> >>>> nfsroot-based (qemu is "just" another board in my boardfarm) and booting
> >>>> into an nfs-root works quite nicely.
> >>>>
> >>>> I guess I need to look into how to get an actual disk-image in there.
> >>>
> >>> It looks like Drew isn't using an initrd (and with NFS, presumably you
> >>> are)?  That's probably a big difference as well.
> >>
> >> There is no initrd involved in my qemu setup ;-) .
> >> Just a kernel built from current for-next + defconfig and a nfs-server
> >> holding a full Debian-riscv64 system.
> >>
> >>
> >> The magic qemu incantation is also pretty standard:
> >>
> >> /usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
> >>   -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
> >>   -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
> >>   -append earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw \
> >>   -netdev user,id=n1 -device virtio-net-pci,netdev=n1 -name boardfarm-0,process=boardfarm-vm-0 -daemonize
> >>
> >>
> >> And I also checked that my kernel really lands in the Zbb-code by simply
> >> adding a "ret" [0] and making sure it breaks vs. runs without it
> >>
> >>
> >> Next I'll try to move my rootfs into a real image-file and switch over to
> >> the commandline Drew calls, to see if it'll reproduce then.
> > 
> > With the rootfs in an image I could reproduce the issue now.
> > 
> > I hacked in a very crude change to find the invocation that acts differently
> > and voila it's a strncmp of the "/dev/vda" against "/dev/" [0].
> > 
> > It's really strange that all the other invocations produce correct results.
> > 
> 
> Not really; the failures are the only comparisons which are expected to
> return 0 and are not a complete string match.
> 
> I'd suspect that strncmp(s1, s2, strlen(s2)) will always return
> a bad result if s1 starts with s2 but is not identical to s2.

The issue seems to come into play when s2 is shorter than the stepsize
for the bitops-accelerated part (reg-size being 8) here.

I.e. 

	strncmp("/dev/vda", "/dev/", 5): 1
	strncmp("/dev/vda", "/dev", 4): 1
	strncmp("/dev/vda", "/de", 3): 1
	...
but
	strncmp("/dev/vda12", "/dev/vda", 8): 0
	strncmp("/dev/vda12", "/dev/vda1", 9): 0

For size-9 this succeeds because the first step is "/dev/vda" and the
0byte in "12" is detected and jumps to processing the rest individually.

Duplicating the 0-byte check for s2 [1] - i.e. jumping to the byte-wise
path when a 0-byte is detected in the second string - solves the issue.

Not sure if there is a more performant solution for this, because we do
have the length available. I'll think some more about that part.


Heiko


[1]
@@ -83,6 +83,8 @@ strncmp_zbb:
        REG_L   t1, 0(a1)
        orc.b   t3, t0
        bne     t3, t5, 2f
+       orc.b   t3, t1
+       bne     t3, t5, 2f
        addi    a0, a0, SZREG
        addi    a1, a1, SZREG
        beq     t0, t1, 1b



> > [0]
> > non-working - with zbb-strncmp:
> > [    2.619129] strncmp: comparing /dev/vda <-> mtd, count 3 => -1
> > [    2.620451] strncmp: comparing /dev/vda <-> ubi, count 3 => -1
> > [    2.621163] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -1
> > [    2.621703] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -1
> > ------ below is the difference
> > [    2.622255] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
> > [    2.623446] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
> > ------ above is the difference
> > [    2.627064] strncmp: comparing sysfs <-> ext3, count 4 => 14
> > [    2.627646] strncmp: comparing tmpfs <-> ext3, count 4 => 15
> > [    2.628476] strncmp: comparing bdev <-> ext3, count 4 => -3
> > [    2.628990] strncmp: comparing proc <-> ext3, count 4 => 11
> > [    2.629422] strncmp: comparing cgroup <-> ext3, count 4 => -2
> > [    2.629856] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
> > [    2.630345] strncmp: comparing cpuset <-> ext3, count 4 => -2
> > [    2.630785] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
> > [    2.631267] strncmp: comparing debugfs <-> ext3, count 4 => -1
> > [    2.631696] strncmp: comparing securityfs <-> ext3, count 4 => 1
> > [    2.632596] strncmp: comparing sockfs <-> ext3, count 4 => 14
> > [    2.633095] strncmp: comparing bpf <-> ext3, count 4 => -3
> > [    2.633600] strncmp: comparing pipefs <-> ext3, count 4 => 11
> > [    2.634071] strncmp: comparing ramfs <-> ext3, count 4 => 13
> > [    2.634501] strncmp: comparing hugetlbfs <-> ext3, count 4 => 1
> > [    2.634955] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 1
> > [    2.635407] strncmp: comparing devpts <-> ext3, count 4 => -1
> > [    2.638788] strncmp: comparing ext3 <-> ext3, count 4 => 0
> > 
> > 
> > working - with normal strncmp:
> > [    2.416438] strncmp: comparing /dev/vda <-> mtd, count 3 => -62
> > [    2.417312] strncmp: comparing /dev/vda <-> ubi, count 3 => -70
> > [    2.418296] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -33
> > [    2.418921] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -33
> > ------ below is the difference
> > [    2.419450] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
> > [    2.420698] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
> > ------ above is the difference
> > [    2.424086] strncmp: comparing sysfs <-> ext3, count 4 => 14
> > [    2.424663] strncmp: comparing tmpfs <-> ext3, count 4 => 15
> > [    2.425111] strncmp: comparing bdev <-> ext3, count 4 => -3
> > [    2.425563] strncmp: comparing proc <-> ext3, count 4 => 11
> > [    2.426022] strncmp: comparing cgroup <-> ext3, count 4 => -2
> > [    2.426717] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
> > [    2.427175] strncmp: comparing cpuset <-> ext3, count 4 => -2
> > [    2.427608] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
> > [    2.428061] strncmp: comparing debugfs <-> ext3, count 4 => -1
> > [    2.428526] strncmp: comparing securityfs <-> ext3, count 4 => 14
> > [    2.428985] strncmp: comparing sockfs <-> ext3, count 4 => 14
> > [    2.429423] strncmp: comparing bpf <-> ext3, count 4 => -3
> > [    2.429863] strncmp: comparing pipefs <-> ext3, count 4 => 11
> > [    2.430433] strncmp: comparing ramfs <-> ext3, count 4 => 13
> > [    2.431202] strncmp: comparing hugetlbfs <-> ext3, count 4 => 3
> > [    2.431721] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 13
> > [    2.432222] strncmp: comparing devpts <-> ext3, count 4 => -1
> > [    2.432685] strncmp: comparing ext3 <-> ext3, count 4 => 0
> > 
> > 
> > 
> 
>
Palmer Dabbelt Feb. 22, 2023, 10:10 p.m. UTC | #19
On Wed, 22 Feb 2023 12:33:47 PST (-0800), heiko@sntech.de wrote:
> Am Mittwoch, 22. Februar 2023, 20:06:56 CET schrieb Guenter Roeck:
>> On 2/22/23 09:43, Heiko Stübner wrote:
>> > Am Mittwoch, 22. Februar 2023, 18:03:55 CET schrieb Heiko Stübner:
>> >> Am Mittwoch, 22. Februar 2023, 17:47:10 CET schrieb Palmer Dabbelt:
>> >>> On Wed, 22 Feb 2023 07:39:11 PST (-0800), heiko@sntech.de wrote:
>> >>>> Am Mittwoch, 22. Februar 2023, 16:31:25 CET schrieb Andrew Jones:
>> >>>>> On Wed, Feb 22, 2023 at 03:27:43PM +0100, Andrew Jones wrote:
>> >>>>>> On Thu, Feb 16, 2023 at 07:33:55AM -0800, Guenter Roeck wrote:
>> >>>>>>> On 2/15/23 14:00, Heiko Stübner wrote:
>> >>>>>>> [ ... ]
>> >>>>>>>>
>> >>>>>>>> So now I've also tested Palmer's for-next at
>> >>>>>>>>     commit ec6311919ea6 ("Merge patch series "riscv: Optimize function trace"")
>> >>>>>>>>
>> >>>>>>>> again with the same variants
>> >>>>>>>> - qemu-riscv32 without zbb
>> >>>>>>>> - qemu-riscv32 with zbb
>> >>>>>>>> - qemu-riscv64 without zbb
>> >>>>>>>> - qemu-riscv64 with zbb
>> >>>>>>>>
>> >>>>>>>> And all of them booted fine into a nfs-root (debian for riscv64 and a
>> >>>>>>>> buildroot for riscv32).
>> >>>>>>>>
>> >>>>>>>> I even forced a bug into the zbb code to make sure the patching worked
>> >>>>>>>> correctly (where the kernel failed as expected).
>> >>>>>>>>
>> >>>>>>>> Qemu-version for me was 7.2.50 (v7.2.0-744-g5a3633929a-dirty)
>> >>>>>>>>
>> >>>>>>>> I did try the one from Debian-stable (qemu-5.2) but that was too old and
>> >>>>>>>> didn't support Zbb yet.
>> >>>>>>>>
>> >>>>>>>> One thing of note, the "active" 32bit config I had, somehow didn't produce
>> >>>>>>>> working images and I needed to start a new build using the rv32_defconfig.
>> >>>>>>>>
>> >>>>>>>> So right now, I'm not sure what more to test though.
>> >>>>>>>>
>> >>>>>>>
>> >>>>>>> Another example:
>> >>>>>>>
>> >>>>>>> - build defconfig
>> >>>>>>> - run
>> >>>>>>>    qemu-system-riscv64 -M virt -m 512M -no-reboot -kernel arch/riscv/boot/Image \
>> >>>>>>>      -snapshot -device virtio-blk-device,drive=d0 -drive file=rootfs.ext2,if=none,id=d0,format=raw \
>> >>>>>>>      -append "root=/dev/vda console=ttyS0,115200 earlycon=uart8250,mmio,0x10000000,115200" \
>> >>>>>>>      -nographic -monitor none
>> >>>>>>>
>> >>>>>>> With CONFIG_RISCV_ISA_ZBB=y, that results in
>> >>>>>>>
>> >>>>>>> [    0.818263] /dev/root: Can't open blockdev
>> >>>>>>> [    0.818856] VFS: Cannot open root device "/dev/vda" or unknown-block(0,0): error -6
>> >>>>>>> [    0.819177] Please append a correct "root=" boot option; here are the available partitions:
>> >>>>>>> [    0.819808] fe00           16384 vda
>> >>>>>>> [    0.819944]  driver: virtio_blk
>> >>>>>>> [    0.820534] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0)
>> >>>>>>> [    0.821101] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.2.0-rc8-next-20230216-00002-g80332825e240 #4
>> >>>>>>> [    0.821672] Hardware name: riscv-virtio,qemu (DT)
>> >>>>>>> [    0.822050] Call Trace:
>> >>>>>>> [    0.822427] [<ffffffff800053e4>] dump_backtrace+0x1c/0x24
>> >>>>>>> [    0.822834] [<ffffffff807f90e4>] show_stack+0x2c/0x38
>> >>>>>>> [    0.823085] [<ffffffff80803aea>] dump_stack_lvl+0x3c/0x54
>> >>>>>>> [    0.823351] [<ffffffff80803b16>] dump_stack+0x14/0x1c
>> >>>>>>> [    0.823601] [<ffffffff807f944c>] panic+0x102/0x29e
>> >>>>>>> [    0.823834] [<ffffffff80a015e2>] mount_block_root+0x18c/0x23e
>> >>>>>>> [    0.824148] [<ffffffff80a0187c>] mount_root+0x1e8/0x218
>> >>>>>>> [    0.824398] [<ffffffff80a019ee>] prepare_namespace+0x142/0x184
>> >>>>>>> [    0.824655] [<ffffffff80a01182>] kernel_init_freeable+0x236/0x25a
>> >>>>>>> [    0.824934] [<ffffffff80804602>] kernel_init+0x1e/0x10a
>> >>>>>>> [    0.825201] [<ffffffff80003560>] ret_from_exception+0x0/0x16
>> >>>>>>> [    0.826180] ---[ end Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) ]---
>> >>>>>>>
>> >>>>>>> This works fine if CONFIG_RISCV_ISA_ZBB is not enabled.
>> >>>>>>>
>> >>>>>>> Tested with gcc 11.3, binutils 2.39, qemu v7.2.0 and qemu built from mainline.
>> >>>>>>>
>> >>>>>>
>> >>>>>> Just to +1 this, I get the same result (unable to mount root fs) with
>> >>>>>>
>> >>>>>> $QEMU -cpu rv64,zbb=on \
>> >>>>>>          -nographic \
>> >>>>>>          -machine virt \
>> >>>>>>          -kernel $KERNEL \
>> >>>>>>          -append 'root=/dev/vda console=ttyS0' \
>> >>>>>>          -drive file=disk.ext4,format=raw,id=hd0 \
>> >>>>>>          -device virtio-blk-pci,drive=hd0
>> >>>>>>
>> >>>>>> kernel:   latest riscv-linux/for-next (8658db0a4a0f), defconfig
>> >>>>>> gcc:      riscv-gnu-toolchain (12.1.0)
>> >>>>>> binutils: riscv-gnu-toolchain (2.39)
>> >>>>>> qemu:     latest master (79b677d658d3)
>> >>>>>>
>> >>>>>> Flipping the QEMU cpu zbb property off allows boot to succeed, i.e. it's
>> >>>>>> not necessary to compile out the CONFIG_RISCV_ISA_ZBB code from the
>> >>>>>> kernel, it's just necessary to avoid using it.
>> >>>>>>
>> >>>>>
>> >>>>> Looks like something in the strncmp implementation. Only commenting it
>> >>>>> out allows boot to succeed.
>> >>>>
>> >>>> and interestingly it seems to be something very specific. I.e. my setup is
>> >>>> nfsroot-based (qemu is "just" another board in my boardfarm) and booting
>> >>>> into an nfs-root works quite nicely.
>> >>>>
>> >>>> I guess I need to look into how to get an actual disk-image in there.
>> >>>
>> >>> It looks like Drew isn't using an initrd (and with NFS, presumably you
>> >>> are)?  That's probably a big difference as well.
>> >>
>> >> There is no initrd involved in my qemu setup ;-) .
>> >> Just a kernel built from current for-next + defconfig and a nfs-server
>> >> holding a full Debian-riscv64 system.
>> >>
>> >>
>> >> The magic qemu incantation is also pretty standard:
>> >>
>> >> /usr/local/bin/qemu-system-riscv64 -M virt -smp 2 -m 1G -display none \
>> >>   -cpu rv64,zbb=true,zbc=true,svpbmt=true,Zicbom=true,Zawrs=true,sscofpmf=true,v=true \
>> >>   -serial telnet:localhost:5500,server,nowait -kernel /home/devel/nfs/kernel/riscv64/Image \
>> >>   -append earlycon=sbi root=/dev/nfs nfsroot=10.0.2.2:/home/devel/nfs/rootfs-riscv64virt ip=dhcp rw \
>> >>   -netdev user,id=n1 -device virtio-net-pci,netdev=n1 -name boardfarm-0,process=boardfarm-vm-0 -daemonize
>> >>
>> >>
>> >> And I also checked that my kernel really lands in the Zbb-code by simply
>> >> adding a "ret" [0] and making sure it breaks vs. runs without it
>> >>
>> >>
>> >> Next I'll try to move my rootfs into a real image-file and switch over to
>> >> the commandline Drew calls, to see if it'll reproduce then.
>> > 
>> > With the rootfs in an image I could reproduce the issue now.
>> > 
>> > I hacked in a very crude change to find the invocation that acts differently
>> > and voila it's a strncmp of the "/dev/vda" against "/dev/" [0].
>> > 
>> > It's really strange that all the other invocations produce correct results.
>> > 
>> 
>> Not really; the failures are the only comparisons which are expected to
>> return 0 and are not a complete string match.
>> 
>> I'd suspect that strncmp(s1, s2, strlen(s2)) will always return
>> a bad result if s1 starts with s2 but is not identical to s2.
>
> The issue seems to come into play when s2 is shorter than the stepsize
> for the bitops-accelerated part (reg-size being 8) here.
>
> I.e. 
>
> 	strncmp("/dev/vda", "/dev/", 5): 1

I'd tried pulling out your failures from the previous message into the 
selftests, turns out this one happens to fail for me.  From looking at 
the code I'd figured it was this fetch-past-nul issue so I'd added some 
alignment skewing, but it looks like it happens to trigger for me 
without any.

I sent a patch to add the test here 
<https://lore.kernel.org/r/20230222220435.10688-1-palmer@rivosinc.com/>

> 	strncmp("/dev/vda", "/dev", 4): 1
> 	strncmp("/dev/vda", "/de", 3): 1
> 	...
> but
> 	strncmp("/dev/vda12", "/dev/vda", 8): 0
> 	strncmp("/dev/vda12", "/dev/vda1", 9): 0
>
> For size-9 this succeeds because the first step is "/dev/vda" and the
> 0byte in "12" is detected and jumps to processing the rest individually.
>
> Duplicating the 0-byte check for s2 [1] - i.e. jumping to the byte-wise
> path when a 0-byte is detected in the second string - solves the issue.
>
> Not sure if there is a more performant solution for this, because we do
> have the length available. I'll think some more about that part.

The only way I could come up with was to check the length and fall back 
to the byte routines.  That may or may not be faster depending on loop 
counts and such, I doubt it matters until we get to proper 
uarch-optimized routines.

>
>
> Heiko
>
>
> [1]
> @@ -83,6 +83,8 @@ strncmp_zbb:
>         REG_L   t1, 0(a1)
>         orc.b   t3, t0
>         bne     t3, t5, 2f
> +       orc.b   t3, t1
> +       bne     t3, t5, 2f
>         addi    a0, a0, SZREG
>         addi    a1, a1, SZREG
>         beq     t0, t1, 1b
>
>
>
>> > [0]
>> > non-working - with zbb-strncmp:
>> > [    2.619129] strncmp: comparing /dev/vda <-> mtd, count 3 => -1
>> > [    2.620451] strncmp: comparing /dev/vda <-> ubi, count 3 => -1
>> > [    2.621163] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -1
>> > [    2.621703] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -1
>> > ------ below is the difference
>> > [    2.622255] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
>> > [    2.623446] strncmp: comparing /dev/vda <-> /dev/, count 5 => 1
>> > ------ above is the difference
>> > [    2.627064] strncmp: comparing sysfs <-> ext3, count 4 => 14
>> > [    2.627646] strncmp: comparing tmpfs <-> ext3, count 4 => 15
>> > [    2.628476] strncmp: comparing bdev <-> ext3, count 4 => -3
>> > [    2.628990] strncmp: comparing proc <-> ext3, count 4 => 11
>> > [    2.629422] strncmp: comparing cgroup <-> ext3, count 4 => -2
>> > [    2.629856] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
>> > [    2.630345] strncmp: comparing cpuset <-> ext3, count 4 => -2
>> > [    2.630785] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
>> > [    2.631267] strncmp: comparing debugfs <-> ext3, count 4 => -1
>> > [    2.631696] strncmp: comparing securityfs <-> ext3, count 4 => 1
>> > [    2.632596] strncmp: comparing sockfs <-> ext3, count 4 => 14
>> > [    2.633095] strncmp: comparing bpf <-> ext3, count 4 => -3
>> > [    2.633600] strncmp: comparing pipefs <-> ext3, count 4 => 11
>> > [    2.634071] strncmp: comparing ramfs <-> ext3, count 4 => 13
>> > [    2.634501] strncmp: comparing hugetlbfs <-> ext3, count 4 => 1
>> > [    2.634955] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 1
>> > [    2.635407] strncmp: comparing devpts <-> ext3, count 4 => -1
>> > [    2.638788] strncmp: comparing ext3 <-> ext3, count 4 => 0
>> > 
>> > 
>> > working - with normal strncmp:
>> > [    2.416438] strncmp: comparing /dev/vda <-> mtd, count 3 => -62
>> > [    2.417312] strncmp: comparing /dev/vda <-> ubi, count 3 => -70
>> > [    2.418296] strncmp: comparing /dev/vda <-> PARTUUID=, count 9 => -33
>> > [    2.418921] strncmp: comparing /dev/vda <-> PARTLABEL=, count 10 => -33
>> > ------ below is the difference
>> > [    2.419450] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
>> > [    2.420698] strncmp: comparing /dev/vda <-> /dev/, count 5 => 0
>> > ------ above is the difference
>> > [    2.424086] strncmp: comparing sysfs <-> ext3, count 4 => 14
>> > [    2.424663] strncmp: comparing tmpfs <-> ext3, count 4 => 15
>> > [    2.425111] strncmp: comparing bdev <-> ext3, count 4 => -3
>> > [    2.425563] strncmp: comparing proc <-> ext3, count 4 => 11
>> > [    2.426022] strncmp: comparing cgroup <-> ext3, count 4 => -2
>> > [    2.426717] strncmp: comparing cgroup2 <-> ext3, count 4 => -2
>> > [    2.427175] strncmp: comparing cpuset <-> ext3, count 4 => -2
>> > [    2.427608] strncmp: comparing devtmpfs <-> ext3, count 4 => -1
>> > [    2.428061] strncmp: comparing debugfs <-> ext3, count 4 => -1
>> > [    2.428526] strncmp: comparing securityfs <-> ext3, count 4 => 14
>> > [    2.428985] strncmp: comparing sockfs <-> ext3, count 4 => 14
>> > [    2.429423] strncmp: comparing bpf <-> ext3, count 4 => -3
>> > [    2.429863] strncmp: comparing pipefs <-> ext3, count 4 => 11
>> > [    2.430433] strncmp: comparing ramfs <-> ext3, count 4 => 13
>> > [    2.431202] strncmp: comparing hugetlbfs <-> ext3, count 4 => 3
>> > [    2.431721] strncmp: comparing rpc_pipefs <-> ext3, count 4 => 13
>> > [    2.432222] strncmp: comparing devpts <-> ext3, count 4 => -1
>> > [    2.432685] strncmp: comparing ext3 <-> ext3, count 4 => 0
>> > 
>> > 
>> > 
>> 
>>
diff mbox series

Patch

diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
index ef9a4eec0dba..da55cb247e89 100644
--- a/arch/riscv/errata/sifive/errata.c
+++ b/arch/riscv/errata/sifive/errata.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <linux/kernel.h>
+#include <linux/memory.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/bug.h>
@@ -107,8 +108,10 @@  void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,
 
 		tmp = (1U << alt->errata_id);
 		if (cpu_req_errata & tmp) {
+			mutex_lock(&text_mutex);
 			patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt),
 					  alt->alt_len);
+			mutex_lock(&text_mutex);
 			cpu_apply_errata |= tmp;
 		}
 	}
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 1dd90a5f86f0..3b96a06d3c54 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -5,6 +5,7 @@ 
 
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/memory.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
@@ -101,10 +102,13 @@  void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
 			altptr = ALT_ALT_PTR(alt);
 
 			/* On vm-alternatives, the mmu isn't running yet */
-			if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+			if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
 				memcpy(oldptr, altptr, alt->alt_len);
-			else
+			} else {
+				mutex_lock(&text_mutex);
 				patch_text_nosync(oldptr, altptr, alt->alt_len);
+				mutex_unlock(&text_mutex);
+			}
 		}
 	}
 
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 21fb567e1b22..59d58ee0f68d 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -10,6 +10,7 @@ 
 #include <linux/ctype.h>
 #include <linux/libfdt.h>
 #include <linux/log2.h>
+#include <linux/memory.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <asm/alternative.h>
@@ -292,8 +293,11 @@  void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
 
 		oldptr = ALT_OLD_PTR(alt);
 		altptr = ALT_ALT_PTR(alt);
+
+		mutex_lock(&text_mutex);
 		patch_text_nosync(oldptr, altptr, alt->alt_len);
 		riscv_alternative_fix_offsets(oldptr, alt->alt_len, oldptr - altptr);
+		mutex_unlock(&text_mutex);
 	}
 }
 #endif