diff mbox series

[1/2] riscv: allow case-insensitive ISA string parsing

Message ID tencent_63090269FF399AE30AC774848C344EF2F10A@qq.com (mailing list archive)
State Superseded
Headers show
Series [1/2] riscv: allow case-insensitive ISA string parsing | expand

Checks

Context Check Description
conchuod/cover_letter success Single patches do not need cover letters
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 310c33dc7a12
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 1 and now 1
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 18 this patch: 18
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 20 this patch: 20
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 3 this patch: 3
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch success total: 0 errors, 0 warnings, 0 checks, 100 lines checked
conchuod/source_inline success Was 0 now: 0
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Yangyu Chen April 25, 2023, noon UTC
According to RISC-V ISA specification, the ISA naming strings are case
insensitive. The kernel docs require the riscv,isa string must be all
lowercase to simplify parsing currently. However, this limitation is not
consistent with RISC-V ISA Spec.

This patch modifies the ISA string parser in the kernel to support
case-insensitive ISA string parsing. It replaces `strncmp` with
`strncasecmp`, replaces `islower` with `isalpha`, and wraps the
dereferenced char in the parser with `tolower`.

Signed-off-by: Yangyu Chen <cyy@cyyself.name>
---
 arch/riscv/kernel/cpu.c        |  6 ++++--
 arch/riscv/kernel/cpufeature.c | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 12 deletions(-)

Comments

Yangyu Chen April 25, 2023, 12:45 p.m. UTC | #1
The cover letter is at:

https://lore.kernel.org/linux-riscv/tencent_1647475C9618C390BEC601BE2CC1206D0C07@qq.com/
Conor Dooley April 26, 2023, 6:54 p.m. UTC | #2
(+CC Drew)

Hey Yangyu,

One meta-level comment - can you submit this patch + my dt-bindings
patch as a v2?
Some comments below.

On Tue, Apr 25, 2023 at 08:00:15PM +0800, Yangyu Chen wrote:
> According to RISC-V ISA specification, the ISA naming strings are case
> insensitive. The kernel docs require the riscv,isa string must be all
> lowercase to simplify parsing currently. However, this limitation is not
> consistent with RISC-V ISA Spec.

Please remove the above and cite ACPI's case-insensitivity as the
rationale for this change.

> This patch modifies the ISA string parser in the kernel to support
> case-insensitive ISA string parsing. It replaces `strncmp` with
> `strncasecmp`, replaces `islower` with `isalpha`, and wraps the
> dereferenced char in the parser with `tolower`.
> 
> Signed-off-by: Yangyu Chen <cyy@cyyself.name>
> ---
>  arch/riscv/kernel/cpu.c        |  6 ++++--
>  arch/riscv/kernel/cpufeature.c | 20 ++++++++++----------
>  2 files changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index 8400f0cc9704..531c76079b73 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -4,6 +4,7 @@
>   */
>  
>  #include <linux/cpu.h>
> +#include <linux/ctype.h>
>  #include <linux/init.h>
>  #include <linux/seq_file.h>
>  #include <linux/of.h>
> @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
>  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
>  		return -ENODEV;
>  	}
> -	if (isa[0] != 'r' || isa[1] != 'v') {
> +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
>  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
>  		return -ENODEV;

I don't understand why this is even here in the first place. I'd be
inclined to advocate for it's entire removal. Checking *only* that there
is an "rv" in that string seems pointless to me. If you're on a 64-bit
kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
Drew what do you think?

>  	}
> @@ -228,7 +229,8 @@ static void print_isa(struct seq_file *f, const char *isa)
>  
>  	seq_puts(f, "isa\t\t: ");
>  	/* Print the rv[64/32] part */
> -	seq_write(f, isa, 4);
> +	for (i = 0; i < 4; i++)
> +		seq_putc(f, tolower(isa[i]));

As was pointed out elsewhere, we shouldn't parse the dt to construct
this in the first place. We know what kernel we are running on, so we
should instead do write "rv" into the string & do:
	string = "rv"
	if IS_ENABLED(32-bit):
		isa.append("32")
	else
		isa.append("64")

See the link below, and Drew's comment on that:
https://lore.kernel.org/all/20230424194911.264850-3-heiko.stuebner@vrull.eu/
I'm fine with this change for now in isolation, but it ideally will be
replaced with something that doesn't touch the DT/ACPI for this
information.

>  	for (i = 0; i < sizeof(base_riscv_exts); i++) {
>  		if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
>  			/* Print only enabled the base ISA extensions */
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 59d58ee0f68d..c01dd144addc 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -120,10 +120,10 @@ void __init riscv_fill_hwcap(void)
>  
>  		temp = isa;
>  #if IS_ENABLED(CONFIG_32BIT)
> -		if (!strncmp(isa, "rv32", 4))
> +		if (!strncasecmp(isa, "rv32", 4))
>  			isa += 4;
>  #elif IS_ENABLED(CONFIG_64BIT)
> -		if (!strncmp(isa, "rv64", 4))
> +		if (!strncasecmp(isa, "rv64", 4))
>  			isa += 4;
>  #endif

If you're already modifying these lines, why not convert the ifdeffery
into something like
		if (IS_ENABLED(CONFIG_32BIT) && !strncasecmp(isa, "rv32", 4))
				isa += 4;
		else if (!strncasecmp(isa, "rv64", 4))
				isa += 4;

>  		/* The riscv,isa DT property must start with rv64 or rv32 */
> @@ -135,7 +135,7 @@ void __init riscv_fill_hwcap(void)
>  			const char *ext_end = isa;
>  			bool ext_long = false, ext_err = false;
>  
> -			switch (*ext) {
> +			switch (tolower(*ext)) {

Is there merit in just converting the entire string to lower-case in the
first place rather than having to fiddle with every single time we want
to do a comparison here?

>  			case 's':
>  				/**
>  				 * Workaround for invalid single-letter 's' & 'u'(QEMU).
> @@ -143,7 +143,7 @@ void __init riscv_fill_hwcap(void)
>  				 * not valid ISA extensions. It works until multi-letter
>  				 * extension starting with "Su" appears.
>  				 */
> -				if (ext[-1] != '_' && ext[1] == 'u') {
> +				if (ext[-1] != '_' && tolower(ext[1]) == 'u') {
>  					++isa;
>  					ext_err = true;
>  					break;
> @@ -154,7 +154,7 @@ void __init riscv_fill_hwcap(void)
>  				ext_long = true;
>  				/* Multi-letter extension must be delimited */
>  				for (; *isa && *isa != '_'; ++isa)
> -					if (unlikely(!islower(*isa)
> +					if (unlikely(!isalpha(*isa)
>  						     && !isdigit(*isa)))

This collapses to isalnum() I think, no?

Cheers,
Conor.

>  						ext_err = true;
>  				/* Parse backwards */
> @@ -166,7 +166,7 @@ void __init riscv_fill_hwcap(void)
>  				/* Skip the minor version */
>  				while (isdigit(*--ext_end))
>  					;
> -				if (ext_end[0] != 'p'
> +				if (tolower(ext_end[0]) != 'p'
>  				    || !isdigit(ext_end[-1])) {
>  					/* Advance it to offset the pre-decrement */
>  					++ext_end;
> @@ -178,7 +178,7 @@ void __init riscv_fill_hwcap(void)
>  				++ext_end;
>  				break;
>  			default:
> -				if (unlikely(!islower(*ext))) {
> +				if (unlikely(!isalpha(*ext))) {
>  					ext_err = true;
>  					break;
>  				}
> @@ -188,7 +188,7 @@ void __init riscv_fill_hwcap(void)
>  				/* Skip the minor version */
>  				while (isdigit(*++isa))
>  					;
> -				if (*isa != 'p')
> +				if (tolower(*isa) != 'p')
>  					break;
>  				if (!isdigit(*++isa)) {
>  					--isa;
> @@ -205,7 +205,7 @@ void __init riscv_fill_hwcap(void)
>  #define SET_ISA_EXT_MAP(name, bit)						\
>  			do {							\
>  				if ((ext_end - ext == sizeof(name) - 1) &&	\
> -				     !memcmp(ext, name, sizeof(name) - 1) &&	\
> +				     !strncasecmp(ext, name, sizeof(name) - 1) &&	\
>  				     riscv_isa_extension_check(bit))		\
>  					set_bit(bit, this_isa);			\
>  			} while (false)						\
> @@ -213,7 +213,7 @@ void __init riscv_fill_hwcap(void)
>  			if (unlikely(ext_err))
>  				continue;
>  			if (!ext_long) {
> -				int nr = *ext - 'a';
> +				int nr = tolower(*ext) - 'a';
>  
>  				if (riscv_isa_extension_check(nr)) {
>  					this_hwcap |= isa2hwcap[nr];
> -- 
> 2.40.0
>
Andrew Jones April 27, 2023, 7:53 a.m. UTC | #3
On Wed, Apr 26, 2023 at 07:54:39PM +0100, Conor Dooley wrote:
> (+CC Drew)
> 
> Hey Yangyu,
> 
> One meta-level comment - can you submit this patch + my dt-bindings
> patch as a v2?
> Some comments below.
> 
> On Tue, Apr 25, 2023 at 08:00:15PM +0800, Yangyu Chen wrote:
> > According to RISC-V ISA specification, the ISA naming strings are case
> > insensitive. The kernel docs require the riscv,isa string must be all
> > lowercase to simplify parsing currently. However, this limitation is not
> > consistent with RISC-V ISA Spec.
> 
> Please remove the above and cite ACPI's case-insensitivity as the
> rationale for this change.
> 
> > This patch modifies the ISA string parser in the kernel to support
> > case-insensitive ISA string parsing. It replaces `strncmp` with
> > `strncasecmp`, replaces `islower` with `isalpha`, and wraps the
> > dereferenced char in the parser with `tolower`.
> > 
> > Signed-off-by: Yangyu Chen <cyy@cyyself.name>
> > ---
> >  arch/riscv/kernel/cpu.c        |  6 ++++--
> >  arch/riscv/kernel/cpufeature.c | 20 ++++++++++----------
> >  2 files changed, 14 insertions(+), 12 deletions(-)
> > 
> > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > index 8400f0cc9704..531c76079b73 100644
> > --- a/arch/riscv/kernel/cpu.c
> > +++ b/arch/riscv/kernel/cpu.c
> > @@ -4,6 +4,7 @@
> >   */
> >  
> >  #include <linux/cpu.h>
> > +#include <linux/ctype.h>
> >  #include <linux/init.h>
> >  #include <linux/seq_file.h>
> >  #include <linux/of.h>
> > @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
> >  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
> >  		return -ENODEV;
> >  	}
> > -	if (isa[0] != 'r' || isa[1] != 'v') {
> > +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
> >  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
> >  		return -ENODEV;
> 
> I don't understand why this is even here in the first place. I'd be
> inclined to advocate for it's entire removal. Checking *only* that there
> is an "rv" in that string seems pointless to me. If you're on a 64-bit
> kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
> Drew what do you think?

It makes some sense to me as a garbage detector. It's unlikely the first
two bytes will be "rv" if the string is random junk. I think it should
also do a strlen(isa) >= 4 check first, though. of_property_read_string()
will succeed even when the string is "".

Thanks,
drew
Conor Dooley April 27, 2023, 9:04 a.m. UTC | #4
On Thu, Apr 27, 2023 at 09:53:19AM +0200, Andrew Jones wrote:
> On Wed, Apr 26, 2023 at 07:54:39PM +0100, Conor Dooley wrote:
> > (+CC Drew)
> > 
> > Hey Yangyu,
> > 
> > One meta-level comment - can you submit this patch + my dt-bindings
> > patch as a v2?
> > Some comments below.
> > 
> > On Tue, Apr 25, 2023 at 08:00:15PM +0800, Yangyu Chen wrote:
> > > According to RISC-V ISA specification, the ISA naming strings are case
> > > insensitive. The kernel docs require the riscv,isa string must be all
> > > lowercase to simplify parsing currently. However, this limitation is not
> > > consistent with RISC-V ISA Spec.
> > 
> > Please remove the above and cite ACPI's case-insensitivity as the
> > rationale for this change.
> > 
> > > This patch modifies the ISA string parser in the kernel to support
> > > case-insensitive ISA string parsing. It replaces `strncmp` with
> > > `strncasecmp`, replaces `islower` with `isalpha`, and wraps the
> > > dereferenced char in the parser with `tolower`.
> > > 
> > > Signed-off-by: Yangyu Chen <cyy@cyyself.name>
> > > ---
> > >  arch/riscv/kernel/cpu.c        |  6 ++++--
> > >  arch/riscv/kernel/cpufeature.c | 20 ++++++++++----------
> > >  2 files changed, 14 insertions(+), 12 deletions(-)
> > > 
> > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > index 8400f0cc9704..531c76079b73 100644
> > > --- a/arch/riscv/kernel/cpu.c
> > > +++ b/arch/riscv/kernel/cpu.c
> > > @@ -4,6 +4,7 @@
> > >   */
> > >  
> > >  #include <linux/cpu.h>
> > > +#include <linux/ctype.h>
> > >  #include <linux/init.h>
> > >  #include <linux/seq_file.h>
> > >  #include <linux/of.h>
> > > @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
> > >  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
> > >  		return -ENODEV;
> > >  	}
> > > -	if (isa[0] != 'r' || isa[1] != 'v') {
> > > +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
> > >  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
> > >  		return -ENODEV;
> > 
> > I don't understand why this is even here in the first place. I'd be
> > inclined to advocate for it's entire removal. Checking *only* that there
> > is an "rv" in that string seems pointless to me. If you're on a 64-bit
> > kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
> > Drew what do you think?
> 
> It makes some sense to me as a garbage detector. It's unlikely the first
> two bytes will be "rv" if the string is random junk.

Preventing the input of absolute rubbish is dt-validate's job & if the dtb
itself has been corrupted somehow I suspect that we have bigger problems
than checking for "rv" will solve.

> also do a strlen(isa) >= 4 check first, though. of_property_read_string()
> will succeed even when the string is "".

I don't think that checking that there are at least 4 characters isn't
even sufficient. Either we should confirm that this is a valid riscv,isa
to run on (so rv##ima w/ ## matching the kernel) or not bother at all.

It's a different issue though, and I'd be inclined to revisit it in the
future when the ACPI stuff is in, along with perhaps the cleanup parts
of Heiko's series too.
Andrew Jones April 27, 2023, 9:25 a.m. UTC | #5
On Thu, Apr 27, 2023 at 10:04:34AM +0100, Conor Dooley wrote:
> On Thu, Apr 27, 2023 at 09:53:19AM +0200, Andrew Jones wrote:
> > On Wed, Apr 26, 2023 at 07:54:39PM +0100, Conor Dooley wrote:
> > > (+CC Drew)
> > > 
> > > Hey Yangyu,
> > > 
> > > One meta-level comment - can you submit this patch + my dt-bindings
> > > patch as a v2?
> > > Some comments below.
> > > 
> > > On Tue, Apr 25, 2023 at 08:00:15PM +0800, Yangyu Chen wrote:
> > > > According to RISC-V ISA specification, the ISA naming strings are case
> > > > insensitive. The kernel docs require the riscv,isa string must be all
> > > > lowercase to simplify parsing currently. However, this limitation is not
> > > > consistent with RISC-V ISA Spec.
> > > 
> > > Please remove the above and cite ACPI's case-insensitivity as the
> > > rationale for this change.
> > > 
> > > > This patch modifies the ISA string parser in the kernel to support
> > > > case-insensitive ISA string parsing. It replaces `strncmp` with
> > > > `strncasecmp`, replaces `islower` with `isalpha`, and wraps the
> > > > dereferenced char in the parser with `tolower`.
> > > > 
> > > > Signed-off-by: Yangyu Chen <cyy@cyyself.name>
> > > > ---
> > > >  arch/riscv/kernel/cpu.c        |  6 ++++--
> > > >  arch/riscv/kernel/cpufeature.c | 20 ++++++++++----------
> > > >  2 files changed, 14 insertions(+), 12 deletions(-)
> > > > 
> > > > diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> > > > index 8400f0cc9704..531c76079b73 100644
> > > > --- a/arch/riscv/kernel/cpu.c
> > > > +++ b/arch/riscv/kernel/cpu.c
> > > > @@ -4,6 +4,7 @@
> > > >   */
> > > >  
> > > >  #include <linux/cpu.h>
> > > > +#include <linux/ctype.h>
> > > >  #include <linux/init.h>
> > > >  #include <linux/seq_file.h>
> > > >  #include <linux/of.h>
> > > > @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
> > > >  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
> > > >  		return -ENODEV;
> > > >  	}
> > > > -	if (isa[0] != 'r' || isa[1] != 'v') {
> > > > +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
> > > >  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
> > > >  		return -ENODEV;
> > > 
> > > I don't understand why this is even here in the first place. I'd be
> > > inclined to advocate for it's entire removal. Checking *only* that there
> > > is an "rv" in that string seems pointless to me. If you're on a 64-bit
> > > kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
> > > Drew what do you think?
> > 
> > It makes some sense to me as a garbage detector. It's unlikely the first
> > two bytes will be "rv" if the string is random junk.
> 
> Preventing the input of absolute rubbish is dt-validate's job & if the dtb
> itself has been corrupted somehow I suspect that we have bigger problems
> than checking for "rv" will solve.

We would, but would they be as easy to debug as this very early sanity
check? I agree, though, that doing the sanity checking in
riscv_of_processor_hartid(), which gets called from several different
places, seems a bit much. It'd be better to do that once, early, and
never again.

> 
> > also do a strlen(isa) >= 4 check first, though. of_property_read_string()
> > will succeed even when the string is "".
> 
> I don't think that checking that there are at least 4 characters isn't
> even sufficient. Either we should confirm that this is a valid riscv,isa
> to run on (so rv##ima w/ ## matching the kernel) or not bother at all.

Extending the check makes sense, but even more reason to do it outside
riscv_of_processor_hartid().

> 
> It's a different issue though, and I'd be inclined to revisit it in the
> future when the ACPI stuff is in, along with perhaps the cleanup parts
> of Heiko's series too.

Agreed.

Thanks,
drew
Yangyu Chen April 27, 2023, 9:36 a.m. UTC | #6
Hi, Conor

I have a different opinion about whether the isa string length should be
checked.

On Thu, 27 Apr 2023 10:04:34 +0100, Conor Dooley wrote:
> Preventing the input of absolute rubbish is dt-validate's job & if the dtb
> itself has been corrupted somehow I suspect that we have bigger problems
> than checking for "rv" will solve.

> > also do a strlen(isa) >= 4 check first, though. of_property_read_string()
> > will succeed even when the string is "".

> I don't think that checking that there are at least 4 characters isn't
> even sufficient. Either we should confirm that this is a valid riscv,isa
> to run on (so rv##ima w/ ## matching the kernel) or not bother at all.

What will happen if we have a bootloader in the future which allows
overriding isa string in the DT or ACPI table, the memory corruption could
happen if we didn't check it first.

Although the kernel will not boot in this case, anything about the user
input string should be parse carefuly that you never know what the future
code will be but leave a checker here will remind someone who will change
the parse in the future to check the length carefully.

So I agree with drew, we should do check strlen before check the first
two characters.

On Thu, 27 Apr 2023 10:04:34 +0100, Conor Dooley wrote:
> It's a different issue though, and I'd be inclined to revisit it in the
> future when the ACPI stuff is in, along with perhaps the cleanup parts
> of Heiko's series too.

Agreed.

Thanks,
Yangyu Chen
Conor Dooley April 27, 2023, 10 a.m. UTC | #7
On Thu, Apr 27, 2023 at 05:36:25PM +0800, Yangyu Chen wrote:
> Hi, Conor

> 
> On Thu, 27 Apr 2023 10:04:34 +0100, Conor Dooley wrote:
> > Preventing the input of absolute rubbish is dt-validate's job & if the dtb
> > itself has been corrupted somehow I suspect that we have bigger problems
> > than checking for "rv" will solve.
> 
> > > also do a strlen(isa) >= 4 check first, though. of_property_read_string()
> > > will succeed even when the string is "".
> 
> > I don't think that checking that there are at least 4 characters isn't
> > even sufficient. Either we should confirm that this is a valid riscv,isa
> > to run on (so rv##ima w/ ## matching the kernel) or not bother at all.
> 
> What will happen if we have a bootloader in the future which allows
> overriding isa string in the DT or ACPI table, the memory corruption could
> happen if we didn't check it first.

You can do this right now, no?
You can also overwrite the memory nodes and all sorts of other things
that'll cause your system to crash too. The isa string is nothing
special in that regard ;)

> Although the kernel will not boot in this case, anything about the user
> input string should be parse carefuly that you never know what the future
> code will be but leave a checker here will remind someone who will change
> the parse in the future to check the length carefully.

of_property_read_string() will always return something that is null
terminated on success, so we can just call strncmp() to make sure that
the hart supports something usable, no?

> I have a different opinion about whether the isa string length should be
> checked.

> So I agree with drew, we should do check strlen before check the first
> two characters.

In case it was lost in translation, I was never disputing checking that
there is a string before accessing it like this, but rather questioning
why we do such a limited check here at all.

Cheers,
Conor.
Yangyu Chen April 27, 2023, 12:47 p.m. UTC | #8
Hi, Conor

Thanks for your meaningful reviews. I agree with most of your advice but
have a question about the code about checking the first 2 characters are
"rv" in `arch/riscv/kernel/cpu.c`.

On Wed, 26 Apr 2023 19:54:39 +0100, Conor Dooley wrote:
> > @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
> >  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
> >  		return -ENODEV;
> >  	}
> > -	if (isa[0] != 'r' || isa[1] != 'v') {
> > +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
> >  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
> >  		return -ENODEV;
> 
> I don't understand why this is even here in the first place. I'd be
> inclined to advocate for it's entire removal. Checking *only* that there
> is an "rv" in that string seems pointless to me. If you're on a 64-bit
> kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
> Drew what do you think?

I think this code could be a workaround for running rv32 S-Mode on rv64
CPU without changing the DT, although the proper way should be to change
this field in DT by bootloader or any other software.

I have tested a simple rv64imac CPU core and left the `riscv,isa` string
empty in the DT and removed the above 3 lines check from the kernel, and
the kernel boots successfully, and using busybox as init is also ok. 
However, if this check exists, the kernel will panic at `setup_smp` due to
`BUG_ON(!found_boot_cpu)` in `setup_smp`.

I am wondering whether this should remove or add a more sufficient
validation. Although this function will not be called in ACPI as I
reviewed the recent ACPI patches[1], it will not be a problem if I submit
this patch for better ACPI support. However, If I just simply remove it
from my patch and submit patch v2 directly, the ISA string in ACPI mode
with all uppercase letters will be OK. But in DT mode, the kernel behavior
will accept the ISA string with all uppercase letters except the first two
"rv". Do you think this behavior is different between DT and ACPI can be
OK?

After some investigation, I suggest removing this validation since the
validation is useless for a proper DT and the recent ACPI patches[1] do
not validate the ISA strings, so we will have the same behavior between
DT and ACPI.

[1] https://lore.kernel.org/linux-riscv/20230404182037.863533-1-sunilvl@ventanamicro.com/

Thanks,
Yangyu Chen
Conor Dooley April 27, 2023, 5:28 p.m. UTC | #9
On Thu, Apr 27, 2023 at 08:47:18PM +0800, Yangyu Chen wrote:
> Hi, Conor
> 
> Thanks for your meaningful reviews. I agree with most of your advice but
> have a question about the code about checking the first 2 characters are
> "rv" in `arch/riscv/kernel/cpu.c`.
> 
> On Wed, 26 Apr 2023 19:54:39 +0100, Conor Dooley wrote:
> > > @@ -41,7 +42,7 @@ int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
> > >  		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
> > >  		return -ENODEV;
> > >  	}
> > > -	if (isa[0] != 'r' || isa[1] != 'v') {
> > > +	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
> > >  		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
> > >  		return -ENODEV;
> > 
> > I don't understand why this is even here in the first place. I'd be
> > inclined to advocate for it's entire removal. Checking *only* that there
> > is an "rv" in that string seems pointless to me. If you're on a 64-bit
> > kernel and the node has riscv,isa = "rv32ima" it's gonna say it is okay?
> > Drew what do you think?
> 
> I think this code could be a workaround for running rv32 S-Mode on rv64
> CPU without changing the DT, although the proper way should be to change
> this field in DT by bootloader or any other software.
> 
> I have tested a simple rv64imac CPU core and left the `riscv,isa` string
> empty in the DT and removed the above 3 lines check from the kernel, and
> the kernel boots successfully, and using busybox as init is also ok. 
> However, if this check exists, the kernel will panic at `setup_smp` due to
> `BUG_ON(!found_boot_cpu)` in `setup_smp`.

The initramfs I have fails to boot because it is build with FP support.
Out of curiosity, what shows up in /proc/cpuinfo in that case?

> I am wondering whether this should remove or add a more sufficient
> validation. Although this function will not be called in ACPI as I
> reviewed the recent ACPI patches[1], it will not be a problem if I submit
> this patch for better ACPI support. However, If I just simply remove it
> from my patch and submit patch v2 directly, the ISA string in ACPI mode
> with all uppercase letters will be OK. But in DT mode, the kernel behavior
> will accept the ISA string with all uppercase letters except the first two
> "rv". Do you think this behavior is different between DT and ACPI can be
> OK?

A difference would be fine, if it was that ACPI allowed caps and DT
didn't. But allowing caps everywhere other than the RV just seems a bit
silly to me, so I would rather allow the capitalisation of RV.

> After some investigation, I suggest removing this validation since the
> validation is useless for a proper DT and the recent ACPI patches[1] do
> not validate the ISA strings, so we will have the same behavior between
> DT and ACPI.

I dunno. I'd like to split that function in 2 actually, but I would
like the ACPI stuff to land before doing so. I think for now, what might
be best is checking that it has a sufficient strlen in a separate patch,
earlier in your series, and making the check case-insensitive as you have
done already here.

Cheers,
Conor.
diff mbox series

Patch

diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 8400f0cc9704..531c76079b73 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <linux/cpu.h>
+#include <linux/ctype.h>
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/of.h>
@@ -41,7 +42,7 @@  int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart)
 		pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart);
 		return -ENODEV;
 	}
-	if (isa[0] != 'r' || isa[1] != 'v') {
+	if (tolower(isa[0]) != 'r' || tolower(isa[1]) != 'v') {
 		pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa);
 		return -ENODEV;
 	}
@@ -228,7 +229,8 @@  static void print_isa(struct seq_file *f, const char *isa)
 
 	seq_puts(f, "isa\t\t: ");
 	/* Print the rv[64/32] part */
-	seq_write(f, isa, 4);
+	for (i = 0; i < 4; i++)
+		seq_putc(f, tolower(isa[i]));
 	for (i = 0; i < sizeof(base_riscv_exts); i++) {
 		if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a'))
 			/* Print only enabled the base ISA extensions */
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 59d58ee0f68d..c01dd144addc 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -120,10 +120,10 @@  void __init riscv_fill_hwcap(void)
 
 		temp = isa;
 #if IS_ENABLED(CONFIG_32BIT)
-		if (!strncmp(isa, "rv32", 4))
+		if (!strncasecmp(isa, "rv32", 4))
 			isa += 4;
 #elif IS_ENABLED(CONFIG_64BIT)
-		if (!strncmp(isa, "rv64", 4))
+		if (!strncasecmp(isa, "rv64", 4))
 			isa += 4;
 #endif
 		/* The riscv,isa DT property must start with rv64 or rv32 */
@@ -135,7 +135,7 @@  void __init riscv_fill_hwcap(void)
 			const char *ext_end = isa;
 			bool ext_long = false, ext_err = false;
 
-			switch (*ext) {
+			switch (tolower(*ext)) {
 			case 's':
 				/**
 				 * Workaround for invalid single-letter 's' & 'u'(QEMU).
@@ -143,7 +143,7 @@  void __init riscv_fill_hwcap(void)
 				 * not valid ISA extensions. It works until multi-letter
 				 * extension starting with "Su" appears.
 				 */
-				if (ext[-1] != '_' && ext[1] == 'u') {
+				if (ext[-1] != '_' && tolower(ext[1]) == 'u') {
 					++isa;
 					ext_err = true;
 					break;
@@ -154,7 +154,7 @@  void __init riscv_fill_hwcap(void)
 				ext_long = true;
 				/* Multi-letter extension must be delimited */
 				for (; *isa && *isa != '_'; ++isa)
-					if (unlikely(!islower(*isa)
+					if (unlikely(!isalpha(*isa)
 						     && !isdigit(*isa)))
 						ext_err = true;
 				/* Parse backwards */
@@ -166,7 +166,7 @@  void __init riscv_fill_hwcap(void)
 				/* Skip the minor version */
 				while (isdigit(*--ext_end))
 					;
-				if (ext_end[0] != 'p'
+				if (tolower(ext_end[0]) != 'p'
 				    || !isdigit(ext_end[-1])) {
 					/* Advance it to offset the pre-decrement */
 					++ext_end;
@@ -178,7 +178,7 @@  void __init riscv_fill_hwcap(void)
 				++ext_end;
 				break;
 			default:
-				if (unlikely(!islower(*ext))) {
+				if (unlikely(!isalpha(*ext))) {
 					ext_err = true;
 					break;
 				}
@@ -188,7 +188,7 @@  void __init riscv_fill_hwcap(void)
 				/* Skip the minor version */
 				while (isdigit(*++isa))
 					;
-				if (*isa != 'p')
+				if (tolower(*isa) != 'p')
 					break;
 				if (!isdigit(*++isa)) {
 					--isa;
@@ -205,7 +205,7 @@  void __init riscv_fill_hwcap(void)
 #define SET_ISA_EXT_MAP(name, bit)						\
 			do {							\
 				if ((ext_end - ext == sizeof(name) - 1) &&	\
-				     !memcmp(ext, name, sizeof(name) - 1) &&	\
+				     !strncasecmp(ext, name, sizeof(name) - 1) &&	\
 				     riscv_isa_extension_check(bit))		\
 					set_bit(bit, this_isa);			\
 			} while (false)						\
@@ -213,7 +213,7 @@  void __init riscv_fill_hwcap(void)
 			if (unlikely(ext_err))
 				continue;
 			if (!ext_long) {
-				int nr = *ext - 'a';
+				int nr = tolower(*ext) - 'a';
 
 				if (riscv_isa_extension_check(nr)) {
 					this_hwcap |= isa2hwcap[nr];