diff mbox series

[2/2] RISC-V: Extract base ISA from device tree

Message ID 8d7e1937f9476d14f64a53684b11bedd6f4ff88d.1644987761.git.research_trasio@irq.a4lg.com (mailing list archive)
State New, archived
Headers show
Series RISC-V: some improvements for Atish's framework (for v5) | expand

Commit Message

Tsukasa OI Feb. 16, 2022, 5:04 a.m. UTC
This commit replaces print_isa function to extract base ISA from device
tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
parser.

Design choices (1):

It constructs base ISA string from the original string (does not cut
the original string like Atish's).  This is to strip version numbers
(too minor to represent as major ISA) but this behavior may be
debatable.

Design choices (2):

It skips when single-letter 'S' or 'U' is encountered.  It improves
familiarity with regular ISA string.

This commit is intended to be squashed into Atish's isa_framework_v4
PATCH 6/6.

Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
---
 arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 9 deletions(-)

Comments

Atish Patra Feb. 16, 2022, 6:01 a.m. UTC | #1
On Tue, Feb 15, 2022 at 9:04 PM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
>
> This commit replaces print_isa function to extract base ISA from device
> tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
> parser.
>
> Design choices (1):
>
> It constructs base ISA string from the original string (does not cut
> the original string like Atish's).  This is to strip version numbers
> (too minor to represent as major ISA) but this behavior may be
> debatable.
>

A simpler solution would be to just invoke
__riscv_isa_extension_available for all the base
extensions (imafdc) and print the base isa string.
We don't need to parse the ISA string again here.

> Design choices (2):
>
> It skips when single-letter 'S' or 'U' is encountered.  It improves
> familiarity with regular ISA string.
>

I don't think this is necessary because only Qemu appends 'S' & 'U' in
the isa string.
Your patch in Qemu will fix that for future releases of Qemu.
All other commonly used platforms (hardware/spike) already use the
correct ISA string.

> This commit is intended to be squashed into Atish's isa_framework_v4
> PATCH 6/6.
>
> Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
> ---
>  arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 74 insertions(+), 9 deletions(-)
>
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index 6f9660c0a973..3f9607a66d95 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -101,17 +101,82 @@ static void print_isa_ext(struct seq_file *f)
>
>  static void print_isa(struct seq_file *f, const char *isa)
>  {
> -       char *ext_start;
> -       int isa_len = strlen(isa);
> -       int base_isa_len = isa_len;
> -
> -       ext_start = strnchr(isa, isa_len, '_');
> -       if (ext_start)
> -               base_isa_len = isa_len - strlen(ext_start);
> +       unsigned char parse_break = 0;
> +       char base_isa[RISCV_ISA_EXT_BASE + 5];
> +       char *base_isa_end = base_isa;
> +       const char *isa_end = isa;
> +#if IS_ENABLED(CONFIG_32BIT)
> +       if (!strncmp(isa, "rv32", 4))
> +               isa_end += 4;
> +#elif IS_ENABLED(CONFIG_64BIT)
> +       if (!strncmp(isa, "rv64", 4))
> +               isa_end += 4;
> +#endif
> +       if (isa != isa_end) {
> +               strncpy(base_isa, isa, isa_end - isa);
> +               base_isa_end += isa_end - isa;
> +               for (; *isa_end; ++isa_end) {
> +                       switch (*isa_end++) {
> +                       case 'x':
> +                       case 'z':
> +                               parse_break = 1;
> +                               break;
> +                       case 's':
> +                               if (*isa_end != 'u') {
> +                                       parse_break = 1;
> +                                       break;
> +                               }
> +                               /**
> +                                * Workaround for invalid single-letter 's'
> +                                * (QEMU).  Should break for valid ISA string.
> +                                */
> +                               fallthrough;
> +                       default:
> +                               if (unlikely(!islower(isa_end[-1])
> +                                   || base_isa_end == base_isa + sizeof(base_isa))) {
> +                                       parse_break = 2;
> +                                       break;
> +                               }
> +                               switch (isa_end[-1]) {
> +                               case 's':
> +                               case 'u':
> +                                       break;
> +                               default:
> +                                       *base_isa_end++ = isa_end[-1];
> +                               }
> +                               /* Skip version number */
> +                               if (!isdigit(*isa_end))
> +                                       break;
> +                               while (isdigit(*++isa_end))
> +                                       ;
> +                               if (*isa_end != 'p')
> +                                       break;
> +                               if (!isdigit(*++isa_end)) {
> +                                       --isa_end;
> +                                       break;
> +                               }
> +                               while (isdigit(*++isa_end))
> +                                       ;
> +                               break;
> +                       }
> +                       if (*isa_end != '_')
> +                               --isa_end;
> +                       if (parse_break)
> +                               break;
> +               }
> +       }
>
> -       /* Print only the base ISA as it is */
> +       /**
> +        * Print only the base ISA
> +        * (original ISA string as is if an error is encountered)
> +        */
>         seq_puts(f, "isa\t\t: ");
> -       seq_write(f, isa, base_isa_len);
> +       if (parse_break < 2) {
> +               seq_write(f, base_isa, base_isa_end - base_isa);
> +       } else {
> +               isa_end += strlen(isa_end);
> +               seq_write(f, isa, isa_end - isa);
> +       }
>         seq_puts(f, "\n");
>  }
>
> --
> 2.32.0
>
Tsukasa OI Feb. 16, 2022, 6:58 a.m. UTC | #2
On 2022/02/16 15:01, Atish Patra wrote:
> On Tue, Feb 15, 2022 at 9:04 PM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
>>
>> This commit replaces print_isa function to extract base ISA from device
>> tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
>> parser.
>>
>> Design choices (1):
>>
>> It constructs base ISA string from the original string (does not cut
>> the original string like Atish's).  This is to strip version numbers
>> (too minor to represent as major ISA) but this behavior may be
>> debatable.
>>
> 
> A simpler solution would be to just invoke
> __riscv_isa_extension_available for all the base
> extensions (imafdc) and print the base isa string.
> We don't need to parse the ISA string again here.

I agree that my patchset is ugly and simpler solution would be good.
(Having two parser for same string is definitely not the best idea.)
There are some testcases (some are not accepted by Spike's --isa option
and requires custom Device tree) for testing:

*   rv64imac
*   rv64imacsu      (invalid ISA string; test QEMU workaround)
*   rv64imafdch
*   rv64imafdcsuh   (invalid ISA string; test QEMU workaround)
*   rv64imafdc_svinval_svnapot
*   rv64imafdcsvinval_svnapot
*   rv64i2p1m2a2fdcsvinval
*   rv64i2p1_m_afdcsvinval

> 
>> Design choices (2):
>>
>> It skips when single-letter 'S' or 'U' is encountered.  It improves
>> familiarity with regular ISA string.
>>
> 
> I don't think this is necessary because only Qemu appends 'S' & 'U' in
> the isa string.
> Your patch in Qemu will fix that for future releases of Qemu.
> All other commonly used platforms (hardware/spike) already use the
> correct ISA string.

Yes, only QEMU.  It's not a serious problem like on cpufeature.c but would
be nice to remove them (when exposing to the usermode) to prevent confusion
on usermode side.

If you invoke __riscv_isa_extension_available to generate base ISA string,
would you exclude 'S' and 'U'?

> 
>> This commit is intended to be squashed into Atish's isa_framework_v4
>> PATCH 6/6.
>>
>> Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
>> ---
>>  arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
>>  1 file changed, 74 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
>> index 6f9660c0a973..3f9607a66d95 100644
>> --- a/arch/riscv/kernel/cpu.c
>> +++ b/arch/riscv/kernel/cpu.c
>> @@ -101,17 +101,82 @@ static void print_isa_ext(struct seq_file *f)
>>
>>  static void print_isa(struct seq_file *f, const char *isa)
>>  {
>> -       char *ext_start;
>> -       int isa_len = strlen(isa);
>> -       int base_isa_len = isa_len;
>> -
>> -       ext_start = strnchr(isa, isa_len, '_');
>> -       if (ext_start)
>> -               base_isa_len = isa_len - strlen(ext_start);
>> +       unsigned char parse_break = 0;
>> +       char base_isa[RISCV_ISA_EXT_BASE + 5];
>> +       char *base_isa_end = base_isa;
>> +       const char *isa_end = isa;
>> +#if IS_ENABLED(CONFIG_32BIT)
>> +       if (!strncmp(isa, "rv32", 4))
>> +               isa_end += 4;
>> +#elif IS_ENABLED(CONFIG_64BIT)
>> +       if (!strncmp(isa, "rv64", 4))
>> +               isa_end += 4;
>> +#endif
>> +       if (isa != isa_end) {
>> +               strncpy(base_isa, isa, isa_end - isa);
>> +               base_isa_end += isa_end - isa;
>> +               for (; *isa_end; ++isa_end) {
>> +                       switch (*isa_end++) {
>> +                       case 'x':
>> +                       case 'z':
>> +                               parse_break = 1;
>> +                               break;
>> +                       case 's':
>> +                               if (*isa_end != 'u') {
>> +                                       parse_break = 1;
>> +                                       break;
>> +                               }
>> +                               /**
>> +                                * Workaround for invalid single-letter 's'
>> +                                * (QEMU).  Should break for valid ISA string.
>> +                                */
>> +                               fallthrough;
>> +                       default:
>> +                               if (unlikely(!islower(isa_end[-1])
>> +                                   || base_isa_end == base_isa + sizeof(base_isa))) {
>> +                                       parse_break = 2;
>> +                                       break;
>> +                               }
>> +                               switch (isa_end[-1]) {
>> +                               case 's':
>> +                               case 'u':
>> +                                       break;
>> +                               default:
>> +                                       *base_isa_end++ = isa_end[-1];
>> +                               }
>> +                               /* Skip version number */
>> +                               if (!isdigit(*isa_end))
>> +                                       break;
>> +                               while (isdigit(*++isa_end))
>> +                                       ;
>> +                               if (*isa_end != 'p')
>> +                                       break;
>> +                               if (!isdigit(*++isa_end)) {
>> +                                       --isa_end;
>> +                                       break;
>> +                               }
>> +                               while (isdigit(*++isa_end))
>> +                                       ;
>> +                               break;
>> +                       }
>> +                       if (*isa_end != '_')
>> +                               --isa_end;
>> +                       if (parse_break)
>> +                               break;
>> +               }
>> +       }
>>
>> -       /* Print only the base ISA as it is */
>> +       /**
>> +        * Print only the base ISA
>> +        * (original ISA string as is if an error is encountered)
>> +        */
>>         seq_puts(f, "isa\t\t: ");
>> -       seq_write(f, isa, base_isa_len);
>> +       if (parse_break < 2) {
>> +               seq_write(f, base_isa, base_isa_end - base_isa);
>> +       } else {
>> +               isa_end += strlen(isa_end);
>> +               seq_write(f, isa, isa_end - isa);
>> +       }
>>         seq_puts(f, "\n");
>>  }
>>
>> --
>> 2.32.0
>>
> 
>
Atish Patra Feb. 16, 2022, 7:43 a.m. UTC | #3
On Tue, Feb 15, 2022 at 10:59 PM Tsukasa OI
<research_trasio@irq.a4lg.com> wrote:
>
> On 2022/02/16 15:01, Atish Patra wrote:
> > On Tue, Feb 15, 2022 at 9:04 PM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
> >>
> >> This commit replaces print_isa function to extract base ISA from device
> >> tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
> >> parser.
> >>
> >> Design choices (1):
> >>
> >> It constructs base ISA string from the original string (does not cut
> >> the original string like Atish's).  This is to strip version numbers
> >> (too minor to represent as major ISA) but this behavior may be
> >> debatable.
> >>
> >
> > A simpler solution would be to just invoke
> > __riscv_isa_extension_available for all the base
> > extensions (imafdc) and print the base isa string.
> > We don't need to parse the ISA string again here.
>
> I agree that my patchset is ugly and simpler solution would be good.
> (Having two parser for same string is definitely not the best idea.)
> There are some testcases (some are not accepted by Spike's --isa option
> and requires custom Device tree) for testing:
>
> *   rv64imac
> *   rv64imacsu      (invalid ISA string; test QEMU workaround)
> *   rv64imafdch
> *   rv64imafdcsuh   (invalid ISA string; test QEMU workaround)
> *   rv64imafdc_svinval_svnapot
> *   rv64imafdcsvinval_svnapot
> *   rv64i2p1m2a2fdcsvinval
> *   rv64i2p1_m_afdcsvinval
>

Thanks. This is a good set of test cases that covers most of the cases.

> >
> >> Design choices (2):
> >>
> >> It skips when single-letter 'S' or 'U' is encountered.  It improves
> >> familiarity with regular ISA string.
> >>
> >
> > I don't think this is necessary because only Qemu appends 'S' & 'U' in
> > the isa string.
> > Your patch in Qemu will fix that for future releases of Qemu.
> > All other commonly used platforms (hardware/spike) already use the
> > correct ISA string.
>
> Yes, only QEMU.  It's not a serious problem like on cpufeature.c but would
> be nice to remove them (when exposing to the usermode) to prevent confusion
> on usermode side.
>
> If you invoke __riscv_isa_extension_available to generate base ISA string,
> would you exclude 'S' and 'U'?
>

Yeah. That's easy. The possible valid base extensions are "IMAFHDQCBJPV".
(all the possible defined bits in misa except 'S' & 'U')
So we will invoke __riscv_isa_extension_available for those only.

> >
> >> This commit is intended to be squashed into Atish's isa_framework_v4
> >> PATCH 6/6.
> >>
> >> Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
> >> ---
> >>  arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
> >>  1 file changed, 74 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> >> index 6f9660c0a973..3f9607a66d95 100644
> >> --- a/arch/riscv/kernel/cpu.c
> >> +++ b/arch/riscv/kernel/cpu.c
> >> @@ -101,17 +101,82 @@ static void print_isa_ext(struct seq_file *f)
> >>
> >>  static void print_isa(struct seq_file *f, const char *isa)
> >>  {
> >> -       char *ext_start;
> >> -       int isa_len = strlen(isa);
> >> -       int base_isa_len = isa_len;
> >> -
> >> -       ext_start = strnchr(isa, isa_len, '_');
> >> -       if (ext_start)
> >> -               base_isa_len = isa_len - strlen(ext_start);
> >> +       unsigned char parse_break = 0;
> >> +       char base_isa[RISCV_ISA_EXT_BASE + 5];
> >> +       char *base_isa_end = base_isa;
> >> +       const char *isa_end = isa;
> >> +#if IS_ENABLED(CONFIG_32BIT)
> >> +       if (!strncmp(isa, "rv32", 4))
> >> +               isa_end += 4;
> >> +#elif IS_ENABLED(CONFIG_64BIT)
> >> +       if (!strncmp(isa, "rv64", 4))
> >> +               isa_end += 4;
> >> +#endif
> >> +       if (isa != isa_end) {
> >> +               strncpy(base_isa, isa, isa_end - isa);
> >> +               base_isa_end += isa_end - isa;
> >> +               for (; *isa_end; ++isa_end) {
> >> +                       switch (*isa_end++) {
> >> +                       case 'x':
> >> +                       case 'z':
> >> +                               parse_break = 1;
> >> +                               break;
> >> +                       case 's':
> >> +                               if (*isa_end != 'u') {
> >> +                                       parse_break = 1;
> >> +                                       break;
> >> +                               }
> >> +                               /**
> >> +                                * Workaround for invalid single-letter 's'
> >> +                                * (QEMU).  Should break for valid ISA string.
> >> +                                */
> >> +                               fallthrough;
> >> +                       default:
> >> +                               if (unlikely(!islower(isa_end[-1])
> >> +                                   || base_isa_end == base_isa + sizeof(base_isa))) {
> >> +                                       parse_break = 2;
> >> +                                       break;
> >> +                               }
> >> +                               switch (isa_end[-1]) {
> >> +                               case 's':
> >> +                               case 'u':
> >> +                                       break;
> >> +                               default:
> >> +                                       *base_isa_end++ = isa_end[-1];
> >> +                               }
> >> +                               /* Skip version number */
> >> +                               if (!isdigit(*isa_end))
> >> +                                       break;
> >> +                               while (isdigit(*++isa_end))
> >> +                                       ;
> >> +                               if (*isa_end != 'p')
> >> +                                       break;
> >> +                               if (!isdigit(*++isa_end)) {
> >> +                                       --isa_end;
> >> +                                       break;
> >> +                               }
> >> +                               while (isdigit(*++isa_end))
> >> +                                       ;
> >> +                               break;
> >> +                       }
> >> +                       if (*isa_end != '_')
> >> +                               --isa_end;
> >> +                       if (parse_break)
> >> +                               break;
> >> +               }
> >> +       }
> >>
> >> -       /* Print only the base ISA as it is */
> >> +       /**
> >> +        * Print only the base ISA
> >> +        * (original ISA string as is if an error is encountered)
> >> +        */
> >>         seq_puts(f, "isa\t\t: ");
> >> -       seq_write(f, isa, base_isa_len);
> >> +       if (parse_break < 2) {
> >> +               seq_write(f, base_isa, base_isa_end - base_isa);
> >> +       } else {
> >> +               isa_end += strlen(isa_end);
> >> +               seq_write(f, isa, isa_end - isa);
> >> +       }
> >>         seq_puts(f, "\n");
> >>  }
> >>
> >> --
> >> 2.32.0
> >>
> >
> >
Tsukasa OI Feb. 21, 2022, 1:42 p.m. UTC | #4
On 2022/02/16 16:43, Atish Patra wrote:
> On Tue, Feb 15, 2022 at 10:59 PM Tsukasa OI
> <research_trasio@irq.a4lg.com> wrote:
>>
>> On 2022/02/16 15:01, Atish Patra wrote:
>>> On Tue, Feb 15, 2022 at 9:04 PM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
>>>>
>>>> This commit replaces print_isa function to extract base ISA from device
>>>> tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
>>>> parser.
>>>>
>>>> Design choices (1):
>>>>
>>>> It constructs base ISA string from the original string (does not cut
>>>> the original string like Atish's).  This is to strip version numbers
>>>> (too minor to represent as major ISA) but this behavior may be
>>>> debatable.
>>>>
>>>
>>> A simpler solution would be to just invoke
>>> __riscv_isa_extension_available for all the base
>>> extensions (imafdc) and print the base isa string.
>>> We don't need to parse the ISA string again here.
>>
>> I agree that my patchset is ugly and simpler solution would be good.
>> (Having two parser for same string is definitely not the best idea.)
>> There are some testcases (some are not accepted by Spike's --isa option
>> and requires custom Device tree) for testing:
>>
>> *   rv64imac
>> *   rv64imacsu      (invalid ISA string; test QEMU workaround)
>> *   rv64imafdch
>> *   rv64imafdcsuh   (invalid ISA string; test QEMU workaround)
>> *   rv64imafdc_svinval_svnapot
>> *   rv64imafdcsvinval_svnapot
>> *   rv64i2p1m2a2fdcsvinval
>> *   rv64i2p1_m_afdcsvinval
>>
> 
> Thanks. This is a good set of test cases that covers most of the cases.
> 
>>>
>>>> Design choices (2):
>>>>
>>>> It skips when single-letter 'S' or 'U' is encountered.  It improves
>>>> familiarity with regular ISA string.
>>>>
>>>
>>> I don't think this is necessary because only Qemu appends 'S' & 'U' in
>>> the isa string.
>>> Your patch in Qemu will fix that for future releases of Qemu.
>>> All other commonly used platforms (hardware/spike) already use the
>>> correct ISA string.
>>
>> Yes, only QEMU.  It's not a serious problem like on cpufeature.c but would
>> be nice to remove them (when exposing to the usermode) to prevent confusion
>> on usermode side.
>>
>> If you invoke __riscv_isa_extension_available to generate base ISA string,
>> would you exclude 'S' and 'U'?
>>
> 
> Yeah. That's easy. The possible valid base extensions are "IMAFHDQCBJPV".
> (all the possible defined bits in misa except 'S' & 'U')
> So we will invoke __riscv_isa_extension_available for those only.

Order "IEMAFDQLCBKJTPVH" would be sufficient.
Note that some of them may be incompatible with Linux or currently has
no plan to be added as a single-letter extension.

"E"**: RV32E
     (incompatible with Linux?)
"L"**: Decimal floating point
     (placeholder is removed from the ISA specification)
"B"**: Bit manipulation
     (no single-letter extension plan but multiple subextensions)
"K"**: Cryptography
     (no single-letter extension plan but multiple subextensions)
"J"**: Extensions for dynamic languages
     (no single-letter extension but planned multiple subextensions)
"T"**: Transactional memory
     (placeholder is removed from the ISA specification)
"P"  : narrow SIMD using GPR
     (this order is ratified but "P" is not)
"H"  : Hypervisor (different position compared to your post)
     (this order is not ratified but likely placed here
      for compatibility with QEMU and Spike)

Tsukasa

> 
>>>
>>>> This commit is intended to be squashed into Atish's isa_framework_v4
>>>> PATCH 6/6.
>>>>
>>>> Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
>>>> ---
>>>>  arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
>>>>  1 file changed, 74 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
>>>> index 6f9660c0a973..3f9607a66d95 100644
>>>> --- a/arch/riscv/kernel/cpu.c
>>>> +++ b/arch/riscv/kernel/cpu.c
>>>> @@ -101,17 +101,82 @@ static void print_isa_ext(struct seq_file *f)
>>>>
>>>>  static void print_isa(struct seq_file *f, const char *isa)
>>>>  {
>>>> -       char *ext_start;
>>>> -       int isa_len = strlen(isa);
>>>> -       int base_isa_len = isa_len;
>>>> -
>>>> -       ext_start = strnchr(isa, isa_len, '_');
>>>> -       if (ext_start)
>>>> -               base_isa_len = isa_len - strlen(ext_start);
>>>> +       unsigned char parse_break = 0;
>>>> +       char base_isa[RISCV_ISA_EXT_BASE + 5];
>>>> +       char *base_isa_end = base_isa;
>>>> +       const char *isa_end = isa;
>>>> +#if IS_ENABLED(CONFIG_32BIT)
>>>> +       if (!strncmp(isa, "rv32", 4))
>>>> +               isa_end += 4;
>>>> +#elif IS_ENABLED(CONFIG_64BIT)
>>>> +       if (!strncmp(isa, "rv64", 4))
>>>> +               isa_end += 4;
>>>> +#endif
>>>> +       if (isa != isa_end) {
>>>> +               strncpy(base_isa, isa, isa_end - isa);
>>>> +               base_isa_end += isa_end - isa;
>>>> +               for (; *isa_end; ++isa_end) {
>>>> +                       switch (*isa_end++) {
>>>> +                       case 'x':
>>>> +                       case 'z':
>>>> +                               parse_break = 1;
>>>> +                               break;
>>>> +                       case 's':
>>>> +                               if (*isa_end != 'u') {
>>>> +                                       parse_break = 1;
>>>> +                                       break;
>>>> +                               }
>>>> +                               /**
>>>> +                                * Workaround for invalid single-letter 's'
>>>> +                                * (QEMU).  Should break for valid ISA string.
>>>> +                                */
>>>> +                               fallthrough;
>>>> +                       default:
>>>> +                               if (unlikely(!islower(isa_end[-1])
>>>> +                                   || base_isa_end == base_isa + sizeof(base_isa))) {
>>>> +                                       parse_break = 2;
>>>> +                                       break;
>>>> +                               }
>>>> +                               switch (isa_end[-1]) {
>>>> +                               case 's':
>>>> +                               case 'u':
>>>> +                                       break;
>>>> +                               default:
>>>> +                                       *base_isa_end++ = isa_end[-1];
>>>> +                               }
>>>> +                               /* Skip version number */
>>>> +                               if (!isdigit(*isa_end))
>>>> +                                       break;
>>>> +                               while (isdigit(*++isa_end))
>>>> +                                       ;
>>>> +                               if (*isa_end != 'p')
>>>> +                                       break;
>>>> +                               if (!isdigit(*++isa_end)) {
>>>> +                                       --isa_end;
>>>> +                                       break;
>>>> +                               }
>>>> +                               while (isdigit(*++isa_end))
>>>> +                                       ;
>>>> +                               break;
>>>> +                       }
>>>> +                       if (*isa_end != '_')
>>>> +                               --isa_end;
>>>> +                       if (parse_break)
>>>> +                               break;
>>>> +               }
>>>> +       }
>>>>
>>>> -       /* Print only the base ISA as it is */
>>>> +       /**
>>>> +        * Print only the base ISA
>>>> +        * (original ISA string as is if an error is encountered)
>>>> +        */
>>>>         seq_puts(f, "isa\t\t: ");
>>>> -       seq_write(f, isa, base_isa_len);
>>>> +       if (parse_break < 2) {
>>>> +               seq_write(f, base_isa, base_isa_end - base_isa);
>>>> +       } else {
>>>> +               isa_end += strlen(isa_end);
>>>> +               seq_write(f, isa, isa_end - isa);
>>>> +       }
>>>>         seq_puts(f, "\n");
>>>>  }
>>>>
>>>> --
>>>> 2.32.0
>>>>
>>>
>>>
> 
> 
>
Atish Patra Feb. 22, 2022, 6:10 p.m. UTC | #5
On Mon, Feb 21, 2022 at 5:42 AM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
>
> On 2022/02/16 16:43, Atish Patra wrote:
> > On Tue, Feb 15, 2022 at 10:59 PM Tsukasa OI
> > <research_trasio@irq.a4lg.com> wrote:
> >>
> >> On 2022/02/16 15:01, Atish Patra wrote:
> >>> On Tue, Feb 15, 2022 at 9:04 PM Tsukasa OI <research_trasio@irq.a4lg.com> wrote:
> >>>>
> >>>> This commit replaces print_isa function to extract base ISA from device
> >>>> tree ("riscv,isa").  It uses a subset of Tsukasa's riscv_fill_hwcap ISA
> >>>> parser.
> >>>>
> >>>> Design choices (1):
> >>>>
> >>>> It constructs base ISA string from the original string (does not cut
> >>>> the original string like Atish's).  This is to strip version numbers
> >>>> (too minor to represent as major ISA) but this behavior may be
> >>>> debatable.
> >>>>
> >>>
> >>> A simpler solution would be to just invoke
> >>> __riscv_isa_extension_available for all the base
> >>> extensions (imafdc) and print the base isa string.
> >>> We don't need to parse the ISA string again here.
> >>
> >> I agree that my patchset is ugly and simpler solution would be good.
> >> (Having two parser for same string is definitely not the best idea.)
> >> There are some testcases (some are not accepted by Spike's --isa option
> >> and requires custom Device tree) for testing:
> >>
> >> *   rv64imac
> >> *   rv64imacsu      (invalid ISA string; test QEMU workaround)
> >> *   rv64imafdch
> >> *   rv64imafdcsuh   (invalid ISA string; test QEMU workaround)
> >> *   rv64imafdc_svinval_svnapot
> >> *   rv64imafdcsvinval_svnapot
> >> *   rv64i2p1m2a2fdcsvinval
> >> *   rv64i2p1_m_afdcsvinval
> >>
> >
> > Thanks. This is a good set of test cases that covers most of the cases.
> >
> >>>
> >>>> Design choices (2):
> >>>>
> >>>> It skips when single-letter 'S' or 'U' is encountered.  It improves
> >>>> familiarity with regular ISA string.
> >>>>
> >>>
> >>> I don't think this is necessary because only Qemu appends 'S' & 'U' in
> >>> the isa string.
> >>> Your patch in Qemu will fix that for future releases of Qemu.
> >>> All other commonly used platforms (hardware/spike) already use the
> >>> correct ISA string.
> >>
> >> Yes, only QEMU.  It's not a serious problem like on cpufeature.c but would
> >> be nice to remove them (when exposing to the usermode) to prevent confusion
> >> on usermode side.
> >>
> >> If you invoke __riscv_isa_extension_available to generate base ISA string,
> >> would you exclude 'S' and 'U'?
> >>
> >
> > Yeah. That's easy. The possible valid base extensions are "IMAFHDQCBJPV".
> > (all the possible defined bits in misa except 'S' & 'U')
> > So we will invoke __riscv_isa_extension_available for those only.
>
> Order "IEMAFDQLCBKJTPVH" would be sufficient.
> Note that some of them may be incompatible with Linux or currently has
> no plan to be added as a single-letter extension.
>
> "E"**: RV32E
>      (incompatible with Linux?)

Yes. No need to check for this.

> "L"**: Decimal floating point
>      (placeholder is removed from the ISA specification)
> "B"**: Bit manipulation
>      (no single-letter extension plan but multiple subextensions)
> "K"**: Cryptography
>      (no single-letter extension plan but multiple subextensions)
> "J"**: Extensions for dynamic languages
>      (no single-letter extension but planned multiple subextensions)
> "T"**: Transactional memory
>      (placeholder is removed from the ISA specification)

That's why I did not include L & T as the ISA spec doesn't talk about
it anymore.
I am in favor of not including right now and add it later if any
future ratified spec includes it.

> "P"  : narrow SIMD using GPR
>      (this order is ratified but "P" is not)
> "H"  : Hypervisor (different position compared to your post)
>      (this order is not ratified but likely placed here
>       for compatibility with QEMU and Spike)
>

I thought it would be after "imafdqc". I will change it as per your suggestion.
We can always fix if the ratified spec will specify a different order.

This is what base ISA extension (single letter) will look like (for now)

"imafdqcbkjpvh"

I will send the patch shortly.

> Tsukasa
>
> >
> >>>
> >>>> This commit is intended to be squashed into Atish's isa_framework_v4
> >>>> PATCH 6/6.
> >>>>
> >>>> Signed-off-by: Tsukasa OI <research_trasio@irq.a4lg.com>
> >>>> ---
> >>>>  arch/riscv/kernel/cpu.c | 83 ++++++++++++++++++++++++++++++++++++-----
> >>>>  1 file changed, 74 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> >>>> index 6f9660c0a973..3f9607a66d95 100644
> >>>> --- a/arch/riscv/kernel/cpu.c
> >>>> +++ b/arch/riscv/kernel/cpu.c
> >>>> @@ -101,17 +101,82 @@ static void print_isa_ext(struct seq_file *f)
> >>>>
> >>>>  static void print_isa(struct seq_file *f, const char *isa)
> >>>>  {
> >>>> -       char *ext_start;
> >>>> -       int isa_len = strlen(isa);
> >>>> -       int base_isa_len = isa_len;
> >>>> -
> >>>> -       ext_start = strnchr(isa, isa_len, '_');
> >>>> -       if (ext_start)
> >>>> -               base_isa_len = isa_len - strlen(ext_start);
> >>>> +       unsigned char parse_break = 0;
> >>>> +       char base_isa[RISCV_ISA_EXT_BASE + 5];
> >>>> +       char *base_isa_end = base_isa;
> >>>> +       const char *isa_end = isa;
> >>>> +#if IS_ENABLED(CONFIG_32BIT)
> >>>> +       if (!strncmp(isa, "rv32", 4))
> >>>> +               isa_end += 4;
> >>>> +#elif IS_ENABLED(CONFIG_64BIT)
> >>>> +       if (!strncmp(isa, "rv64", 4))
> >>>> +               isa_end += 4;
> >>>> +#endif
> >>>> +       if (isa != isa_end) {
> >>>> +               strncpy(base_isa, isa, isa_end - isa);
> >>>> +               base_isa_end += isa_end - isa;
> >>>> +               for (; *isa_end; ++isa_end) {
> >>>> +                       switch (*isa_end++) {
> >>>> +                       case 'x':
> >>>> +                       case 'z':
> >>>> +                               parse_break = 1;
> >>>> +                               break;
> >>>> +                       case 's':
> >>>> +                               if (*isa_end != 'u') {
> >>>> +                                       parse_break = 1;
> >>>> +                                       break;
> >>>> +                               }
> >>>> +                               /**
> >>>> +                                * Workaround for invalid single-letter 's'
> >>>> +                                * (QEMU).  Should break for valid ISA string.
> >>>> +                                */
> >>>> +                               fallthrough;
> >>>> +                       default:
> >>>> +                               if (unlikely(!islower(isa_end[-1])
> >>>> +                                   || base_isa_end == base_isa + sizeof(base_isa))) {
> >>>> +                                       parse_break = 2;
> >>>> +                                       break;
> >>>> +                               }
> >>>> +                               switch (isa_end[-1]) {
> >>>> +                               case 's':
> >>>> +                               case 'u':
> >>>> +                                       break;
> >>>> +                               default:
> >>>> +                                       *base_isa_end++ = isa_end[-1];
> >>>> +                               }
> >>>> +                               /* Skip version number */
> >>>> +                               if (!isdigit(*isa_end))
> >>>> +                                       break;
> >>>> +                               while (isdigit(*++isa_end))
> >>>> +                                       ;
> >>>> +                               if (*isa_end != 'p')
> >>>> +                                       break;
> >>>> +                               if (!isdigit(*++isa_end)) {
> >>>> +                                       --isa_end;
> >>>> +                                       break;
> >>>> +                               }
> >>>> +                               while (isdigit(*++isa_end))
> >>>> +                                       ;
> >>>> +                               break;
> >>>> +                       }
> >>>> +                       if (*isa_end != '_')
> >>>> +                               --isa_end;
> >>>> +                       if (parse_break)
> >>>> +                               break;
> >>>> +               }
> >>>> +       }
> >>>>
> >>>> -       /* Print only the base ISA as it is */
> >>>> +       /**
> >>>> +        * Print only the base ISA
> >>>> +        * (original ISA string as is if an error is encountered)
> >>>> +        */
> >>>>         seq_puts(f, "isa\t\t: ");
> >>>> -       seq_write(f, isa, base_isa_len);
> >>>> +       if (parse_break < 2) {
> >>>> +               seq_write(f, base_isa, base_isa_end - base_isa);
> >>>> +       } else {
> >>>> +               isa_end += strlen(isa_end);
> >>>> +               seq_write(f, isa, isa_end - isa);
> >>>> +       }
> >>>>         seq_puts(f, "\n");
> >>>>  }
> >>>>
> >>>> --
> >>>> 2.32.0
> >>>>
> >>>
> >>>
> >
> >
> >
diff mbox series

Patch

diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 6f9660c0a973..3f9607a66d95 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -101,17 +101,82 @@  static void print_isa_ext(struct seq_file *f)
 
 static void print_isa(struct seq_file *f, const char *isa)
 {
-	char *ext_start;
-	int isa_len = strlen(isa);
-	int base_isa_len = isa_len;
-
-	ext_start = strnchr(isa, isa_len, '_');
-	if (ext_start)
-		base_isa_len = isa_len - strlen(ext_start);
+	unsigned char parse_break = 0;
+	char base_isa[RISCV_ISA_EXT_BASE + 5];
+	char *base_isa_end = base_isa;
+	const char *isa_end = isa;
+#if IS_ENABLED(CONFIG_32BIT)
+	if (!strncmp(isa, "rv32", 4))
+		isa_end += 4;
+#elif IS_ENABLED(CONFIG_64BIT)
+	if (!strncmp(isa, "rv64", 4))
+		isa_end += 4;
+#endif
+	if (isa != isa_end) {
+		strncpy(base_isa, isa, isa_end - isa);
+		base_isa_end += isa_end - isa;
+		for (; *isa_end; ++isa_end) {
+			switch (*isa_end++) {
+			case 'x':
+			case 'z':
+				parse_break = 1;
+				break;
+			case 's':
+				if (*isa_end != 'u') {
+					parse_break = 1;
+					break;
+				}
+				/**
+				 * Workaround for invalid single-letter 's'
+				 * (QEMU).  Should break for valid ISA string.
+				 */
+				fallthrough;
+			default:
+				if (unlikely(!islower(isa_end[-1])
+				    || base_isa_end == base_isa + sizeof(base_isa))) {
+					parse_break = 2;
+					break;
+				}
+				switch (isa_end[-1]) {
+				case 's':
+				case 'u':
+					break;
+				default:
+					*base_isa_end++ = isa_end[-1];
+				}
+				/* Skip version number */
+				if (!isdigit(*isa_end))
+					break;
+				while (isdigit(*++isa_end))
+					;
+				if (*isa_end != 'p')
+					break;
+				if (!isdigit(*++isa_end)) {
+					--isa_end;
+					break;
+				}
+				while (isdigit(*++isa_end))
+					;
+				break;
+			}
+			if (*isa_end != '_')
+				--isa_end;
+			if (parse_break)
+				break;
+		}
+	}
 
-	/* Print only the base ISA as it is */
+	/**
+	 * Print only the base ISA
+	 * (original ISA string as is if an error is encountered)
+	 */
 	seq_puts(f, "isa\t\t: ");
-	seq_write(f, isa, base_isa_len);
+	if (parse_break < 2) {
+		seq_write(f, base_isa, base_isa_end - base_isa);
+	} else {
+		isa_end += strlen(isa_end);
+		seq_write(f, isa, isa_end - isa);
+	}
 	seq_puts(f, "\n");
 }