diff mbox series

[03/17] x86: Replace open-coded parity calculation with parity8()

Message ID 20250223164217.2139331-4-visitorckw@gmail.com (mailing list archive)
State New
Headers show
Series Introduce and use generic parity32/64 helper | expand

Commit Message

Kuan-Wei Chiu Feb. 23, 2025, 4:42 p.m. UTC
Refactor parity calculations to use the standard parity8() helper. This
change eliminates redundant implementations and improves code
efficiency.

Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
---
 arch/x86/kernel/bootflag.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

Comments

Uros Bizjak Feb. 24, 2025, 3:24 p.m. UTC | #1
On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> Refactor parity calculations to use the standard parity8() helper. This
> change eliminates redundant implementations and improves code
> efficiency.

The patch improves parity assembly code in bootflag.o from:

   58:	89 de                	mov    %ebx,%esi
   5a:	b9 08 00 00 00       	mov    $0x8,%ecx
   5f:	31 d2                	xor    %edx,%edx
   61:	89 f0                	mov    %esi,%eax
   63:	89 d7                	mov    %edx,%edi
   65:	40 d0 ee             	shr    %sil
   68:	83 e0 01             	and    $0x1,%eax
   6b:	31 c2                	xor    %eax,%edx
   6d:	83 e9 01             	sub    $0x1,%ecx
   70:	75 ef                	jne    61 <sbf_init+0x51>
   72:	39 c7                	cmp    %eax,%edi
   74:	74 7f                	je     f5 <sbf_init+0xe5>
   76:

to:

   54:	89 d8                	mov    %ebx,%eax
   56:	ba 96 69 00 00       	mov    $0x6996,%edx
   5b:	c0 e8 04             	shr    $0x4,%al
   5e:	31 d8                	xor    %ebx,%eax
   60:	83 e0 0f             	and    $0xf,%eax
   63:	0f a3 c2             	bt     %eax,%edx
   66:	73 64                	jae    cc <sbf_init+0xbc>
   68:

which is faster and smaller (-10 bytes) code.

Reviewed-by: Uros Bizjak <ubizjak@gmail.com>

Thanks,
Uros.

> 
> Co-developed-by: Yu-Chun Lin <eleanor15x@gmail.com>
> Signed-off-by: Yu-Chun Lin <eleanor15x@gmail.com>
> Signed-off-by: Kuan-Wei Chiu <visitorckw@gmail.com>
> ---
>   arch/x86/kernel/bootflag.c | 18 +++---------------
>   1 file changed, 3 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
> index 3fed7ae58b60..314ff0e84900 100644
> --- a/arch/x86/kernel/bootflag.c
> +++ b/arch/x86/kernel/bootflag.c
> @@ -8,6 +8,7 @@
>   #include <linux/string.h>
>   #include <linux/spinlock.h>
>   #include <linux/acpi.h>
> +#include <linux/bitops.h>
>   #include <asm/io.h>
>   
>   #include <linux/mc146818rtc.h>
> @@ -20,26 +21,13 @@
>   
>   int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
>   
> -static int __init parity(u8 v)
> -{
> -	int x = 0;
> -	int i;
> -
> -	for (i = 0; i < 8; i++) {
> -		x ^= (v & 1);
> -		v >>= 1;
> -	}
> -
> -	return x;
> -}
> -
>   static void __init sbf_write(u8 v)
>   {
>   	unsigned long flags;
>   
>   	if (sbf_port != -1) {
>   		v &= ~SBF_PARITY;
> -		if (!parity(v))
> +		if (!parity8(v))
>   			v |= SBF_PARITY;
>   
>   		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
> @@ -70,7 +58,7 @@ static int __init sbf_value_valid(u8 v)
>   {
>   	if (v & SBF_RESERVED)		/* Reserved bits */
>   		return 0;
> -	if (!parity(v))
> +	if (!parity8(v))
>   		return 0;
>   
>   	return 1;
H. Peter Anvin Feb. 24, 2025, 9:55 p.m. UTC | #2
On 2/24/25 07:24, Uros Bizjak wrote:
> 
> 
> On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> Refactor parity calculations to use the standard parity8() helper. This
>> change eliminates redundant implementations and improves code
>> efficiency.
> 
> The patch improves parity assembly code in bootflag.o from:
> 
>    58:    89 de                    mov    %ebx,%esi
>    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>    5f:    31 d2                    xor    %edx,%edx
>    61:    89 f0                    mov    %esi,%eax
>    63:    89 d7                    mov    %edx,%edi
>    65:    40 d0 ee                 shr    %sil
>    68:    83 e0 01                 and    $0x1,%eax
>    6b:    31 c2                    xor    %eax,%edx
>    6d:    83 e9 01                 sub    $0x1,%ecx
>    70:    75 ef                    jne    61 <sbf_init+0x51>
>    72:    39 c7                    cmp    %eax,%edi
>    74:    74 7f                    je     f5 <sbf_init+0xe5>
>    76:
> 
> to:
> 
>    54:    89 d8                    mov    %ebx,%eax
>    56:    ba 96 69 00 00           mov    $0x6996,%edx
>    5b:    c0 e8 04                 shr    $0x4,%al
>    5e:    31 d8                    xor    %ebx,%eax
>    60:    83 e0 0f                 and    $0xf,%eax
>    63:    0f a3 c2                 bt     %eax,%edx
>    66:    73 64                    jae    cc <sbf_init+0xbc>
>    68:
> 
> which is faster and smaller (-10 bytes) code.
> 

Of course, on x86, parity8() and parity16() can be implemented very simply:

(Also, the parity functions really ought to return bool, and be flagged 
__attribute_const__.)

static inline __attribute_const__ bool _arch_parity8(u8 val)
{
	bool parity;
	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
	return parity;
}

static inline __attribute_const__ bool _arch_parity16(u16 val)
{
	bool parity;
	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
	return parity;
}

In the generic algorithm, you probably should implement parity16() in 
terms of parity8(), parity32() in terms of parity16() and so on:

static inline __attribute_const__ bool parity16(u16 val)
{
#ifdef ARCH_HAS_PARITY16
	if (!__builtin_const_p(val))
		return _arch_parity16(val);
#endif
	return parity8(val ^ (val >> 8));
}

This picks up the architectural versions when available.

Furthermore, if a popcnt instruction is known to exist, then the parity 
is simply popcnt(x) & 1.

	-hpa
Uros Bizjak Feb. 24, 2025, 10:08 p.m. UTC | #3
On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>
> On 2/24/25 07:24, Uros Bizjak wrote:
> >
> >
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> >> Refactor parity calculations to use the standard parity8() helper. This
> >> change eliminates redundant implementations and improves code
> >> efficiency.
> >
> > The patch improves parity assembly code in bootflag.o from:
> >
> >    58:    89 de                    mov    %ebx,%esi
> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >    5f:    31 d2                    xor    %edx,%edx
> >    61:    89 f0                    mov    %esi,%eax
> >    63:    89 d7                    mov    %edx,%edi
> >    65:    40 d0 ee                 shr    %sil
> >    68:    83 e0 01                 and    $0x1,%eax
> >    6b:    31 c2                    xor    %eax,%edx
> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >    72:    39 c7                    cmp    %eax,%edi
> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >    76:
> >
> > to:
> >
> >    54:    89 d8                    mov    %ebx,%eax
> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >    5b:    c0 e8 04                 shr    $0x4,%al
> >    5e:    31 d8                    xor    %ebx,%eax
> >    60:    83 e0 0f                 and    $0xf,%eax
> >    63:    0f a3 c2                 bt     %eax,%edx
> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >    68:
> >
> > which is faster and smaller (-10 bytes) code.
> >
>
> Of course, on x86, parity8() and parity16() can be implemented very simply:
>
> (Also, the parity functions really ought to return bool, and be flagged
> __attribute_const__.)
>
> static inline __attribute_const__ bool _arch_parity8(u8 val)
> {
>         bool parity;
>         asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));

asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));

because we are interested only in flags.

Uros.
Yury Norov Feb. 24, 2025, 10:17 p.m. UTC | #4
On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
> On 2/24/25 07:24, Uros Bizjak wrote:
> > 
> > 
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> > > Refactor parity calculations to use the standard parity8() helper. This
> > > change eliminates redundant implementations and improves code
> > > efficiency.
> > 
> > The patch improves parity assembly code in bootflag.o from:
> > 
> >    58:    89 de                    mov    %ebx,%esi
> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >    5f:    31 d2                    xor    %edx,%edx
> >    61:    89 f0                    mov    %esi,%eax
> >    63:    89 d7                    mov    %edx,%edi
> >    65:    40 d0 ee                 shr    %sil
> >    68:    83 e0 01                 and    $0x1,%eax
> >    6b:    31 c2                    xor    %eax,%edx
> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >    72:    39 c7                    cmp    %eax,%edi
> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >    76:
> > 
> > to:
> > 
> >    54:    89 d8                    mov    %ebx,%eax
> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >    5b:    c0 e8 04                 shr    $0x4,%al
> >    5e:    31 d8                    xor    %ebx,%eax
> >    60:    83 e0 0f                 and    $0xf,%eax
> >    63:    0f a3 c2                 bt     %eax,%edx
> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >    68:
> > 
> > which is faster and smaller (-10 bytes) code.
> > 
> 
> Of course, on x86, parity8() and parity16() can be implemented very simply:
> 
> (Also, the parity functions really ought to return bool, and be flagged
> __attribute_const__.)

There was a discussion regarding return type when parity8() was added.
The integer type was taken over bool with a sort of consideration that
bool should be returned as an answer to some question, like parity_odd().

To me it's not a big deal. We can switch to boolean and describe in
comment what the 'true' means for the parity() function.
H. Peter Anvin Feb. 24, 2025, 10:18 p.m. UTC | #5
On February 24, 2025 2:08:05 PM PST, Uros Bizjak <ubizjak@gmail.com> wrote:
>On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> >
>> >
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> >> Refactor parity calculations to use the standard parity8() helper. This
>> >> change eliminates redundant implementations and improves code
>> >> efficiency.
>> >
>> > The patch improves parity assembly code in bootflag.o from:
>> >
>> >    58:    89 de                    mov    %ebx,%esi
>> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>> >    5f:    31 d2                    xor    %edx,%edx
>> >    61:    89 f0                    mov    %esi,%eax
>> >    63:    89 d7                    mov    %edx,%edi
>> >    65:    40 d0 ee                 shr    %sil
>> >    68:    83 e0 01                 and    $0x1,%eax
>> >    6b:    31 c2                    xor    %eax,%edx
>> >    6d:    83 e9 01                 sub    $0x1,%ecx
>> >    70:    75 ef                    jne    61 <sbf_init+0x51>
>> >    72:    39 c7                    cmp    %eax,%edi
>> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
>> >    76:
>> >
>> > to:
>> >
>> >    54:    89 d8                    mov    %ebx,%eax
>> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
>> >    5b:    c0 e8 04                 shr    $0x4,%al
>> >    5e:    31 d8                    xor    %ebx,%eax
>> >    60:    83 e0 0f                 and    $0xf,%eax
>> >    63:    0f a3 c2                 bt     %eax,%edx
>> >    66:    73 64                    jae    cc <sbf_init+0xbc>
>> >    68:
>> >
>> > which is faster and smaller (-10 bytes) code.
>> >
>>
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>>
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>>
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>>         bool parity;
>>         asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
>
>asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));
>
>because we are interested only in flags.
>
>Uros.
>

Same thing, really, but yes, using test is cleaner.
H. Peter Anvin Feb. 24, 2025, 10:21 p.m. UTC | #6
On February 24, 2025 2:17:29 PM PST, Yury Norov <yury.norov@gmail.com> wrote:
>On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>> > > Refactor parity calculations to use the standard parity8() helper. This
>> > > change eliminates redundant implementations and improves code
>> > > efficiency.
>> > 
>> > The patch improves parity assembly code in bootflag.o from:
>> > 
>> >    58:    89 de                    mov    %ebx,%esi
>> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
>> >    5f:    31 d2                    xor    %edx,%edx
>> >    61:    89 f0                    mov    %esi,%eax
>> >    63:    89 d7                    mov    %edx,%edi
>> >    65:    40 d0 ee                 shr    %sil
>> >    68:    83 e0 01                 and    $0x1,%eax
>> >    6b:    31 c2                    xor    %eax,%edx
>> >    6d:    83 e9 01                 sub    $0x1,%ecx
>> >    70:    75 ef                    jne    61 <sbf_init+0x51>
>> >    72:    39 c7                    cmp    %eax,%edi
>> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
>> >    76:
>> > 
>> > to:
>> > 
>> >    54:    89 d8                    mov    %ebx,%eax
>> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
>> >    5b:    c0 e8 04                 shr    $0x4,%al
>> >    5e:    31 d8                    xor    %ebx,%eax
>> >    60:    83 e0 0f                 and    $0xf,%eax
>> >    63:    0f a3 c2                 bt     %eax,%edx
>> >    66:    73 64                    jae    cc <sbf_init+0xbc>
>> >    68:
>> > 
>> > which is faster and smaller (-10 bytes) code.
>> > 
>> 
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>
>There was a discussion regarding return type when parity8() was added.
>The integer type was taken over bool with a sort of consideration that
>bool should be returned as an answer to some question, like parity_odd().
>
>To me it's not a big deal. We can switch to boolean and describe in
>comment what the 'true' means for the parity() function.

Bool is really the single-bit type, and gives the compiler more information. You could argue that the function really should be called parity_odd*() in general, but that's kind of excessive IMO.
Yury Norov Feb. 24, 2025, 10:30 p.m. UTC | #7
On Mon, Feb 24, 2025 at 02:21:13PM -0800, H. Peter Anvin wrote:
> On February 24, 2025 2:17:29 PM PST, Yury Norov <yury.norov@gmail.com> wrote:
> >On Mon, Feb 24, 2025 at 01:55:28PM -0800, H. Peter Anvin wrote:
> >> On 2/24/25 07:24, Uros Bizjak wrote:
> >> > 
> >> > 
> >> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
> >> > > Refactor parity calculations to use the standard parity8() helper. This
> >> > > change eliminates redundant implementations and improves code
> >> > > efficiency.
> >> > 
> >> > The patch improves parity assembly code in bootflag.o from:
> >> > 
> >> >    58:    89 de                    mov    %ebx,%esi
> >> >    5a:    b9 08 00 00 00           mov    $0x8,%ecx
> >> >    5f:    31 d2                    xor    %edx,%edx
> >> >    61:    89 f0                    mov    %esi,%eax
> >> >    63:    89 d7                    mov    %edx,%edi
> >> >    65:    40 d0 ee                 shr    %sil
> >> >    68:    83 e0 01                 and    $0x1,%eax
> >> >    6b:    31 c2                    xor    %eax,%edx
> >> >    6d:    83 e9 01                 sub    $0x1,%ecx
> >> >    70:    75 ef                    jne    61 <sbf_init+0x51>
> >> >    72:    39 c7                    cmp    %eax,%edi
> >> >    74:    74 7f                    je     f5 <sbf_init+0xe5>
> >> >    76:
> >> > 
> >> > to:
> >> > 
> >> >    54:    89 d8                    mov    %ebx,%eax
> >> >    56:    ba 96 69 00 00           mov    $0x6996,%edx
> >> >    5b:    c0 e8 04                 shr    $0x4,%al
> >> >    5e:    31 d8                    xor    %ebx,%eax
> >> >    60:    83 e0 0f                 and    $0xf,%eax
> >> >    63:    0f a3 c2                 bt     %eax,%edx
> >> >    66:    73 64                    jae    cc <sbf_init+0xbc>
> >> >    68:
> >> > 
> >> > which is faster and smaller (-10 bytes) code.
> >> > 
> >> 
> >> Of course, on x86, parity8() and parity16() can be implemented very simply:
> >> 
> >> (Also, the parity functions really ought to return bool, and be flagged
> >> __attribute_const__.)
> >
> >There was a discussion regarding return type when parity8() was added.
> >The integer type was taken over bool with a sort of consideration that
> >bool should be returned as an answer to some question, like parity_odd().
> >
> >To me it's not a big deal. We can switch to boolean and describe in
> >comment what the 'true' means for the parity() function.
> 
> Bool is really the single-bit type, and gives the compiler more information. You could argue that the function really should be called parity_odd*() in general, but that's kind of excessive IMO.

Yes, I could, but I will not. :) I also feel like bool looks more
natural here.
H. Peter Anvin Feb. 25, 2025, 3:36 a.m. UTC | #8
On 2/24/25 14:08, Uros Bizjak wrote:
> On Mon, Feb 24, 2025 at 10:56 PM H. Peter Anvin <hpa@zytor.com> wrote:
>>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>>>
>>>
>>> On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:
>>>> Refactor parity calculations to use the standard parity8() helper. This
>>>> change eliminates redundant implementations and improves code
>>>> efficiency.
>>>
>>> The patch improves parity assembly code in bootflag.o from:
>>>
>>>     58:    89 de                    mov    %ebx,%esi
>>>     5a:    b9 08 00 00 00           mov    $0x8,%ecx
>>>     5f:    31 d2                    xor    %edx,%edx
>>>     61:    89 f0                    mov    %esi,%eax
>>>     63:    89 d7                    mov    %edx,%edi
>>>     65:    40 d0 ee                 shr    %sil
>>>     68:    83 e0 01                 and    $0x1,%eax
>>>     6b:    31 c2                    xor    %eax,%edx
>>>     6d:    83 e9 01                 sub    $0x1,%ecx
>>>     70:    75 ef                    jne    61 <sbf_init+0x51>
>>>     72:    39 c7                    cmp    %eax,%edi
>>>     74:    74 7f                    je     f5 <sbf_init+0xe5>
>>>     76:
>>>
>>> to:
>>>
>>>     54:    89 d8                    mov    %ebx,%eax
>>>     56:    ba 96 69 00 00           mov    $0x6996,%edx
>>>     5b:    c0 e8 04                 shr    $0x4,%al
>>>     5e:    31 d8                    xor    %ebx,%eax
>>>     60:    83 e0 0f                 and    $0xf,%eax
>>>     63:    0f a3 c2                 bt     %eax,%edx
>>>     66:    73 64                    jae    cc <sbf_init+0xbc>
>>>     68:
>>>
>>> which is faster and smaller (-10 bytes) code.
>>>
>>
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>>
>> (Also, the parity functions really ought to return bool, and be flagged
>> __attribute_const__.)
>>
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>>          bool parity;
>>          asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> asm("test %0,%0" : "=@ccnp" (parity) : "q" (val));
> 
> because we are interested only in flags.
> 

Also, needs to be %1,%1 (my mistake, thought flags outputs didn't count.)

Finally, this is kind of an obvious improvement:

  static void __init sbf_write(u8 v)
  {
         unsigned long flags;

         if (sbf_port != -1) {
-               v &= ~SBF_PARITY;
                 if (!parity(v))
-                       v |= SBF_PARITY;
+                       v ^= SBF_PARITY;

	-hpa
David Laight Feb. 25, 2025, 10:46 p.m. UTC | #9
On Mon, 24 Feb 2025 13:55:28 -0800
"H. Peter Anvin" <hpa@zytor.com> wrote:

> On 2/24/25 07:24, Uros Bizjak wrote:
> > 
> > 
> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
> >> Refactor parity calculations to use the standard parity8() helper. This
> >> change eliminates redundant implementations and improves code
> >> efficiency.  
...
> Of course, on x86, parity8() and parity16() can be implemented very simply:
> 
> (Also, the parity functions really ought to return bool, and be flagged 
> __attribute_const__.)
> 
> static inline __attribute_const__ bool _arch_parity8(u8 val)
> {
> 	bool parity;
> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
> 	return parity;
> }
> 
> static inline __attribute_const__ bool _arch_parity16(u16 val)
> {
> 	bool parity;
> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
> 	return parity;
> }

The same (with fixes) can be done for parity64() on 32bit.

> 
> In the generic algorithm, you probably should implement parity16() in 
> terms of parity8(), parity32() in terms of parity16() and so on:
> 
> static inline __attribute_const__ bool parity16(u16 val)
> {
> #ifdef ARCH_HAS_PARITY16
> 	if (!__builtin_const_p(val))
> 		return _arch_parity16(val);
> #endif
> 	return parity8(val ^ (val >> 8));
> }
> 
> This picks up the architectural versions when available.

Not the best way to do that.
Make the name in the #ifdef the same as the function and define
a default one if the architecture doesn't define one.
So:

static inline parity16(u16 val)
{
	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
}

#ifndef _parity16
static inline _parity16(u15 val)
{
	return _parity8(val ^ (val >> 8));
}
#endif

You only need one _parity_const().

> 
> Furthermore, if a popcnt instruction is known to exist, then the parity 
> is simply popcnt(x) & 1.

Beware that some popcnt instructions are slow.

	David

> 
> 	-hpa
> 
>
H. Peter Anvin Feb. 26, 2025, 12:26 a.m. UTC | #10
On February 25, 2025 2:46:23 PM PST, David Laight <david.laight.linux@gmail.com> wrote:
>On Mon, 24 Feb 2025 13:55:28 -0800
>"H. Peter Anvin" <hpa@zytor.com> wrote:
>
>> On 2/24/25 07:24, Uros Bizjak wrote:
>> > 
>> > 
>> > On 23. 02. 25 17:42, Kuan-Wei Chiu wrote:  
>> >> Refactor parity calculations to use the standard parity8() helper. This
>> >> change eliminates redundant implementations and improves code
>> >> efficiency.  
>...
>> Of course, on x86, parity8() and parity16() can be implemented very simply:
>> 
>> (Also, the parity functions really ought to return bool, and be flagged 
>> __attribute_const__.)
>> 
>> static inline __attribute_const__ bool _arch_parity8(u8 val)
>> {
>> 	bool parity;
>> 	asm("and %0,%0" : "=@ccnp" (parity) : "q" (val));
>> 	return parity;
>> }
>> 
>> static inline __attribute_const__ bool _arch_parity16(u16 val)
>> {
>> 	bool parity;
>> 	asm("xor %h0,%b0" : "=@ccnp" (parity), "+Q" (val));
>> 	return parity;
>> }
>
>The same (with fixes) can be done for parity64() on 32bit.
>
>> 
>> In the generic algorithm, you probably should implement parity16() in 
>> terms of parity8(), parity32() in terms of parity16() and so on:
>> 
>> static inline __attribute_const__ bool parity16(u16 val)
>> {
>> #ifdef ARCH_HAS_PARITY16
>> 	if (!__builtin_const_p(val))
>> 		return _arch_parity16(val);
>> #endif
>> 	return parity8(val ^ (val >> 8));
>> }
>> 
>> This picks up the architectural versions when available.
>
>Not the best way to do that.
>Make the name in the #ifdef the same as the function and define
>a default one if the architecture doesn't define one.
>So:
>
>static inline parity16(u16 val)
>{
>	return __builtin_const_p(val) ? _parity_const(val) : _parity16(val);
>}
>
>#ifndef _parity16
>static inline _parity16(u15 val)
>{
>	return _parity8(val ^ (val >> 8));
>}
>#endif
>
>You only need one _parity_const().
>
>> 
>> Furthermore, if a popcnt instruction is known to exist, then the parity 
>> is simply popcnt(x) & 1.
>
>Beware that some popcnt instructions are slow.
>
>	David
>
>> 
>> 	-hpa
>> 
>> 
>

Seems more verbose than just #ifdef _arch_parity8 et al since the const and generic code cases are the same (which they aren't always.)

But that part is a good idea, especially since on at least *some* architectures like x86 doing: 

#define _arch_parity8(x) __builtin_parity(x)

... etc is entirely reasonable and lets gcc use an already available parity flag should one be available.

The inline wrapper, of course, takes care of the type mangling.
diff mbox series

Patch

diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 3fed7ae58b60..314ff0e84900 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -8,6 +8,7 @@ 
 #include <linux/string.h>
 #include <linux/spinlock.h>
 #include <linux/acpi.h>
+#include <linux/bitops.h>
 #include <asm/io.h>
 
 #include <linux/mc146818rtc.h>
@@ -20,26 +21,13 @@ 
 
 int sbf_port __initdata = -1;	/* set via acpi_boot_init() */
 
-static int __init parity(u8 v)
-{
-	int x = 0;
-	int i;
-
-	for (i = 0; i < 8; i++) {
-		x ^= (v & 1);
-		v >>= 1;
-	}
-
-	return x;
-}
-
 static void __init sbf_write(u8 v)
 {
 	unsigned long flags;
 
 	if (sbf_port != -1) {
 		v &= ~SBF_PARITY;
-		if (!parity(v))
+		if (!parity8(v))
 			v |= SBF_PARITY;
 
 		printk(KERN_INFO "Simple Boot Flag at 0x%x set to 0x%x\n",
@@ -70,7 +58,7 @@  static int __init sbf_value_valid(u8 v)
 {
 	if (v & SBF_RESERVED)		/* Reserved bits */
 		return 0;
-	if (!parity(v))
+	if (!parity8(v))
 		return 0;
 
 	return 1;