diff mbox series

[v3,2/2] riscv: introduce asm/swab.h

Message ID 20250403-riscv-swab-v3-2-3bf705d80e33@iencinas.com (mailing list archive)
State New
Headers show
Series Implement endianess swap macros for RISC-V | expand

Checks

Context Check Description
bjorn/pre-ci_am success Success
bjorn/build-rv32-defconfig success build-rv32-defconfig
bjorn/build-rv64-clang-allmodconfig fail build-rv64-clang-allmodconfig
bjorn/build-rv64-gcc-allmodconfig fail build-rv64-gcc-allmodconfig
bjorn/build-rv64-nommu-k210-defconfig success build-rv64-nommu-k210-defconfig
bjorn/build-rv64-nommu-k210-virt success build-rv64-nommu-k210-virt
bjorn/checkpatch warning checkpatch
bjorn/dtb-warn-rv64 success dtb-warn-rv64
bjorn/header-inline success header-inline
bjorn/kdoc success kdoc
bjorn/module-param success module-param
bjorn/verify-fixes success verify-fixes
bjorn/verify-signedoff success verify-signedoff

Commit Message

Ignacio Encinas April 3, 2025, 8:34 p.m. UTC
Implement endianness swap macros for RISC-V.

Use the rev8 instruction when Zbb is available. Otherwise, rely on the
default mask-and-shift implementation.

Signed-off-by: Ignacio Encinas <ignacio@iencinas.com>
---
 arch/riscv/include/asm/swab.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

Comments

Arnd Bergmann April 4, 2025, 5:58 a.m. UTC | #1
On Thu, Apr 3, 2025, at 22:34, Ignacio Encinas wrote:
> +#define ARCH_SWAB(size) \
> +static __always_inline unsigned long __arch_swab##size(__u##size value) \
> +{									\
> +	unsigned long x = value;					\
> +									\
> +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
> +		asm volatile (".option push\n"				\
> +			      ".option arch,+zbb\n"			\
> +			      "rev8 %0, %1\n"				\
> +			      ".option pop\n"				\
> +			      : "=r" (x) : "r" (x));			\
> +		return x >> (BITS_PER_LONG - size);			\
> +	}                                                               \
> +	return  ___constant_swab##size(value);				\
> +}

I think the fallback should really just use the __builtin_bswap
helpers instead of the ___constant_swab variants. The output
would be the same, but you can skip patch 1/2.

I would also suggest dumbing down the macro a bit so you can
still find the definition with 'git grep __arch_swab64'. Ideally
just put the function body into a macro but leave the three
separate inline function definitions.

     Arnd
Ben Dooks April 4, 2025, 3:47 p.m. UTC | #2
On 03/04/2025 21:34, Ignacio Encinas wrote:
> Implement endianness swap macros for RISC-V.
> 
> Use the rev8 instruction when Zbb is available. Otherwise, rely on the
> default mask-and-shift implementation.
> 
> Signed-off-by: Ignacio Encinas <ignacio@iencinas.com>
> ---
>   arch/riscv/include/asm/swab.h | 43 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 43 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/swab.h b/arch/riscv/include/asm/swab.h
> new file mode 100644
> index 000000000000..7352e8405a99
> --- /dev/null
> +++ b/arch/riscv/include/asm/swab.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef _ASM_RISCV_SWAB_H
> +#define _ASM_RISCV_SWAB_H
> +
> +#include <linux/types.h>
> +#include <linux/compiler.h>
> +#include <asm/cpufeature-macros.h>
> +#include <asm/hwcap.h>
> +#include <asm-generic/swab.h>
> +
> +#if defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE)
> +
> +#define ARCH_SWAB(size) \
> +static __always_inline unsigned long __arch_swab##size(__u##size value) \
> +{									\
> +	unsigned long x = value;					\
> +									\
> +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
> +		asm volatile (".option push\n"				\
> +			      ".option arch,+zbb\n"			\
> +			      "rev8 %0, %1\n"				\
> +			      ".option pop\n"				\
> +			      : "=r" (x) : "r" (x));			\
> +		return x >> (BITS_PER_LONG - size);			\
> +	}                                                               \
> +	return  ___constant_swab##size(value);				\
> +}
> +
> +#ifdef CONFIG_64BIT
> +ARCH_SWAB(64)
> +#define __arch_swab64 __arch_swab64
> +#endif
> +
> +ARCH_SWAB(32)
> +#define __arch_swab32 __arch_swab32
> +
> +ARCH_SWAB(16)
> +#define __arch_swab16 __arch_swab16
> +
> +#undef ARCH_SWAB
> +
> +#endif /* defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE) */
> +#endif /* _ASM_RISCV_SWAB_H */
> 

I was having a look at this as well, using the alternatives macros.

It would be nice to have a __zbb_swab defined so that you could do some
time checks with this, because it would be interesting to see the
benchmark of how much these improve byteswapping.

How about:

#define ARCH_SWAB(size) \
static __always_inline unsigned long __zbb_swab##size(__u##size value) \
{								\
	unsigned long x = value;				\
								\
	asm volatile (".option push\n"				\
		      ".option arch,+zbb\n"			\
		      "rev8 %0, %1\n"				\
		      ".option pop\n"				\
		      : "=r" (x) : "r" (x));			\
	return x >> (BITS_PER_LONG - size);			\
}                                                               \
								\
static __always_inline unsigned long __arch_swab##size(__u##size value) \
	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB))       \
		return  __zbb_swab##size(value)			\
	return ___constant_swab##size(value);			\	


We might need to define  __zbb_swab##size to BUG or something if it
isn't available.

Also, I wonder if it is possible to say to the build system we must
have ZBB therefore only emit ZBB for cases where you are building a
kernel for an known ZBB system.
Ben Dooks April 4, 2025, 3:54 p.m. UTC | #3
On 04/04/2025 06:58, Arnd Bergmann wrote:
> On Thu, Apr 3, 2025, at 22:34, Ignacio Encinas wrote:
>> +#define ARCH_SWAB(size) \
>> +static __always_inline unsigned long __arch_swab##size(__u##size value) \
>> +{									\
>> +	unsigned long x = value;					\
>> +									\
>> +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
>> +		asm volatile (".option push\n"				\
>> +			      ".option arch,+zbb\n"			\
>> +			      "rev8 %0, %1\n"				\
>> +			      ".option pop\n"				\
>> +			      : "=r" (x) : "r" (x));			\
>> +		return x >> (BITS_PER_LONG - size);			\
>> +	}                                                               \
>> +	return  ___constant_swab##size(value);				\
>> +}
> 
> I think the fallback should really just use the __builtin_bswap
> helpers instead of the ___constant_swab variants. The output
> would be the same, but you can skip patch 1/2.
> 
> I would also suggest dumbing down the macro a bit so you can
> still find the definition with 'git grep __arch_swab64'. Ideally
> just put the function body into a macro but leave the three
> separate inline function definitions.

I thought we explicitly disabled the builtin from gc... I tried doing
this and just ended up with undefined calls from these sites.,
Ben Dooks April 4, 2025, 3:55 p.m. UTC | #4
On 03/04/2025 21:34, Ignacio Encinas wrote:
> Implement endianness swap macros for RISC-V.
> 
> Use the rev8 instruction when Zbb is available. Otherwise, rely on the
> default mask-and-shift implementation.
> 
> Signed-off-by: Ignacio Encinas <ignacio@iencinas.com>
> ---
>   arch/riscv/include/asm/swab.h | 43 +++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 43 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/swab.h b/arch/riscv/include/asm/swab.h
> new file mode 100644
> index 000000000000..7352e8405a99
> --- /dev/null
> +++ b/arch/riscv/include/asm/swab.h
> @@ -0,0 +1,43 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef _ASM_RISCV_SWAB_H
> +#define _ASM_RISCV_SWAB_H
> +
> +#include <linux/types.h>
> +#include <linux/compiler.h>
> +#include <asm/cpufeature-macros.h>
> +#include <asm/hwcap.h>
> +#include <asm-generic/swab.h>
> +
> +#if defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE)
> +
> +#define ARCH_SWAB(size) \
> +static __always_inline unsigned long __arch_swab##size(__u##size value) \
> +{									\
> +	unsigned long x = value;					\
> +									\
> +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
> +		asm volatile (".option push\n"				\
> +			      ".option arch,+zbb\n"			\
> +			      "rev8 %0, %1\n"				\
> +			      ".option pop\n"				\
> +			      : "=r" (x) : "r" (x));			\
> +		return x >> (BITS_PER_LONG - size);			\
> +	}                                                               \
> +	return  ___constant_swab##size(value);				\
> +}
> +
> +#ifdef CONFIG_64BIT
> +ARCH_SWAB(64)
> +#define __arch_swab64 __arch_swab64
> +#endif

I suppose if we're 64bit we can't just rely on values being in one
register so this'd need special casing here?

> +ARCH_SWAB(32)
> +#define __arch_swab32 __arch_swab32
> +
> +ARCH_SWAB(16)
> +#define __arch_swab16 __arch_swab16
> +
> +#undef ARCH_SWAB
> +
> +#endif /* defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE) */
> +#endif /* _ASM_RISCV_SWAB_H */
>
Ignacio Encinas April 4, 2025, 5:35 p.m. UTC | #5
On 4/4/25 7:58, Arnd Bergmann wrote:
> On Thu, Apr 3, 2025, at 22:34, Ignacio Encinas wrote:
>> +#define ARCH_SWAB(size) \
>> +static __always_inline unsigned long __arch_swab##size(__u##size value) \
>> +{									\
>> +	unsigned long x = value;					\
>> +									\
>> +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
>> +		asm volatile (".option push\n"				\
>> +			      ".option arch,+zbb\n"			\
>> +			      "rev8 %0, %1\n"				\
>> +			      ".option pop\n"				\
>> +			      : "=r" (x) : "r" (x));			\
>> +		return x >> (BITS_PER_LONG - size);			\
>> +	}                                                               \
>> +	return  ___constant_swab##size(value);				\
>> +}

Hello Arnd!

> I think the fallback should really just use the __builtin_bswap
> helpers instead of the ___constant_swab variants. The output
> would be the same, but you can skip patch 1/2.

I tried, but that change causes build errors:

```
undefined reference to `__bswapsi2'

[...]

undefined reference to `__bswapdi2
```

I tried working around those, but couldn't find a good solution. I'm a 
bit out of my depth here, but I "summarized" everything here [1]. Let me
know if I'm missing something.

[1] https://lore.kernel.org/linux-riscv/b3b59747-0484-4042-bdc4-c067688e3bfe@iencinas.com/

> I would also suggest dumbing down the macro a bit so you can
> still find the definition with 'git grep __arch_swab64'. Ideally
> just put the function body into a macro but leave the three
> separate inline function definitions.

Good point, thanks for bringing it up. Just to be sure, is this what you
had in mind? (Give or take formatting + naming of variables)

#define arch_swab(size, value) 						\
({                      						\
	unsigned long x = value;					\
									\
	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
		asm volatile (".option push\n"				\
			      ".option arch,+zbb\n"			\
			      "rev8 %0, %1\n"				\
			      ".option pop\n"				\
			      : "=r" (x) : "r" (x));			\
		x = x >> (BITS_PER_LONG - size);			\
	} else {                                                        \
		x = ___constant_swab##size(value);                      \
	}								\
	x;								\
})

static __always_inline unsigned long __arch_swab64(__u64 value) {
	return arch_swab(64, value);
}

Thanks!
Ignacio Encinas April 4, 2025, 5:53 p.m. UTC | #6
On 4/4/25 17:47, Ben Dooks wrote:
> I was having a look at this as well, using the alternatives macros.
> 
> It would be nice to have a __zbb_swab defined so that you could do some
> time checks with this, because it would be interesting to see the
> benchmark of how much these improve byteswapping.

I get your point, but isn't what you propose equivalent to benchmarking 
__arch_swab vs ___constant_swab? 
 
> Also, I wonder if it is possible to say to the build system we must
> have ZBB therefore only emit ZBB for cases where you are building a
> kernel for an known ZBB system.

You can disable ZBB instructions if you do RISCV_ISA_ZBB=n. 

config RISCV_ISA_ZBB
	bool "Zbb extension support for bit manipulation instructions"
	depends on TOOLCHAIN_HAS_ZBB
	depends on RISCV_ALTERNATIVE
	default y
	help
	   Adds support to dynamically detect the presence of the ZBB
	   extension (basic bit manipulation) and enable its usage.

	   The Zbb extension provides instructions to accelerate a number
	   of bit-specific operations (count bit population, sign extending,
	   bitrotation, etc).

	   If you don't know what to do here, say Y.

However, statically assuming ZBB instruction support is another beast
and I don't really have an informed opinion about this. Perhaps in a few
years?

Thanks for the review!
Ignacio Encinas April 4, 2025, 6:13 p.m. UTC | #7
On 4/4/25 17:55, Ben Dooks wrote:
> On 03/04/2025 21:34, Ignacio Encinas wrote:
>> +#ifdef CONFIG_64BIT
>> +ARCH_SWAB(64)
>> +#define __arch_swab64 __arch_swab64
>> +#endif
> 
> I suppose if we're 64bit we can't just rely on values being in one
> register so this'd need special casing here?

Oops... I somehow decided that __arch_swab64 wasn't worth having for
CONFIG_32BIT. I can't tell how useful it is to have it, but it is
doable and already present in the codebase (include/uapi/linux/swab.h):

	__u32 h = val >> 32;
	__u32 l = val & ((1ULL << 32) - 1);
	return (((__u64)__fswab32(l)) << 32) | ((__u64)(__fswab32(h)));

I'll excuse myself on this one because I'm not sure I have ever used a
32 bit CPU (other than the very occasional and quick school project)

Thanks for catching this one! I'll make sure to add __arch_swab64 for
the 32BIT version mimicking the snippet from above.
Eric Biggers April 4, 2025, 7:28 p.m. UTC | #8
On Fri, Apr 04, 2025 at 04:47:52PM +0100, Ben Dooks wrote:
> On 03/04/2025 21:34, Ignacio Encinas wrote:
> > Implement endianness swap macros for RISC-V.
> > 
> > Use the rev8 instruction when Zbb is available. Otherwise, rely on the
> > default mask-and-shift implementation.
> > 
> > Signed-off-by: Ignacio Encinas <ignacio@iencinas.com>
> > ---
> >   arch/riscv/include/asm/swab.h | 43 +++++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 43 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/swab.h b/arch/riscv/include/asm/swab.h
> > new file mode 100644
> > index 000000000000..7352e8405a99
> > --- /dev/null
> > +++ b/arch/riscv/include/asm/swab.h
> > @@ -0,0 +1,43 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +#ifndef _ASM_RISCV_SWAB_H
> > +#define _ASM_RISCV_SWAB_H
> > +
> > +#include <linux/types.h>
> > +#include <linux/compiler.h>
> > +#include <asm/cpufeature-macros.h>
> > +#include <asm/hwcap.h>
> > +#include <asm-generic/swab.h>
> > +
> > +#if defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE)
> > +
> > +#define ARCH_SWAB(size) \
> > +static __always_inline unsigned long __arch_swab##size(__u##size value) \
> > +{									\
> > +	unsigned long x = value;					\
> > +									\
> > +	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
> > +		asm volatile (".option push\n"				\
> > +			      ".option arch,+zbb\n"			\
> > +			      "rev8 %0, %1\n"				\
> > +			      ".option pop\n"				\
> > +			      : "=r" (x) : "r" (x));			\
> > +		return x >> (BITS_PER_LONG - size);			\
> > +	}                                                               \
> > +	return  ___constant_swab##size(value);				\
> > +}
> > +
> > +#ifdef CONFIG_64BIT
> > +ARCH_SWAB(64)
> > +#define __arch_swab64 __arch_swab64
> > +#endif
> > +
> > +ARCH_SWAB(32)
> > +#define __arch_swab32 __arch_swab32
> > +
> > +ARCH_SWAB(16)
> > +#define __arch_swab16 __arch_swab16
> > +
> > +#undef ARCH_SWAB
> > +
> > +#endif /* defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE) */
> > +#endif /* _ASM_RISCV_SWAB_H */
> > 
> 
> I was having a look at this as well, using the alternatives macros.
> 
> It would be nice to have a __zbb_swab defined so that you could do some
> time checks with this, because it would be interesting to see the
> benchmark of how much these improve byteswapping.

FYI if you missed the previous discussion
(https://lore.kernel.org/linux-riscv/20250302220426.GC2079@quark.localdomain/),
currently the overhead caused by the slow generic byte-swapping on RISC-V is
easily visible in the CRC benchmark.  For example compare:

    crc32_le_benchmark: len=16384: 2440 MB/s

to
    
    crc32_be_benchmark: len=16384: 674 MB/s

But the main loops of crc32_le and crc32_be are basically the same, except
crc32_le does le64_to_cpu() (or le32_to_cpu()) on the data whereas crc32_be does
be64_to_cpu() (or be32_to_cpu()).  The above numbers came from a little-endian
CPU, where le*_to_cpu() is a no-op and be*_to_cpu() is a byte-swap.

To reproduce this, build a kernel from the latest upstream with
CONFIG_CRC_KUNIT_TEST=y and CONFIG_CRC_BENCHMARK=y, boot it on a CPU that has
the Zbc extension, and check dmesg for the benchmark results.

This patch should mostly close the difference, though I don't currently have
hardware to confirm that myself.

- Eric
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/swab.h b/arch/riscv/include/asm/swab.h
new file mode 100644
index 000000000000..7352e8405a99
--- /dev/null
+++ b/arch/riscv/include/asm/swab.h
@@ -0,0 +1,43 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_RISCV_SWAB_H
+#define _ASM_RISCV_SWAB_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/cpufeature-macros.h>
+#include <asm/hwcap.h>
+#include <asm-generic/swab.h>
+
+#if defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE)
+
+#define ARCH_SWAB(size) \
+static __always_inline unsigned long __arch_swab##size(__u##size value) \
+{									\
+	unsigned long x = value;					\
+									\
+	if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBB)) {            \
+		asm volatile (".option push\n"				\
+			      ".option arch,+zbb\n"			\
+			      "rev8 %0, %1\n"				\
+			      ".option pop\n"				\
+			      : "=r" (x) : "r" (x));			\
+		return x >> (BITS_PER_LONG - size);			\
+	}                                                               \
+	return  ___constant_swab##size(value);				\
+}
+
+#ifdef CONFIG_64BIT
+ARCH_SWAB(64)
+#define __arch_swab64 __arch_swab64
+#endif
+
+ARCH_SWAB(32)
+#define __arch_swab32 __arch_swab32
+
+ARCH_SWAB(16)
+#define __arch_swab16 __arch_swab16
+
+#undef ARCH_SWAB
+
+#endif /* defined(CONFIG_RISCV_ISA_ZBB) && !defined(NO_ALTERNATIVE) */
+#endif /* _ASM_RISCV_SWAB_H */