Message ID | 20230809115516.214537-10-ajones@ventanamicro.com (mailing list archive) |
---|---|
State | Superseded |
Commit | 090925374690a40b3ac96348874333f331184f4c |
Headers | show |
Series | RISC-V: Enable cbo.zero in usermode | expand |
Context | Check | Description |
---|---|---|
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be for-next at HEAD 174e8ac0272d |
conchuod/fixes_present | success | Fixes tag not required for -next series |
conchuod/maintainers_pattern | success | MAINTAINERS pattern errors before the patch: 4 and now 4 |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/build_rv64_clang_allmodconfig | success | Errors and warnings before: 2810 this patch: 2810 |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/build_rv64_gcc_allmodconfig | fail | Errors and warnings before: 15877 this patch: 15878 |
conchuod/build_rv32_defconfig | success | Build OK |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 12 this patch: 12 |
conchuod/header_inline | success | No static functions without inline keyword in header files |
conchuod/checkpatch | warning | CHECK: Consider using #include <linux/cpufeature.h> instead of <asm/cpufeature.h> |
conchuod/build_rv64_nommu_k210_defconfig | success | Build OK |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | success | Build OK |
On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > When Zicboz is present, enable its instruction (cbo.zero) in > usermode by setting its respective senvcfg bit. We don't bother > trying to set this bit per-task, which would also require an > interface for tasks to request enabling and/or disabling. Instead, > permanently set the bit for each hart which has the extension when > bringing it online. > > Signed-off-by: Andrew Jones <ajones@ventanamicro.com> > --- > arch/riscv/include/asm/cpufeature.h | 2 ++ > arch/riscv/include/asm/csr.h | 1 + > arch/riscv/include/asm/hwcap.h | 16 ++++++++++++++++ > arch/riscv/kernel/cpufeature.c | 6 ++++++ > arch/riscv/kernel/setup.c | 4 ++++ > arch/riscv/kernel/smpboot.c | 4 ++++ > 6 files changed, 33 insertions(+) > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > index 23fed53b8815..788fd575c21a 100644 > --- a/arch/riscv/include/asm/cpufeature.h > +++ b/arch/riscv/include/asm/cpufeature.h > @@ -30,4 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed); > /* Per-cpu ISA extensions. */ > extern struct riscv_isainfo hart_isa[NR_CPUS]; > > +void riscv_user_isa_enable(void); > + > #endif > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > index 7bac43a3176e..e187e76e3df4 100644 > --- a/arch/riscv/include/asm/csr.h > +++ b/arch/riscv/include/asm/csr.h > @@ -273,6 +273,7 @@ > #define CSR_SIE 0x104 > #define CSR_STVEC 0x105 > #define CSR_SCOUNTEREN 0x106 > +#define CSR_SENVCFG 0x10a > #define CSR_SSCRATCH 0x140 > #define CSR_SEPC 0x141 > #define CSR_SCAUSE 0x142 > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > index f041bfa7f6a0..4929faecb75f 100644 > --- a/arch/riscv/include/asm/hwcap.h > +++ b/arch/riscv/include/asm/hwcap.h > @@ -66,6 +66,7 @@ > #ifndef __ASSEMBLY__ > > #include <linux/jump_label.h> > +#include <asm/cpufeature.h> > > unsigned long riscv_get_elf_hwcap(void); > > @@ -130,6 +131,21 @@ riscv_has_extension_unlikely(const unsigned long ext) > return true; > } > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > +{ > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > + return true; > + > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > +} > + > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > +{ > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > + return true; > + > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > +} Another way to do this would be to add a parameter to riscv_has_extension_*() (as there are very few users), then these new functions can turn around and call those with the new parameter set to hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The only advantage to it I can argue is it keeps the code flows more unified. -Evan
On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: ... > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > +{ > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > + return true; > > + > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > +} > > + > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > +{ > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > + return true; > > + > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > +} > > Another way to do this would be to add a parameter to > riscv_has_extension_*() (as there are very few users), then these new > functions can turn around and call those with the new parameter set to > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > only advantage to it I can argue is it keeps the code flows more > unified. > I like unification, but I think I'd prefer we create wrappers and try to avoid callers needing to construct hart_isa[].isa parameters themselves. I'm also not a big fan of the NULL parameter needed when riscv_isa_extension_available() is invoked for the riscv_isa bitmap. So we need: 1. check if an extension is in riscv_isa 2. check if an extension is in a bitmap provided by the caller 3. check if an extension is in this cpu's isa bitmap 4. check if an extension is in the isa bitmap of a cpu provided by the caller The only one we can optimize with alternatives is (1), so it definitely gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can also get wrappers which first try the optimized (1), like I have above. Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers for (4) static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) { if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) { if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) return true; return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } and then use smp_processor_id() directly in the callers that need to check this_cpu's extensions. For case (2), I'd advocate we rename __riscv_isa_extension_available() to riscv_has_extension() and drop the riscv_isa_extension_available() macro in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and others that rely on the pasting. And, ideally, we'd convert most riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). Thanks, drew
On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote: > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > ... > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > > +{ > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > + return true; > > > + > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > +} > > > + > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > > +{ > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > + return true; > > > + > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > +} > > > > Another way to do this would be to add a parameter to > > riscv_has_extension_*() (as there are very few users), then these new > > functions can turn around and call those with the new parameter set to > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > > only advantage to it I can argue is it keeps the code flows more > > unified. > > > > I like unification, but I think I'd prefer we create wrappers and > try to avoid callers needing to construct hart_isa[].isa parameters > themselves. I'm also not a big fan of the NULL parameter needed when > riscv_isa_extension_available() is invoked for the riscv_isa bitmap. > So we need: > > 1. check if an extension is in riscv_isa > 2. check if an extension is in a bitmap provided by the caller > 3. check if an extension is in this cpu's isa bitmap > 4. check if an extension is in the isa bitmap of a cpu provided by the > caller > > The only one we can optimize with alternatives is (1), so it definitely > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can > also get wrappers which first try the optimized (1), like I have above. > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers > for (4) > > static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) > { > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > return true; > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > } > > static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) > { > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) Why are you gating on CONFIG_RISCV_ALTERNATIVE here? > return true; > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > } > > and then use smp_processor_id() directly in the callers that need > to check this_cpu's extensions. > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to > riscv_has_extension() and drop the riscv_isa_extension_available() macro > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and > others that rely on the pasting. > And, ideally, we'd convert most > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). > I'm also not a big fan of the NULL parameter needed when > riscv_isa_extension_available() is invoked for the riscv_isa bitmap Rather than actually act on my concerns about __riscv_isa_extension_available(), I've been busy devoting my spare time to playing MMOs with the excuse of not wanting to fiddle further with cpufeature.c et al until Palmer merged the new DT property stuff, but splitting out your case 1 above seems like it would really help there. The NULL argument case is the one I think has the potential to be a footgun in the face of config options. Split out we can document that purpose of each function & hopefully have one set of functions that deals with "this extension was detected to be present in the hardware" and one that does "this extension was detected & supported by this particular kernel". I'll try to take a proper look at this series tomorrow :) Cheers, Conor.
On Wed, Aug 9, 2023 at 9:58 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > ... > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > > +{ > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > + return true; > > > + > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > +} > > > + > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > > +{ > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > + return true; > > > + > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > +} > > > > Another way to do this would be to add a parameter to > > riscv_has_extension_*() (as there are very few users), then these new > > functions can turn around and call those with the new parameter set to > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > > only advantage to it I can argue is it keeps the code flows more > > unified. > > > > I like unification, but I think I'd prefer we create wrappers and > try to avoid callers needing to construct hart_isa[].isa parameters > themselves. I'm also not a big fan of the NULL parameter needed when > riscv_isa_extension_available() is invoked for the riscv_isa bitmap. > So we need: > > 1. check if an extension is in riscv_isa > 2. check if an extension is in a bitmap provided by the caller > 3. check if an extension is in this cpu's isa bitmap > 4. check if an extension is in the isa bitmap of a cpu provided by the > caller > > The only one we can optimize with alternatives is (1), so it definitely > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can > also get wrappers which first try the optimized (1), like I have above. > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers > for (4) > > static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) > { > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > return true; > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > } > > static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) > { > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > return true; > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > } > > and then use smp_processor_id() directly in the callers that need > to check this_cpu's extensions. > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to > riscv_has_extension() and drop the riscv_isa_extension_available() macro > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and > others that rely on the pasting. And, ideally, we'd convert most > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). Sounds ok to me! -Evan
On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote: > On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote: > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > ... > > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > > > +{ > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > > + return true; > > > > + > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > +} > > > > + > > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > > > +{ > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > > + return true; > > > > + > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > +} > > > > > > Another way to do this would be to add a parameter to > > > riscv_has_extension_*() (as there are very few users), then these new > > > functions can turn around and call those with the new parameter set to > > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > > > only advantage to it I can argue is it keeps the code flows more > > > unified. > > > > > > > I like unification, but I think I'd prefer we create wrappers and > > try to avoid callers needing to construct hart_isa[].isa parameters > > themselves. I'm also not a big fan of the NULL parameter needed when > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap. > > So we need: > > > > 1. check if an extension is in riscv_isa > > 2. check if an extension is in a bitmap provided by the caller > > 3. check if an extension is in this cpu's isa bitmap > > 4. check if an extension is in the isa bitmap of a cpu provided by the > > caller > > > > The only one we can optimize with alternatives is (1), so it definitely > > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can > > also get wrappers which first try the optimized (1), like I have above. > > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers > > for (4) > > > > static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) > > { > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > return true; > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > } > > > > static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) > > { > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > Why are you gating on CONFIG_RISCV_ALTERNATIVE here? This ensures we remove the riscv_has_extension_[un]likely() call when that call would end up using its __riscv_isa_extension_available(NULL, ext) fallback. If that fallback where to return false, then we'd still need to make the __riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that riscv_has_extension_[un]likely() will use an alternative to check the global set of extensions. When the extension is there, the compiler ensures that everything vanishes. When it's not, we'll fallback to a single search of the cpu's isa bitmap. > > > return true; > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > } > > > > and then use smp_processor_id() directly in the callers that need > > to check this_cpu's extensions. > > > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to > > riscv_has_extension() and drop the riscv_isa_extension_available() macro > > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and > > others that rely on the pasting. > > > And, ideally, we'd convert most > > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). > > > I'm also not a big fan of the NULL parameter needed when > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap > > Rather than actually act on my concerns about > __riscv_isa_extension_available(), I've been busy devoting my spare > time to playing MMOs with the excuse of not wanting to fiddle further > with cpufeature.c et al until Palmer merged the new DT property stuff, > but splitting out your case 1 above seems like it would really help > there. The NULL argument case is the one I think has the potential to > be a footgun in the face of config options. > Split out we can document that purpose of each function & hopefully > have one set of functions that deals with "this extension was detected > to be present in the hardware" and one that does "this extension was > detected & supported by this particular kernel". Sounds good to me! > > I'll try to take a proper look at this series tomorrow :) > Thanks! drew
On Thu, Aug 10, 2023 at 09:31:54AM +0200, Andrew Jones wrote: > On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote: > > On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote: > > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > > > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > ... > > > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > > > > +{ > > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > > > + return true; > > > > > + > > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > > +} > > > > > + > > > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > > > > +{ > > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > > > + return true; > > > > > + > > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > > +} > > > > > > > > Another way to do this would be to add a parameter to > > > > riscv_has_extension_*() (as there are very few users), then these new > > > > functions can turn around and call those with the new parameter set to > > > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > > > > only advantage to it I can argue is it keeps the code flows more > > > > unified. > > > > > > > > > > I like unification, but I think I'd prefer we create wrappers and > > > try to avoid callers needing to construct hart_isa[].isa parameters > > > themselves. I'm also not a big fan of the NULL parameter needed when > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap. > > > So we need: > > > > > > 1. check if an extension is in riscv_isa > > > 2. check if an extension is in a bitmap provided by the caller > > > 3. check if an extension is in this cpu's isa bitmap > > > 4. check if an extension is in the isa bitmap of a cpu provided by the > > > caller > > > > > > The only one we can optimize with alternatives is (1), so it definitely > > > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can > > > also get wrappers which first try the optimized (1), like I have above. > > > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers > > > for (4) > > > > > > static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) > > > { > > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > return true; > > > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > > } > > > > > > static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) > > > { > > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > > Why are you gating on CONFIG_RISCV_ALTERNATIVE here? > > This ensures we remove the riscv_has_extension_[un]likely() call > when that call would end up using its > __riscv_isa_extension_available(NULL, ext) fallback. If that fallback > where to return false, then we'd still need to make the > __riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling > the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that > riscv_has_extension_[un]likely() will use an alternative to check the > global set of extensions. When the extension is there, the compiler > ensures that everything vanishes. When it's not, we'll fallback to a > single search of the cpu's isa bitmap. Right, that is what I suspected that you were trying to accomplish here. I was not just not entirely sure whether it was or you'd just missed the fallback path. In my original mail I was just going to say "Please add a comment here as to why you want to avoid the fallback", but figured I should figure out your intent first! Just to note, alternatives are available on all !XIP kernels now, so it's only in the case that the fallback path will be activated. > > > return true; > > > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > > } > > > > > > and then use smp_processor_id() directly in the callers that need > > > to check this_cpu's extensions. > > > > > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to > > > riscv_has_extension() and drop the riscv_isa_extension_available() macro > > > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and > > > others that rely on the pasting. > > > > > And, ideally, we'd convert most > > > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). > > > > > I'm also not a big fan of the NULL parameter needed when > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap > > > > Rather than actually act on my concerns about > > __riscv_isa_extension_available(), I've been busy devoting my spare > > time to playing MMOs with the excuse of not wanting to fiddle further > > with cpufeature.c et al until Palmer merged the new DT property stuff, > > but splitting out your case 1 above seems like it would really help > > there. The NULL argument case is the one I think has the potential to > > be a footgun in the face of config options. > > Split out we can document that purpose of each function & hopefully > > have one set of functions that deals with "this extension was detected > > to be present in the hardware" and one that does "this extension was > > detected & supported by this particular kernel". > > Sounds good to me! I figure said change should be independent of what's going on in this series?
On Thu, Aug 10, 2023 at 10:34:33AM +0100, Conor Dooley wrote: > On Thu, Aug 10, 2023 at 09:31:54AM +0200, Andrew Jones wrote: > > On Wed, Aug 09, 2023 at 07:12:58PM +0100, Conor Dooley wrote: > > > On Wed, Aug 09, 2023 at 06:58:15PM +0200, Andrew Jones wrote: > > > > On Wed, Aug 09, 2023 at 09:00:35AM -0700, Evan Green wrote: > > > > > On Wed, Aug 9, 2023 at 4:55 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > ... > > > > > > +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) > > > > > > +{ > > > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > > > > + return true; > > > > > > + > > > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > > > +} > > > > > > + > > > > > > +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) > > > > > > +{ > > > > > > + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > > > > + return true; > > > > > > + > > > > > > + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); > > > > > > +} > > > > > > > > > > Another way to do this would be to add a parameter to > > > > > riscv_has_extension_*() (as there are very few users), then these new > > > > > functions can turn around and call those with the new parameter set to > > > > > hart_isa[smp_processor_id()].isa. It's a tossup, so up to you. The > > > > > only advantage to it I can argue is it keeps the code flows more > > > > > unified. > > > > > > > > > > > > > I like unification, but I think I'd prefer we create wrappers and > > > > try to avoid callers needing to construct hart_isa[].isa parameters > > > > themselves. I'm also not a big fan of the NULL parameter needed when > > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap. > > > > So we need: > > > > > > > > 1. check if an extension is in riscv_isa > > > > 2. check if an extension is in a bitmap provided by the caller > > > > 3. check if an extension is in this cpu's isa bitmap > > > > 4. check if an extension is in the isa bitmap of a cpu provided by the > > > > caller > > > > > > > > The only one we can optimize with alternatives is (1), so it definitely > > > > gets wrappers (riscv_has_extension_likely/unlikely()). (3) and (4) can > > > > also get wrappers which first try the optimized (1), like I have above. > > > > Actually (3)'s wrapper could be based on (4)'s, or only provide wrappers > > > > for (4) > > > > > > > > static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext) > > > > { > > > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) > > > > return true; > > > > > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > > > } > > > > > > > > static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext) > > > > { > > > > if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) > > > > > > Why are you gating on CONFIG_RISCV_ALTERNATIVE here? > > > > This ensures we remove the riscv_has_extension_[un]likely() call > > when that call would end up using its > > __riscv_isa_extension_available(NULL, ext) fallback. If that fallback > > where to return false, then we'd still need to make the > > __riscv_isa_extension_available(hart_isa[cpu].isa, ext) call, doubling > > the cost. Whereas, when we gate on CONFIG_RISCV_ALTERNATIVE, we know that > > riscv_has_extension_[un]likely() will use an alternative to check the > > global set of extensions. When the extension is there, the compiler > > ensures that everything vanishes. When it's not, we'll fallback to a > > single search of the cpu's isa bitmap. > > Right, that is what I suspected that you were trying to accomplish here. > I was not just not entirely sure whether it was or you'd just missed the > fallback path. In my original mail I was just going to say "Please add a > comment here as to why you want to avoid the fallback", but figured I > should figure out your intent first! Thanks, I'll add a comment for v2. > > Just to note, alternatives are available on all !XIP kernels now, so > it's only in the case that the fallback path will be activated. > > > > > return true; > > > > > > > > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > > > > } > > > > > > > > and then use smp_processor_id() directly in the callers that need > > > > to check this_cpu's extensions. > > > > > > > > For case (2), I'd advocate we rename __riscv_isa_extension_available() to > > > > riscv_has_extension() and drop the riscv_isa_extension_available() macro > > > > in order to avoid having some calls with RISCV_ISA_EXT_* spelled out and > > > > others that rely on the pasting. > > > > > > > And, ideally, we'd convert most > > > > riscv_has_extension(NULL, ext) calls to riscv_has_extension_[un]likely(). > > > > > > > I'm also not a big fan of the NULL parameter needed when > > > > riscv_isa_extension_available() is invoked for the riscv_isa bitmap > > > > > > Rather than actually act on my concerns about > > > __riscv_isa_extension_available(), I've been busy devoting my spare > > > time to playing MMOs with the excuse of not wanting to fiddle further > > > with cpufeature.c et al until Palmer merged the new DT property stuff, > > > but splitting out your case 1 above seems like it would really help > > > there. The NULL argument case is the one I think has the potential to > > > be a footgun in the face of config options. > > > Split out we can document that purpose of each function & hopefully > > > have one set of functions that deals with "this extension was detected > > > to be present in the hardware" and one that does "this extension was > > > detected & supported by this particular kernel". > > > > Sounds good to me! > > I figure said change should be independent of what's going on in this > series? Yeah, I considered doing it as part of this series, but then decided to only start down the path with two new wrappers, which, for v2, I'll change to the cpu parameter taking variant. The rest of the rework would probably be best to do as a separate series, which I can start soon. Thanks, drew
On Thu, Aug 10, 2023 at 12:54:56PM +0200, Andrew Jones wrote: > On Thu, Aug 10, 2023 at 10:34:33AM +0100, Conor Dooley wrote: > > I figure said change should be independent of what's going on in this > > series? > > Yeah, I considered doing it as part of this series, but then decided to > only start down the path with two new wrappers, which, for v2, I'll > change to the cpu parameter taking variant. The rest of the rework > would probably be best to do as a separate series, which I can start > soon. If you're offering, who am I to refuse!
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 23fed53b8815..788fd575c21a 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -30,4 +30,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +void riscv_user_isa_enable(void); + #endif diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 7bac43a3176e..e187e76e3df4 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -273,6 +273,7 @@ #define CSR_SIE 0x104 #define CSR_STVEC 0x105 #define CSR_SCOUNTEREN 0x106 +#define CSR_SENVCFG 0x10a #define CSR_SSCRATCH 0x140 #define CSR_SEPC 0x141 #define CSR_SCAUSE 0x142 diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index f041bfa7f6a0..4929faecb75f 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -66,6 +66,7 @@ #ifndef __ASSEMBLY__ #include <linux/jump_label.h> +#include <asm/cpufeature.h> unsigned long riscv_get_elf_hwcap(void); @@ -130,6 +131,21 @@ riscv_has_extension_unlikely(const unsigned long ext) return true; } +static __always_inline bool riscv_this_cpu_has_extension_likely(const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); +} + +static __always_inline bool riscv_this_cpu_has_extension_unlikely(const unsigned long ext) +{ + if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext)) + return true; + + return __riscv_isa_extension_available(hart_isa[smp_processor_id()].isa, ext); +} #endif #endif /* _ASM_RISCV_HWCAP_H */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 31843e9cc80c..fc0bf300acc7 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -391,6 +391,12 @@ unsigned long riscv_get_elf_hwcap(void) return hwcap; } +void riscv_user_isa_enable(void) +{ + if (riscv_this_cpu_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) + csr_set(CSR_SENVCFG, ENVCFG_CBZE); +} + #ifdef CONFIG_RISCV_ALTERNATIVE /* * Alternative patch sites consider 48 bits when determining when to patch diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 971fe776e2f8..2f053f0763a1 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -25,6 +25,7 @@ #include <asm/acpi.h> #include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/early_ioremap.h> #include <asm/pgtable.h> @@ -308,9 +309,12 @@ void __init setup_arch(char **cmdline_p) riscv_fill_hwcap(); init_rt_signal_env(); apply_boot_alternatives(); + if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) riscv_noncoherent_supported(); + + riscv_user_isa_enable(); } static int __init topology_init(void) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index f4d6acb38dd0..502b04abda0b 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -25,6 +25,8 @@ #include <linux/of.h> #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> + +#include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/irq.h> #include <asm/mmu_context.h> @@ -252,6 +254,8 @@ asmlinkage __visible void smp_callin(void) elf_hwcap &= ~COMPAT_HWCAP_ISA_V; } + riscv_user_isa_enable(); + /* * Remote TLB flushes are ignored while the CPU is offline, so emit * a local TLB flush right now just in case.
When Zicboz is present, enable its instruction (cbo.zero) in usermode by setting its respective senvcfg bit. We don't bother trying to set this bit per-task, which would also require an interface for tasks to request enabling and/or disabling. Instead, permanently set the bit for each hart which has the extension when bringing it online. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> --- arch/riscv/include/asm/cpufeature.h | 2 ++ arch/riscv/include/asm/csr.h | 1 + arch/riscv/include/asm/hwcap.h | 16 ++++++++++++++++ arch/riscv/kernel/cpufeature.c | 6 ++++++ arch/riscv/kernel/setup.c | 4 ++++ arch/riscv/kernel/smpboot.c | 4 ++++ 6 files changed, 33 insertions(+)