Message ID | 20240319215915.832127-6-samuel.holland@sifive.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | riscv: Userspace pointer masking and tagged address ABI | expand |
On Tue, Mar 19, 2024 at 2:59 PM Samuel Holland via lists.riscv.org <samuel.holland=sifive.com@lists.riscv.org> wrote: > > Some envcfg bits need to be controlled on a per-thread basis, such as > the pointer masking mode. However, the envcfg CSR value cannot simply be > stored in struct thread_struct, because some hardware may implement a > different subset of envcfg CSR bits is across CPUs. As a result, we need > to combine the per-CPU and per-thread bits whenever we switch threads. > Why not do something like this diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index b3400517b0a9..01ba87954da2 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -202,6 +202,8 @@ #define ENVCFG_CBIE_FLUSH _AC(0x1, UL) #define ENVCFG_CBIE_INV _AC(0x3, UL) #define ENVCFG_FIOM _AC(0x1, UL) +/* by default all threads should be able to zero cache */ +#define ENVCFG_BASE ENVCFG_CBZE /* Smstateen bits */ #define SMSTATEEN0_AIA_IMSIC_SHIFT 58 diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 4f21d970a129..2420123444c4 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -152,6 +152,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, else regs->status |= SR_UXL_64; #endif + current->thread_info.envcfg = ENVCFG_BASE; } And instead of context switching in `_switch_to`, In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. This construction avoids - declaring per cpu riscv_cpu_envcfg - syncing up - collection of *envcfg bits. > Signed-off-by: Samuel Holland <samuel.holland@sifive.com> > --- > > arch/riscv/include/asm/cpufeature.h | 2 ++ > arch/riscv/include/asm/processor.h | 1 + > arch/riscv/include/asm/switch_to.h | 12 ++++++++++++ > arch/riscv/kernel/cpufeature.c | 4 +++- > 4 files changed, 18 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > index 0bd11862b760..b1ad8d0b4599 100644 > --- a/arch/riscv/include/asm/cpufeature.h > +++ b/arch/riscv/include/asm/cpufeature.h > @@ -33,6 +33,8 @@ DECLARE_PER_CPU(long, misaligned_access_speed); > /* Per-cpu ISA extensions. */ > extern struct riscv_isainfo hart_isa[NR_CPUS]; > > +DECLARE_PER_CPU(unsigned long, riscv_cpu_envcfg); > + > void riscv_user_isa_enable(void); > > #ifdef CONFIG_RISCV_MISALIGNED > diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > index a8509cc31ab2..06b87402a4d8 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -118,6 +118,7 @@ struct thread_struct { > unsigned long s[12]; /* s[0]: frame pointer */ > struct __riscv_d_ext_state fstate; > unsigned long bad_cause; > + unsigned long envcfg; > u32 riscv_v_flags; > u32 vstate_ctrl; > struct __riscv_v_ext_state vstate; > diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h > index 7efdb0584d47..256a354a5c4a 100644 > --- a/arch/riscv/include/asm/switch_to.h > +++ b/arch/riscv/include/asm/switch_to.h > @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } > #define __switch_to_fpu(__prev, __next) do { } while (0) > #endif > > +static inline void sync_envcfg(struct task_struct *task) > +{ > + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); > +} > + > +static inline void __switch_to_envcfg(struct task_struct *next) > +{ > + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) I've seen `riscv_cpu_has_extension_unlikely` generating branchy code even if ALTERNATIVES was turned on. Can you check disasm on your end as well. IMHO, `entry.S` is a better place to pick up *envcfg. > + sync_envcfg(next); > +} > + > extern struct task_struct *__switch_to(struct task_struct *, > struct task_struct *); > > @@ -80,6 +91,7 @@ do { \ > __switch_to_fpu(__prev, __next); \ > if (has_vector()) \ > __switch_to_vector(__prev, __next); \ > + __switch_to_envcfg(__next); \ > ((last) = __switch_to(__prev, __next)); \ > } while (0) > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index d1846aab1f78..32aaaf41f8a8 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -44,6 +44,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; > /* Per-cpu ISA extensions. */ > struct riscv_isainfo hart_isa[NR_CPUS]; > > +DEFINE_PER_CPU(unsigned long, riscv_cpu_envcfg); > + > /* Performance information */ > DEFINE_PER_CPU(long, misaligned_access_speed); > > @@ -978,7 +980,7 @@ arch_initcall(check_unaligned_access_all_cpus); > void riscv_user_isa_enable(void) > { > if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) > - csr_set(CSR_ENVCFG, ENVCFG_CBZE); > + this_cpu_or(riscv_cpu_envcfg, ENVCFG_CBZE); > } > > #ifdef CONFIG_RISCV_ALTERNATIVE > -- > 2.43.1 > > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#659): https://lists.riscv.org/g/tech-j-ext/message/659 > Mute This Topic: https://lists.riscv.org/mt/105033914/7300952 > Group Owner: tech-j-ext+owner@lists.riscv.org > Unsubscribe: https://lists.riscv.org/g/tech-j-ext/unsub [debug@rivosinc.com] > -=-=-=-=-=-=-=-=-=-=-=- > >
Hi Deepak, On 2024-03-19 6:55 PM, Deepak Gupta wrote: > On Tue, Mar 19, 2024 at 2:59 PM Samuel Holland via lists.riscv.org > <samuel.holland=sifive.com@lists.riscv.org> wrote: >> >> Some envcfg bits need to be controlled on a per-thread basis, such as >> the pointer masking mode. However, the envcfg CSR value cannot simply be >> stored in struct thread_struct, because some hardware may implement a >> different subset of envcfg CSR bits is across CPUs. As a result, we need >> to combine the per-CPU and per-thread bits whenever we switch threads. >> > > Why not do something like this > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > index b3400517b0a9..01ba87954da2 100644 > --- a/arch/riscv/include/asm/csr.h > +++ b/arch/riscv/include/asm/csr.h > @@ -202,6 +202,8 @@ > #define ENVCFG_CBIE_FLUSH _AC(0x1, UL) > #define ENVCFG_CBIE_INV _AC(0x3, UL) > #define ENVCFG_FIOM _AC(0x1, UL) > +/* by default all threads should be able to zero cache */ > +#define ENVCFG_BASE ENVCFG_CBZE Linux does not assume Sstrict, so without Zicboz being present in DT/ACPI, we have no idea what the CBZE bit does--there's no guarantee it has the standard meaning--so it's not safe to set the bit unconditionally. If that policy changes, we could definitely simplify the code. > /* Smstateen bits */ > #define SMSTATEEN0_AIA_IMSIC_SHIFT 58 > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index 4f21d970a129..2420123444c4 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -152,6 +152,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, > else > regs->status |= SR_UXL_64; > #endif > + current->thread_info.envcfg = ENVCFG_BASE; > } > > And instead of context switching in `_switch_to`, > In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. The immediate reason is that writing envcfg in ret_from_exception() adds cycles to every IRQ and system call exit, even though most of them will not change the envcfg value. This is especially the case when returning from an IRQ/exception back to S-mode, since envcfg has zero effect there. The CSRs that are read/written in entry.S are generally those where the value can be updated by hardware, as part of taking an exception. But envcfg never changes on its own. The kernel knows exactly when its value will change, and those places are: 1) Task switch, i.e. switch_to() 2) execve(), i.e. start_thread() or flush_thread() 3) A system call that specifically affects a feature controlled by envcfg So that's where this series writes it. There are a couple of minor tradeoffs about when exactly to do the write: - We could drop the sync_envcfg() calls outside of switch_to() by reading the current CSR value when scheduling out a thread, but again that adds overhead to the fast path to remove a tiny bit of code in the prctl() handlers. - We don't need to write envcfg when switching to a kernel thread, only when switching to a user thread, because kernel threads never leave S-mode, so envcfg doesn't affect them. But checking the thread type takes many more instructions than just writing the CSR. Overall, the optimal implementation will approximate the rule of only writing envcfg when its value changes. > This construction avoids > - declaring per cpu riscv_cpu_envcfg This is really a separate concern than when we write envcfg. The per-CPU variable is only necessary to support hardware where a subset of harts support Zicboz. Since the riscv_cpu_has_extension_[un]likely() helpers were added specifically for Zicboz, I assume this is an important use case, and dropping support for this hardware would be a regression. After all, hwprobe() allows userspace to see that Zicboz is implemented at a per-CPU level. Maybe Andrew can weigh in on that. If we decide to enable Zicboz only when all harts support it, or we decide it's safe to attempt to set the envcfg.CBZE bit on harts that do not declare support for Zicboz, then we could drop the percpu variable. > - syncing up > - collection of *envcfg bits. > > >> Signed-off-by: Samuel Holland <samuel.holland@sifive.com> >> --- >> >> arch/riscv/include/asm/cpufeature.h | 2 ++ >> arch/riscv/include/asm/processor.h | 1 + >> arch/riscv/include/asm/switch_to.h | 12 ++++++++++++ >> arch/riscv/kernel/cpufeature.c | 4 +++- >> 4 files changed, 18 insertions(+), 1 deletion(-) >> >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h >> index 0bd11862b760..b1ad8d0b4599 100644 >> --- a/arch/riscv/include/asm/cpufeature.h >> +++ b/arch/riscv/include/asm/cpufeature.h >> @@ -33,6 +33,8 @@ DECLARE_PER_CPU(long, misaligned_access_speed); >> /* Per-cpu ISA extensions. */ >> extern struct riscv_isainfo hart_isa[NR_CPUS]; >> >> +DECLARE_PER_CPU(unsigned long, riscv_cpu_envcfg); >> + >> void riscv_user_isa_enable(void); >> >> #ifdef CONFIG_RISCV_MISALIGNED >> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h >> index a8509cc31ab2..06b87402a4d8 100644 >> --- a/arch/riscv/include/asm/processor.h >> +++ b/arch/riscv/include/asm/processor.h >> @@ -118,6 +118,7 @@ struct thread_struct { >> unsigned long s[12]; /* s[0]: frame pointer */ >> struct __riscv_d_ext_state fstate; >> unsigned long bad_cause; >> + unsigned long envcfg; >> u32 riscv_v_flags; >> u32 vstate_ctrl; >> struct __riscv_v_ext_state vstate; >> diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h >> index 7efdb0584d47..256a354a5c4a 100644 >> --- a/arch/riscv/include/asm/switch_to.h >> +++ b/arch/riscv/include/asm/switch_to.h >> @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } >> #define __switch_to_fpu(__prev, __next) do { } while (0) >> #endif >> >> +static inline void sync_envcfg(struct task_struct *task) >> +{ >> + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); >> +} >> + >> +static inline void __switch_to_envcfg(struct task_struct *next) >> +{ >> + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) > > I've seen `riscv_cpu_has_extension_unlikely` generating branchy code > even if ALTERNATIVES was turned on. > Can you check disasm on your end as well. IMHO, `entry.S` is a better > place to pick up *envcfg. The branchiness is sort of expected, since that function is implemented by switching on/off a branch instruction, so the alternate code is necessarily a separate basic block. It's a tradeoff so we don't have to write assembly code for every bit of code that depends on an extension. However, the cost should be somewhat lowered since the branch is unconditional and so entirely predictable. If the branch turns out to be problematic for performance, then we could use ALTERNATIVE directly in sync_envcfg() to NOP out the CSR write. >> + sync_envcfg(next); >> +} >> + >> extern struct task_struct *__switch_to(struct task_struct *, >> struct task_struct *); >> >> @@ -80,6 +91,7 @@ do { \ >> __switch_to_fpu(__prev, __next); \ >> if (has_vector()) \ >> __switch_to_vector(__prev, __next); \ >> + __switch_to_envcfg(__next); \ >> ((last) = __switch_to(__prev, __next)); \ >> } while (0) >> >> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c >> index d1846aab1f78..32aaaf41f8a8 100644 >> --- a/arch/riscv/kernel/cpufeature.c >> +++ b/arch/riscv/kernel/cpufeature.c >> @@ -44,6 +44,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; >> /* Per-cpu ISA extensions. */ >> struct riscv_isainfo hart_isa[NR_CPUS]; >> >> +DEFINE_PER_CPU(unsigned long, riscv_cpu_envcfg); >> + >> /* Performance information */ >> DEFINE_PER_CPU(long, misaligned_access_speed); >> >> @@ -978,7 +980,7 @@ arch_initcall(check_unaligned_access_all_cpus); >> void riscv_user_isa_enable(void) >> { >> if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) >> - csr_set(CSR_ENVCFG, ENVCFG_CBZE); >> + this_cpu_or(riscv_cpu_envcfg, ENVCFG_CBZE); If we drop the percpu variable, this becomes if (riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOZ)) current->thread.envcfg |= ENVCFG_CBZE; since the init thread's envcfg gets copied to all other threads via fork(), and we can drop the call to riscv_user_isa_enable() from smp_callin(). Or if we decide CBZE is always safe to set, then the function is even simpler: current->thread.envcfg = ENVCFG_CBZE; Regards, Samuel >> } >> >> #ifdef CONFIG_RISCV_ALTERNATIVE >> -- >> 2.43.1
Hi Samuel, Thanks for your response. On Tue, Mar 19, 2024 at 7:21 PM Samuel Holland <samuel.holland@sifive.com> wrote: > > Hi Deepak, > > On 2024-03-19 6:55 PM, Deepak Gupta wrote: > > On Tue, Mar 19, 2024 at 2:59 PM Samuel Holland via lists.riscv.org > > <samuel.holland=sifive.com@lists.riscv.org> wrote: > >> > >> Some envcfg bits need to be controlled on a per-thread basis, such as > >> the pointer masking mode. However, the envcfg CSR value cannot simply be > >> stored in struct thread_struct, because some hardware may implement a > >> different subset of envcfg CSR bits is across CPUs. As a result, we need > >> to combine the per-CPU and per-thread bits whenever we switch threads. > >> > > > > Why not do something like this > > > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > > index b3400517b0a9..01ba87954da2 100644 > > --- a/arch/riscv/include/asm/csr.h > > +++ b/arch/riscv/include/asm/csr.h > > @@ -202,6 +202,8 @@ > > #define ENVCFG_CBIE_FLUSH _AC(0x1, UL) > > #define ENVCFG_CBIE_INV _AC(0x3, UL) > > #define ENVCFG_FIOM _AC(0x1, UL) > > +/* by default all threads should be able to zero cache */ > > +#define ENVCFG_BASE ENVCFG_CBZE > > Linux does not assume Sstrict, so without Zicboz being present in DT/ACPI, we > have no idea what the CBZE bit does--there's no guarantee it has the standard > meaning--so it's not safe to set the bit unconditionally. If that policy > changes, we could definitely simplify the code. > Yeah, it makes sense. > > /* Smstateen bits */ > > #define SMSTATEEN0_AIA_IMSIC_SHIFT 58 > > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > > index 4f21d970a129..2420123444c4 100644 > > --- a/arch/riscv/kernel/process.c > > +++ b/arch/riscv/kernel/process.c > > @@ -152,6 +152,7 @@ void start_thread(struct pt_regs *regs, unsigned long pc, > > else > > regs->status |= SR_UXL_64; > > #endif > > + current->thread_info.envcfg = ENVCFG_BASE; > > } > > > > And instead of context switching in `_switch_to`, > > In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. > > The immediate reason is that writing envcfg in ret_from_exception() adds cycles > to every IRQ and system call exit, even though most of them will not change the > envcfg value. This is especially the case when returning from an IRQ/exception > back to S-mode, since envcfg has zero effect there. > > The CSRs that are read/written in entry.S are generally those where the value > can be updated by hardware, as part of taking an exception. But envcfg never > changes on its own. The kernel knows exactly when its value will change, and > those places are: > > 1) Task switch, i.e. switch_to() > 2) execve(), i.e. start_thread() or flush_thread() > 3) A system call that specifically affects a feature controlled by envcfg Yeah I was optimizing for a single place to write instead of sprinkling at multiple places. But I see your argument. That's fine. > > So that's where this series writes it. There are a couple of minor tradeoffs > about when exactly to do the write: > > - We could drop the sync_envcfg() calls outside of switch_to() by reading the > current CSR value when scheduling out a thread, but again that adds overhead > to the fast path to remove a tiny bit of code in the prctl() handlers. > - We don't need to write envcfg when switching to a kernel thread, only when > switching to a user thread, because kernel threads never leave S-mode, so > envcfg doesn't affect them. But checking the thread type takes many more > instructions than just writing the CSR. > > Overall, the optimal implementation will approximate the rule of only writing > envcfg when its value changes. > > > This construction avoids > > - declaring per cpu riscv_cpu_envcfg > > This is really a separate concern than when we write envcfg. The per-CPU > variable is only necessary to support hardware where a subset of harts support > Zicboz. Since the riscv_cpu_has_extension_[un]likely() helpers were added > specifically for Zicboz, I assume this is an important use case, and dropping > support for this hardware would be a regression. After all, hwprobe() allows > userspace to see that Zicboz is implemented at a per-CPU level. Maybe Andrew can > weigh in on that. I am not sure of the practicality of this heterogeneity for Zicboz and for that matter any of the upcoming features that'll be enabled via senvcfg (control flow integrity, pointer masking, etc). As an example if cache zeroing instructions are used by app binary, I expect it to be used in following manner - Explicitly inserting cbo.zero by application developer - Some compiler flag which ensures that structures larger than cache line gets zeroed by cbo.zero In either of the cases, the developer is not expecting to target it to a specific hart on SoC and instead expect it to work. There might be libraries (installed via sudo apt get) with cache zero support in them which may run in different address spaces. Should the library be aware of the CPU on which it's running. Now whoever is running these binaries should be aware which CPUs they get assigned to in order to avoid faults? That seems excessive, doesn't it? > > If we decide to enable Zicboz only when all harts support it, or we decide it's > safe to attempt to set the envcfg.CBZE bit on harts that do not declare support > for Zicboz, then we could drop the percpu variable. > > > - syncing up > > - collection of *envcfg bits. > > > > > >> Signed-off-by: Samuel Holland <samuel.holland@sifive.com> > >> --- > >> > >> arch/riscv/include/asm/cpufeature.h | 2 ++ > >> arch/riscv/include/asm/processor.h | 1 + > >> arch/riscv/include/asm/switch_to.h | 12 ++++++++++++ > >> arch/riscv/kernel/cpufeature.c | 4 +++- > >> 4 files changed, 18 insertions(+), 1 deletion(-) > >> > >> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h > >> index 0bd11862b760..b1ad8d0b4599 100644 > >> --- a/arch/riscv/include/asm/cpufeature.h > >> +++ b/arch/riscv/include/asm/cpufeature.h > >> @@ -33,6 +33,8 @@ DECLARE_PER_CPU(long, misaligned_access_speed); > >> /* Per-cpu ISA extensions. */ > >> extern struct riscv_isainfo hart_isa[NR_CPUS]; > >> > >> +DECLARE_PER_CPU(unsigned long, riscv_cpu_envcfg); > >> + > >> void riscv_user_isa_enable(void); > >> > >> #ifdef CONFIG_RISCV_MISALIGNED > >> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h > >> index a8509cc31ab2..06b87402a4d8 100644 > >> --- a/arch/riscv/include/asm/processor.h > >> +++ b/arch/riscv/include/asm/processor.h > >> @@ -118,6 +118,7 @@ struct thread_struct { > >> unsigned long s[12]; /* s[0]: frame pointer */ > >> struct __riscv_d_ext_state fstate; > >> unsigned long bad_cause; > >> + unsigned long envcfg; > >> u32 riscv_v_flags; > >> u32 vstate_ctrl; > >> struct __riscv_v_ext_state vstate; > >> diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h > >> index 7efdb0584d47..256a354a5c4a 100644 > >> --- a/arch/riscv/include/asm/switch_to.h > >> +++ b/arch/riscv/include/asm/switch_to.h > >> @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } > >> #define __switch_to_fpu(__prev, __next) do { } while (0) > >> #endif > >> > >> +static inline void sync_envcfg(struct task_struct *task) > >> +{ > >> + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); > >> +} > >> + > >> +static inline void __switch_to_envcfg(struct task_struct *next) > >> +{ > >> + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) > > > > I've seen `riscv_cpu_has_extension_unlikely` generating branchy code > > even if ALTERNATIVES was turned on. > > Can you check disasm on your end as well. IMHO, `entry.S` is a better > > place to pick up *envcfg. > > The branchiness is sort of expected, since that function is implemented by > switching on/off a branch instruction, so the alternate code is necessarily a > separate basic block. It's a tradeoff so we don't have to write assembly code > for every bit of code that depends on an extension. However, the cost should be > somewhat lowered since the branch is unconditional and so entirely predictable. > > If the branch turns out to be problematic for performance, then we could use > ALTERNATIVE directly in sync_envcfg() to NOP out the CSR write. Yeah I lean towards using alternatives directly. > > >> + sync_envcfg(next); > >> +} >
On Tue, Mar 19, 2024 at 09:20:59PM -0500, Samuel Holland wrote: > On 2024-03-19 6:55 PM, Deepak Gupta wrote: > > On Tue, Mar 19, 2024 at 2:59 PM Samuel Holland via lists.riscv.org > > <samuel.holland=sifive.com@lists.riscv.org> wrote: > >> > >> Some envcfg bits need to be controlled on a per-thread basis, such as > >> the pointer masking mode. However, the envcfg CSR value cannot simply be > >> stored in struct thread_struct, because some hardware may implement a > >> different subset of envcfg CSR bits is across CPUs. As a result, we need > >> to combine the per-CPU and per-thread bits whenever we switch threads. > >> > > > > Why not do something like this > > > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > > index b3400517b0a9..01ba87954da2 100644 > > --- a/arch/riscv/include/asm/csr.h > > +++ b/arch/riscv/include/asm/csr.h > > @@ -202,6 +202,8 @@ > > #define ENVCFG_CBIE_FLUSH _AC(0x1, UL) > > #define ENVCFG_CBIE_INV _AC(0x3, UL) > > #define ENVCFG_FIOM _AC(0x1, UL) > > +/* by default all threads should be able to zero cache */ > > +#define ENVCFG_BASE ENVCFG_CBZE > > Linux does not assume Sstrict, so without Zicboz being present in DT/ACPI, we > have no idea what the CBZE bit does--there's no guarantee it has the standard > meaning--so it's not safe to set the bit unconditionally. If that policy > changes, we could definitely simplify the code. The wording for that "extension", if two lines in the profiles doc makes something an extension is: "No non-conforming extensions are present. Attempts to execute unimplemented opcodes or access unimplemented CSRs in the standard or reserved encoding spaces raises an illegal instruction exception that results in a contained trap to the supervisor-mode trap handler." I know we have had new extensions come along and mark previously fair game interrupts for vendors as out of bounds. I wonder if there's a risk of that happening with CSRs or opcodes too (or maybe it has happened and I cannot recall). Going back to the interrupts - is the Andes PMU non-conforming because it uses an interrupt that was declared as vendor usable but is now part of the standard space because of AIA? If it is, then the meaning of Sstrict could vary wildly based on the set of extensions (and their versions for specs). That sounds like a lot of fun.
> > > > > > And instead of context switching in `_switch_to`, > > > In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. > > > > The immediate reason is that writing envcfg in ret_from_exception() adds cycles > > to every IRQ and system call exit, even though most of them will not change the > > envcfg value. This is especially the case when returning from an IRQ/exception > > back to S-mode, since envcfg has zero effect there. > > > > The CSRs that are read/written in entry.S are generally those where the value > > can be updated by hardware, as part of taking an exception. But envcfg never > > changes on its own. The kernel knows exactly when its value will change, and > > those places are: > > > > 1) Task switch, i.e. switch_to() > > 2) execve(), i.e. start_thread() or flush_thread() > > 3) A system call that specifically affects a feature controlled by envcfg > > Yeah I was optimizing for a single place to write instead of > sprinkling at multiple places. > But I see your argument. That's fine. > Because this is RFC and we are discussing it. I thought a little bit more about this. If we were to go with the above approach that essentially requires whenever a envcfg bit changes, `sync_envcfg` has to be called to reflect the correct value. What if some of these features enable/disable are exposed to `ptrace` (gdb, etc use cases) for enable/disable. How will syncing work then ? I can see the reasoning behind saving some cycles during trap return. But `senvcfg` is not actually a user state, it controls the execution environment configuration for user mode. I think the best place for this CSR to be written is trap return and writing at a single place from a single image on stack reduces chances of bugs and errors. And allows `senvcfg` features to be exposed to other kernel flows (like `ptrace`) We can figure out ways on how to optimize in trap return path to avoid writing it if we entered and exiting on the same task.
On 2024-03-19 11:39 PM, Deepak Gupta wrote: >>>> --- a/arch/riscv/include/asm/switch_to.h >>>> +++ b/arch/riscv/include/asm/switch_to.h >>>> @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } >>>> #define __switch_to_fpu(__prev, __next) do { } while (0) >>>> #endif >>>> >>>> +static inline void sync_envcfg(struct task_struct *task) >>>> +{ >>>> + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); >>>> +} >>>> + >>>> +static inline void __switch_to_envcfg(struct task_struct *next) >>>> +{ >>>> + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) >>> >>> I've seen `riscv_cpu_has_extension_unlikely` generating branchy code >>> even if ALTERNATIVES was turned on. >>> Can you check disasm on your end as well. IMHO, `entry.S` is a better >>> place to pick up *envcfg. >> >> The branchiness is sort of expected, since that function is implemented by >> switching on/off a branch instruction, so the alternate code is necessarily a >> separate basic block. It's a tradeoff so we don't have to write assembly code >> for every bit of code that depends on an extension. However, the cost should be >> somewhat lowered since the branch is unconditional and so entirely predictable. >> >> If the branch turns out to be problematic for performance, then we could use >> ALTERNATIVE directly in sync_envcfg() to NOP out the CSR write. > > Yeah I lean towards using alternatives directly. One thing to note here: we can't use alternatives directly if the behavior needs to be different on different harts (i.e. a subset of harts implement the envcfg CSR). I think we need some policy about which ISA extensions are allowed to be asymmetric across harts, or else we add too much complexity. Regards, Samuel
Hi Deepak, On 2024-03-20 6:27 PM, Deepak Gupta wrote: >>>> And instead of context switching in `_switch_to`, >>>> In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. >>> >>> The immediate reason is that writing envcfg in ret_from_exception() adds cycles >>> to every IRQ and system call exit, even though most of them will not change the >>> envcfg value. This is especially the case when returning from an IRQ/exception >>> back to S-mode, since envcfg has zero effect there. >>> >>> The CSRs that are read/written in entry.S are generally those where the value >>> can be updated by hardware, as part of taking an exception. But envcfg never >>> changes on its own. The kernel knows exactly when its value will change, and >>> those places are: >>> >>> 1) Task switch, i.e. switch_to() >>> 2) execve(), i.e. start_thread() or flush_thread() >>> 3) A system call that specifically affects a feature controlled by envcfg >> >> Yeah I was optimizing for a single place to write instead of >> sprinkling at multiple places. >> But I see your argument. That's fine. >> > > Because this is RFC and we are discussing it. I thought a little bit > more about this. Thanks for your comments and the discussion! I know several in-progress features depend on envcfg, so hopefully we can agree on a design acceptable to everyone. > If we were to go with the above approach that essentially requires > whenever a envcfg bit changes, `sync_envcfg` > has to be called to reflect the correct value. sync_envcfg() is only needed if the task being updated is `current`. Would it be more acceptable if this happened inside a helper function? Something like: static inline void envcfg_update_bits(struct task_struct *task, unsigned long mask, unsigned long val) { unsigned long envcfg; envcfg = (task->thread.envcfg & ~mask) | val; task->thread.envcfg = envcfg; if (task == current) csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | envcfg); } > What if some of these features enable/disable are exposed to `ptrace` > (gdb, etc use cases) for enable/disable. > How will syncing work then ? ptrace_check_attach() ensures the tracee is scheduled out while a ptrace operation is running, so there is no need to sync anything. Any changes to task->thread.envcfg are written to the CSR when the tracee is scheduled back in. > I can see the reasoning behind saving some cycles during trap return. > But `senvcfg` is not actually a user state, it > controls the execution environment configuration for user mode. I > think the best place for this CSR to be written is > trap return and writing at a single place from a single image on stack > reduces chances of bugs and errors. And allows > `senvcfg` features to be exposed to other kernel flows (like `ptrace`) If ptrace is accessing a process, then task->thread.envcfg is always up to date. The only complication is that the per-CPU bits need to be ORed back in to get the real CSR value for another process, but this again is unrelated to whether the CSR is written in switch_to() or ret_from_exception(). > We can figure out ways on how to optimize in trap return path to avoid > writing it if we entered and exiting on the same > task. Optimizing out the CSR write when the task did not switch requires knowing if the current task's envcfg was changed during this trip to S-mode... and this starts looking similar to sync_envcfg(). Regards, Samuel
On Tue, Mar 19, 2024 at 09:20:59PM -0500, Samuel Holland wrote: ... > This is really a separate concern than when we write envcfg. The per-CPU > variable is only necessary to support hardware where a subset of harts support > Zicboz. Since the riscv_cpu_has_extension_[un]likely() helpers were added > specifically for Zicboz, I assume this is an important use case, and dropping > support for this hardware would be a regression. After all, hwprobe() allows > userspace to see that Zicboz is implemented at a per-CPU level. Maybe Andrew can > weigh in on that. > Hi Samuel, I've approached Zicboz the same way I would approach all extensions, which is to be per-hart. I'm not currently aware of a platform that is / will be composed of harts where some have Zicboz and others don't, but there's nothing stopping a platform like that from being built. I realize this adds complexity that we may not want to manage in Linux without an actual use case requiring it. I wouldn't be opposed to keeping things simple for now, only bringing in complexity when needed (for this extension or for a future extension with envcfg bits), but we should ensure we make it clear that we're making those simplifications now based on assumptions, and we may need to change things later. Thanks, drew
On Tue, Mar 19, 2024 at 09:39:52PM -0700, Deepak Gupta wrote: ... > I am not sure of the practicality of this heterogeneity for Zicboz and > for that matter any of the upcoming > features that'll be enabled via senvcfg (control flow integrity, > pointer masking, etc). > > As an example if cache zeroing instructions are used by app binary, I > expect it to be used in following > manner > > - Explicitly inserting cbo.zero by application developer > - Some compiler flag which ensures that structures larger than cache > line gets zeroed by cbo.zero > > In either of the cases, the developer is not expecting to target it to > a specific hart on SoC and instead expect it to work. > There might be libraries (installed via sudo apt get) with cache zero > support in them which may run in different address spaces. > Should the library be aware of the CPU on which it's running. Now > whoever is running these binaries should be aware which CPUs > they get assigned to in order to avoid faults? > > That seems excessive, doesn't it? > It might be safe to assume extensions like Zicboz will be on all harts if any, but I wouldn't expect all extensions in the future to be present on all available harts. For example, some Arm big.LITTLE boards only have virt extensions on big CPUs. When a VMM wants to launch a guest it must be aware of which CPUs it will use for the VCPU threads. For riscv, we have the which-cpus variant of the hwprobe syscall to try and make this type of thing easier to manage, but I agree it will still be a pain for software since it will need to make that query and then set its affinity, which is something it hasn't needed to do before. Thanks, drew
On Fri, Mar 22, 2024 at 1:09 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Tue, Mar 19, 2024 at 09:39:52PM -0700, Deepak Gupta wrote: > ... > > I am not sure of the practicality of this heterogeneity for Zicboz and > > for that matter any of the upcoming > > features that'll be enabled via senvcfg (control flow integrity, > > pointer masking, etc). > > > > As an example if cache zeroing instructions are used by app binary, I > > expect it to be used in following > > manner > > > > - Explicitly inserting cbo.zero by application developer > > - Some compiler flag which ensures that structures larger than cache > > line gets zeroed by cbo.zero > > > > In either of the cases, the developer is not expecting to target it to > > a specific hart on SoC and instead expect it to work. > > There might be libraries (installed via sudo apt get) with cache zero > > support in them which may run in different address spaces. > > Should the library be aware of the CPU on which it's running. Now > > whoever is running these binaries should be aware which CPUs > > they get assigned to in order to avoid faults? > > > > That seems excessive, doesn't it? > > > > It might be safe to assume extensions like Zicboz will be on all harts if > any, but I wouldn't expect all extensions in the future to be present on > all available harts. For example, some Arm big.LITTLE boards only have > virt extensions on big CPUs. When a VMM wants to launch a guest it must > be aware of which CPUs it will use for the VCPU threads. For riscv, we > have the which-cpus variant of the hwprobe syscall to try and make this > type of thing easier to manage, but I agree it will still be a pain for > software since it will need to make that query and then set its affinity, > which is something it hasn't needed to do before. > Sure, the future may be a world where heterogeneous ISA is a thing. But that's not the present. Let's not try to build for something which doesn't exist. It has been (heterogeneous ISA) tried earlier many times and mostly have fallen flat (remember on Intel alder lake, Intel had to ship a ucode patch to disable AVX512 exactly for same reason) https://www.anandtech.com/show/17047/the-intel-12th-gen-core-i912900k-review-hybrid-performance-brings-hybrid-complexity/2 As and when ISA features get enabled, they get compiled into libraries/binaries and end user many times use things like `taskset` to set affinity without even realizing there is some weirdness going on under the hood. For majority of use cases -- heterogeneous ISA doesn't make sense. Sure if someone is willing to build a custom SoC with heterogeneous ISA for their strict usecase, they control their software and hardware and thus they can do that. But littering linux kernel to support wierd usecases and putting a burden of that on majority of usecases and software is not wise. If something like this has to be done, I expect first that it doesn't force end users to learn about ISA differences between harts on their system and then figure out which installed packages have which ISA features compiled in. This is like walking on eggshells from the end user perspective. Sure, end user can be extremely intelligent / smart and figure it all out but that population is rare and that rare population can develop their custom kernel and libc patches to do something like this. This is a good science project to support heterogeneous ISA but practically not viable unless there is a high level end user use case. > Thanks, > drew
On Thu, Mar 21, 2024 at 5:13 PM Samuel Holland <samuel.holland@sifive.com> wrote: > > On 2024-03-19 11:39 PM, Deepak Gupta wrote: > >>>> --- a/arch/riscv/include/asm/switch_to.h > >>>> +++ b/arch/riscv/include/asm/switch_to.h > >>>> @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } > >>>> #define __switch_to_fpu(__prev, __next) do { } while (0) > >>>> #endif > >>>> > >>>> +static inline void sync_envcfg(struct task_struct *task) > >>>> +{ > >>>> + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); > >>>> +} > >>>> + > >>>> +static inline void __switch_to_envcfg(struct task_struct *next) > >>>> +{ > >>>> + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) > >>> > >>> I've seen `riscv_cpu_has_extension_unlikely` generating branchy code > >>> even if ALTERNATIVES was turned on. > >>> Can you check disasm on your end as well. IMHO, `entry.S` is a better > >>> place to pick up *envcfg. > >> > >> The branchiness is sort of expected, since that function is implemented by > >> switching on/off a branch instruction, so the alternate code is necessarily a > >> separate basic block. It's a tradeoff so we don't have to write assembly code > >> for every bit of code that depends on an extension. However, the cost should be > >> somewhat lowered since the branch is unconditional and so entirely predictable. > >> > >> If the branch turns out to be problematic for performance, then we could use > >> ALTERNATIVE directly in sync_envcfg() to NOP out the CSR write. > > > > Yeah I lean towards using alternatives directly. > > One thing to note here: we can't use alternatives directly if the behavior needs > to be different on different harts (i.e. a subset of harts implement the envcfg > CSR). I think we need some policy about which ISA extensions are allowed to be > asymmetric across harts, or else we add too much complexity. As I've responded on the same thread . We are adding too much complexity by assuming that heterogeneous ISA exists (which it doesn't today). And even if it exists, it wouldn't work. Nobody wants to spend a lot of time figuring out which harts have which ISA and which packages are compiled with which ISA. Most of the end users do `sudo apt get install blah blah` And then expect it to just work. It doesn't work for other architectures and even when someone tried, they had to disable certain ISA features to make sure that all cores have the same ISA feature (search AVX12 Intel Alder Lake Disable). > > Regards, > Samuel >
On Fri, Mar 22, 2024 at 10:13:48AM -0700, Deepak Gupta wrote: > On Thu, Mar 21, 2024 at 5:13 PM Samuel Holland > <samuel.holland@sifive.com> wrote: > > > > On 2024-03-19 11:39 PM, Deepak Gupta wrote: > > >>>> --- a/arch/riscv/include/asm/switch_to.h > > >>>> +++ b/arch/riscv/include/asm/switch_to.h > > >>>> @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } > > >>>> #define __switch_to_fpu(__prev, __next) do { } while (0) > > >>>> #endif > > >>>> > > >>>> +static inline void sync_envcfg(struct task_struct *task) > > >>>> +{ > > >>>> + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); > > >>>> +} > > >>>> + > > >>>> +static inline void __switch_to_envcfg(struct task_struct *next) > > >>>> +{ > > >>>> + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) > > >>> > > >>> I've seen `riscv_cpu_has_extension_unlikely` generating branchy code > > >>> even if ALTERNATIVES was turned on. > > >>> Can you check disasm on your end as well. IMHO, `entry.S` is a better > > >>> place to pick up *envcfg. > > >> > > >> The branchiness is sort of expected, since that function is implemented by > > >> switching on/off a branch instruction, so the alternate code is necessarily a > > >> separate basic block. It's a tradeoff so we don't have to write assembly code > > >> for every bit of code that depends on an extension. However, the cost should be > > >> somewhat lowered since the branch is unconditional and so entirely predictable. > > >> > > >> If the branch turns out to be problematic for performance, then we could use > > >> ALTERNATIVE directly in sync_envcfg() to NOP out the CSR write. > > > > > > Yeah I lean towards using alternatives directly. > > > > One thing to note here: we can't use alternatives directly if the behavior needs > > to be different on different harts (i.e. a subset of harts implement the envcfg > > CSR). I think we need some policy about which ISA extensions are allowed to be > > asymmetric across harts, or else we add too much complexity. > > As I've responded on the same thread . We are adding too much > complexity by assuming > that heterogeneous ISA exists (which it doesn't today). And even if it > exists, it wouldn't work. > Nobody wants to spend a lot of time figuring out which harts have > which ISA and which > packages are compiled with which ISA. Most of the end users do `sudo > apt get install blah blah` > And then expect it to just work. That will still work if the applications and libraries installed are heterogeneous-platform aware, i.e. they do the figuring out which harts have which extensions themselves. Applications/libraries should already be probing for ISA extensions before using them. It's not a huge leap to also check which harts support those extensions and then ensure affinity is set appropriately. > It doesn't work for other > architectures and even when someone > tried, they had to disable certain ISA features to make sure that all > cores have the same ISA feature > (search AVX12 Intel Alder Lake Disable). The RISC-V software ecosystem is still being developed. We have an opportunity to drop assumptions made by other architectures. As I said in a different reply, it's reasonable for Linux to not add the complexity until a use case comes along that Linux would like to support, but I think it would be premature for Linux to put a stake in the sand. So, how about we add code that confirms Zicboz is on all harts. If any hart does not have it, then we complain loudly and disable it on all the other harts. If it was just a hardware description bug, then it'll get fixed. If there's actually a platform which doesn't have Zicboz on all harts, then, when the issue is reported, we can decide to not support it, support it with defconfig, or support it under a Kconfig guard which must be enabled by the user. Thanks, drew
On Sat, Mar 23, 2024 at 2:35 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Fri, Mar 22, 2024 at 10:13:48AM -0700, Deepak Gupta wrote: > > > > Yeah I lean towards using alternatives directly. > > > > > > One thing to note here: we can't use alternatives directly if the behavior needs > > > to be different on different harts (i.e. a subset of harts implement the envcfg > > > CSR). I think we need some policy about which ISA extensions are allowed to be > > > asymmetric across harts, or else we add too much complexity. > > > > As I've responded on the same thread . We are adding too much > > complexity by assuming > > that heterogeneous ISA exists (which it doesn't today). And even if it > > exists, it wouldn't work. > > Nobody wants to spend a lot of time figuring out which harts have > > which ISA and which > > packages are compiled with which ISA. Most of the end users do `sudo > > apt get install blah blah` > > And then expect it to just work. > > That will still work if the applications and libraries installed are > heterogeneous-platform aware, i.e. they do the figuring out which harts > have which extensions themselves. Applications/libraries should already > be probing for ISA extensions before using them. It's not a huge leap to > also check which harts support those extensions and then ensure affinity > is set appropriately. How ? It's a single image of a library that will be loaded in multiple address spaces. You expect all code pages to do COW for multiple address spaces or expect to have per task variables to choose different code paths in the library based on address space its running in ? On top of that, the library/application developer doesn't know how the end user is going to use them. End users (sysadmin, etc) just might use taskset to put affinity on tasks without being aware. I just don't see the motivation in an application developer/library developer to do something like this. No application/library developer has time for this. Putting a lot of burden on application developers is mostly a nuisance considering they don't have to think about these nuisance when they expect the same code to be deployed on non-riscv architectures. One good example of putting unnecessary burden on app/library developer is Intel SGX This is exactly the reason Intel SGX failed. Application developers don't have time to develop confidential compute version of the application for a specific CPU while on other CPUs carry a different version of application. But at the same time virtual machine confidential compute is better approach where all complicated decision making is delegated to operating system developer and application/library developers are empowered to only think about their stuff. > > > It doesn't work for other > > architectures and even when someone > > tried, they had to disable certain ISA features to make sure that all > > cores have the same ISA feature > > (search AVX12 Intel Alder Lake Disable). > > The RISC-V software ecosystem is still being developed. We have an > opportunity to drop assumptions made by other architectures. It doesn't mean that it should try to make the same mistakes which others have done. If there is a motivation and use case from end user perspective, please provide. Otherwise no point doing something which is just a science thought exercise and no concrete use case. Please note that these arguments are against Heterogeneous ISA on cores. From power and efficiency perspective cores can still be heterogeneous. > > > As I said in a different reply, it's reasonable for Linux to not add the > complexity until a use case comes along that Linux would like to support, > but I think it would be premature for Linux to put a stake in the sand. > > So, how about we add code that confirms Zicboz is on all harts. If any > hart does not have it, then we complain loudly and disable it on all > the other harts. If it was just a hardware description bug, then it'll > get fixed. If there's actually a platform which doesn't have Zicboz > on all harts, then, when the issue is reported, we can decide to not > support it, support it with defconfig, or support it under a Kconfig > guard which must be enabled by the user. > > Thanks, > drew
On Tue, Mar 19, 2024 at 7:21 PM Samuel Holland <samuel.holland@sifive.com> wrote: > > > else > > regs->status |= SR_UXL_64; > > #endif > > + current->thread_info.envcfg = ENVCFG_BASE; > > } > > > > And instead of context switching in `_switch_to`, > > In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. > > The immediate reason is that writing envcfg in ret_from_exception() adds cycles > to every IRQ and system call exit, even though most of them will not change the > envcfg value. This is especially the case when returning from an IRQ/exception > back to S-mode, since envcfg has zero effect there. > A quick observation: So I tried this on my setup. When I put `senvcfg` writes in `__switch_to ` path, qemu suddenly just tanks and takes a lot of time to boot up as opposed to when `senvcfg` was in trap return path. In my case entire userspace (all processes) have cfi enabled for them via `senvcfg` and it gets context switched. Not sure it's specific to my setup. I don't think it should be an issue on actual hardware. Still debugging why it slows down my qemu drastically when same writes to same CSR are moved from `ret_from_exception` to `switch_to`
On Wed, Mar 27, 2024 at 06:58:45PM -0700, Deepak Gupta via lists.riscv.org wrote: >On Tue, Mar 19, 2024 at 7:21 PM Samuel Holland ><samuel.holland@sifive.com> wrote: >> >> > else >> > regs->status |= SR_UXL_64; >> > #endif >> > + current->thread_info.envcfg = ENVCFG_BASE; >> > } >> > >> > And instead of context switching in `_switch_to`, >> > In `entry.S` pick up `envcfg` from `thread_info` and write it into CSR. >> >> The immediate reason is that writing envcfg in ret_from_exception() adds cycles >> to every IRQ and system call exit, even though most of them will not change the >> envcfg value. This is especially the case when returning from an IRQ/exception >> back to S-mode, since envcfg has zero effect there. >> > >A quick observation: >So I tried this on my setup. When I put `senvcfg` writes in >`__switch_to ` path, qemu suddenly >just tanks and takes a lot of time to boot up as opposed to when >`senvcfg` was in trap return path. >In my case entire userspace (all processes) have cfi enabled for them >via `senvcfg` and it gets >context switched. Not sure it's specific to my setup. I don't think it >should be an issue on actual >hardware. > >Still debugging why it slows down my qemu drastically when same writes >to same CSR >are moved from `ret_from_exception` to `switch_to` Nevermind and sorry for the bother. An issue on my setup. > > >-=-=-=-=-=-=-=-=-=-=-=- >Links: You receive all messages sent to this group. >View/Reply Online (#680): https://lists.riscv.org/g/tech-j-ext/message/680 >Mute This Topic: https://lists.riscv.org/mt/105033914/7300952 >Group Owner: tech-j-ext+owner@lists.riscv.org >Unsubscribe: https://lists.riscv.org/g/tech-j-ext/unsub [debug@rivosinc.com] >-=-=-=-=-=-=-=-=-=-=-=- > >
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 0bd11862b760..b1ad8d0b4599 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -33,6 +33,8 @@ DECLARE_PER_CPU(long, misaligned_access_speed); /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; +DECLARE_PER_CPU(unsigned long, riscv_cpu_envcfg); + void riscv_user_isa_enable(void); #ifdef CONFIG_RISCV_MISALIGNED diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index a8509cc31ab2..06b87402a4d8 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -118,6 +118,7 @@ struct thread_struct { unsigned long s[12]; /* s[0]: frame pointer */ struct __riscv_d_ext_state fstate; unsigned long bad_cause; + unsigned long envcfg; u32 riscv_v_flags; u32 vstate_ctrl; struct __riscv_v_ext_state vstate; diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h index 7efdb0584d47..256a354a5c4a 100644 --- a/arch/riscv/include/asm/switch_to.h +++ b/arch/riscv/include/asm/switch_to.h @@ -69,6 +69,17 @@ static __always_inline bool has_fpu(void) { return false; } #define __switch_to_fpu(__prev, __next) do { } while (0) #endif +static inline void sync_envcfg(struct task_struct *task) +{ + csr_write(CSR_ENVCFG, this_cpu_read(riscv_cpu_envcfg) | task->thread.envcfg); +} + +static inline void __switch_to_envcfg(struct task_struct *next) +{ + if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_XLINUXENVCFG)) + sync_envcfg(next); +} + extern struct task_struct *__switch_to(struct task_struct *, struct task_struct *); @@ -80,6 +91,7 @@ do { \ __switch_to_fpu(__prev, __next); \ if (has_vector()) \ __switch_to_vector(__prev, __next); \ + __switch_to_envcfg(__next); \ ((last) = __switch_to(__prev, __next)); \ } while (0) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index d1846aab1f78..32aaaf41f8a8 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -44,6 +44,8 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; /* Per-cpu ISA extensions. */ struct riscv_isainfo hart_isa[NR_CPUS]; +DEFINE_PER_CPU(unsigned long, riscv_cpu_envcfg); + /* Performance information */ DEFINE_PER_CPU(long, misaligned_access_speed); @@ -978,7 +980,7 @@ arch_initcall(check_unaligned_access_all_cpus); void riscv_user_isa_enable(void) { if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ)) - csr_set(CSR_ENVCFG, ENVCFG_CBZE); + this_cpu_or(riscv_cpu_envcfg, ENVCFG_CBZE); } #ifdef CONFIG_RISCV_ALTERNATIVE
Some envcfg bits need to be controlled on a per-thread basis, such as the pointer masking mode. However, the envcfg CSR value cannot simply be stored in struct thread_struct, because some hardware may implement a different subset of envcfg CSR bits is across CPUs. As a result, we need to combine the per-CPU and per-thread bits whenever we switch threads. Signed-off-by: Samuel Holland <samuel.holland@sifive.com> --- arch/riscv/include/asm/cpufeature.h | 2 ++ arch/riscv/include/asm/processor.h | 1 + arch/riscv/include/asm/switch_to.h | 12 ++++++++++++ arch/riscv/kernel/cpufeature.c | 4 +++- 4 files changed, 18 insertions(+), 1 deletion(-)