@@ -557,6 +557,20 @@ config RISCV_ISA_V_MEMMOVE_THRESHOLD
Prefer using vectorized memmove() when the workload size exceeds this
value.
+config RISCV_ISA_V_PREEMPTIVE
+ bool "Run kernel-mode Vector with kernel preemption"
+ depends on PREEMPTION
+ depends on RISCV_ISA_V
+ default y
+ help
+ Usually, in-kernel SIMD routines are run with preemption disabled.
+ Functions which envoke long running SIMD thus must yield core's
+ vector unit to prevent blocking other tasks for too long.
+
+ This config allows kernel to run SIMD without explicitly disable
+ preemption. Enabling this config will result in higher memory
+ consumption due to the allocation of per-task's kernel Vector context.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
@@ -81,11 +81,32 @@ struct pt_regs;
* activation of this state disables the preemption. On a non-RT kernel, it
* also disable bh. Currently only 0 and 1 are valid value for this field.
* Other values are reserved for future uses.
+ * - bits 8-15 are used for tracking preemptible kernel-mode Vector, when
+ * RISCV_ISA_V_PREEMPTIVE is set. Calling kernel_vector_begin() does not
+ * disable the preemption if the thread's kernel_vstate.datap is allocated.
+ * Instead, the kernel adds 1 into this field. Then the trap entry/exit code
+ * knows if we are entering/exiting the context that owns preempt_v.
+ * - 0: the task is not using preempt_v
+ * - 1: the task is actively using, and owns preempt_v
+ * - >1: the task was using preempt_v, but then took a trap within. Thus,
+ * the task does not own preempt_v. Any use of Vector will have to save
+ * preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ * Vector.
+ * - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ * restored when returning to the context that owns the preempt_v.
+ * - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ * trap entry code. Any context switches out-of current task need to save
+ * it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ * preempt_v requesting to use V needs a save.
*/
-#define RISCV_KERNEL_MODE_V_MASK 0xff
+#define RISCV_KERNEL_MODE_V_MASK 0x000000ff
+#define RISCV_PREEMPT_V_MASK 0x0000ff00
-#define RISCV_KERNEL_MODE_V 0x1
+#define RISCV_KERNEL_MODE_V 0x00000001
+#define RISCV_PREEMPT_V 0x00000100
+#define RISCV_PREEMPT_V_DIRTY 0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
/* CPU-specific state of a task */
struct thread_struct {
@@ -99,6 +120,7 @@ struct thread_struct {
u32 vstate_ctrl;
struct __riscv_v_ext_state vstate;
unsigned long align_ctl;
+ struct __riscv_v_ext_state kernel_vstate;
};
/* Whitelist the fstate from the task_struct for hardened usercopy */
@@ -12,6 +12,7 @@
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
+#include <linux/thread_info.h>
#ifdef CONFIG_RISCV_ISA_V
/*
@@ -26,12 +27,27 @@ static __must_check inline bool may_use_simd(void)
/*
* RISCV_KERNEL_MODE_V is only set while preemption is disabled,
* and is clear whenever preemption is enabled.
- *
- * Kernel-mode Vector temperarily disables bh. So we must not return
- * true on irq_disabled(). Otherwise we would fail the lockdep check
- * calling local_bh_enable()
*/
- return !in_hardirq() && !in_nmi() && !irqs_disabled() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
+ if (in_hardirq() || in_nmi())
+ return false;
+
+ /*
+ * Nesting is acheived in preempt_v by spreading the control for
+ * preemptible and non-preemptible kernel-mode Vector into two fields.
+ * Always try to match with prempt_v if kernel V-context exists. Then,
+ * fallback to check non preempt_v if nesting happens, or if the config
+ * is not set.
+ */
+ if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+ if (!riscv_preempt_v_started(current))
+ return true;
+ }
+ /*
+ * Non-preemptible kernel-mode Vector temperarily disables bh. So we
+ * must not return true on irq_disabled(). Otherwise we would fail the
+ * lockdep check calling local_bh_enable()
+ */
+ return !irqs_disabled() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
}
#else /* ! CONFIG_RISCV_ISA_V */
@@ -28,6 +28,7 @@ void get_cpu_vector_context(void);
void put_cpu_vector_context(void);
void riscv_v_thread_free(struct task_struct *tsk);
void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
static inline void riscv_v_ctx_cnt_add(u32 offset)
{
@@ -212,14 +213,63 @@ static inline void riscv_v_vstate_set_restore(struct task_struct *task,
}
}
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+ u32 val = READ_ONCE(task->thread.riscv_v_flags);
+
+ return !!(val & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+ u32 val = READ_ONCE(task->thread.riscv_v_flags);
+
+ return !!(val & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+ barrier();
+ task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+ barrier();
+ task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+ return !!(READ_ONCE(task->thread.riscv_v_flags) & RISCV_PREEMPT_V_MASK);
+}
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk) do {} while (0)
+#define riscv_preempt_v_set_restore(tsk) do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
static inline void __switch_to_vector(struct task_struct *prev,
struct task_struct *next)
{
struct pt_regs *regs;
- regs = task_pt_regs(prev);
- riscv_v_vstate_save(&prev->thread.vstate, regs);
- riscv_v_vstate_set_restore(next, task_pt_regs(next));
+ if (riscv_preempt_v_dirty(prev)) {
+ __riscv_v_vstate_save(&prev->thread.kernel_vstate,
+ prev->thread.kernel_vstate.datap);
+ riscv_preempt_v_clear_dirty(prev);
+ } else {
+ regs = task_pt_regs(prev);
+ riscv_v_vstate_save(&prev->thread.vstate, regs);
+ }
+
+ if (riscv_preempt_v_started(next))
+ riscv_preempt_v_set_restore(next);
+ else
+ riscv_v_vstate_set_restore(next, task_pt_regs(next));
}
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -243,6 +293,7 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
#define riscv_v_vstate_on(regs) do {} while (0)
#define riscv_v_thread_free(tsk) do {} while (0)
#define riscv_v_setup_ctx_cache() do {} while (0)
+#define riscv_v_thread_alloc(tsk) do {} while (0)
#endif /* CONFIG_RISCV_ISA_V */
@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
/* Load the kernel shadow call stack pointer if coming from userspace */
scs_load_current_if_task_changed s5
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ move a0, sp
+ call riscv_v_context_nesting_start
+#endif
move a0, sp /* pt_regs */
la ra, ret_from_exception
@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
*/
csrw CSR_SCRATCH, tp
1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ move a0, sp
+ call riscv_v_context_nesting_end
+#endif
REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
@@ -50,6 +50,111 @@ void put_cpu_vector_context(void)
preempt_enable();
}
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline void riscv_preempt_v_set_dirty(void)
+{
+ current->thread.riscv_v_flags |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+ current->thread.riscv_v_flags &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_depth_inc(void)
+{
+ riscv_v_ctx_cnt_add(RISCV_PREEMPT_V);
+}
+
+static inline void riscv_preempt_v_depth_dec(void)
+{
+ riscv_v_ctx_cnt_sub(RISCV_PREEMPT_V);
+}
+
+static inline u32 riscv_preempt_v_get_depth(void)
+{
+ return riscv_v_ctx_cnt() & RISCV_PREEMPT_V_MASK;
+}
+
+#define PREEMPT_V_FIRST_DEPTH RISCV_PREEMPT_V
+static int riscv_v_stop_kernel_context(void)
+{
+ if (riscv_preempt_v_get_depth() != PREEMPT_V_FIRST_DEPTH)
+ return 1;
+
+ riscv_preempt_v_depth_dec();
+ return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+ struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate;
+
+ if (!vstate->datap)
+ return -ENOENT;
+
+ if (riscv_preempt_v_started(current)) {
+ WARN_ON(riscv_preempt_v_get_depth() == PREEMPT_V_FIRST_DEPTH);
+ if (riscv_preempt_v_dirty(current)) {
+ get_cpu_vector_context();
+ __riscv_v_vstate_save(vstate, vstate->datap);
+ riscv_preempt_v_clear_dirty(current);
+ put_cpu_vector_context();
+ }
+ get_cpu_vector_context();
+ riscv_preempt_v_set_restore(current);
+ *is_nested = true;
+ return 0;
+ }
+
+ get_cpu_vector_context();
+ riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current));
+ put_cpu_vector_context();
+
+ riscv_preempt_v_depth_inc();
+ return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+ int depth;
+
+ if (!riscv_preempt_v_started(current))
+ return;
+
+ depth = riscv_preempt_v_get_depth();
+ if (depth == PREEMPT_V_FIRST_DEPTH && (regs->status & SR_VS) == SR_VS_DIRTY)
+ riscv_preempt_v_set_dirty();
+
+ riscv_preempt_v_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+ struct __riscv_v_ext_state *vstate = ¤t->thread.kernel_vstate;
+ u32 depth;
+
+ lockdep_assert_irqs_disabled();
+
+ if (!riscv_preempt_v_started(current))
+ return;
+
+ riscv_preempt_v_depth_dec();
+ depth = riscv_preempt_v_get_depth();
+ if (depth == PREEMPT_V_FIRST_DEPTH) {
+ if (riscv_preempt_v_restore(current)) {
+ __riscv_v_vstate_restore(vstate, vstate->datap);
+ __riscv_v_vstate_clean(regs);
+ }
+ riscv_preempt_v_reset_flags();
+ }
+}
+#else
+#define riscv_v_start_kernel_context(nested) (-ENOENT)
+#define riscv_v_stop_kernel_context() (-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
/*
* kernel_vector_begin(): obtain the CPU vector registers for use by the calling
* context
@@ -65,14 +170,20 @@ void put_cpu_vector_context(void)
*/
void kernel_vector_begin(void)
{
+ bool nested = false;
+
if (WARN_ON(!has_vector()))
return;
BUG_ON(!may_use_simd());
- get_cpu_vector_context();
+ if (riscv_v_start_kernel_context(&nested)) {
+ get_cpu_vector_context();
+ riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current));
+ }
- riscv_v_vstate_save(¤t->thread.vstate, task_pt_regs(current));
+ if (!nested)
+ riscv_v_vstate_set_restore(current, task_pt_regs(current));
riscv_v_enable();
}
@@ -92,10 +203,10 @@ void kernel_vector_end(void)
if (WARN_ON(!has_vector()))
return;
- riscv_v_vstate_set_restore(current, task_pt_regs(current));
-
riscv_v_disable();
- put_cpu_vector_context();
+ if (riscv_v_stop_kernel_context()) {// we should call this early
+ put_cpu_vector_context();
+ }
}
EXPORT_SYMBOL_GPL(kernel_vector_end);
@@ -188,6 +188,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+ memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
@@ -223,6 +224,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.s[0] = 0;
}
p->thread.riscv_v_flags = 0;
+ if (has_vector())
+ riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
@@ -22,6 +22,9 @@
static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
unsigned long riscv_v_vsize __read_mostly;
EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -53,6 +56,11 @@ void __init riscv_v_setup_ctx_cache(void)
riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
riscv_v_vsize, 16, SLAB_PANIC,
0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+ riscv_v_vsize, 16,
+ SLAB_PANIC, NULL);
+#endif
}
static bool insn_is_vector(u32 insn_buf)
@@ -88,24 +96,35 @@ static bool insn_is_vector(u32 insn_buf)
return false;
}
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+ struct __riscv_v_ext_state *ctx)
{
void *datap;
- datap = kmem_cache_zalloc(riscv_v_user_cachep, GFP_KERNEL);
+ datap = kmem_cache_zalloc(cache, GFP_KERNEL);
if (!datap)
return -ENOMEM;
- current->thread.vstate.datap = datap;
- memset(¤t->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
- datap));
+ ctx->datap = datap;
+ memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
return 0;
}
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
void riscv_v_thread_free(struct task_struct *tsk)
{
if (tsk->thread.vstate.datap)
kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+ if (tsk->thread.kernel_vstate.datap)
+ kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
}
#define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
@@ -177,7 +196,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
* context where VS has been off. So, try to allocate the user's V
* context and resume execution.
*/
- if (riscv_v_thread_zalloc()) {
+ if (riscv_v_thread_zalloc(riscv_v_user_cachep, ¤t->thread.vstate)) {
force_sig(SIGBUS);
return true;
}
Add kernel_vstate to keep track of kernel-mode Vector registers when trap introduced context switch happens. Also, provide riscv_v_flags to let context save/restore routine track context status. Context tracking happens whenever the core starts its in-kernel Vector executions. An active (dirty) kernel task's V contexts will be saved to memory whenever a trap-introduced context switch happens. Or, when a softirq, which happens to nest on top of it, uses Vector. Context retoring happens when the execution transfer back to the original Kernel context where it first enable preempt_v. Also, provide a config CONFIG_RISCV_ISA_V_PREEMPTIVE to give users an option to disable preemptible kernel-mode Vector at build time. Users with constraint memory may want to disable this config as preemptible kernel-mode Vector needs extra space for tracking of per thread's kernel-mode V context. Or, users might as well want to disable it if all kernel-mode Vector code is time sensitive and cannot tolerate context switch overhead. Signed-off-by: Andy Chiu <andy.chiu@sifive.com> --- Changelog v6: - re-write patch to handle context nesting for softirqs - drop thread flag and track context instead in riscv_v_flags - refine some asm code and constraint it into C functions - preallocate v context for preempt_v - Return non-zero in riscv_v_start_kernel_context with non-preemptible kernel-mode Vector Changelog v4: - dropped from v4 Changelog v3: - Guard vstate_save with {get,set}_cpu_vector_context - Add comments on preventions of nesting V contexts - remove warnings in context switch when trap's reg is not pressent (Conor) - refactor code (Björn) Changelog v2: - fix build fail when compiling without RISCV_ISA_V (Conor) - 's/TIF_RISCV_V_KMV/TIF_RISCV_V_KERNEL_MODE' and add comment (Conor) - merge Kconfig patch into this oine (Conor). - 's/CONFIG_RISCV_ISA_V_PREEMPTIVE_KMV/CONFIG_RISCV_ISA_V_PREEMPTIVE/' (Conor) - fix some typos (Conor) - enclose assembly with RISCV_ISA_V_PREEMPTIVE. - change riscv_v_vstate_ctrl_config_kmv() to kernel_vector_allow_preemption() for better understanding. (Conor) - 's/riscv_v_kmv_preempitble/kernel_vector_preemptible/' --- arch/riscv/Kconfig | 14 +++ arch/riscv/include/asm/processor.h | 26 +++++- arch/riscv/include/asm/simd.h | 26 +++++- arch/riscv/include/asm/vector.h | 57 +++++++++++- arch/riscv/kernel/entry.S | 8 ++ arch/riscv/kernel/kernel_mode_vector.c | 121 ++++++++++++++++++++++++- arch/riscv/kernel/process.c | 3 + arch/riscv/kernel/vector.c | 31 +++++-- 8 files changed, 265 insertions(+), 21 deletions(-)