@@ -109,19 +109,13 @@
REG_L \dst, 0(\dst)
.endm
-#ifdef CONFIG_SHADOW_CALL_STACK
-/* gp is used as the shadow call stack pointer instead */
-.macro load_global_pointer
+.macro load_pcpu_off_gp tmp
+ REG_L \tmp, TASK_TI_CPU(tp)
+ slli \tmp, \tmp, 3
+ la gp, __per_cpu_offset
+ add gp, gp, \tmp
+ REG_L gp, 0(gp)
.endm
-#else
-/* load __global_pointer to gp */
-.macro load_global_pointer
-.option push
-.option norelax
- la gp, __global_pointer$
-.option pop
-.endm
-#endif /* CONFIG_SHADOW_CALL_STACK */
/* save all GPs except x1 ~ x5 */
.macro save_from_x6_to_x31
new file mode 100644
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+static inline void set_my_cpu_offset(unsigned long off)
+{
+ asm volatile("addi gp, %0, 0" :: "r" (off));
+}
+
+static inline unsigned long __kern_my_cpu_offset(void)
+{
+ unsigned long off;
+
+ asm ("mv %0, gp":"=r" (off) :);
+ return off;
+}
+
+#define __my_cpu_offset __kern_my_cpu_offset()
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */
+
@@ -36,6 +36,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]);
OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]);
OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]);
+ OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags);
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
@@ -77,8 +77,8 @@ SYM_CODE_START(handle_exception)
*/
csrw CSR_SCRATCH, x0
- /* Load the global pointer */
- load_global_pointer
+ /* load __per_cpu_offset[cpu] to gp*/
+ load_pcpu_off_gp t6
/* Load the kernel shadow call stack pointer if coming from userspace */
scs_load_current_if_task_changed s5
@@ -110,9 +110,6 @@ relocate_enable_mmu:
la a0, .Lsecondary_park
csrw CSR_TVEC, a0
- /* Reload the global pointer */
- load_global_pointer
-
/*
* Switch to kernel page tables. A full fence is necessary in order to
* avoid using the trampoline translations, which are only correct for
@@ -131,9 +128,6 @@ secondary_start_sbi:
csrw CSR_IE, zero
csrw CSR_IP, zero
- /* Load the global pointer */
- load_global_pointer
-
/*
* Disable FPU & VECTOR to detect illegal usage of
* floating point or vector in kernel space
@@ -228,9 +222,6 @@ SYM_CODE_START(_start_kernel)
csrr a0, CSR_MHARTID
#endif /* CONFIG_RISCV_M_MODE */
- /* Load the global pointer */
- load_global_pointer
-
/*
* Disable FPU & VECTOR to detect illegal usage of
* floating point or vector in kernel space
@@ -41,6 +41,11 @@
static DECLARE_COMPLETION(cpu_running);
+void __init smp_prepare_boot_cpu(void)
+{
+ set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
@@ -212,6 +217,8 @@ asmlinkage __visible void smp_callin(void)
struct mm_struct *mm = &init_mm;
unsigned int curr_cpuid = smp_processor_id();
+ set_my_cpu_offset(per_cpu_offset(curr_cpuid));
+
if (has_vector()) {
/*
* Return as early as possible so the hart with a mismatching
@@ -60,8 +60,6 @@ SYM_FUNC_START(__cpu_suspend_enter)
SYM_FUNC_END(__cpu_suspend_enter)
SYM_TYPED_FUNC_START(__cpu_resume_enter)
- /* Load the global pointer */
- load_global_pointer
#ifdef CONFIG_MMU
/* Save A0 and A1 */
Compared to directly fetching the per-CPU offset from memory (or cache), using the global pointer (gp) to store the per-CPU offset can save one memory access. When compiling the kernel, the following command needs to be explicitly specified: export KCFLAGS="... -mno-relax" export KAFLAGS="... -mno-relax" Signed-off-by: Yunhui Cui <cuiyunhui@bytedance.com> --- arch/riscv/include/asm/asm.h | 18 ++++++------------ arch/riscv/include/asm/percpu.h | 24 ++++++++++++++++++++++++ arch/riscv/kernel/asm-offsets.c | 1 + arch/riscv/kernel/entry.S | 4 ++-- arch/riscv/kernel/head.S | 9 --------- arch/riscv/kernel/smpboot.c | 7 +++++++ arch/riscv/kernel/suspend_entry.S | 2 -- 7 files changed, 40 insertions(+), 25 deletions(-) create mode 100644 arch/riscv/include/asm/percpu.h