@@ -1244,7 +1244,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
xorl %ebp, %ebp
movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
- leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+ leal -PTREGS_SIZE(%esi), %esp
call make_task_dead
1: jmp 1b
@@ -178,6 +178,8 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
[EVENT_TYPE_OTHER] = fred_syscall_slow
};
+ current->thread_info.user_pt_regs = regs;
+
/*
* FRED employs a two-level event dispatch mechanism, with
* the first-level on the type of an event and the second-level
@@ -12,6 +12,9 @@
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
{
+ if (!cpu_feature_enabled(X86_FEATURE_FRED))
+ current->thread_info.user_pt_regs = regs;
+
if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
/*
* Make sure that the entry code gave us a sensible EFLAGS
@@ -626,17 +626,11 @@ static inline void spin_lock_prefetch(const void *x)
prefetchw(x);
}
-#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
- TOP_OF_KERNEL_STACK_PADDING)
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack))
-#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+#define task_top_of_stack(task) ((unsigned long)task_stack_page(task) + THREAD_SIZE)
-#define task_pt_regs(task) \
-({ \
- unsigned long __ptr = (unsigned long)task_stack_page(task); \
- __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
- ((struct pt_regs *)__ptr) - 1; \
-})
+#define task_pt_regs(task) ((task)->thread_info.user_pt_regs)
#ifdef CONFIG_X86_32
#define INIT_THREAD { \
@@ -72,8 +72,7 @@ static inline void update_task_stack(struct task_struct *task)
/*
* Will use WRMSRNS/WRMSRLIST for performance once it's upstreamed.
*/
- wrmsrl(MSR_IA32_FRED_RSP0,
- task_top_of_stack(task) + TOP_OF_KERNEL_STACK_PADDING);
+ wrmsrl(MSR_IA32_FRED_RSP0, task_top_of_stack(task));
} else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
/* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
@@ -13,42 +13,6 @@
#include <asm/percpu.h>
#include <asm/types.h>
-/*
- * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
- * reserve at the top of the kernel stack. We do it because of a nasty
- * 32-bit corner case. On x86_32, the hardware stack frame is
- * variable-length. Except for vm86 mode, struct pt_regs assumes a
- * maximum-length frame. If we enter from CPL 0, the top 8 bytes of
- * pt_regs don't actually exist. Ordinarily this doesn't matter, but it
- * does in at least one case:
- *
- * If we take an NMI early enough in SYSENTER, then we can end up with
- * pt_regs that extends above sp0. On the way out, in the espfix code,
- * we can read the saved SS value, but that value will be above sp0.
- * Without this offset, that can result in a page fault. (We are
- * careful that, in this case, the value we read doesn't matter.)
- *
- * In vm86 mode, the hardware frame is much longer still, so add 16
- * bytes to make room for the real-mode segments.
- *
- * x86-64 has a fixed-length stack frame, but it depends on whether
- * or not FRED is enabled. Future versions of FRED might make this
- * dynamic, but for now it is always 2 words longer.
- */
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_VM86
-# define TOP_OF_KERNEL_STACK_PADDING 16
-# else
-# define TOP_OF_KERNEL_STACK_PADDING 8
-# endif
-#else /* x86-64 */
-# ifdef CONFIG_X86_FRED
-# define TOP_OF_KERNEL_STACK_PADDING (2*8)
-# else
-# define TOP_OF_KERNEL_STACK_PADDING 0
-# endif
-#endif
-
/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
@@ -56,6 +20,7 @@
*/
#ifndef __ASSEMBLY__
struct task_struct;
+struct pt_regs;
#include <asm/cpufeature.h>
#include <linux/atomic.h>
@@ -66,11 +31,14 @@ struct thread_info {
#ifdef CONFIG_SMP
u32 cpu; /* current CPU */
#endif
+ struct pt_regs *user_pt_regs;
};
+#define INIT_TASK_PT_REGS ((struct pt_regs *)TOP_OF_INIT_STACK - 1)
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = 0, \
+ .user_pt_regs = INIT_TASK_PT_REGS, \
}
#else /* !__ASSEMBLY__ */
@@ -240,6 +208,7 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+extern void arch_init_user_pt_regs(struct task_struct *tsk);
extern void arch_release_task_struct(struct task_struct *tsk);
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
@@ -517,8 +517,7 @@ SYM_DATA_END(initial_page_table)
* reliably detect the end of the stack.
*/
SYM_DATA(initial_stack,
- .long init_thread_union + THREAD_SIZE -
- SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING)
+ .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS)
__INITRODATA
int_msg:
@@ -98,6 +98,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
return 0;
}
+void arch_init_user_pt_regs(struct task_struct *tsk)
+{
+ tsk->thread_info.user_pt_regs = (struct pt_regs *)task_top_of_stack(tsk)- 1;
+}
+
#ifdef CONFIG_X86_64
void arch_release_task_struct(struct task_struct *tsk)
{
@@ -958,6 +958,10 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
return 0;
}
+void __weak arch_init_user_pt_regs(struct task_struct *tsk)
+{
+}
+
void set_task_stack_end_magic(struct task_struct *tsk)
{
unsigned long *stackend;
@@ -985,6 +989,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_tsk;
+ arch_init_user_pt_regs(tsk);
+
#ifdef CONFIG_THREAD_INFO_IN_TASK
refcount_set(&tsk->stack_refcount, 1);
#endif