@@ -97,6 +97,32 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8
+/*
+ * __call_ext_ptregs - Helper macro to call into C with extended pt_regs
+ * @cfunc: C function to be called
+ *
+ * This will ensure that extended_ptregs is added and removed as needed during
+ * a call into C code.
+ */
+.macro __call_ext_ptregs cfunc annotate_retpoline_safe:req
+#ifdef CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS
+ /* add space for extended_pt_regs */
+ subq $EXTENDED_PT_REGS_SIZE, %rsp
+#endif
+ .if \annotate_retpoline_safe == 1
+ ANNOTATE_RETPOLINE_SAFE
+ .endif
+ call \cfunc
+#ifdef CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS
+ /* remove space for extended_pt_regs */
+ addq $EXTENDED_PT_REGS_SIZE, %rsp
+#endif
+.endm
+
+.macro call_ext_ptregs cfunc
+ __call_ext_ptregs \cfunc, annotate_retpoline_safe=0
+.endm
+
.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
.if \save_ret
pushq %rsi /* pt_regs->si */
@@ -19,6 +19,7 @@
#include <linux/nospec.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
@@ -34,6 +35,7 @@
#include <asm/io_bitmap.h>
#include <asm/syscall.h>
#include <asm/irq_stack.h>
+#include <asm/pks.h>
#ifdef CONFIG_X86_64
__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
@@ -214,6 +216,60 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
+#ifdef CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS
+
+void show_extended_regs_oops(struct pt_regs *regs, unsigned long error_code)
+{
+ struct extended_pt_regs *ept_regs = extended_pt_regs(regs);
+
+ if (cpu_feature_enabled(X86_FEATURE_PKS) && (error_code & X86_PF_PK))
+ pr_alert("PKRS: 0x%x\n", ept_regs->thread_pkrs);
+}
+
+/*
+ * PKRS is a per-logical-processor MSR which overlays additional protection for
+ * pages which have been mapped with a protection key.
+ *
+ * The register is not maintained with XSAVE so we have to maintain the MSR
+ * value in software during context switch and exception handling.
+ *
+ * Context switches save the MSR in the task struct thus taking that value to
+ * other processors if necessary.
+ *
+ * To protect against exceptions having access to this memory we save the
+ * current running value and sets the PKRS value to be used during the
+ * exception.
+ */
+void pkrs_save_set_irq(struct pt_regs *regs, u32 val)
+{
+ struct extended_pt_regs *ept_regs;
+
+ BUILD_BUG_ON(sizeof(struct extended_pt_regs)
+ != EXTENDED_PT_REGS_SIZE
+ + sizeof(struct pt_regs));
+
+ if (!cpu_feature_enabled(X86_FEATURE_PKS))
+ return;
+
+ ept_regs = extended_pt_regs(regs);
+ ept_regs->thread_pkrs = current->thread.saved_pkrs;
+ write_pkrs(val);
+}
+
+void pkrs_restore_irq(struct pt_regs *regs)
+{
+ struct extended_pt_regs *ept_regs;
+
+ if (!cpu_feature_enabled(X86_FEATURE_PKS))
+ return;
+
+ ept_regs = extended_pt_regs(regs);
+ write_pkrs(ept_regs->thread_pkrs);
+ current->thread.saved_pkrs = ept_regs->thread_pkrs;
+}
+
+#endif /* CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS */
+
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
@@ -270,6 +326,8 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
inhcall = get_and_clear_inhcall();
if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
+ /* Normally called by irqentry_exit, we must restore pkrs here */
+ pkrs_restore_irq(regs);
instrumentation_begin();
irqentry_exit_cond_resched();
instrumentation_end();
@@ -331,7 +331,7 @@ SYM_CODE_END(ret_from_fork)
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
.endif
- call \cfunc
+ call_ext_ptregs \cfunc
jmp error_return
.endm
@@ -434,7 +434,7 @@ SYM_CODE_START(\asmsym)
movq %rsp, %rdi /* pt_regs pointer */
- call \cfunc
+ call_ext_ptregs \cfunc
jmp paranoid_exit
@@ -495,7 +495,7 @@ SYM_CODE_START(\asmsym)
* stack.
*/
movq %rsp, %rdi /* pt_regs pointer */
- call vc_switch_off_ist
+ call_ext_ptregs vc_switch_off_ist
movq %rax, %rsp /* Switch to new stack */
UNWIND_HINT_REGS
@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
movq %rsp, %rdi /* pt_regs pointer */
- call \cfunc
+ call_ext_ptregs \cfunc
/*
* No need to switch back to the IST stack. The current stack is either
@@ -541,7 +541,7 @@ SYM_CODE_START(\asmsym)
movq %rsp, %rdi /* pt_regs pointer into first argument */
movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/
movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */
- call \cfunc
+ call_ext_ptregs \cfunc
jmp paranoid_exit
@@ -780,7 +780,7 @@ SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
movq %rdi, %rsp /* we don't return, adjust the stack frame */
UNWIND_HINT_REGS
- call xen_pv_evtchn_do_upcall
+ call_ext_ptregs xen_pv_evtchn_do_upcall
jmp error_return
SYM_CODE_END(exc_xen_hypervisor_callback)
@@ -986,7 +986,7 @@ SYM_CODE_START_LOCAL(error_entry)
/* Put us onto the real thread stack. */
popq %r12 /* save return addr in %12 */
movq %rsp, %rdi /* arg0 = pt_regs pointer */
- call sync_regs
+ call_ext_ptregs sync_regs
movq %rax, %rsp /* switch stack */
ENCODE_FRAME_POINTER
pushq %r12
@@ -1041,7 +1041,7 @@ SYM_CODE_START_LOCAL(error_entry)
* as if we faulted immediately after IRET.
*/
mov %rsp, %rdi
- call fixup_bad_iret
+ call_ext_ptregs fixup_bad_iret
mov %rax, %rsp
jmp .Lerror_entry_from_usermode_after_swapgs
SYM_CODE_END(error_entry)
@@ -1147,7 +1147,7 @@ SYM_CODE_START(asm_exc_nmi)
movq %rsp, %rdi
movq $-1, %rsi
- call exc_nmi
+ call_ext_ptregs exc_nmi
/*
* Return back to user mode. We must *not* do the normal exit
@@ -1183,6 +1183,8 @@ SYM_CODE_START(asm_exc_nmi)
* +---------------------------------------------------------+
* | pt_regs |
* +---------------------------------------------------------+
+ * | (Optionally) extended_pt_regs |
+ * +---------------------------------------------------------+
*
* The "original" frame is used by hardware. Before re-enabling
* NMIs, we need to be done with it, and we need to leave enough
@@ -1359,7 +1361,7 @@ end_repeat_nmi:
movq %rsp, %rdi
movq $-1, %rsi
- call exc_nmi
+ call_ext_ptregs exc_nmi
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
@@ -136,7 +136,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
.Lsysenter_flags_fixed:
movq %rsp, %rdi
- call do_SYSENTER_32
+ call_ext_ptregs do_SYSENTER_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
@@ -253,7 +253,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
UNWIND_HINT_REGS
movq %rsp, %rdi
- call do_fast_syscall_32
+ call_ext_ptregs do_fast_syscall_32
/* XEN PV guests always use IRET path */
ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
@@ -410,6 +410,6 @@ SYM_CODE_START(entry_INT80_compat)
cld
movq %rsp, %rdi
- call do_int80_syscall_32
+ call_ext_ptregs do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(entry_INT80_compat)
@@ -4,11 +4,27 @@
#ifdef CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS
+struct extended_pt_regs {
+ u32 thread_pkrs;
+ /* Keep stack 8 byte aligned */
+ u32 pad;
+ struct pt_regs pt_regs;
+};
+
void setup_pks(void);
+static inline struct extended_pt_regs *extended_pt_regs(struct pt_regs *regs)
+{
+ return container_of(regs, struct extended_pt_regs, pt_regs);
+}
+
+void show_extended_regs_oops(struct pt_regs *regs, unsigned long error_code);
+
#else /* !CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS */
static inline void setup_pks(void) { }
+static inline void show_extended_regs_oops(struct pt_regs *regs,
+ unsigned long error_code) { }
#endif /* CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS */
@@ -53,4 +53,6 @@
# define X86_CR3_PTI_PCID_USER_BIT 11
#endif
+#define EXTENDED_PT_REGS_SIZE 8
+
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
@@ -319,8 +319,7 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb)
movq %rsp, %rdi
movq ORIG_RAX(%rsp), %rsi
movq initial_vc_handler(%rip), %rax
- ANNOTATE_RETPOLINE_SAFE
- call *%rax
+ __call_ext_ptregs *%rax, annotate_retpoline_safe=1
/* Unwind pt_regs */
POP_REGS
@@ -397,7 +396,7 @@ SYM_CODE_START_LOCAL(early_idt_handler_common)
UNWIND_HINT_REGS
movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
- call do_early_exception
+ call_ext_ptregs do_early_exception
decl early_recursion_flag(%rip)
jmp restore_regs_and_return_to_kernel
@@ -421,7 +420,7 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb)
/* Call C handler */
movq %rsp, %rdi
movq ORIG_RAX(%rsp), %rsi
- call do_vc_no_ghcb
+ call_ext_ptregs do_vc_no_ghcb
/* Unwind pt_regs */
POP_REGS
@@ -32,6 +32,7 @@
#include <asm/pgtable_areas.h> /* VMALLOC_START, ... */
#include <asm/kvm_para.h> /* kvm_handle_async_pf */
#include <asm/vdso.h> /* fixup_vdso_exception() */
+#include <asm/pks.h>
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -547,6 +548,8 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
(error_code & X86_PF_PK) ? "protection keys violation" :
"permissions violation");
+ show_extended_regs_oops(regs, error_code);
+
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
struct desc_ptr idt, gdt;
u16 ldtr, tr;
@@ -50,4 +50,21 @@ static inline void copy_init_pkru_to_fpregs(void)
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
+
+#ifdef CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS
+
+void pkrs_save_set_irq(struct pt_regs *regs, u32 val);
+void pkrs_restore_irq(struct pt_regs *regs);
+
+#else /* !CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS */
+
+#ifndef INIT_PKRS_VALUE
+#define INIT_PKRS_VALUE 0
+#endif
+
+static inline void pkrs_save_set_irq(struct pt_regs *regs, u32 val) { }
+static inline void pkrs_restore_irq(struct pt_regs *regs) { }
+
+#endif /* CONFIG_ARCH_ENABLE_SUPERVISOR_PKEYS */
+
#endif /* _LINUX_PKEYS_H */
@@ -5,6 +5,7 @@
#include <linux/highmem.h>
#include <linux/livepatch.h>
#include <linux/audit.h>
+#include <linux/pkeys.h>
#include "common.h"
@@ -363,7 +364,7 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
instrumentation_end();
ret.exit_rcu = true;
- return ret;
+ goto done;
}
/*
@@ -378,6 +379,8 @@ noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)
trace_hardirqs_off_finish();
instrumentation_end();
+done:
+ pkrs_save_set_irq(regs, INIT_PKRS_VALUE);
return ret;
}
@@ -403,7 +406,12 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
/* Check whether this returns to user mode */
if (user_mode(regs)) {
irqentry_exit_to_user_mode(regs);
- } else if (!regs_irqs_disabled(regs)) {
+ return;
+ }
+
+ pkrs_restore_irq(regs);
+
+ if (!regs_irqs_disabled(regs)) {
/*
* If RCU was not watching on entry this needs to be done
* carefully and needs the same ordering of lockdep/tracing
@@ -457,11 +465,13 @@ irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs)
ftrace_nmi_enter();
instrumentation_end();
+ pkrs_save_set_irq(regs, INIT_PKRS_VALUE);
return irq_state;
}
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state)
{
+ pkrs_restore_irq(regs);
instrumentation_begin();
ftrace_nmi_exit();
if (irq_state.lockdep) {