@@ -67,6 +67,8 @@ boolean_param("smep", opt_smep);
static bool_t __initdata opt_smap = 1;
boolean_param("smap", opt_smap);
+unsigned long __read_mostly cr4_pv32_mask;
+
/* Boot dom0 in pvh mode */
static bool_t __initdata opt_dom0pvh;
boolean_param("dom0pvh", opt_dom0pvh);
@@ -1364,6 +1366,8 @@ void __init noreturn __start_xen(unsigne
if ( cpu_has_smap )
set_in_cr4(X86_CR4_SMAP);
+ cr4_pv32_mask = mmu_cr4_features & (X86_CR4_SMEP | X86_CR4_SMAP);
+
if ( cpu_has_fsgsbase )
set_in_cr4(X86_CR4_FSGSBASE);
@@ -1500,7 +1504,10 @@ void __init noreturn __start_xen(unsigne
* copy_from_user().
*/
if ( cpu_has_smap )
+ {
+ cr4_pv32_mask &= ~X86_CR4_SMAP;
write_cr4(read_cr4() & ~X86_CR4_SMAP);
+ }
printk("%sNX (Execute Disable) protection %sactive\n",
cpu_has_nx ? XENLOG_INFO : XENLOG_WARNING "Warning: ",
@@ -1517,7 +1524,10 @@ void __init noreturn __start_xen(unsigne
panic("Could not set up DOM0 guest OS");
if ( cpu_has_smap )
+ {
write_cr4(read_cr4() | X86_CR4_SMAP);
+ cr4_pv32_mask |= X86_CR4_SMAP;
+ }
/* Scrub RAM that is still free and so may go to an unprivileged domain. */
scrub_heap_pages();
@@ -135,6 +135,7 @@ void __dummy__(void)
OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs);
OFFSET(CPUINFO_processor_id, struct cpu_info, processor_id);
OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu);
+ OFFSET(CPUINFO_cr4, struct cpu_info, cr4);
DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
BLANK();
@@ -16,14 +16,16 @@ ENTRY(compat_hypercall)
ASM_CLAC
pushq $0
SAVE_VOLATILE type=TRAP_syscall compat=1
+ CR4_PV32_RESTORE
cmpb $0,untrusted_msi(%rip)
UNLIKELY_START(ne, msi_check)
movl $HYPERCALL_VECTOR,%edi
call check_for_unexpected_msi
- LOAD_C_CLOBBERED
+ LOAD_C_CLOBBERED compat=1 ax=0
UNLIKELY_END(msi_check)
+ movl UREGS_rax(%rsp),%eax
GET_CURRENT(%rbx)
cmpl $NR_hypercalls,%eax
@@ -33,7 +35,6 @@ UNLIKELY_END(msi_check)
pushq UREGS_rbx(%rsp); pushq %rcx; pushq %rdx; pushq %rsi; pushq %rdi
pushq UREGS_rbp+5*8(%rsp)
leaq compat_hypercall_args_table(%rip),%r10
- movl %eax,%eax
movl $6,%ecx
subb (%r10,%rax,1),%cl
movq %rsp,%rdi
@@ -48,7 +49,6 @@ UNLIKELY_END(msi_check)
#define SHADOW_BYTES 16 /* Shadow EIP + shadow hypercall # */
#else
/* Relocate argument registers and zero-extend to 64 bits. */
- movl %eax,%eax /* Hypercall # */
xchgl %ecx,%esi /* Arg 2, Arg 4 */
movl %edx,%edx /* Arg 3 */
movl %edi,%r8d /* Arg 5 */
@@ -174,10 +174,61 @@ compat_bad_hypercall:
/* %rbx: struct vcpu, interrupts disabled */
ENTRY(compat_restore_all_guest)
ASSERT_INTERRUPTS_DISABLED
+.Lcr4_orig:
+ ASM_NOP8 /* testb $3,UREGS_cs(%rsp) */
+ ASM_NOP2 /* jpe .Lcr4_alt_end */
+ ASM_NOP8 /* mov CPUINFO_cr4...(%rsp), %rax */
+ ASM_NOP6 /* and $..., %rax */
+ ASM_NOP8 /* mov %rax, CPUINFO_cr4...(%rsp) */
+ ASM_NOP3 /* mov %rax, %cr4 */
+.Lcr4_orig_end:
+ .pushsection .altinstr_replacement, "ax"
+.Lcr4_alt:
+ testb $3,UREGS_cs(%rsp)
+ jpe .Lcr4_alt_end
+ mov CPUINFO_cr4-CPUINFO_guest_cpu_user_regs(%rsp), %rax
+ and $~(X86_CR4_SMEP|X86_CR4_SMAP), %rax
+ mov %rax, CPUINFO_cr4-CPUINFO_guest_cpu_user_regs(%rsp)
+ mov %rax, %cr4
+.Lcr4_alt_end:
+ .section .altinstructions, "a"
+ altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMEP, \
+ (.Lcr4_orig_end - .Lcr4_orig), \
+ (.Lcr4_alt_end - .Lcr4_alt)
+ altinstruction_entry .Lcr4_orig, .Lcr4_alt, X86_FEATURE_SMAP, \
+ (.Lcr4_orig_end - .Lcr4_orig), \
+ (.Lcr4_alt_end - .Lcr4_alt)
+ .popsection
RESTORE_ALL adj=8 compat=1
.Lft0: iretq
_ASM_PRE_EXTABLE(.Lft0, handle_exception)
+/* This mustn't modify registers other than %rax. */
+ENTRY(cr4_pv32_restore)
+ push %rdx
+ GET_CPUINFO_FIELD(cr4, %rdx)
+ mov (%rdx), %rax
+ test $X86_CR4_SMEP|X86_CR4_SMAP,%eax
+ jnz 0f
+ or cr4_pv32_mask(%rip), %rax
+ mov %rax, %cr4
+ mov %rax, (%rdx)
+ pop %rdx
+ ret
+0:
+#ifndef NDEBUG
+ /* Check that _all_ of the bits intended to be set actually are. */
+ mov %cr4, %rax
+ and cr4_pv32_mask(%rip), %eax
+ cmp cr4_pv32_mask(%rip), %eax
+ je 1f
+ BUG
+1:
+#endif
+ pop %rdx
+ xor %eax, %eax
+ ret
+
/* %rdx: trap_bounce, %rbx: struct vcpu */
ENTRY(compat_post_handle_exception)
testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
@@ -190,6 +241,7 @@ ENTRY(compat_post_handle_exception)
/* See lstar_enter for entry register state. */
ENTRY(cstar_enter)
sti
+ CR4_PV32_RESTORE
movq 8(%rsp),%rax /* Restore %rax. */
movq $FLAT_KERNEL_SS,8(%rsp)
pushq %r11
@@ -225,6 +277,7 @@ UNLIKELY_END(compat_syscall_gpf)
jmp .Lcompat_bounce_exception
ENTRY(compat_sysenter)
+ CR4_PV32_RESTORE
movq VCPU_trap_ctxt(%rbx),%rcx
cmpb $TRAP_gp_fault,UREGS_entry_vector(%rsp)
movzwl VCPU_sysenter_sel(%rbx),%eax
@@ -238,6 +291,7 @@ ENTRY(compat_sysenter)
jmp compat_test_all_events
ENTRY(compat_int80_direct_trap)
+ CR4_PV32_RESTORE
call compat_create_bounce_frame
jmp compat_test_all_events
@@ -434,6 +434,7 @@ ENTRY(dom_crash_sync_extable)
ENTRY(common_interrupt)
SAVE_ALL CLAC
+ CR4_PV32_RESTORE
movq %rsp,%rdi
callq do_IRQ
jmp ret_from_intr
@@ -454,13 +455,67 @@ ENTRY(page_fault)
GLOBAL(handle_exception)
SAVE_ALL CLAC
handle_exception_saved:
+ GET_CURRENT(%rbx)
testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%rsp)
jz exception_with_ints_disabled
+
+.Lcr4_pv32_orig:
+ jmp .Lcr4_pv32_done
+ .skip (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt) - (. - .Lcr4_pv32_orig), 0xcc
+ .pushsection .altinstr_replacement, "ax"
+.Lcr4_pv32_alt:
+ mov VCPU_domain(%rbx),%rax
+.Lcr4_pv32_alt_end:
+ .section .altinstructions, "a"
+ altinstruction_entry .Lcr4_pv32_orig, .Lcr4_pv32_alt, \
+ X86_FEATURE_SMEP, \
+ (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt), \
+ (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt)
+ altinstruction_entry .Lcr4_pv32_orig, .Lcr4_pv32_alt, \
+ X86_FEATURE_SMAP, \
+ (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt), \
+ (.Lcr4_pv32_alt_end - .Lcr4_pv32_alt)
+ .popsection
+
+ testb $3,UREGS_cs(%rsp)
+ jz .Lcr4_pv32_done
+ cmpb $0,DOMAIN_is_32bit_pv(%rax)
+ je .Lcr4_pv32_done
+ call cr4_pv32_restore
+ /*
+ * An NMI or #MC may occur between clearing CR4.SMEP / CR4.SMAP in
+ * compat_restore_all_guest and it actually returning to guest
+ * context, in which case the guest would run with the two features
+ * enabled. The only bad that can happen from this is a kernel mode
+ * #PF which the guest doesn't expect. Rather than trying to make the
+ * NMI/#MC exit path honor the intended CR4 setting, simply check
+ * whether the wrong CR4 was in use when the #PF occurred, and exit
+ * back to the guest (which will in turn clear the two CR4 bits) to
+ * re-execute the instruction. If we get back here, the CR4 bits
+ * should then be found clear (unless another NMI/#MC occurred at
+ * exactly the right time), and we'll continue processing the
+ * exception as normal.
+ */
+ test %rax,%rax
+ jnz .Lcr4_pv32_done
+ /*
+ * The below effectively is
+ * if ( regs->entry_vector == TRAP_page_fault &&
+ * (regs->error_code & PFEC_page_present) &&
+ * !(regs->error_code & ~(PFEC_write_access|PFEC_insn_fetch)) )
+ * goto compat_test_all_events;
+ */
+ mov $PFEC_page_present,%al
+ cmpb $TRAP_page_fault,UREGS_entry_vector(%rsp)
+ jne .Lcr4_pv32_done
+ xor UREGS_error_code(%rsp),%eax
+ test $~(PFEC_write_access|PFEC_insn_fetch),%eax
+ jz compat_test_all_events
+.Lcr4_pv32_done:
sti
1: movq %rsp,%rdi
movzbl UREGS_entry_vector(%rsp),%eax
leaq exception_table(%rip),%rdx
- GET_CURRENT(%rbx)
PERFC_INCR(exceptions, %rax, %rbx)
callq *(%rdx,%rax,8)
testb $3,UREGS_cs(%rsp)
@@ -590,6 +645,7 @@ ENTRY(nmi)
movl $TRAP_nmi,4(%rsp)
handle_ist_exception:
SAVE_ALL CLAC
+ CR4_PV32_RESTORE
testb $3,UREGS_cs(%rsp)
jz 1f
/* Interrupted guest context. Copy the context to stack bottom. */
@@ -209,6 +209,16 @@ void ret_from_intr(void);
#define ASM_STAC ASM_AC(STAC)
#define ASM_CLAC ASM_AC(CLAC)
+
+#define CR4_PV32_RESTORE \
+ 667: ASM_NOP5; \
+ .pushsection .altinstr_replacement, "ax"; \
+ 668: call cr4_pv32_restore; \
+ .section .altinstructions, "a"; \
+ altinstruction_entry 667b, 668b, X86_FEATURE_SMEP, 5, 5; \
+ altinstruction_entry 667b, 668b, X86_FEATURE_SMAP, 5, 5; \
+ .popsection
+
#else
static always_inline void clac(void)
{
@@ -308,14 +318,18 @@ static always_inline void stac(void)
*
* For the way it is used in RESTORE_ALL, this macro must preserve EFLAGS.ZF.
*/
-.macro LOAD_C_CLOBBERED compat=0
+.macro LOAD_C_CLOBBERED compat=0 ax=1
.if !\compat
movq UREGS_r11(%rsp),%r11
movq UREGS_r10(%rsp),%r10
movq UREGS_r9(%rsp),%r9
movq UREGS_r8(%rsp),%r8
-.endif
+.if \ax
movq UREGS_rax(%rsp),%rax
+.endif
+.elseif \ax
+ movl UREGS_rax(%rsp),%eax
+.endif
movq UREGS_rcx(%rsp),%rcx
movq UREGS_rdx(%rsp),%rdx
movq UREGS_rsi(%rsp),%rsi
@@ -134,12 +134,12 @@
#define TF_kernel_mode (1<<_TF_kernel_mode)
/* #PF error code values. */
-#define PFEC_page_present (1U<<0)
-#define PFEC_write_access (1U<<1)
-#define PFEC_user_mode (1U<<2)
-#define PFEC_reserved_bit (1U<<3)
-#define PFEC_insn_fetch (1U<<4)
-#define PFEC_prot_key (1U<<5)
+#define PFEC_page_present (_AC(1,U) << 0)
+#define PFEC_write_access (_AC(1,U) << 1)
+#define PFEC_user_mode (_AC(1,U) << 2)
+#define PFEC_reserved_bit (_AC(1,U) << 3)
+#define PFEC_insn_fetch (_AC(1,U) << 4)
+#define PFEC_prot_key (_AC(1,U) << 5)
/* Internally used only flags. */
#define PFEC_page_paged (1U<<16)
#define PFEC_page_shared (1U<<17)