diff mbox series

[RFC,6/7] x86/entry: Use atomic-IST-entry for VC

Message ID 20230403140605.540512-7-jiangshanlai@gmail.com (mailing list archive)
State New, archived
Headers show
Series x86/entry: Atomic statck switching for IST | expand

Commit Message

Lai Jiangshan April 3, 2023, 2:06 p.m. UTC
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
 arch/x86/entry/entry_64.S             |  83 --------------------
 arch/x86/entry/ist_entry.c            |   2 +-
 arch/x86/include/asm/cpu_entry_area.h |   3 -
 arch/x86/include/asm/idtentry.h       |   2 +-
 arch/x86/include/asm/sev.h            |  14 ----
 arch/x86/include/asm/traps.h          |   1 -
 arch/x86/kernel/dumpstack_64.c        |   4 +-
 arch/x86/kernel/nmi.c                 |   8 --
 arch/x86/kernel/sev.c                 | 108 --------------------------
 arch/x86/kernel/traps.c               |  43 ----------
 arch/x86/mm/cpu_entry_area.c          |   1 -
 11 files changed, 3 insertions(+), 266 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index e4ddc793f841..187d42efd288 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -621,89 +621,6 @@  _ASM_NOKPROBE(\asmsym)
 SYM_CODE_END(\asmsym)
 .endm
 
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-/**
- * idtentry_vc - Macro to generate entry stub for #VC
- * @vector:		Vector number
- * @asmsym:		ASM symbol for the entry point
- * @cfunc:		C function to be called
- *
- * The macro emits code to set up the kernel context for #VC. The #VC handler
- * runs on an IST stack and needs to be able to cause nested #VC exceptions.
- *
- * To make this work the #VC entry code tries its best to pretend it doesn't use
- * an IST stack by switching to the task stack if coming from user-space (which
- * includes early SYSCALL entry path) or back to the stack in the IRET frame if
- * entered from kernel-mode.
- *
- * If entered from kernel-mode the return stack is validated first, and if it is
- * not safe to use (e.g. because it points to the entry stack) the #VC handler
- * will switch to a fall-back stack (VC2) and call a special handler function.
- *
- * The macro is only used for one vector, but it is planned to be extended in
- * the future for the #HV exception.
- */
-.macro idtentry_vc vector asmsym cfunc
-SYM_CODE_START(\asmsym)
-	UNWIND_HINT_IRET_REGS
-	ENDBR
-	ASM_CLAC
-	cld
-
-	/*
-	 * If the entry is from userspace, switch stacks and treat it as
-	 * a normal entry.
-	 */
-	testb	$3, CS-ORIG_RAX(%rsp)
-	jnz	.Lfrom_usermode_switch_stack_\@
-
-	PUSH_AND_CLEAR_REGS
-	UNWIND_HINT_REGS
-	ENCODE_FRAME_POINTER
-
-	/*
-	 * paranoid_entry returns SWAPGS flag for paranoid_exit in EBX.
-	 * EBX == 0 -> SWAPGS, EBX == 1 -> no SWAPGS
-	 */
-	call	paranoid_entry
-
-	/*
-	 * Switch off the IST stack to make it free for nested exceptions. The
-	 * vc_switch_off_ist() function will switch back to the interrupted
-	 * stack if it is safe to do so. If not it switches to the VC fall-back
-	 * stack.
-	 */
-	movq	%rsp, %rdi		/* pt_regs pointer */
-	call	vc_switch_off_ist
-	movq	%rax, %rsp		/* Switch to new stack */
-
-	ENCODE_FRAME_POINTER
-	UNWIND_HINT_REGS
-
-	/* Update pt_regs */
-	movq	ORIG_RAX(%rsp), %rsi	/* get error code into 2nd argument*/
-	movq	$-1, ORIG_RAX(%rsp)	/* no syscall to restart */
-
-	movq	%rsp, %rdi		/* pt_regs pointer */
-
-	call	kernel_\cfunc
-
-	/*
-	 * No need to switch back to the IST stack. The current stack is either
-	 * identical to the stack in the IRET frame or the VC fall-back stack,
-	 * so it is definitely mapped even with PTI enabled.
-	 */
-	jmp	paranoid_exit
-
-	/* Switch to the regular task stack */
-.Lfrom_usermode_switch_stack_\@:
-	idtentry_body user_\cfunc, has_error_code=1
-
-_ASM_NOKPROBE(\asmsym)
-SYM_CODE_END(\asmsym)
-.endm
-#endif
-
 /*
  * Double fault entry. Straight paranoid. No checks from which context
  * this comes because for the espfix induced #DF this would do the wrong
diff --git a/arch/x86/entry/ist_entry.c b/arch/x86/entry/ist_entry.c
index 946b3b537bd5..c0cbd4527033 100644
--- a/arch/x86/entry/ist_entry.c
+++ b/arch/x86/entry/ist_entry.c
@@ -44,7 +44,7 @@  static __always_inline bool identify_ist_##sym_name(				\
 DEFINE_IDENTIFY_IST(NMI, nmi, true)
 DEFINE_IDENTIFY_IST(DB, debug, true)
 DEFINE_IDENTIFY_IST(MCE, machine_check, IS_ENABLED(CONFIG_X86_MCE))
-DEFINE_IDENTIFY_IST(VC, vmm_communication, false)
+DEFINE_IDENTIFY_IST(VC, vmm_communication, IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT))
 
 static __always_inline bool identify_ist(
 		const struct pt_regs *regs, struct cea_exception_stacks *stacks,
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index a373e8c37e25..618aa698eb82 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -30,8 +30,6 @@ 
 	char	MCE_stack[EXCEPTION_STKSZ];			\
 	char	VC_stack_guard[guardsize];			\
 	char	VC_stack[optional_stack_size];			\
-	char	VC2_stack_guard[guardsize];			\
-	char	VC2_stack[optional_stack_size];			\
 	char	IST_stack_guard[guardsize];			\
 	char	IST_stack[IST_MAIN_STKSZ];			\
 	char	IST_top_guard[guardsize];			\
@@ -55,7 +53,6 @@  enum exception_stack_ordering {
 	ESTACK_DB,
 	ESTACK_MCE,
 	ESTACK_VC,
-	ESTACK_VC2,
 	ESTACK_IST,
 	N_EXCEPTION_STACKS
 };
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 01f3152ffe82..5f3250e589ec 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -466,7 +466,7 @@  __visible noinstr void func(struct pt_regs *regs,			\
 	DECLARE_IDTENTRY(vector, func)
 
 # define DECLARE_IDTENTRY_VC(vector, func)				\
-	idtentry_vc vector asm_##func func
+	idtentry_ist vector asm_##func kernel_##func user_##func has_error_code=1 stack_offset=CEA_stacks_VC
 
 #else
 # define DECLARE_IDTENTRY_MCE(vector, func)				\
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index ebc271bb6d8e..ce554b3a818d 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -135,18 +135,6 @@  struct snp_secrets_page_layout {
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
 extern struct static_key_false sev_es_enable_key;
-extern void __sev_es_ist_enter(struct pt_regs *regs);
-extern void __sev_es_ist_exit(void);
-static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
-{
-	if (static_branch_unlikely(&sev_es_enable_key))
-		__sev_es_ist_enter(regs);
-}
-static __always_inline void sev_es_ist_exit(void)
-{
-	if (static_branch_unlikely(&sev_es_enable_key))
-		__sev_es_ist_exit();
-}
 extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
 extern void __sev_es_nmi_complete(void);
 static __always_inline void sev_es_nmi_complete(void)
@@ -198,8 +186,6 @@  bool snp_init(struct boot_params *bp);
 void __init __noreturn snp_abort(void);
 int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err);
 #else
-static inline void sev_es_ist_enter(struct pt_regs *regs) { }
-static inline void sev_es_ist_exit(void) { }
 static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 47ecfff2c83d..dc0da530f951 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -15,7 +15,6 @@  asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs);
 asmlinkage __visible notrace
 struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs);
 void __init trap_init(void);
-asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs);
 #endif
 
 extern bool ibt_selftest(void);
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 3413b23fa9f1..b7ef2685f63b 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -25,7 +25,6 @@  static const char * const exception_stack_names[] = {
 		[ ESTACK_DB	]	= "#DB",
 		[ ESTACK_MCE	]	= "#MC",
 		[ ESTACK_VC	]	= "#VC",
-		[ ESTACK_VC2	]	= "#VC2",
 		[ ESTACK_IST	]	= "#IST",
 };
 
@@ -89,7 +88,6 @@  struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
 	EPAGERANGE(DB),
 	EPAGERANGE(MCE),
 	EPAGERANGE(VC),
-	EPAGERANGE(VC2),
 	EPAGERANGE(IST),
 };
 
@@ -100,7 +98,7 @@  static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 	struct pt_regs *regs;
 	unsigned int k;
 
-	BUILD_BUG_ON(N_EXCEPTION_STACKS != 7);
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
 
 	begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
 	/*
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 776f4b1e395b..bafd0c7ca5b7 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -514,12 +514,6 @@  DEFINE_IDTENTRY_RAW(exc_nmi)
 	}
 nmi_restart:
 
-	/*
-	 * Needs to happen before DR7 is accessed, because the hypervisor can
-	 * intercept DR7 reads/writes, turning those into #VC exceptions.
-	 */
-	sev_es_ist_enter(regs);
-
 	this_cpu_write(nmi_dr7, local_db_save());
 
 	irq_state = irqentry_nmi_enter(regs);
@@ -544,8 +538,6 @@  DEFINE_IDTENTRY_RAW(exc_nmi)
 
 	local_db_restore(this_cpu_read(nmi_dr7));
 
-	sev_es_ist_exit();
-
 	if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
 		write_cr2(this_cpu_read(nmi_cr2));
 	if (this_cpu_dec_return(nmi_state))
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 679026a640ef..74d55786c353 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -122,77 +122,6 @@  struct sev_config {
 
 static struct sev_config sev_cfg __read_mostly;
 
-static __always_inline bool on_vc_stack(struct pt_regs *regs)
-{
-	unsigned long sp = regs->sp;
-
-	/* User-mode RSP is not trusted */
-	if (user_mode(regs))
-		return false;
-
-	/* SYSCALL gap still has user-mode RSP */
-	if (ip_within_syscall_gap(regs))
-		return false;
-
-	return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
-}
-
-/*
- * This function handles the case when an NMI is raised in the #VC
- * exception handler entry code, before the #VC handler has switched off
- * its IST stack. In this case, the IST entry for #VC must be adjusted,
- * so that any nested #VC exception will not overwrite the stack
- * contents of the interrupted #VC handler.
- *
- * The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
- * nested sev_es_ist_exit() call may adjust back the IST entry too
- * early.
- *
- * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
- * on the NMI IST stack, as they are only called from NMI handling code
- * right now.
- */
-void noinstr __sev_es_ist_enter(struct pt_regs *regs)
-{
-	unsigned long old_ist, new_ist;
-
-	/* Read old IST entry */
-	new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-	/*
-	 * If NMI happened while on the #VC IST stack, set the new IST
-	 * value below regs->sp, so that the interrupted stack frame is
-	 * not overwritten by subsequent #VC exceptions.
-	 */
-	if (on_vc_stack(regs))
-		new_ist = regs->sp;
-
-	/*
-	 * Reserve additional 8 bytes and store old IST value so this
-	 * adjustment can be unrolled in __sev_es_ist_exit().
-	 */
-	new_ist -= sizeof(old_ist);
-	*(unsigned long *)new_ist = old_ist;
-
-	/* Set new IST entry */
-	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
-}
-
-void noinstr __sev_es_ist_exit(void)
-{
-	unsigned long ist;
-
-	/* Read IST entry */
-	ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-	if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
-		return;
-
-	/* Read back old IST entry and write it to the TSS */
-	this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
-}
-
 /*
  * Nothing shall interrupt this code path while holding the per-CPU
  * GHCB. The backup GHCB is only for NMIs interrupting this path.
@@ -1841,26 +1770,6 @@  static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
 	}
 }
 
-static __always_inline bool is_vc2_stack(unsigned long sp)
-{
-	return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
-}
-
-static __always_inline bool vc_from_invalid_context(struct pt_regs *regs)
-{
-	unsigned long sp, prev_sp;
-
-	sp      = (unsigned long)regs;
-	prev_sp = regs->sp;
-
-	/*
-	 * If the code was already executing on the VC2 stack when the #VC
-	 * happened, let it proceed to the normal handling routine. This way the
-	 * code executing on the VC2 stack can cause #VC exceptions to get handled.
-	 */
-	return is_vc2_stack(sp) && !is_vc2_stack(prev_sp);
-}
-
 static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
 {
 	struct ghcb_state state;
@@ -1930,23 +1839,6 @@  DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
 {
 	irqentry_state_t irq_state;
 
-	/*
-	 * With the current implementation it is always possible to switch to a
-	 * safe stack because #VC exceptions only happen at known places, like
-	 * intercepted instructions or accesses to MMIO areas/IO ports. They can
-	 * also happen with code instrumentation when the hypervisor intercepts
-	 * #DB, but the critical paths are forbidden to be instrumented, so #DB
-	 * exceptions currently also only happen in safe places.
-	 *
-	 * But keep this here in case the noinstr annotations are violated due
-	 * to bug elsewhere.
-	 */
-	if (unlikely(vc_from_invalid_context(regs))) {
-		instrumentation_begin();
-		panic("Can't handle #VC exception from unsupported context\n");
-		instrumentation_end();
-	}
-
 	/*
 	 * Handle #DB before calling into !noinstr code to avoid recursive #DB.
 	 */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d317dc3d06a3..6c697c175f7a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -864,49 +864,6 @@  asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
 	return regs;
 }
 
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *regs)
-{
-	unsigned long sp, *stack;
-	struct stack_info info;
-	struct pt_regs *regs_ret;
-
-	/*
-	 * In the SYSCALL entry path the RSP value comes from user-space - don't
-	 * trust it and switch to the current kernel stack
-	 */
-	if (ip_within_syscall_gap(regs)) {
-		sp = this_cpu_read(pcpu_hot.top_of_stack);
-		goto sync;
-	}
-
-	/*
-	 * From here on the RSP value is trusted. Now check whether entry
-	 * happened from a safe stack. Not safe are the entry or unknown stacks,
-	 * use the fall-back stack instead in this case.
-	 */
-	sp    = regs->sp;
-	stack = (unsigned long *)sp;
-
-	if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY ||
-	    info.type > STACK_TYPE_EXCEPTION_LAST)
-		sp = __this_cpu_ist_top_va(VC2);
-
-sync:
-	/*
-	 * Found a safe stack - switch to it as if the entry didn't happen via
-	 * IST stack. The code below only copies pt_regs, the real switch happens
-	 * in assembly code.
-	 */
-	sp = ALIGN_DOWN(sp, 8) - sizeof(*regs_ret);
-
-	regs_ret = (struct pt_regs *)sp;
-	*regs_ret = *regs;
-
-	return regs_ret;
-}
-#endif
-
 asmlinkage __visible noinstr struct pt_regs *fixup_bad_iret(struct pt_regs *bad_regs)
 {
 	struct pt_regs tmp, *new_stack;
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 62341cb819ab..7df1301ec343 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -153,7 +153,6 @@  static void __init percpu_setup_exception_stacks(unsigned int cpu)
 	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
 		if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) {
 			cea_map_stack(VC);
-			cea_map_stack(VC2);
 		}
 	}
 }