diff mbox series

[v5,28/34] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user

Message ID 20230307023946.14516-29-xin3.li@intel.com (mailing list archive)
State New, archived
Headers show
Series x86: enable FRED for x86-64 | expand

Commit Message

Li, Xin3 March 7, 2023, 2:39 a.m. UTC
If the stack frame contains an invalid user context (e.g. due to invalid SS,
a non-canonical RIP, etc.) the ERETU instruction will trap (#SS or #GP).

From a Linux point of view, this really should be considered a user space
failure, so use the standard fault fixup mechanism to intercept the fault,
fix up the exception frame, and redirect execution to fred_entrypoint_user.
The end result is that it appears just as if the hardware had taken the
exception immediately after completing the transition to user space.

Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/entry/entry_64_fred.S             |  8 +++++--
 arch/x86/include/asm/extable_fixup_types.h |  4 +++-
 arch/x86/mm/extable.c                      | 28 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)

Comments

Lai Jiangshan March 17, 2023, 9:39 a.m. UTC | #1
> +#ifdef CONFIG_X86_FRED
> +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
> +                            struct pt_regs *regs, unsigned long error_code)
> +{
> +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
> +       unsigned short ss = uregs->ss;
> +       unsigned short cs = uregs->cs;
> +
> +       fred_info(uregs)->edata = fred_event_data(regs);
> +       uregs->ssx = regs->ssx;
> +       uregs->ss = ss;
> +       uregs->csx = regs->csx;
> +       uregs->current_stack_level = 0;
> +       uregs->cs = cs;

Hello

If the ERETU instruction had tried to return from NMI to ring3 and just faulted,
is NMI still blocked?

We know that IRET unconditionally enables NMI, but I can't find any clue in the
FRED's manual.

In the pseudocode of ERETU in the manual, it seems that NMI is only enabled when
ERETU succeeds with bit28 in csx set.  If so, this code will fail to reenable
NMI if bit28 is not explicitly re-set in csx.

Thanks,
Lai

> +
> +       /* Copy error code to uregs and adjust stack pointer accordingly */
> +       uregs->orig_ax = error_code;
> +       regs->sp -= 8;
> +
> +       return ex_handler_default(fixup, regs);
> +}
Andrew Cooper March 17, 2023, 9:55 a.m. UTC | #2
On 17/03/2023 9:39 am, Lai Jiangshan wrote:
>> +#ifdef CONFIG_X86_FRED
>> +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
>> +                            struct pt_regs *regs, unsigned long error_code)
>> +{
>> +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
>> +       unsigned short ss = uregs->ss;
>> +       unsigned short cs = uregs->cs;
>> +
>> +       fred_info(uregs)->edata = fred_event_data(regs);
>> +       uregs->ssx = regs->ssx;
>> +       uregs->ss = ss;
>> +       uregs->csx = regs->csx;
>> +       uregs->current_stack_level = 0;
>> +       uregs->cs = cs;
> Hello
>
> If the ERETU instruction had tried to return from NMI to ring3 and just faulted,
> is NMI still blocked?
>
> We know that IRET unconditionally enables NMI, but I can't find any clue in the
> FRED's manual.
>
> In the pseudocode of ERETU in the manual, it seems that NMI is only enabled when
> ERETU succeeds with bit28 in csx set.  If so, this code will fail to reenable
> NMI if bit28 is not explicitly re-set in csx.

IRET clearing NMI blocking is the source of an immense amount of grief,
and ultimately the reason why Linux and others can't use supervisor
shadow stacks at the moment.

Changing this property, so NMIs only get unblocked on successful
execution of an ERET{S,U}, was a key demand of the FRED spec.

i.e. until you have successfully ERET*'d, you're still logically in the
NMI handler and NMIs need to remain blocked even when handling the #GP
from a bad ERET.

~Andrew
Lai Jiangshan March 17, 2023, 1:02 p.m. UTC | #3
On Fri, Mar 17, 2023 at 5:56 PM <andrew.cooper3@citrix.com> wrote:
>
> On 17/03/2023 9:39 am, Lai Jiangshan wrote:
> >> +#ifdef CONFIG_X86_FRED
> >> +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
> >> +                            struct pt_regs *regs, unsigned long error_code)
> >> +{
> >> +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
> >> +       unsigned short ss = uregs->ss;
> >> +       unsigned short cs = uregs->cs;
> >> +
> >> +       fred_info(uregs)->edata = fred_event_data(regs);
> >> +       uregs->ssx = regs->ssx;
> >> +       uregs->ss = ss;
> >> +       uregs->csx = regs->csx;
> >> +       uregs->current_stack_level = 0;
> >> +       uregs->cs = cs;
> > Hello
> >
> > If the ERETU instruction had tried to return from NMI to ring3 and just faulted,
> > is NMI still blocked?
> >
> > We know that IRET unconditionally enables NMI, but I can't find any clue in the
> > FRED's manual.
> >
> > In the pseudocode of ERETU in the manual, it seems that NMI is only enabled when
> > ERETU succeeds with bit28 in csx set.  If so, this code will fail to reenable
> > NMI if bit28 is not explicitly re-set in csx.
>
> IRET clearing NMI blocking is the source of an immense amount of grief,
> and ultimately the reason why Linux and others can't use supervisor
> shadow stacks at the moment.
>
> Changing this property, so NMIs only get unblocked on successful
> execution of an ERET{S,U}, was a key demand of the FRED spec.
>
> i.e. until you have successfully ERET*'d, you're still logically in the
> NMI handler and NMIs need to remain blocked even when handling the #GP
> from a bad ERET.
>

Handling of the #GP for a bad ERETU can be rescheduled. It is not
OK to reschedule with NMI blocked.

I think "regs->nmi = 1;" (not uregs->nmi) can fix the problem.
H. Peter Anvin March 17, 2023, 9 p.m. UTC | #4
On March 17, 2023 2:55:44 AM PDT, andrew.cooper3@citrix.com wrote:
>On 17/03/2023 9:39 am, Lai Jiangshan wrote:
>>> +#ifdef CONFIG_X86_FRED
>>> +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
>>> +                            struct pt_regs *regs, unsigned long error_code)
>>> +{
>>> +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
>>> +       unsigned short ss = uregs->ss;
>>> +       unsigned short cs = uregs->cs;
>>> +
>>> +       fred_info(uregs)->edata = fred_event_data(regs);
>>> +       uregs->ssx = regs->ssx;
>>> +       uregs->ss = ss;
>>> +       uregs->csx = regs->csx;
>>> +       uregs->current_stack_level = 0;
>>> +       uregs->cs = cs;
>> Hello
>>
>> If the ERETU instruction had tried to return from NMI to ring3 and just faulted,
>> is NMI still blocked?
>>
>> We know that IRET unconditionally enables NMI, but I can't find any clue in the
>> FRED's manual.
>>
>> In the pseudocode of ERETU in the manual, it seems that NMI is only enabled when
>> ERETU succeeds with bit28 in csx set.  If so, this code will fail to reenable
>> NMI if bit28 is not explicitly re-set in csx.
>
>IRET clearing NMI blocking is the source of an immense amount of grief,
>and ultimately the reason why Linux and others can't use supervisor
>shadow stacks at the moment.
>
>Changing this property, so NMIs only get unblocked on successful
>execution of an ERET{S,U}, was a key demand of the FRED spec.
>
>i.e. until you have successfully ERET*'d, you're still logically in the
>NMI handler and NMIs need to remain blocked even when handling the #GP
>from a bad ERET.
>
>~Andrew

This is correct.
H. Peter Anvin March 17, 2023, 9:23 p.m. UTC | #5
On March 17, 2023 6:02:52 AM PDT, Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>On Fri, Mar 17, 2023 at 5:56 PM <andrew.cooper3@citrix.com> wrote:
>>
>> On 17/03/2023 9:39 am, Lai Jiangshan wrote:
>> >> +#ifdef CONFIG_X86_FRED
>> >> +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
>> >> +                            struct pt_regs *regs, unsigned long error_code)
>> >> +{
>> >> +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
>> >> +       unsigned short ss = uregs->ss;
>> >> +       unsigned short cs = uregs->cs;
>> >> +
>> >> +       fred_info(uregs)->edata = fred_event_data(regs);
>> >> +       uregs->ssx = regs->ssx;
>> >> +       uregs->ss = ss;
>> >> +       uregs->csx = regs->csx;
>> >> +       uregs->current_stack_level = 0;
>> >> +       uregs->cs = cs;
>> > Hello
>> >
>> > If the ERETU instruction had tried to return from NMI to ring3 and just faulted,
>> > is NMI still blocked?
>> >
>> > We know that IRET unconditionally enables NMI, but I can't find any clue in the
>> > FRED's manual.
>> >
>> > In the pseudocode of ERETU in the manual, it seems that NMI is only enabled when
>> > ERETU succeeds with bit28 in csx set.  If so, this code will fail to reenable
>> > NMI if bit28 is not explicitly re-set in csx.
>>
>> IRET clearing NMI blocking is the source of an immense amount of grief,
>> and ultimately the reason why Linux and others can't use supervisor
>> shadow stacks at the moment.
>>
>> Changing this property, so NMIs only get unblocked on successful
>> execution of an ERET{S,U}, was a key demand of the FRED spec.
>>
>> i.e. until you have successfully ERET*'d, you're still logically in the
>> NMI handler and NMIs need to remain blocked even when handling the #GP
>> from a bad ERET.
>>
>
>Handling of the #GP for a bad ERETU can be rescheduled. It is not
>OK to reschedule with NMI blocked.
>
>I think "regs->nmi = 1;" (not uregs->nmi) can fix the problem.
>

You are quite correct, since what we want here is to emulate having taken the fault in user space – which meant that NMI would have been re-enabled by the never-executed return.

I think the "best" solution is:

regs->nmi = uregs->nmi;
uregs->nmi = 0;

... as enabling NMI is expected to have a performance penalty (being the less common case, an implementation which has a performance difference at all would want to optimize the non-NMI case), and I believe the compiler should be able to at least mostly fold those operations into ones it is doing anyway.
Li, Xin3 March 18, 2023, 7:55 a.m. UTC | #6
> > +#ifdef CONFIG_X86_FRED
> > +static bool ex_handler_eretu(const struct exception_table_entry *fixup,
> > +                            struct pt_regs *regs, unsigned long
> > +error_code) {
> > +       struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs,
> ip));
> > +       unsigned short ss = uregs->ss;
> > +       unsigned short cs = uregs->cs;
> > +
> > +       fred_info(uregs)->edata = fred_event_data(regs);
> > +       uregs->ssx = regs->ssx;
> > +       uregs->ss = ss;
> > +       uregs->csx = regs->csx;
> > +       uregs->current_stack_level = 0;
> > +       uregs->cs = cs;
> 
> Hello
> 
> If the ERETU instruction had tried to return from NMI to ring3 and just faulted, is
> NMI still blocked?

Do you mean the NMI FRED stack frame contains an invalid ring3 context?
diff mbox series

Patch

diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index 1fb765fd3871..027ef8f1e600 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -5,8 +5,10 @@ 
  * The actual FRED entry points.
  */
 #include <linux/linkage.h>
-#include <asm/errno.h>
+#include <asm/asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/export.h>
 #include <asm/fred.h>
 
 #include "calling.h"
@@ -38,7 +40,9 @@  SYM_CODE_START_NOALIGN(fred_entrypoint_user)
 	call	fred_entry_from_user
 SYM_INNER_LABEL(fred_exit_user, SYM_L_GLOBAL)
 	FRED_EXIT
-	ERETU
+1:	ERETU
+
+	_ASM_EXTABLE_TYPE(1b, fred_entrypoint_user, EX_TYPE_ERETU)
 SYM_CODE_END(fred_entrypoint_user)
 
 /*
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index 991e31cfde94..1585c798a02f 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -64,6 +64,8 @@ 
 #define	EX_TYPE_UCOPY_LEN4		(EX_TYPE_UCOPY_LEN | EX_DATA_IMM(4))
 #define	EX_TYPE_UCOPY_LEN8		(EX_TYPE_UCOPY_LEN | EX_DATA_IMM(8))
 
-#define EX_TYPE_ZEROPAD			20 /* longword load with zeropad on fault */
+#define	EX_TYPE_ZEROPAD			20 /* longword load with zeropad on fault */
+
+#define	EX_TYPE_ERETU			21
 
 #endif
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 60814e110a54..88a2c419ce8b 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -6,6 +6,7 @@ 
 #include <xen/xen.h>
 
 #include <asm/fpu/api.h>
+#include <asm/fred.h>
 #include <asm/sev.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
@@ -195,6 +196,29 @@  static bool ex_handler_ucopy_len(const struct exception_table_entry *fixup,
 	return ex_handler_uaccess(fixup, regs, trapnr);
 }
 
+#ifdef CONFIG_X86_FRED
+static bool ex_handler_eretu(const struct exception_table_entry *fixup,
+			     struct pt_regs *regs, unsigned long error_code)
+{
+	struct pt_regs *uregs = (struct pt_regs *)(regs->sp - offsetof(struct pt_regs, ip));
+	unsigned short ss = uregs->ss;
+	unsigned short cs = uregs->cs;
+
+	fred_info(uregs)->edata = fred_event_data(regs);
+	uregs->ssx = regs->ssx;
+	uregs->ss = ss;
+	uregs->csx = regs->csx;
+	uregs->current_stack_level = 0;
+	uregs->cs = cs;
+
+	/* Copy error code to uregs and adjust stack pointer accordingly */
+	uregs->orig_ax = error_code;
+	regs->sp -= 8;
+
+	return ex_handler_default(fixup, regs);
+}
+#endif
+
 int ex_get_fixup_type(unsigned long ip)
 {
 	const struct exception_table_entry *e = search_exception_tables(ip);
@@ -272,6 +296,10 @@  int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
 		return ex_handler_ucopy_len(e, regs, trapnr, reg, imm);
 	case EX_TYPE_ZEROPAD:
 		return ex_handler_zeropad(e, regs, fault_addr);
+#ifdef CONFIG_X86_FRED
+	case EX_TYPE_ERETU:
+		return ex_handler_eretu(e, regs, error_code);
+#endif
 	}
 	BUG();
 }